Beispiel #1
0
def test_write_constraint():
    """'constraint' argument to writer.write_dataset."""
    with specimen("ng-ts.xml") as f:
        msg = pandasdmx.read_sdmx(f)

    # Fetch the message's DSD
    assert msg.structure.is_external_reference
    # NB the speciment included in tests/data has 'ECB_EXR_NG' as the
    #    data structure ID; but a query against the web service gives
    #    'ECB_EXR1' for the same data structure.
    id = "ECB_EXR1"
    dsd = (
        pandasdmx.Request(msg.structure.maintainer.id)
        .get("datastructure", id)
        .structure[id]
    )

    # Create a ContentConstraint
    cc = dsd.make_constraint({"CURRENCY": "JPY+USD"})

    # Write the message without constraint
    s1 = pandasdmx.to_pandas(msg)
    assert len(s1) == 12
    assert set(s1.index.to_frame()["CURRENCY"]) == {"CHF", "GBP", "JPY", "USD"}

    # Writing using constraint produces a fewer items; only those matching the
    # constraint
    s2 = pandasdmx.to_pandas(msg, constraint=cc)
    assert len(s2) == 6
    assert set(s2.index.to_frame()["CURRENCY"]) == {"JPY", "USD"}
Beispiel #2
0
def test_write_conceptscheme():
    with specimen("common-structure.xml") as f:
        msg = pandasdmx.read_sdmx(f)
        data = pandasdmx.to_pandas(msg)

    cdc = data["concept_scheme"]["CROSS_DOMAIN_CONCEPTS"]
    assert cdc.loc["UNIT_MEASURE", "name"] == "Unit of Measure"
Beispiel #3
0
def test_write_categoryscheme():
    with specimen("IPI-2010-A21-structure.xml") as f:
        msg = pandasdmx.read_sdmx(f)
        data = pandasdmx.to_pandas(msg)

    cs = data["category_scheme"]["CLASSEMENT_DATAFLOWS"]

    assert cs.loc["COMPTA-NAT", "name"] == "National accounts (GDP, consumption...)"

    # Children appear
    assert cs.loc["CNA-PIB-2005", "parent"] == "CNA-PIB"
def test_read_xml_structure_insee():
    with specimen("IPI-2010-A21-structure.xml") as f:
        msg = pandasdmx.read_sdmx(f)

    # Same objects referenced
    assert id(msg.dataflow["IPI-2010-A21"].structure) == id(
        msg.structure["IPI-2010-A21"])

    # Number of dimensions loaded correctly
    dsd = msg.structure["IPI-2010-A21"]
    assert len(dsd.dimensions) == 4
def test_data_roundtrip(pytestconfig, data_id, structure_id, tmp_path):
    """Test that SDMX-ML DataMessages can be 'round-tripped'."""

    # Read structure from file
    with specimen(structure_id) as f:
        dsd = pandasdmx.read_sdmx(f).structure[0]

    # Read data from file, using the DSD
    with specimen(data_id) as f:
        msg0 = pandasdmx.read_sdmx(f, dsd=dsd)

    # Write to file
    path = tmp_path / "output.xml"
    path.write_bytes(pandasdmx.to_xml(msg0, pretty_print=True))

    # Read again, using the same DSD
    msg1 = pandasdmx.read_sdmx(path, dsd=dsd)

    # Contents are identical
    assert msg0.compare(msg1, strict=True), (
        path.read_text() if pytestconfig.getoption("verbose") else path
    )
Beispiel #6
0
def test_write_codelist():
    # Retrieve codelists from a test specimen and convert to pandas
    with specimen("common-structure.xml") as f:
        dsd_common = pandasdmx.read_sdmx(f)
    codelists = pandasdmx.to_pandas(dsd_common)["codelist"]

    # File contains 5 code lists
    assert len(codelists) == 5

    # Code lists have expected number of items
    assert len(codelists["CL_FREQ"]) == 8

    # Items names can be retrieved by ID
    freq = codelists["CL_FREQ"]
    assert freq["A"] == "Annual"

    # Non-hierarchical code list has a string name
    assert freq.name == "Code list for Frequency (FREQ)"

    # Hierarchical code list
    with specimen("codelist_partial.xml") as f:
        msg = pandasdmx.read_sdmx(f)

    # Convert single codelist
    CL_AREA = pandasdmx.to_pandas(msg.codelist["CL_AREA"])

    # Hierichical list has a 'parent' column; parent of Africa is the World
    assert CL_AREA.loc["002", "parent"] == "001"

    # Pandas features can be used to merge parent names
    area_hierarchy = pd.merge(
        CL_AREA,
        CL_AREA,
        how="left",
        left_on="parent",
        right_index=True,
        suffixes=("", "_parent"),
    )
    assert area_hierarchy.loc["002", "name_parent"] == "World"
def test_structure_roundtrip(pytestconfig, specimen_id, strict, tmp_path):
    """Test that pandasdmx.ML StructureMessages can be 'round-tripped'."""

    # Read a specimen file
    with specimen(specimen_id) as f:
        msg0 = pandasdmx.read_sdmx(f)

    # Write to file
    path = tmp_path / "output.xml"
    path.write_bytes(pandasdmx.to_xml(msg0, pretty_print=True))

    # Read again
    msg1 = pandasdmx.read_sdmx(path)

    # Contents are identical
    assert msg0.compare(msg1, strict), (
        path.read_text() if pytestconfig.getoption("verbose") else path
    )
Beispiel #8
0
def test_write_agencyscheme():
    # Convert an agency scheme
    with specimen("ECB/orgscheme.xml") as f:
        msg = pandasdmx.read_sdmx(f)
        data = pandasdmx.to_pandas(msg)

    assert data["organisation_scheme"]["AGENCIES"]["ESTAT"] == "Eurostat"

    # to_pandas only returns keys for non-empty attributes of StructureMessage
    # https://github.com/dr-leo/pandaSDMX/issues/90
    assert set(data.keys()) == {"organisation_scheme"}

    # Attribute access works
    assert data.organisation_scheme.AGENCIES.ESTAT == "Eurostat"

    with pytest.raises(AttributeError):
        data.codelist
    with pytest.raises(AttributeError):
        data.dataflow
    with pytest.raises(AttributeError):
        data.structure
Beispiel #9
0
def test_write_dataflow():
    # Read the INSEE dataflow definition
    with specimen("INSEE/dataflow") as f:
        msg = pandasdmx.read_sdmx(f)

    # Convert to pandas
    result = pandasdmx.to_pandas(msg, include="dataflow")

    # Number of Dataflows described in the file
    assert len(result["dataflow"]) == 663

    # ID and names of first Dataflows
    mbop = "Monthly Balance of Payments - "
    expected = pd.Series(
        {
            "ACT-TRIM-ANC": "Activity by sex and age - Quarterly series",
            "BPM6-CCAPITAL": "{}Capital account".format(mbop),
            "BPM6-CFINANCIER": "{}Financial account".format(mbop),
            "BPM6-CTRANSACTION": "{}Current transactions account".format(mbop),
            "BPM6-TOTAL": "{}Overall total and main headings".format(mbop),
        }
    )
    assert_pd_equal(result["dataflow"].head(), expected)
def test_read_ss_xml():
    with specimen("M.USD.EUR.SP00.A.xml", opened=False) as f:
        msg_path = f
        dsd_path = f.parent / "structure.xml"

    # Read the DSD
    dsd = pandasdmx.read_sdmx(dsd_path).structure["ECB_EXR1"]

    # Read a data message
    msg = pandasdmx.read_sdmx(msg_path, dsd=dsd)
    ds = msg.data[0]

    # The dataset in the message is structured by the DSD
    assert ds.structured_by is dsd

    # Structures referenced in the dataset are from the dsd

    s0_key = list(ds.series.keys())[0]

    # AttributeValue.value_for
    assert s0_key.attrib["DECIMALS"].value_for is dsd.attributes.get(
        "DECIMALS")

    # SeriesKey.described_by
    assert s0_key.described_by is dsd.dimensions

    # Key.described_by
    assert ds.obs[0].key.described_by is dsd.dimensions

    # KeyValue.value_for
    assert ds.obs[0].key.values[0].value_for is dsd.dimensions.get("FREQ")

    # DSD information that is not in the data message can be looked up through
    # navigating object relationships
    TIME_FORMAT = s0_key.attrib["TIME_FORMAT"].value_for
    assert len(TIME_FORMAT.related_to.dimensions) == 5
Beispiel #11
0
def test_header():
    with specimen("flat.json") as f:
        resp = pandasdmx.read_sdmx(f)
    assert resp.header.id == "62b5f19d-f1c9-495d-8446-a3661ed24753"
Beispiel #12
0
def test_write_dataset_datetime():
    """Test datetime arguments to write_dataset()."""
    # Load structure
    with specimen("IPI-2010-A21-structure.xml") as f:
        dsd = pandasdmx.read_sdmx(f).structure["IPI-2010-A21"]
        TIME_PERIOD = dsd.dimensions.get("TIME_PERIOD")
        FREQ = dsd.dimensions.get("FREQ")

    assert isinstance(TIME_PERIOD, TimeDimension)

    # Load data, two ways
    with specimen("IPI-2010-A21.xml") as f:
        msg = pandasdmx.read_sdmx(f, dsd=dsd)
        ds = msg.data[0]
    with specimen("IPI-2010-A21.xml") as f:
        msg_no_structure = pandasdmx.read_sdmx(f)

    other_dims = list(
        filter(lambda n: n != "TIME_PERIOD", [d.id for d in dsd.dimensions.components])
    )

    def expected(df, axis=0, cls=pd.DatetimeIndex):
        axes = ["index", "columns"] if axis else ["columns", "index"]
        assert getattr(df, axes[0]).names == other_dims
        assert isinstance(getattr(df, axes[1]), cls)

    # Write with datetime=str
    df = pandasdmx.to_pandas(ds, datetime="TIME_PERIOD")
    expected(df)

    # Write with datetime=Dimension instance
    df = pandasdmx.to_pandas(ds, datetime=TIME_PERIOD)
    expected(df)

    # Write with datetime=True fails because the data message contains no
    # actual structure information
    with pytest.raises(ValueError, match=r"no TimeDimension in \[.*\]"):
        pandasdmx.to_pandas(msg_no_structure, datetime=True)
    with pytest.raises(ValueError, match=r"no TimeDimension in \[.*\]"):
        pandasdmx.to_pandas(msg_no_structure.data[0], datetime=True)

    # DataMessage parsed with a DSD allows write_dataset to infer the
    # TimeDimension
    df = pandasdmx.to_pandas(msg, datetime=True)
    expected(df)
    # Same for DataSet
    df = pandasdmx.to_pandas(ds, datetime=True)
    expected(df)

    # As above, with axis=1
    df = pandasdmx.to_pandas(ds, datetime=dict(dim="TIME_PERIOD", axis=1))
    expected(df, axis=1)
    df = pandasdmx.to_pandas(ds, datetime=dict(dim=TIME_PERIOD, axis=1))
    expected(df, axis=1)
    ds.structured_by = dsd
    df = pandasdmx.to_pandas(ds, datetime=dict(axis=1))
    expected(df, axis=1)
    df = pandasdmx.to_pandas(msg, datetime=dict(axis=1))
    expected(df, axis=1)

    # Write with freq='M' works
    df = pandasdmx.to_pandas(ds, datetime=dict(dim="TIME_PERIOD", freq="M"))
    expected(df, cls=pd.PeriodIndex)

    # Write with freq='A' works
    df = pandasdmx.to_pandas(ds, datetime=dict(dim="TIME_PERIOD", freq="A"))
    expected(df, cls=pd.PeriodIndex)
    # …but the index is not unique, because month information was discarded
    assert not df.index.is_unique

    # Write specifying the FREQ dimension by name fails
    with pytest.raises(
        ValueError,
        match="cannot convert to PeriodIndex with " r"non-unique freq=\['A', 'M'\]",
    ):
        pandasdmx.to_pandas(ds, datetime=dict(dim="TIME_PERIOD", freq="FREQ"))

    # Remove non-monthly obs
    # TODO use a constraint, when this is supported
    ds.obs = list(filter(lambda o: o.key.FREQ != "A", ds.obs))

    # Now specifying the dimension by name works
    df = pandasdmx.to_pandas(ds, datetime=dict(dim="TIME_PERIOD", freq="FREQ"))

    # and FREQ is no longer in the columns index
    other_dims.pop(other_dims.index("FREQ"))
    expected(df, cls=pd.PeriodIndex)

    # Specifying a Dimension works
    df = pandasdmx.to_pandas(ds, datetime=dict(dim=TIME_PERIOD, freq=FREQ))
    expected(df, cls=pd.PeriodIndex)

    # As above, using DSD attached to the DataMessage
    df = pandasdmx.to_pandas(msg, datetime=dict(dim=TIME_PERIOD, freq="FREQ"))
    expected(df, cls=pd.PeriodIndex)

    # Invalid arguments
    with pytest.raises(ValueError, match="X"):
        pandasdmx.to_pandas(msg, datetime=dict(dim=TIME_PERIOD, freq="X"))
    with pytest.raises(ValueError, match="foo"):
        pandasdmx.to_pandas(ds, datetime=dict(foo="bar"))
    with pytest.raises(ValueError, match="43"):
        pandasdmx.to_pandas(ds, datetime=43)