Exemple #1
0
def test_write_conceptscheme():
    with specimen("common-structure.xml") as f:
        msg = sdmx.read_sdmx(f)
        data = sdmx.to_pandas(msg)

    cdc = data["concept_scheme"]["CROSS_DOMAIN_CONCEPTS"]
    assert cdc.loc["UNIT_MEASURE", "name"] == "Unit of Measure"
Exemple #2
0
 def test_freq_in_series_attribute(self, req):
     # Test that we don't have regression on Issues #39 and #41
     # INSEE time series provide the FREQ value as attribute on the series
     # instead of a dimension. This caused a runtime error when writing as
     # pandas dataframe.
     data_response = sdmx.read_sdmx(SERIES["UNEMPLOYMENT_CAT_A_B_C"]["data-fp"])
     sdmx.to_pandas(data_response)
Exemple #3
0
    def test_gh_75(self, req):
        """Test of https://github.com/dr-leo/pandaSDMX/pull/75."""

        df_id = "47_850"

        # # Reported Dataflow query works
        # df = req.dataflow(df_id).dataflow[df_id]

        with specimen("47_850-structure") as f:
            df = sdmx.read_sdmx(f).dataflow[df_id]

        # dict() key for the query
        data_key = dict(
            FREQ=["A"],
            ITTER107=["001001"],
            SETTITOLARE=["1"],
            TIPO_DATO=["AUTP"],
            TIPO_GESTIONE=["ALL"],
            TIPSERVSOC=["ALL"],
        )

        # Dimension components are in the correct order
        assert [dim.id for dim in df.structure.dimensions.components
                ] == list(data_key.keys()) + ["TIME_PERIOD"]

        # Reported data query works
        req.data(df_id, key="A.001001+001002.1.AUTP.ALL.ALL")

        # Use a dict() key to force Request to make a sub-query for the DSD
        req.data(df_id, key=data_key)
Exemple #4
0
def test_message_repr(specimen, pattern, expected):
    with specimen(pattern) as f:
        msg = sdmx.read_sdmx(f)
    if isinstance(expected, re.Pattern):
        assert expected.fullmatch(repr(msg))
    else:
        # __repr__() and __str__() give the same, expected result
        assert expected == repr(msg) == str(msg)
Exemple #5
0
    def test_load_dataset(self, base_path):
        dataset_code = "IPI-2010-A21"

        # Load all dataflows
        dataflows_response = sdmx.read_sdmx(base_path / DATAFLOW_FP)
        dataflows = dataflows_response.dataflow

        assert len(dataflows) == 663
        assert dataset_code in dataflows

        # Load datastructure for current dataset_code
        fp_datastructure = base_path / DATASETS[dataset_code]["datastructure-fp"]
        datastructure_response = sdmx.read_sdmx(fp_datastructure)
        assert dataset_code in datastructure_response.dataflow
        dsd = datastructure_response.dataflow[dataset_code].structure

        # Verify dimensions list
        dimensions = OrderedDict(
            [dim.id, dim]
            for dim in dsd.dimensions
            if dim.id not in ["TIME", "TIME_PERIOD"]
        )
        dim_keys = list(dimensions.keys())
        assert dim_keys == ["FREQ", "PRODUIT", "NATURE"]

        # Load datas for the current dataset
        fp_data = base_path / DATASETS[dataset_code]["data-fp"]
        data = sdmx.read_sdmx(fp_data)

        # Verify series count and values
        series = data.data[0].series
        series_count = len(series)
        assert series_count == DATASETS[dataset_code]["series_count"]

        first_series = series[0]
        observations = first_series

        first_obs = observations[0]
        last_obs = observations[-1]

        assert first_obs.dim == "2015-10"
        assert first_obs.value == "105.61"

        assert last_obs.dim == "1990-01"
        assert last_obs.value == "139.22"
Exemple #6
0
def test_structure_roundtrip(pytestconfig, specimen_id, strict, tmp_path):
    """Test that SDMX-ML StructureMessages can be 'round-tripped'."""

    # Read a specimen file
    with specimen(specimen_id) as f:
        msg0 = sdmx.read_sdmx(f)

    # Write to file
    path = tmp_path / "output.xml"
    path.write_bytes(sdmx.to_xml(msg0, pretty_print=True))

    # Read again
    msg1 = sdmx.read_sdmx(path)

    # Contents are identical
    assert msg0.compare(msg1, strict), (
        path.read_text() if pytestconfig.getoption("verbose") else path
    )
Exemple #7
0
def test_write_data_arguments():
    msg = sdmx.read_sdmx(test_files(kind="data")["argvalues"][0])

    # Attributes must be a string
    with raises(TypeError):
        sdmx.to_pandas(msg, attributes=2)

    # Attributes must contain only 'dgso'
    with raises(ValueError):
        sdmx.to_pandas(msg, attributes="foobarbaz")
Exemple #8
0
def test_write_categoryscheme(specimen):
    with specimen("IPI-2010-A21-structure.xml") as f:
        msg = sdmx.read_sdmx(f)
        data = sdmx.to_pandas(msg)

    cs = data["category_scheme"]["CLASSEMENT_DATAFLOWS"]

    assert cs.loc["COMPTA-NAT", "name"] == "National accounts (GDP, consumption...)"

    # Children appear
    assert cs.loc["CNA-PIB-2005", "parent"] == "CNA-PIB"
Exemple #9
0
def test_read_xml_structure_insee(specimen):
    with specimen("IPI-2010-A21-structure.xml") as f:
        msg = sdmx.read_sdmx(f)

    # Same objects referenced
    assert id(msg.dataflow["IPI-2010-A21"].structure) == id(
        msg.structure["IPI-2010-A21"])

    # Number of dimensions loaded correctly
    dsd = msg.structure["IPI-2010-A21"]
    assert len(dsd.dimensions) == 4
Exemple #10
0
    def test_fixe_key_names(self, base_path):
        """Verify key or attribute contains '-' in name."""
        dataset_code = "CNA-2010-CONSO-SI-A17"

        fp_datastructure = base_path / DATASETS[dataset_code]["datastructure-fp"]
        datastructure_response = sdmx.read_sdmx(fp_datastructure)
        assert dataset_code in datastructure_response.dataflow
        dsd = datastructure_response.dataflow[dataset_code].structure

        dimensions = OrderedDict(
            [dim.id, dim]
            for dim in dsd.dimensions
            if dim.id not in ["TIME", "TIME_PERIOD"]
        )
        dim_keys = list(dimensions.keys())
        assert dim_keys == ["SECT-INST", "OPERATION", "PRODUIT", "PRIX"]

        fp_data = base_path / DATASETS[dataset_code]["data-fp"]
        data = sdmx.read_sdmx(fp_data)
        series = data.data[0].series
        series_key = list(series.keys())[0]

        assert list(series_key.values.keys()) == [
            "SECT-INST",
            "OPERATION",
            "PRODUIT",
            "PRIX",
        ]

        assert list(series_key.attrib.keys()) == [
            "FREQ",
            "IDBANK",
            "TITLE",
            "LAST_UPDATE",
            "UNIT_MEASURE",
            "UNIT_MULT",
            "REF_AREA",
            "DECIMALS",
            "BASE_PER",
            "TIME_PER_COLLECT",
        ]
Exemple #11
0
def test_write_data(specimen, path):
    msg = sdmx.read_sdmx(path)

    result = sdmx.to_pandas(msg)

    expected = specimen.expected_data(path)
    if expected is not None:
        print(expected, result, sep="\n")
    assert_pd_equal(expected, result)

    # TODO incomplete
    assert isinstance(result, (pd.Series, pd.DataFrame, list)), type(result)
Exemple #12
0
def test_write_data_arguments(specimen):
    # The identity here is not important; any non-empty DataMessage will work
    with specimen("INSEE/CNA-2010-CONSO-SI-A17.xml") as f:
        msg = sdmx.read_sdmx(f)

    # Attributes must be a string
    with raises(TypeError):
        sdmx.to_pandas(msg, attributes=2)

    # Attributes must contain only 'dgso'
    with raises(ValueError):
        sdmx.to_pandas(msg, attributes="foobarbaz")
Exemple #13
0
def test_exr_constraints():
    with specimen("1/structure-full.xml") as f:
        m = sdmx.read_sdmx(f)
    ECB_EXR1 = m.structure["ECB_EXR1"]

    # Test DimensionDescriptor
    dd = ECB_EXR1.dimensions

    # Correct order
    assert dd[0].id == "FREQ"

    # Correct number of dimensions
    assert len(dd.components) == 6

    # Dimensions can be retrieved by name; membership can be tested
    assert "W" in dd.get("FREQ")

    # Similar tests for AttributeDescriptor
    ad = ECB_EXR1.attributes
    assert len(ad.components) == 24
    assert ad[-1].id == "UNIT_MULT"
    assert "5" in ad.get("UNIT_MULT")

    pytest.xfail("constrained codes not implemented")
    assert len(m._constrained_codes), 14

    assert "W" not in m._constrained_codes.FREQ

    key = {"FREQ": ["W"]}

    assert m.in_codes(key)

    assert not m.in_constraints(key, raise_error=False)

    with pytest.raises(ValueError):
        m.in_constraints(key)

    assert m.in_constraints({"CURRENCY": ["CHF"]})

    # test with invalid key
    with pytest.raises(TypeError):
        m._in_constraints({"FREQ": "A"})

    # structure writer with constraints
    out = sdmx.to_pandas(m)
    cl = out.codelist
    assert cl.shape == (3555, 2)

    # unconstrained codelists
    out = sdmx.to_pandas(m, constraint=False)
    cl = out.codelist
    assert cl.shape, (4177, 2)
Exemple #14
0
def test_write_codelist(specimen):
    # Retrieve codelists from a test specimen and convert to pandas
    with specimen("common-structure.xml") as f:
        dsd_common = sdmx.read_sdmx(f)
    codelists = sdmx.to_pandas(dsd_common)["codelist"]

    # File contains 5 code lists
    assert len(codelists) == 5

    # Code lists have expected number of items
    assert len(codelists["CL_FREQ"]) == 8

    # Items names can be retrieved by ID
    freq = codelists["CL_FREQ"]
    assert freq["A"] == "Annual"

    # Non-hierarchical code list has a string name
    assert freq.name == "Code list for Frequency (FREQ)"

    # Hierarchical code list
    with specimen("codelist_partial.xml") as f:
        msg = sdmx.read_sdmx(f)

    # Convert single codelist
    CL_AREA = sdmx.to_pandas(msg.codelist["CL_AREA"])

    # Hierichical list has a 'parent' column; parent of Africa is the World
    assert CL_AREA.loc["002", "parent"] == "001"

    # Pandas features can be used to merge parent names
    area_hierarchy = pd.merge(
        CL_AREA,
        CL_AREA,
        how="left",
        left_on="parent",
        right_index=True,
        suffixes=("", "_parent"),
    )
    assert area_hierarchy.loc["002", "name_parent"] == "World"
Exemple #15
0
def test_data_roundtrip(pytestconfig, data_id, structure_id, tmp_path):
    """Test that SDMX-ML DataMessages can be 'round-tripped'."""

    # Read structure from file
    with specimen(structure_id) as f:
        dsd = sdmx.read_sdmx(f).structure[0]

    # Read data from file, using the DSD
    with specimen(data_id) as f:
        msg0 = sdmx.read_sdmx(f, dsd=dsd)

    # Write to file
    path = tmp_path / "output.xml"
    path.write_bytes(sdmx.to_xml(msg0, pretty_print=True))

    # Read again, using the same DSD
    msg1 = sdmx.read_sdmx(path, dsd=dsd)

    # Contents are identical
    assert msg0.compare(msg1, strict=True), (
        path.read_text() if pytestconfig.getoption("verbose") else path
    )
Exemple #16
0
def test_flat():
    # Create a bare Message
    msg = DataMessage()

    # Recreate the content from exr-flat.json
    header = Header(
        id="62b5f19d-f1c9-495d-8446-a3661ed24753",
        prepared="2012-11-29T08:40:26Z",
        sender=model.Agency(id="ECB"),
    )
    msg.header = header

    ds = DataSet()

    # Create a Key and attributes
    key = Key(
        FREQ="D",
        CURRENCY="NZD",
        CURRENCY_DENOM="EUR",
        EXR_TYPE="SP00",
        EXR_SUFFIX="A",
        TIME_PERIOD="2013-01-18",
    )
    obs_status = DataAttribute(id="OBS_STATUS")
    attr = {"OBS_STATUS": AttributeValue(value_for=obs_status, value="A")}

    ds.obs.append(
        Observation(dimension=key, value=1.5931, attached_attribute=attr))

    key = key.copy(TIME_PERIOD="2013-01-21")
    ds.obs.append(
        Observation(dimension=key, value=1.5925, attached_attribute=attr))

    key = key.copy(CURRENCY="RUB", TIME_PERIOD="2013-01-18")
    ds.obs.append(
        Observation(dimension=key, value=40.3426, attached_attribute=attr))

    key = key.copy(TIME_PERIOD="2013-01-21")
    ds.obs.append(
        Observation(dimension=key, value=40.3000, attached_attribute=attr))

    msg.data.append(ds)

    # Write to pd.Dataframe
    df1 = sdmx.to_pandas(msg)

    with specimen("flat.json") as f:
        ref = sdmx.read_sdmx(f)
    df2 = sdmx.to_pandas(ref)

    assert_pd_equal(df1, df2)
Exemple #17
0
def test_read_ss_xml(specimen):
    with specimen("M.USD.EUR.SP00.A.xml", opened=False) as f:
        msg_path = f
        dsd_path = f.parent / "structure.xml"

    # Read the DSD
    dsd = sdmx.read_sdmx(dsd_path).structure["ECB_EXR1"]

    # Read a data message
    msg = sdmx.read_sdmx(msg_path, dsd=dsd)
    ds = msg.data[0]

    # The dataset in the message is structured by the DSD
    assert ds.structured_by is dsd

    # Structures referenced in the dataset are from the dsd

    s0_key = list(ds.series.keys())[0]

    # AttributeValue.value_for
    assert s0_key.attrib["DECIMALS"].value_for is dsd.attributes.get(
        "DECIMALS")

    # SeriesKey.described_by
    assert s0_key.described_by is dsd.dimensions

    # Key.described_by
    assert ds.obs[0].key.described_by is dsd.dimensions

    # KeyValue.value_for
    assert ds.obs[0].key.values[0].value_for is dsd.dimensions.get("FREQ")

    # DSD information that is not in the data message can be looked up through
    # navigating object relationships
    TIME_FORMAT = s0_key.attrib["TIME_FORMAT"].value_for
    assert len(TIME_FORMAT.related_to.dimensions) == 5
Exemple #18
0
def test_structuremessage(tmp_path, structuremessage):
    result = sdmx.to_xml(structuremessage, pretty_print=True)
    print(result.decode())

    # Message can be round-tripped to/from file
    path = tmp_path / "output.xml"
    path.write_bytes(result)
    msg = sdmx.read_sdmx(path)

    # Contents match the original object
    assert (msg.codelist["CL_COLLECTION"]["A"].name["en"] ==
            structuremessage.codelist["CL_COLLECTION"]["A"].name["en"])

    # False because `structuremessage` lacks URNs, which are constructed automatically
    # by `to_xml`
    assert not msg.compare(structuremessage, strict=True)
    # Compares equal when allowing this difference
    assert msg.compare(structuremessage, strict=False)
def test_sdmx_roundtrip(tmp_path):
    path = tmp_path / "structure.xml"

    # Structure can be written
    with open(path, "wb") as f:
        f.write(sdmx.to_xml(generate(), pretty_print=True))

    # Structure can be read
    sm = sdmx.read_sdmx(path)

    # One CubeRegion
    assert 1 == len(sm.constraint["PRICE_FUEL"].data_content_region)

    # One dimension with a MemberSelection
    cr = sm.constraint["PRICE_FUEL"].data_content_region[0]
    assert {"FUEL"} == set(d.id for d in cr.member.keys())

    # 3 values in the MemberSelection
    assert 3 == len(cr.member["FUEL"].values)
Exemple #20
0
def test_write_agencyscheme(specimen):
    # Convert an agency scheme
    with specimen("ECB/orgscheme.xml") as f:
        msg = sdmx.read_sdmx(f)
        data = sdmx.to_pandas(msg)

    assert data["organisation_scheme"]["AGENCIES"]["ESTAT"] == "Eurostat"

    # to_pandas only returns keys for non-empty attributes of StructureMessage
    # https://github.com/dr-leo/pandaSDMX/issues/90
    assert set(data.keys()) == {"organisation_scheme"}

    # Attribute access works
    assert data.organisation_scheme.AGENCIES.ESTAT == "Eurostat"

    with pytest.raises(AttributeError):
        data.codelist
    with pytest.raises(AttributeError):
        data.dataflow
    with pytest.raises(AttributeError):
        data.structure
Exemple #21
0
def test_read_sdmx(tmp_path, specimen):
    # Copy the file to a temporary file with an urecognizable suffix
    target = tmp_path / "foo.badsuffix"
    with specimen("flat.json", opened=False) as original:
        target.open("w").write(original.read_text())

    # With unknown file extension, read_sdmx() peeks at the file content
    sdmx.read_sdmx(target)

    # Format can be inferred from an already-open file without extension
    with specimen("flat.json") as f:
        sdmx.read_sdmx(f)

    # Exception raised when the file contents don't allow to guess the format
    bad_file = BytesIO(b"#! neither XML nor JSON")
    exc = (
        "cannot infer SDMX message format from path None, format={}, or content "
        "'#! ne..'")
    with pytest.raises(RuntimeError, match=exc.format("None")):
        sdmx.read_sdmx(bad_file)

    # Using the format= argument forces a certain reader to be used
    with pytest.raises(json.JSONDecodeError):
        sdmx.read_sdmx(bad_file, format="JSON")
Exemple #22
0
def test_write_dataflow(specimen):
    # Read the INSEE dataflow definition
    with specimen("INSEE/dataflow") as f:
        msg = sdmx.read_sdmx(f)

    # Convert to pandas
    result = sdmx.to_pandas(msg, include="dataflow")

    # Number of Dataflows described in the file
    assert len(result["dataflow"]) == 663

    # ID and names of first Dataflows
    mbop = "Monthly Balance of Payments - "
    expected = pd.Series(
        {
            "ACT-TRIM-ANC": "Activity by sex and age - Quarterly series",
            "BPM6-CCAPITAL": "{}Capital account".format(mbop),
            "BPM6-CFINANCIER": "{}Financial account".format(mbop),
            "BPM6-CTRANSACTION": "{}Current transactions account".format(mbop),
            "BPM6-TOTAL": "{}Overall total and main headings".format(mbop),
        }
    )
    assert_pd_equal(result["dataflow"].head(), expected)
Exemple #23
0
 def test_msg(self, path, dsd):
     # The message can be parsed
     sdmx.read_sdmx(path / self.filename, dsd=dsd)
Exemple #24
0
 def msg(self, path, dsd):
     yield sdmx.read_sdmx(path / self.filename, dsd=dsd)
Exemple #25
0
 def dsd(self, path):
     yield sdmx.read_sdmx(path / self.dsd_filename).structure[0]
Exemple #26
0
def test_bare_series(specimen):
    with specimen("ng-ts.xml") as f:
        sdmx.read_sdmx(f)
Exemple #27
0
def get_dsd():
    dsd_file = 'SDG_DSD.xml'
    msg = sdmx.read_sdmx(dsd_file)
    return msg.structure[0]
 def retrieve_dsd(self, dsd):
     if dsd.startswith('http'):
         urlretrieve(dsd, 'SDG_DSD.xml')
         dsd = 'SDG_DSD.xml'
     msg = sdmx.read_sdmx(dsd)
     return msg.structure[0]
Exemple #29
0
def test_writer_structure(path):
    msg = sdmx.read_sdmx(path)

    sdmx.to_pandas(msg)
Exemple #30
0
def test_write_data_attributes(path):
    msg = sdmx.read_sdmx(path)

    result = sdmx.to_pandas(msg, attributes="osgd")
    # TODO incomplete
    assert isinstance(result, (pd.Series, pd.DataFrame, list)), type(result)