def test_write_constraint(): """'constraint' argument to writer.write_dataset.""" with specimen("ng-ts.xml") as f: msg = pandasdmx.read_sdmx(f) # Fetch the message's DSD assert msg.structure.is_external_reference # NB the speciment included in tests/data has 'ECB_EXR_NG' as the # data structure ID; but a query against the web service gives # 'ECB_EXR1' for the same data structure. id = "ECB_EXR1" dsd = ( pandasdmx.Request(msg.structure.maintainer.id) .get("datastructure", id) .structure[id] ) # Create a ContentConstraint cc = dsd.make_constraint({"CURRENCY": "JPY+USD"}) # Write the message without constraint s1 = pandasdmx.to_pandas(msg) assert len(s1) == 12 assert set(s1.index.to_frame()["CURRENCY"]) == {"CHF", "GBP", "JPY", "USD"} # Writing using constraint produces a fewer items; only those matching the # constraint s2 = pandasdmx.to_pandas(msg, constraint=cc) assert len(s2) == 6 assert set(s2.index.to_frame()["CURRENCY"]) == {"JPY", "USD"}
def test_write_conceptscheme(): with specimen("common-structure.xml") as f: msg = pandasdmx.read_sdmx(f) data = pandasdmx.to_pandas(msg) cdc = data["concept_scheme"]["CROSS_DOMAIN_CONCEPTS"] assert cdc.loc["UNIT_MEASURE", "name"] == "Unit of Measure"
def test_write_categoryscheme(): with specimen("IPI-2010-A21-structure.xml") as f: msg = pandasdmx.read_sdmx(f) data = pandasdmx.to_pandas(msg) cs = data["category_scheme"]["CLASSEMENT_DATAFLOWS"] assert cs.loc["COMPTA-NAT", "name"] == "National accounts (GDP, consumption...)" # Children appear assert cs.loc["CNA-PIB-2005", "parent"] == "CNA-PIB"
def test_read_xml_structure_insee(): with specimen("IPI-2010-A21-structure.xml") as f: msg = pandasdmx.read_sdmx(f) # Same objects referenced assert id(msg.dataflow["IPI-2010-A21"].structure) == id( msg.structure["IPI-2010-A21"]) # Number of dimensions loaded correctly dsd = msg.structure["IPI-2010-A21"] assert len(dsd.dimensions) == 4
def test_data_roundtrip(pytestconfig, data_id, structure_id, tmp_path): """Test that SDMX-ML DataMessages can be 'round-tripped'.""" # Read structure from file with specimen(structure_id) as f: dsd = pandasdmx.read_sdmx(f).structure[0] # Read data from file, using the DSD with specimen(data_id) as f: msg0 = pandasdmx.read_sdmx(f, dsd=dsd) # Write to file path = tmp_path / "output.xml" path.write_bytes(pandasdmx.to_xml(msg0, pretty_print=True)) # Read again, using the same DSD msg1 = pandasdmx.read_sdmx(path, dsd=dsd) # Contents are identical assert msg0.compare(msg1, strict=True), ( path.read_text() if pytestconfig.getoption("verbose") else path )
def test_write_codelist(): # Retrieve codelists from a test specimen and convert to pandas with specimen("common-structure.xml") as f: dsd_common = pandasdmx.read_sdmx(f) codelists = pandasdmx.to_pandas(dsd_common)["codelist"] # File contains 5 code lists assert len(codelists) == 5 # Code lists have expected number of items assert len(codelists["CL_FREQ"]) == 8 # Items names can be retrieved by ID freq = codelists["CL_FREQ"] assert freq["A"] == "Annual" # Non-hierarchical code list has a string name assert freq.name == "Code list for Frequency (FREQ)" # Hierarchical code list with specimen("codelist_partial.xml") as f: msg = pandasdmx.read_sdmx(f) # Convert single codelist CL_AREA = pandasdmx.to_pandas(msg.codelist["CL_AREA"]) # Hierichical list has a 'parent' column; parent of Africa is the World assert CL_AREA.loc["002", "parent"] == "001" # Pandas features can be used to merge parent names area_hierarchy = pd.merge( CL_AREA, CL_AREA, how="left", left_on="parent", right_index=True, suffixes=("", "_parent"), ) assert area_hierarchy.loc["002", "name_parent"] == "World"
def test_structure_roundtrip(pytestconfig, specimen_id, strict, tmp_path): """Test that pandasdmx.ML StructureMessages can be 'round-tripped'.""" # Read a specimen file with specimen(specimen_id) as f: msg0 = pandasdmx.read_sdmx(f) # Write to file path = tmp_path / "output.xml" path.write_bytes(pandasdmx.to_xml(msg0, pretty_print=True)) # Read again msg1 = pandasdmx.read_sdmx(path) # Contents are identical assert msg0.compare(msg1, strict), ( path.read_text() if pytestconfig.getoption("verbose") else path )
def test_write_agencyscheme(): # Convert an agency scheme with specimen("ECB/orgscheme.xml") as f: msg = pandasdmx.read_sdmx(f) data = pandasdmx.to_pandas(msg) assert data["organisation_scheme"]["AGENCIES"]["ESTAT"] == "Eurostat" # to_pandas only returns keys for non-empty attributes of StructureMessage # https://github.com/dr-leo/pandaSDMX/issues/90 assert set(data.keys()) == {"organisation_scheme"} # Attribute access works assert data.organisation_scheme.AGENCIES.ESTAT == "Eurostat" with pytest.raises(AttributeError): data.codelist with pytest.raises(AttributeError): data.dataflow with pytest.raises(AttributeError): data.structure
def test_write_dataflow(): # Read the INSEE dataflow definition with specimen("INSEE/dataflow") as f: msg = pandasdmx.read_sdmx(f) # Convert to pandas result = pandasdmx.to_pandas(msg, include="dataflow") # Number of Dataflows described in the file assert len(result["dataflow"]) == 663 # ID and names of first Dataflows mbop = "Monthly Balance of Payments - " expected = pd.Series( { "ACT-TRIM-ANC": "Activity by sex and age - Quarterly series", "BPM6-CCAPITAL": "{}Capital account".format(mbop), "BPM6-CFINANCIER": "{}Financial account".format(mbop), "BPM6-CTRANSACTION": "{}Current transactions account".format(mbop), "BPM6-TOTAL": "{}Overall total and main headings".format(mbop), } ) assert_pd_equal(result["dataflow"].head(), expected)
def test_read_ss_xml(): with specimen("M.USD.EUR.SP00.A.xml", opened=False) as f: msg_path = f dsd_path = f.parent / "structure.xml" # Read the DSD dsd = pandasdmx.read_sdmx(dsd_path).structure["ECB_EXR1"] # Read a data message msg = pandasdmx.read_sdmx(msg_path, dsd=dsd) ds = msg.data[0] # The dataset in the message is structured by the DSD assert ds.structured_by is dsd # Structures referenced in the dataset are from the dsd s0_key = list(ds.series.keys())[0] # AttributeValue.value_for assert s0_key.attrib["DECIMALS"].value_for is dsd.attributes.get( "DECIMALS") # SeriesKey.described_by assert s0_key.described_by is dsd.dimensions # Key.described_by assert ds.obs[0].key.described_by is dsd.dimensions # KeyValue.value_for assert ds.obs[0].key.values[0].value_for is dsd.dimensions.get("FREQ") # DSD information that is not in the data message can be looked up through # navigating object relationships TIME_FORMAT = s0_key.attrib["TIME_FORMAT"].value_for assert len(TIME_FORMAT.related_to.dimensions) == 5
def test_header(): with specimen("flat.json") as f: resp = pandasdmx.read_sdmx(f) assert resp.header.id == "62b5f19d-f1c9-495d-8446-a3661ed24753"
def test_write_dataset_datetime(): """Test datetime arguments to write_dataset().""" # Load structure with specimen("IPI-2010-A21-structure.xml") as f: dsd = pandasdmx.read_sdmx(f).structure["IPI-2010-A21"] TIME_PERIOD = dsd.dimensions.get("TIME_PERIOD") FREQ = dsd.dimensions.get("FREQ") assert isinstance(TIME_PERIOD, TimeDimension) # Load data, two ways with specimen("IPI-2010-A21.xml") as f: msg = pandasdmx.read_sdmx(f, dsd=dsd) ds = msg.data[0] with specimen("IPI-2010-A21.xml") as f: msg_no_structure = pandasdmx.read_sdmx(f) other_dims = list( filter(lambda n: n != "TIME_PERIOD", [d.id for d in dsd.dimensions.components]) ) def expected(df, axis=0, cls=pd.DatetimeIndex): axes = ["index", "columns"] if axis else ["columns", "index"] assert getattr(df, axes[0]).names == other_dims assert isinstance(getattr(df, axes[1]), cls) # Write with datetime=str df = pandasdmx.to_pandas(ds, datetime="TIME_PERIOD") expected(df) # Write with datetime=Dimension instance df = pandasdmx.to_pandas(ds, datetime=TIME_PERIOD) expected(df) # Write with datetime=True fails because the data message contains no # actual structure information with pytest.raises(ValueError, match=r"no TimeDimension in \[.*\]"): pandasdmx.to_pandas(msg_no_structure, datetime=True) with pytest.raises(ValueError, match=r"no TimeDimension in \[.*\]"): pandasdmx.to_pandas(msg_no_structure.data[0], datetime=True) # DataMessage parsed with a DSD allows write_dataset to infer the # TimeDimension df = pandasdmx.to_pandas(msg, datetime=True) expected(df) # Same for DataSet df = pandasdmx.to_pandas(ds, datetime=True) expected(df) # As above, with axis=1 df = pandasdmx.to_pandas(ds, datetime=dict(dim="TIME_PERIOD", axis=1)) expected(df, axis=1) df = pandasdmx.to_pandas(ds, datetime=dict(dim=TIME_PERIOD, axis=1)) expected(df, axis=1) ds.structured_by = dsd df = pandasdmx.to_pandas(ds, datetime=dict(axis=1)) expected(df, axis=1) df = pandasdmx.to_pandas(msg, datetime=dict(axis=1)) expected(df, axis=1) # Write with freq='M' works df = pandasdmx.to_pandas(ds, datetime=dict(dim="TIME_PERIOD", freq="M")) expected(df, cls=pd.PeriodIndex) # Write with freq='A' works df = pandasdmx.to_pandas(ds, datetime=dict(dim="TIME_PERIOD", freq="A")) expected(df, cls=pd.PeriodIndex) # …but the index is not unique, because month information was discarded assert not df.index.is_unique # Write specifying the FREQ dimension by name fails with pytest.raises( ValueError, match="cannot convert to PeriodIndex with " r"non-unique freq=\['A', 'M'\]", ): pandasdmx.to_pandas(ds, datetime=dict(dim="TIME_PERIOD", freq="FREQ")) # Remove non-monthly obs # TODO use a constraint, when this is supported ds.obs = list(filter(lambda o: o.key.FREQ != "A", ds.obs)) # Now specifying the dimension by name works df = pandasdmx.to_pandas(ds, datetime=dict(dim="TIME_PERIOD", freq="FREQ")) # and FREQ is no longer in the columns index other_dims.pop(other_dims.index("FREQ")) expected(df, cls=pd.PeriodIndex) # Specifying a Dimension works df = pandasdmx.to_pandas(ds, datetime=dict(dim=TIME_PERIOD, freq=FREQ)) expected(df, cls=pd.PeriodIndex) # As above, using DSD attached to the DataMessage df = pandasdmx.to_pandas(msg, datetime=dict(dim=TIME_PERIOD, freq="FREQ")) expected(df, cls=pd.PeriodIndex) # Invalid arguments with pytest.raises(ValueError, match="X"): pandasdmx.to_pandas(msg, datetime=dict(dim=TIME_PERIOD, freq="X")) with pytest.raises(ValueError, match="foo"): pandasdmx.to_pandas(ds, datetime=dict(foo="bar")) with pytest.raises(ValueError, match="43"): pandasdmx.to_pandas(ds, datetime=43)