def test_ContentConstraint(dsd, dks): """:class:`.ContentConstraint` can be written to XML.""" sdmx.to_xml( m.ContentConstraint( role=m.ConstraintRole(role=m.ConstraintRoleType.allowable), content=[dsd], data_content_keys=dks, ))
def test_ds(dsd, obs): # Write DataSet with Observations not in Series ds = DataSet(structured_by=dsd) ds.obs.append(obs) result = sdmx.to_xml(ds, pretty_print=True) print(result.decode())
def test_obs(obs): # Generate <gen:ObsKey> element for 2+-dimensional Observation.dimension exp = ( '<gen:ObsKey><gen:Value id="FOO" value="1"/>' '<gen:Value id="BAR" value="2"/></gen:ObsKey>' ) assert exp in sdmx.to_xml(obs).decode()
def update_dsd(): """Generate the iTEM SDMX data structures. The file item/data/structure.xml is updated. """ import sdmx from item.structure import generate with open(Path(__file__).parent / "data" / "structure.xml", "wb") as f: f.write(sdmx.to_xml(generate(), pretty_print=True))
def test_structuremessage(tmp_path, structuremessage): result = sdmx.to_xml(structuremessage, pretty_print=True) print(result.decode()) # Message can be round-tripped to/from file path = tmp_path / "output.xml" path.write_bytes(result) msg = sdmx.read_sdmx(path) # Contents match the original object assert (msg.codelist["CL_COLLECTION"]["A"].name["en"] == structuremessage.codelist["CL_COLLECTION"]["A"].name["en"]) # False because `structuremessage` lacks URNs, which are constructed automatically # by `to_xml` assert not msg.compare(structuremessage, strict=True) # Compares equal when allowing this difference assert msg.compare(structuremessage, strict=False)
def test_structure_roundtrip(pytestconfig, specimen_id, strict, tmp_path): """Test that SDMX-ML StructureMessages can be 'round-tripped'.""" # Read a specimen file with specimen(specimen_id) as f: msg0 = sdmx.read_sdmx(f) # Write to file path = tmp_path / "output.xml" path.write_bytes(sdmx.to_xml(msg0, pretty_print=True)) # Read again msg1 = sdmx.read_sdmx(path) # Contents are identical assert msg0.compare(msg1, strict), ( path.read_text() if pytestconfig.getoption("verbose") else path )
def test_sdmx_roundtrip(tmp_path): path = tmp_path / "structure.xml" # Structure can be written with open(path, "wb") as f: f.write(sdmx.to_xml(generate(), pretty_print=True)) # Structure can be read sm = sdmx.read_sdmx(path) # One CubeRegion assert 1 == len(sm.constraint["PRICE_FUEL"].data_content_region) # One dimension with a MemberSelection cr = sm.constraint["PRICE_FUEL"].data_content_region[0] assert {"FUEL"} == set(d.id for d in cr.member.keys()) # 3 values in the MemberSelection assert 3 == len(cr.member["FUEL"].values)
def test_data_roundtrip(pytestconfig, data_id, structure_id, tmp_path): """Test that SDMX-ML DataMessages can be 'round-tripped'.""" # Read structure from file with specimen(structure_id) as f: dsd = sdmx.read_sdmx(f).structure[0] # Read data from file, using the DSD with specimen(data_id) as f: msg0 = sdmx.read_sdmx(f, dsd=dsd) # Write to file path = tmp_path / "output.xml" path.write_bytes(sdmx.to_xml(msg0, pretty_print=True)) # Read again, using the same DSD msg1 = sdmx.read_sdmx(path, dsd=dsd) # Contents are identical assert msg0.compare(msg1, strict=True), ( path.read_text() if pytestconfig.getoption("verbose") else path )
def build(self, language=None): """Write the SDMX output. Overrides parent.""" status = True # The SDMX output is a special case, and does not need to be # translated separately for each language. So we only continue # if this is an untranslated or language-agnostic build. if language is not None and language != 'untranslated': return status all_serieses = {} all_metadata_serieses = [] metadata_template = Template(self.get_metadata_template()) dfd = DataflowDefinition(id="OPEN_SDG_DFD", structure=self.dsd) time_period = next(dim for dim in self.dsd.dimensions if dim.id == 'TIME_PERIOD') header_info = self.get_header_info() header = self.create_header(header_info) metadata_base_vars = header_info.copy() for indicator_id in self.get_indicator_ids(): indicator = self.get_indicator_by_id(indicator_id) data = indicator.data.copy() # Some hardcoded dataframe changes. data = data.rename(columns={ 'Value': 'OBS_VALUE', 'Units': 'UNIT_MEASURE', 'Series': 'SERIES', 'Year': 'TIME_PERIOD', }) # Any user-specified dataframe changes. self.apply_column_map(data) self.apply_code_map(data) if self.constrain_data: before = len(data.index) data = indicator.get_data_matching_schema(self.data_schema, data=data) after = len(data.index) message = '{indicator_id} - Removed {difference} rows while constraining data (out of {total}).' difference = str(before - after) self.warn(message, indicator_id=indicator_id, difference=difference, total=before) data = data.replace(np.nan, '', regex=True) if not data.empty: serieses = {} for _, row in data.iterrows(): series_key = self.dsd.make_key(SeriesKey, self.get_dimension_values(row, indicator)) series_key.attrib = self.get_series_attribute_values(row, indicator) attributes = self.get_observation_attribute_values(row, indicator) dimension_key = self.dsd.make_key(Key, values={ 'TIME_PERIOD': str(row['TIME_PERIOD']), }) observation = Observation( series_key=series_key, dimension=dimension_key, attached_attribute=attributes, value_for=self.dsd.measures[0], value=row[self.dsd.measures[0].id], ) if series_key not in serieses: serieses[series_key] = [] serieses[series_key].append(observation) dataset = self.create_dataset(serieses) msg = DataMessage(data=[dataset], dataflow=dfd, header=header, observation_dimension=time_period) sdmx_path = os.path.join(self.sdmx_folder, indicator_id + '.xml') with open(sdmx_path, 'wb') as f: status = status & f.write(sdmx.to_xml(msg)) all_serieses.update(serieses) concepts = indicator.meta if self.constrain_meta and self.msd is not None: concepts = indicator.get_meta_matching_schema(self.msd) reporting_type = self.meta_reporting_type if reporting_type is None and 'REPORTING_TYPE' in data.columns and len(data) > 0: reporting_type = self.get_first_value_from_data_column(data, 'REPORTING_TYPE') ref_area = self.meta_ref_area if ref_area is None and 'REF_AREA' in data.columns and len(data) > 0: ref_area = self.get_first_value_from_data_column(data, 'REF_AREA') if concepts and ref_area is not None and reporting_type is not None: series_codes = helpers.sdmx.get_all_series_codes_from_indicator_id(indicator_id, dsd_path=self.dsd_path, request_params=self.request_params, ) # Make sure the indicator is fully translated. for language in self.all_languages: indicator.translate(language, self.translation_helper) concept_items = [] for key in concepts: translation_items = [] for language in self.all_languages: translated_value = indicator.language(language).get_meta_field_value(key) translation_items.append({ 'language': language, 'value': translated_value, }) concept_items.append({ 'key': key, 'translations': translation_items, }) metadata_serieses = [] for code in series_codes: metadata_series = { 'set_id': uuid.uuid4(), 'series': code, 'reporting_type': reporting_type, 'ref_area': ref_area, 'concepts': concept_items, } metadata_serieses.append(metadata_series) metadata = metadata_base_vars.copy() metadata['serieses'] = metadata_serieses metadata_sdmx = metadata_template.render(metadata) meta_path = os.path.join(self.meta_folder, indicator_id + '.xml') with open(meta_path, 'w') as f: status = status & f.write(metadata_sdmx) all_metadata_serieses = all_metadata_serieses + metadata_serieses dataset = self.create_dataset(all_serieses) msg = DataMessage(data=[dataset], dataflow=dfd, header=header, observation_dimension=time_period) all_sdmx_path = os.path.join(self.sdmx_folder, 'all.xml') with open(all_sdmx_path, 'wb') as f: status = status & f.write(sdmx.to_xml(msg)) metadata = metadata_base_vars.copy() metadata['serieses'] = all_metadata_serieses metadata_sdmx = metadata_template.render(metadata) meta_path = os.path.join(self.meta_folder, 'all.xml') with open(meta_path, 'w') as f: status = status & f.write(metadata_sdmx) return status
def test_DataKeySet(dks): """:class:`.DataKeySet` can be written to XML.""" sdmx.to_xml(dks)
def test_codelist(tmp_path, codelist): result = sdmx.to_xml(codelist, pretty_print=True) print(result.decode())