def parse_header(self, elem): values = self._collect('header', elem) # Handle a reference to a DataStructureDefinition attrs = {} for k in ['id', 'agencyid', 'version', 'urn']: value = values.pop('structure_ref_' + k, None) if not value: continue elif k == 'agencyid': attrs['maintainer'] = Agency(id=value) else: attrs[k] = value if set(attrs.keys()) == {'urn'}: attrs['id'] = values['structure_id'] if 'id' in attrs: # Create the DSD and DFD dsd = self._maintained(DataStructureDefinition, **attrs) dfd = DataflowDefinition(id=values.pop('structure_id'), structure=dsd) # Also store the dimension at observation self._set_obs_dim(values.pop('dim_at_obs')) extra = [dfd] else: extra = [] # Maybe return the DFD; see .initialize() return [Header(**values)] + extra
def parse_header(self, elem): # Collect values from *elem* and its children using XPath values = {} for key, xpath in HEADER_XPATH.items(): matches = xpath(elem) if len(matches) == 0: continue values[key] = matches[0] if len(matches) == 1 else matches # Handle a reference to a DataStructureDefinition attrs = {} for k in ['id', 'agencyid', 'version', 'urn']: value = values.pop('structure_ref_' + k, None) if not value: continue elif k == 'agencyid': attrs['maintainer'] = Agency(id=value) else: attrs[k] = value if set(attrs.keys()) == {'urn'}: attrs['id'] = values['structure_id'] extra = [] if 'id' in attrs: # Create or retrieve the DSD. NB if the dsd argument was provided # to read_message(), this should be the same DSD dsd = self._maintained(DataStructureDefinition, **attrs) if 'structure_id' in values: # Add the DSD to the index a second time, using the message # -specific structure ID (rather that the DSD's own ID). key = ('DataStructureDefinition', values['structure_id']) self._index[key] = dsd # Create a DataflowDefinition dfd = DataflowDefinition(id=values.pop('structure_id'), structure=dsd) extra.append(dfd) # Store the observation at dimension level dim_at_obs = values.pop('dim_at_obs') if dim_at_obs == 'AllDimensions': self._obs_dim = AllDimensions else: # Retrieve or create the Dimension args = dict(id=dim_at_obs, order=1e9) if 'TimeSeries' in self._stack[0]: # {,StructureSpecific}TimeSeriesData message → the # dimension at observation level is a TimeDimension args['cls'] = TimeDimension self._obs_dim = dsd.dimensions.get(**args) # Maybe return the DFD; see .initialize() return [Header(**values)] + extra
def test_flat(): # Create a bare Message msg = DataMessage() # Recreate the content from exr-flat.json header = Header( id="62b5f19d-f1c9-495d-8446-a3661ed24753", prepared="2012-11-29T08:40:26Z", sender=model.Agency(id="ECB"), ) msg.header = header ds = DataSet() # Create a Key and attributes key = Key( FREQ="D", CURRENCY="NZD", CURRENCY_DENOM="EUR", EXR_TYPE="SP00", EXR_SUFFIX="A", TIME_PERIOD="2013-01-18", ) obs_status = DataAttribute(id="OBS_STATUS") attr = {"OBS_STATUS": AttributeValue(value_for=obs_status, value="A")} ds.obs.append( Observation(dimension=key, value=1.5931, attached_attribute=attr)) key = key.copy(TIME_PERIOD="2013-01-21") ds.obs.append( Observation(dimension=key, value=1.5925, attached_attribute=attr)) key = key.copy(CURRENCY="RUB", TIME_PERIOD="2013-01-18") ds.obs.append( Observation(dimension=key, value=40.3426, attached_attribute=attr)) key = key.copy(TIME_PERIOD="2013-01-21") ds.obs.append( Observation(dimension=key, value=40.3000, attached_attribute=attr)) msg.data.append(ds) # Write to pd.Dataframe df1 = pandasdmx.to_pandas(msg) with specimen("flat.json") as f: ref = pandasdmx.read_sdmx(f) df2 = pandasdmx.to_pandas(ref) assert_pd_equal(df1, df2)
def test_flat(): # Create a bare Message msg = DataMessage() # Recreate the content from exr-flat.json header = Header( id='62b5f19d-f1c9-495d-8446-a3661ed24753', prepared='2012-11-29T08:40:26Z', sender='ECB', ) msg.header = header ds = DataSet() # Create a Key and attributes key = Key(FREQ='D', CURRENCY='NZD', CURRENCY_DENOM='EUR', EXR_TYPE='SP00', EXR_SUFFIX='A', TIME_PERIOD='2013-01-18') obs_status = DataAttribute(id='OBS_STATUS') attr = {'OBS_STATUS': AttributeValue(value_for=obs_status, value='A')} ds.obs.append( Observation(dimension=key, value=1.5931, attached_attribute=attr)) key = key.copy(TIME_PERIOD='2013-01-21') ds.obs.append( Observation(dimension=key, value=1.5925, attached_attribute=attr)) key = key.copy(CURRENCY='RUB', TIME_PERIOD='2013-01-18') ds.obs.append( Observation(dimension=key, value=40.3426, attached_attribute=attr)) key = key.copy(TIME_PERIOD='2013-01-21') ds.obs.append( Observation(dimension=key, value=40.3000, attached_attribute=attr)) msg.data.append(ds) # Write to pd.Dataframe df1 = sdmx.to_pandas(msg) with specimen('flat.json') as f: ref = sdmx.read_sdmx(f) df2 = sdmx.to_pandas(ref) assert_pd_equal(df1, df2)
def read_message(self, source): # Root XML element root = etree.parse(source).getroot() # Message class try: cls = MESSAGE[root.tag] except KeyError: msg = 'Unrecognized message root element {!r}'.format(root.tag) raise ParseError(msg) from None # Reset state self._stack = [] self._index = {} self._current = {} # Parse the tree values = self._parse(root) # Instantiate the message object msg = cls() # Store the header header = values.pop('header', None) if header is None and 'errormessage' in values: # An error message msg.header = Header() # Error message attributes resemble footer attributes values['footer'] = Footer(**values.pop('errormessage')) elif len(header) == 2: # Length-2 list includes DFD/DSD reference msg.header, msg.dataflow = header msg.observation_dimension = self._obs_dim else: # No DFD in the header, e.g. for a StructureMessage msg.header = header[0] # Store the footer msg.footer = values.pop('footer', None) # Finalize according to the message type if cls is DataMessage: # Simply store the datasets msg.data.extend(wrap(values.pop('dataset', []))) elif cls is StructureMessage: structures = values.pop('structures') # Populate dictionaries by ID for attr, name in ( ('dataflow', 'dataflows'), ('codelist', 'codelists'), ('constraint', 'constraints'), ('structure', 'datastructures'), ('category_scheme', 'categoryschemes'), ('concept_scheme', 'concepts'), ('organisation_scheme', 'organisationschemes'), ('provisionagreement', 'provisionagreements'), ): for obj in structures.pop(name, []): getattr(msg, attr)[obj.id] = obj # Check, but do not store, Categorisations # Assemble a list of external categoryschemes ext_cs = [] for key, cs in self._index.items(): if key[0] == 'CategoryScheme' and cs.is_external_reference: ext_cs.append(cs) for c in structures.pop('categorisations', []): if not isinstance(c.artefact, DataflowDefinition): continue assert c.artefact in msg.dataflow.values() missing_cs = True for cs in chain(msg.category_scheme.values(), ext_cs): if c.category in cs: missing_cs = False if cs.is_external_reference: # Store the externally-referred CategoryScheme msg.category_scheme[cs.id] = cs break assert not missing_cs assert len(structures) == 0, structures assert len(values) == 0, values return msg
def read_message(self, source): # Initialize message instance msg = DataMessage() # Read JSON tree = json.load(source) # Read the header # FIXME KeyError: 'header' elem = tree['header'] msg.header = Header(id=elem['id'], prepared=elem['prepared'], sender=Item(**elem['sender'])) # pre-fetch some structures for efficient use in series and obs structure = tree['structure'] # Read dimensions and values self._dim_level = dict() self._dim_values = dict() for level_name, level in structure['dimensions'].items(): for elem in level: # Create the Dimension d = msg.structure.dimension(id=elem['id'], order=elem.get('keyPosition', -1)) # Record the level it appears at self._dim_level[d] = level_name # Record values self._dim_values[d] = list() for value in elem.get('values', []): self._dim_values[d].append( KeyValue(id=d.id, value=value['id'])) # Assign an order to an implicit dimension for d in msg.structure.dimensions: if d.order == -1: d.order = len(msg.structure.dimensions) # Determine the dimension at the observation level if all([level == 'observation' for level in self._dim_level.values()]): dim_at_obs = AllDimensions else: dim_at_obs = [ dim for dim, level in self._dim_level.items() if level == 'observation' ] msg.observation_dimension = dim_at_obs # Read attributes and values self._attr_level = dict() self._attr_values = dict() for level_name, level in structure['attributes'].items(): for attr in level: # Create a DataAttribute in the DSD a = msg.structure.attribute( id=attr['id'], concept_identity=Concept(name=attr['name']), ) # Record the level it appears at self._attr_level[a] = level_name # Record its values self._attr_values[a] = list() for value in attr.get('values', []): self._attr_values[a].append( AttributeValue(value=value['name'], value_for=a)) self.msg = msg # Make a SeriesKey for Observations in this DataSet ds_key = self._make_key('dataSet') # Read DataSets for ds in tree['dataSets']: msg.data.append(self.read_dataset(ds, ds_key)) return msg
def read_message(self, source, dsd=None): # Initialize message instance msg = DataMessage() if dsd: # pragma: no cover # Store explicit DSD, if any msg.dataflow.structure = dsd # Read JSON source.default_size = -1 tree = json.load(source) # Read the header # TODO handle KeyError here elem = tree["header"] msg.header = Header( id=elem["id"], prepared=elem["prepared"], sender=model.Agency(**elem["sender"]), ) # pre-fetch some structures for efficient use in series and obs structure = tree["structure"] # Read dimensions and values self._dim_level = dict() self._dim_values = dict() for level_name, level in structure["dimensions"].items(): for elem in level: # Create the Dimension d = msg.structure.dimensions.getdefault( id=elem["id"], order=elem.get("keyPosition", -1) ) # Record the level it appears at self._dim_level[d] = level_name # Record values self._dim_values[d] = list() for value in elem.get("values", []): self._dim_values[d].append(KeyValue(id=d.id, value=value["id"])) # Assign an order to an implicit dimension for d in msg.structure.dimensions: if d.order == -1: d.order = len(msg.structure.dimensions) # Determine the dimension at the observation level if all([level == "observation" for level in self._dim_level.values()]): dim_at_obs = AllDimensions else: dim_at_obs = [ dim for dim, level in self._dim_level.items() if level == "observation" ] msg.observation_dimension = dim_at_obs # Read attributes and values self._attr_level = dict() self._attr_values = dict() for level_name, level in structure["attributes"].items(): for attr in level: # Create a DataAttribute in the DSD a = msg.structure.attributes.getdefault( id=attr["id"], concept_identity=Concept(name=attr["name"]) ) # Record the level it appears at self._attr_level[a] = level_name # Record its values self._attr_values[a] = list() for value in attr.get("values", []): self._attr_values[a].append( AttributeValue(value=value["name"], value_for=a) ) self.msg = msg # Make a SeriesKey for Observations in this DataSet ds_key = self._make_key("dataSet") # Read DataSets for ds in tree["dataSets"]: msg.data.append(self.read_dataset(ds, ds_key)) return msg