예제 #1
0
파일: sdmxml.py 프로젝트: daoluan/pandaSDMX
    def parse_header(self, elem):
        values = self._collect('header', elem)

        # Handle a reference to a DataStructureDefinition
        attrs = {}
        for k in ['id', 'agencyid', 'version', 'urn']:
            value = values.pop('structure_ref_' + k, None)
            if not value:
                continue
            elif k == 'agencyid':
                attrs['maintainer'] = Agency(id=value)
            else:
                attrs[k] = value

        if set(attrs.keys()) == {'urn'}:
            attrs['id'] = values['structure_id']

        if 'id' in attrs:
            # Create the DSD and DFD
            dsd = self._maintained(DataStructureDefinition, **attrs)
            dfd = DataflowDefinition(id=values.pop('structure_id'),
                                     structure=dsd)

            # Also store the dimension at observation
            self._set_obs_dim(values.pop('dim_at_obs'))
            extra = [dfd]
        else:
            extra = []

        # Maybe return the DFD; see .initialize()
        return [Header(**values)] + extra
예제 #2
0
파일: sdmxml.py 프로젝트: osbdr/pandaSDMX
    def parse_header(self, elem):
        # Collect values from *elem* and its children using XPath
        values = {}
        for key, xpath in HEADER_XPATH.items():
            matches = xpath(elem)
            if len(matches) == 0:
                continue
            values[key] = matches[0] if len(matches) == 1 else matches

        # Handle a reference to a DataStructureDefinition
        attrs = {}
        for k in ['id', 'agencyid', 'version', 'urn']:
            value = values.pop('structure_ref_' + k, None)
            if not value:
                continue
            elif k == 'agencyid':
                attrs['maintainer'] = Agency(id=value)
            else:
                attrs[k] = value

        if set(attrs.keys()) == {'urn'}:
            attrs['id'] = values['structure_id']

        extra = []

        if 'id' in attrs:
            # Create or retrieve the DSD. NB if the dsd argument was provided
            # to read_message(), this should be the same DSD
            dsd = self._maintained(DataStructureDefinition, **attrs)

            if 'structure_id' in values:
                # Add the DSD to the index a second time, using the message
                # -specific structure ID (rather that the DSD's own ID).
                key = ('DataStructureDefinition', values['structure_id'])
                self._index[key] = dsd

            # Create a DataflowDefinition
            dfd = DataflowDefinition(id=values.pop('structure_id'),
                                     structure=dsd)
            extra.append(dfd)

            # Store the observation at dimension level
            dim_at_obs = values.pop('dim_at_obs')
            if dim_at_obs == 'AllDimensions':
                self._obs_dim = AllDimensions
            else:
                # Retrieve or create the Dimension
                args = dict(id=dim_at_obs, order=1e9)
                if 'TimeSeries' in self._stack[0]:
                    # {,StructureSpecific}TimeSeriesData message → the
                    # dimension at observation level is a TimeDimension
                    args['cls'] = TimeDimension
                self._obs_dim = dsd.dimensions.get(**args)

        # Maybe return the DFD; see .initialize()
        return [Header(**values)] + extra
예제 #3
0
def test_flat():
    # Create a bare Message
    msg = DataMessage()

    # Recreate the content from exr-flat.json
    header = Header(
        id="62b5f19d-f1c9-495d-8446-a3661ed24753",
        prepared="2012-11-29T08:40:26Z",
        sender=model.Agency(id="ECB"),
    )
    msg.header = header

    ds = DataSet()

    # Create a Key and attributes
    key = Key(
        FREQ="D",
        CURRENCY="NZD",
        CURRENCY_DENOM="EUR",
        EXR_TYPE="SP00",
        EXR_SUFFIX="A",
        TIME_PERIOD="2013-01-18",
    )
    obs_status = DataAttribute(id="OBS_STATUS")
    attr = {"OBS_STATUS": AttributeValue(value_for=obs_status, value="A")}

    ds.obs.append(
        Observation(dimension=key, value=1.5931, attached_attribute=attr))

    key = key.copy(TIME_PERIOD="2013-01-21")
    ds.obs.append(
        Observation(dimension=key, value=1.5925, attached_attribute=attr))

    key = key.copy(CURRENCY="RUB", TIME_PERIOD="2013-01-18")
    ds.obs.append(
        Observation(dimension=key, value=40.3426, attached_attribute=attr))

    key = key.copy(TIME_PERIOD="2013-01-21")
    ds.obs.append(
        Observation(dimension=key, value=40.3000, attached_attribute=attr))

    msg.data.append(ds)

    # Write to pd.Dataframe
    df1 = pandasdmx.to_pandas(msg)

    with specimen("flat.json") as f:
        ref = pandasdmx.read_sdmx(f)
    df2 = pandasdmx.to_pandas(ref)

    assert_pd_equal(df1, df2)
예제 #4
0
def test_flat():
    # Create a bare Message
    msg = DataMessage()

    # Recreate the content from exr-flat.json
    header = Header(
        id='62b5f19d-f1c9-495d-8446-a3661ed24753',
        prepared='2012-11-29T08:40:26Z',
        sender='ECB',
    )
    msg.header = header

    ds = DataSet()

    # Create a Key and attributes
    key = Key(FREQ='D',
              CURRENCY='NZD',
              CURRENCY_DENOM='EUR',
              EXR_TYPE='SP00',
              EXR_SUFFIX='A',
              TIME_PERIOD='2013-01-18')
    obs_status = DataAttribute(id='OBS_STATUS')
    attr = {'OBS_STATUS': AttributeValue(value_for=obs_status, value='A')}

    ds.obs.append(
        Observation(dimension=key, value=1.5931, attached_attribute=attr))

    key = key.copy(TIME_PERIOD='2013-01-21')
    ds.obs.append(
        Observation(dimension=key, value=1.5925, attached_attribute=attr))

    key = key.copy(CURRENCY='RUB', TIME_PERIOD='2013-01-18')
    ds.obs.append(
        Observation(dimension=key, value=40.3426, attached_attribute=attr))

    key = key.copy(TIME_PERIOD='2013-01-21')
    ds.obs.append(
        Observation(dimension=key, value=40.3000, attached_attribute=attr))

    msg.data.append(ds)

    # Write to pd.Dataframe
    df1 = sdmx.to_pandas(msg)

    with specimen('flat.json') as f:
        ref = sdmx.read_sdmx(f)
    df2 = sdmx.to_pandas(ref)

    assert_pd_equal(df1, df2)
예제 #5
0
파일: sdmxml.py 프로젝트: daoluan/pandaSDMX
    def read_message(self, source):
        # Root XML element
        root = etree.parse(source).getroot()

        # Message class
        try:
            cls = MESSAGE[root.tag]
        except KeyError:
            msg = 'Unrecognized message root element {!r}'.format(root.tag)
            raise ParseError(msg) from None

        # Reset state
        self._stack = []
        self._index = {}
        self._current = {}

        # Parse the tree
        values = self._parse(root)

        # Instantiate the message object
        msg = cls()

        # Store the header
        header = values.pop('header', None)
        if header is None and 'errormessage' in values:
            # An error message
            msg.header = Header()

            # Error message attributes resemble footer attributes
            values['footer'] = Footer(**values.pop('errormessage'))
        elif len(header) == 2:
            # Length-2 list includes DFD/DSD reference
            msg.header, msg.dataflow = header
            msg.observation_dimension = self._obs_dim
        else:
            # No DFD in the header, e.g. for a StructureMessage
            msg.header = header[0]

        # Store the footer
        msg.footer = values.pop('footer', None)

        # Finalize according to the message type
        if cls is DataMessage:
            # Simply store the datasets
            msg.data.extend(wrap(values.pop('dataset', [])))
        elif cls is StructureMessage:
            structures = values.pop('structures')

            # Populate dictionaries by ID
            for attr, name in (
                ('dataflow', 'dataflows'),
                ('codelist', 'codelists'),
                ('constraint', 'constraints'),
                ('structure', 'datastructures'),
                ('category_scheme', 'categoryschemes'),
                ('concept_scheme', 'concepts'),
                ('organisation_scheme', 'organisationschemes'),
                ('provisionagreement', 'provisionagreements'),
            ):
                for obj in structures.pop(name, []):
                    getattr(msg, attr)[obj.id] = obj

            # Check, but do not store, Categorisations

            # Assemble a list of external categoryschemes
            ext_cs = []
            for key, cs in self._index.items():
                if key[0] == 'CategoryScheme' and cs.is_external_reference:
                    ext_cs.append(cs)

            for c in structures.pop('categorisations', []):
                if not isinstance(c.artefact, DataflowDefinition):
                    continue
                assert c.artefact in msg.dataflow.values()

                missing_cs = True
                for cs in chain(msg.category_scheme.values(), ext_cs):
                    if c.category in cs:
                        missing_cs = False
                        if cs.is_external_reference:
                            # Store the externally-referred CategoryScheme
                            msg.category_scheme[cs.id] = cs
                        break

                assert not missing_cs

            assert len(structures) == 0, structures

        assert len(values) == 0, values
        return msg
예제 #6
0
    def read_message(self, source):
        # Initialize message instance
        msg = DataMessage()

        # Read JSON
        tree = json.load(source)

        # Read the header
        # FIXME KeyError: 'header'
        elem = tree['header']
        msg.header = Header(id=elem['id'],
                            prepared=elem['prepared'],
                            sender=Item(**elem['sender']))

        # pre-fetch some structures for efficient use in series and obs
        structure = tree['structure']

        # Read dimensions and values
        self._dim_level = dict()
        self._dim_values = dict()
        for level_name, level in structure['dimensions'].items():
            for elem in level:
                # Create the Dimension
                d = msg.structure.dimension(id=elem['id'],
                                            order=elem.get('keyPosition', -1))

                # Record the level it appears at
                self._dim_level[d] = level_name

                # Record values
                self._dim_values[d] = list()
                for value in elem.get('values', []):
                    self._dim_values[d].append(
                        KeyValue(id=d.id, value=value['id']))

        # Assign an order to an implicit dimension
        for d in msg.structure.dimensions:
            if d.order == -1:
                d.order = len(msg.structure.dimensions)

        # Determine the dimension at the observation level
        if all([level == 'observation' for level in self._dim_level.values()]):
            dim_at_obs = AllDimensions
        else:
            dim_at_obs = [
                dim for dim, level in self._dim_level.items()
                if level == 'observation'
            ]

        msg.observation_dimension = dim_at_obs

        # Read attributes and values
        self._attr_level = dict()
        self._attr_values = dict()
        for level_name, level in structure['attributes'].items():
            for attr in level:
                # Create a DataAttribute in the DSD
                a = msg.structure.attribute(
                    id=attr['id'],
                    concept_identity=Concept(name=attr['name']),
                )

                # Record the level it appears at
                self._attr_level[a] = level_name

                # Record its values
                self._attr_values[a] = list()
                for value in attr.get('values', []):
                    self._attr_values[a].append(
                        AttributeValue(value=value['name'], value_for=a))

        self.msg = msg

        # Make a SeriesKey for Observations in this DataSet
        ds_key = self._make_key('dataSet')

        # Read DataSets
        for ds in tree['dataSets']:
            msg.data.append(self.read_dataset(ds, ds_key))

        return msg
예제 #7
0
    def read_message(self, source, dsd=None):
        # Initialize message instance
        msg = DataMessage()

        if dsd:  # pragma: no cover
            # Store explicit DSD, if any
            msg.dataflow.structure = dsd

        # Read JSON
        source.default_size = -1
        tree = json.load(source)

        # Read the header
        # TODO handle KeyError here
        elem = tree["header"]
        msg.header = Header(
            id=elem["id"],
            prepared=elem["prepared"],
            sender=model.Agency(**elem["sender"]),
        )

        # pre-fetch some structures for efficient use in series and obs
        structure = tree["structure"]

        # Read dimensions and values
        self._dim_level = dict()
        self._dim_values = dict()
        for level_name, level in structure["dimensions"].items():
            for elem in level:
                # Create the Dimension
                d = msg.structure.dimensions.getdefault(
                    id=elem["id"], order=elem.get("keyPosition", -1)
                )

                # Record the level it appears at
                self._dim_level[d] = level_name

                # Record values
                self._dim_values[d] = list()
                for value in elem.get("values", []):
                    self._dim_values[d].append(KeyValue(id=d.id, value=value["id"]))

        # Assign an order to an implicit dimension
        for d in msg.structure.dimensions:
            if d.order == -1:
                d.order = len(msg.structure.dimensions)

        # Determine the dimension at the observation level
        if all([level == "observation" for level in self._dim_level.values()]):
            dim_at_obs = AllDimensions
        else:
            dim_at_obs = [
                dim for dim, level in self._dim_level.items() if level == "observation"
            ]

        msg.observation_dimension = dim_at_obs

        # Read attributes and values
        self._attr_level = dict()
        self._attr_values = dict()
        for level_name, level in structure["attributes"].items():
            for attr in level:
                # Create a DataAttribute in the DSD
                a = msg.structure.attributes.getdefault(
                    id=attr["id"], concept_identity=Concept(name=attr["name"])
                )

                # Record the level it appears at
                self._attr_level[a] = level_name

                # Record its values
                self._attr_values[a] = list()
                for value in attr.get("values", []):
                    self._attr_values[a].append(
                        AttributeValue(value=value["name"], value_for=a)
                    )

        self.msg = msg

        # Make a SeriesKey for Observations in this DataSet
        ds_key = self._make_key("dataSet")

        # Read DataSets
        for ds in tree["dataSets"]:
            msg.data.append(self.read_dataset(ds, ds_key))

        return msg