class DataSet(AnnotableArtefact): # SDMX-IM features action: ActionType = None attrib: DictLike[str, AttributeValue] = DictLike() valid_from: str = None structured_by: DataStructureDefinition = None obs: List[Observation] = [] #: Map of series key → list of observations. #: :mod:`pandaSDMX` extension not in the IM. series: DictLike[SeriesKey, List[Observation]] = DictLike() #: Map of group key → list of observations. #: :mod:`pandaSDMX` extension not in the IM. group: DictLike[GroupKey, List[Observation]] = DictLike() def _add_group_refs(self, target): """Associate *target* with groups in this dataset. *target* may be an instance of SeriesKey or Observation. """ for group_key in self.group: if group_key in (target if isinstance(target, SeriesKey) else target.key): target.group_keys.add(group_key) if isinstance(target, Observation): self.group[group_key].append(target) def add_obs(self, observations, series_key=None): """Add *observations* to a series with *series_key*. Checks consistency and adds group associations.""" if series_key: # Associate series_key with any GroupKeys that apply to it self._add_group_refs(series_key) if series_key not in self.series: # Initialize empty series self.series[series_key] = [] for obs in observations: # Associate the observation with any GroupKeys that contain it self._add_group_refs(obs) # Store a reference to the observation self.obs.append(obs) if series_key: # Check that the Observation is not associated with a different # SeriesKey assert obs.series_key is series_key, \ (obs.series_key, id(obs.series_key), series_key, id(series_key)) # Store a reference to the observation self.series[series_key].append(obs) @validator('action') def _validate_action(cls, value): if value in ActionType: return value else: return ActionType[value]
class StructureMessage(Message): #: Collection of :class:`.Categorisation`. categorisation: DictLike[str, model.Categorisation] = DictLike() #: Collection of :class:`.CategoryScheme`. category_scheme: DictLike[str, model.CategoryScheme] = DictLike() #: Collection of :class:`.Codelist`. codelist: DictLike[str, model.Codelist] = DictLike() #: Collection of :class:`.ConceptScheme`. concept_scheme: DictLike[str, model.ConceptScheme] = DictLike() #: Collection of :class:`.ContentConstraint`. constraint: DictLike[str, model.ContentConstraint] = DictLike() #: Collection of :class:`.DataflowDefinition`. dataflow: DictLike[str, model.DataflowDefinition] = DictLike() #: Collection of :class:`.DataStructureDefinition`. structure: DictLike[str, model.DataStructureDefinition] = DictLike() #: Collection of :class:`.AgencyScheme`. organisation_scheme: DictLike[str, model.AgencyScheme] = DictLike() #: Collection of :class:`.ProvisionAgreement`. provisionagreement: DictLike[str, model.ProvisionAgreement] = DictLike() def compare(self, other, strict=True): """Return :obj:`True` if `self` is the same as `other`. Two StructureMessages compare equal if :meth:`.DictLike.compare` is :obj:`True` for each of the object collection attributes. Parameters ---------- strict : bool, optional Passed to :meth:`.DictLike.compare`. """ return all( getattr(self, attr).compare(getattr(other, attr), strict) for attr in ( "categorisation", "category_scheme", "codelist", "concept_scheme", "constraint", "dataflow", "structure", "organisation_scheme", "provisionagreement", ) ) def __repr__(self): """String representation.""" lines = [super().__repr__()] # StructureMessage contents for attr in self.__dict__.values(): if isinstance(attr, DictLike) and attr: lines.append(summarize_dictlike(attr)) return "\n ".join(lines)
class StructureMessage(Message): #: Collection of :class:`.CategoryScheme`. category_scheme: DictLike[str, CategoryScheme] = DictLike() #: Collection of :class:`.Codelist`. codelist: DictLike[str, Codelist] = DictLike() #: Collection of :class:`.ConceptScheme`. concept_scheme: DictLike[str, ConceptScheme] = DictLike() #: Collection of :class:`.ContentConstraint`. constraint: DictLike[str, ContentConstraint] = DictLike() #: Collection of :class:`.DataflowDefinition`. dataflow: DictLike[str, DataflowDefinition] = DictLike() #: Collection of :class:`.DataStructureDefinition`. structure: DictLike[str, DataStructureDefinition] = DictLike() #: Collection of :class:`.AgencyScheme`. organisation_scheme: DictLike[str, AgencyScheme] = DictLike() #: Collection of :class:`.ProvisionAgreement`. provisionagreement: DictLike[str, ProvisionAgreement] = DictLike() def __repr__(self): """String representation.""" lines = [super().__repr__()] # StructureMessage contents for attr in self.__dict__.values(): if isinstance(attr, DictLike) and attr: lines.append(summarize_dictlike(attr)) return '\n '.join(lines)
def write_cuberegion(obj, **kwargs): """Convert :class:`.CubeRegion`.""" result = DictLike() for dim, memberselection in obj.member.items(): result[dim] = pd.Series([mv.value for mv in memberselection.values], name=dim.id) return result
def write_dict(obj, *args, **kwargs): """Convert mappings. The values of the mapping are write()'d individually. If the resulting values are :class:`str` or :class:`pd.Series` *with indexes that share the same name*, then they are converted to a pd.Series, possibly with a pd.MultiIndex. Otherwise, a DictLike is returned. """ result = {k: write(v, *args, **kwargs) for k, v in obj.items()} result_type = set(type(v) for v in result.values()) if result_type <= {pd.Series, pd.DataFrame}: if (len(set(map(lambda s: s.index.name, result.values()))) == 1 and len(result) > 1): # Can safely concatenate these to a pd.MultiIndex'd Series. return pd.concat(result) else: # The individual pd.Series are indexed by different dimensions; do # not concatenate. return DictLike(result) elif result_type == {str}: return pd.Series(result) elif result_type == set(): return pd.Series() else: raise ValueError(result_type)
def group_attrib(self): """Return a view of combined group attributes.""" # Needed to pass existing tests view = DictLike() for gk in self.group_keys: view.update(gk.attrib) return view
def _cr(obj: model.CubeRegion, **kwargs): """Convert :class:`.CubeRegion`.""" result: DictLike[str, pd.Series] = DictLike() for dim, memberselection in obj.member.items(): result[dim.id] = pd.Series([mv.value for mv in memberselection.values], name=dim.id) return result
def write_structuremessage(obj: message.StructureMessage, include=None, **kwargs): """Convert :class:`.StructureMessage`. Parameters ---------- obj : .StructureMessage include : iterable of str or str, optional One or more of the attributes of the StructureMessage ( 'category_scheme', 'codelist', etc.) to transform. kwargs : Passed to :meth:`write` for each attribute. Returns ------- .DictLike Keys are StructureMessage attributes; values are pandas objects. """ all_contents = { "category_scheme", "codelist", "concept_scheme", "constraint", "dataflow", "structure", "organisation_scheme", } # Handle arguments if include is None: attr_set = all_contents else: attr_set = set([include] if isinstance(include, str) else include) # Silently discard invalid names attr_set &= all_contents attrs = sorted(attr_set) result: DictLike[str, Union[pd.Series, pd.DataFrame]] = DictLike() for a in attrs: dl = writer.recurse(getattr(obj, a), **kwargs) if len(dl): # Only add non-empty elements result[a] = dl return result
def write_structuremessage(obj, include=None, **kwargs): """Convert :class:`StructureMessage <pandasdmx.message.StructureMessage>`. Parameters ---------- obj : pandasdmx.message.StructureMessage include : iterable of str or str, optional One or more of the attributes of the StructureMessage ( 'category_scheme', 'codelist', etc.) to transform. kwargs : Passed to :meth:`write` for each attribute. Returns ------- :class:`pandasdmx.util.DictLike` Keys are StructureMessage attributes; values are pandas objects. """ all_contents = { 'category_scheme', 'codelist', 'concept_scheme', 'constraint', 'dataflow', 'structure', 'organisation_scheme', } # Handle arguments if include is None: attrs = all_contents else: attrs = set([include] if isinstance(include, str) else include) # Silently discard invalid names attrs &= all_contents attrs = sorted(attrs) result = DictLike() for a in attrs: dl = write(getattr(obj, a), **kwargs) if len(dl): # Only add non-empty elements result[a] = dl return result
class Observation(BaseModel): """SDMX-IM Observation. This class also implements the spec classes ObservationValue, UncodedObservationValue, and CodedObservation. """ #: attached_attribute: DictLike[str, AttributeValue] = DictLike() #: series_key: SeriesKey = None #: Key for dimension(s) varying at the observation level. dimension: Key = None #: Data value. value: Union[Any, Code] = None #: value_for: PrimaryMeasure = None #: :mod:`pandaSDMX` extension not in the IM. group_keys: Set[GroupKey] = set() @property def attrib(self): """Return a view of combined observation, series & group attributes.""" view = self.attached_attribute.copy() view.update(getattr(self.series_key, 'attrib', {})) for gk in self.group_keys: view.update(gk.attrib) return view @property def dim(self): return self.dimension @property def key(self): """Return the entire key, including KeyValues at the series level.""" return self.series_key + self.dimension def __len__(self): # FIXME this is unintuitive; maybe deprecate/remove? return len(self.key) def __str__(self): return '{0.key}: {0.value}'.format(self)
class StructureMessage(Message): category_scheme: DictLike[str, CategoryScheme] = DictLike() codelist: DictLike[str, Codelist] = DictLike() concept_scheme: DictLike[str, ConceptScheme] = DictLike() constraint: DictLike[str, ContentConstraint] = DictLike() dataflow: DictLike[str, DataflowDefinition] = DictLike() structure: DictLike[str, DataStructureDefinition] = DictLike() organisation_scheme: DictLike[str, AgencyScheme] = DictLike() provisionagreement: DictLike[str, ProvisionAgreement] = DictLike() def __repr__(self): """String representation.""" lines = [super().__repr__()] # StructureMessage contents for name in dir(self): attr = getattr(self, name) if not isinstance(attr, DictLike) or len(attr) == 0: continue lines.append(summarize_dictlike(attr)) return '\n '.join(lines)
def test_dictlike_anno(): @validate_dictlike("items") class Foo(BaseModel): items: DictLike[StrictStr, int] = DictLike() f = Foo() assert type(f.items) == DictLike # Can be set with DictLike f.items = DictLike(a=1, b=2) assert type(f.items) == DictLike # Can be set with dict() f.items = {"a": 1, "b": 2} assert type(f.items) == DictLike # Type checking on creation with pytest.raises(pydantic.ValidationError): f = Foo(items={1: "a"}) # Type checking on assignment f = Foo() with pytest.raises(pydantic.ValidationError): f.items = {1: "a"} # Type checking on setting elements f = Foo(items={"a": 1}) with pytest.raises(pydantic.ValidationError): f.items[123] = 456 # commented: this does not work, since validate_dictlike does not operate # until initial values are assigned to the field # f = Foo() # with pytest.raises(pydantic.ValidationError): # f.items[123] = 456 # Use validate_dictlike() twice @validate_dictlike("elems") class Bar(BaseModel): elems: DictLike[StrictStr, float] = DictLike()
def test_dictlike(): dl = DictLike() # Set by item name dl['TIME_PERIOD'] = 3 dl['CURRENCY'] = 'USD' # Access by attribute name assert dl.TIME_PERIOD == 3 # Access by item index assert dl[1] == 'USD' # Access beyond index with pytest.raises(KeyError): dl['FOO'] with pytest.raises(IndexError): dl[2] with pytest.raises(AttributeError): dl.FOO
def test_dictlike(): dl = DictLike() # Set by item name dl["TIME_PERIOD"] = 3 dl["CURRENCY"] = "USD" # Access by attribute name assert dl.TIME_PERIOD == 3 # Access by item index assert dl[1] == "USD" # Access beyond index with pytest.raises(KeyError): dl["FOO"] with pytest.raises(IndexError): dl[2] with pytest.raises(AttributeError): dl.FOO
def _dict(obj: dict, *args, **kwargs): """Convert mappings.""" result = {k: writer.recurse(v, *args, **kwargs) for k, v in obj.items()} result_type = set(type(v) for v in result.values()) if result_type <= {pd.Series, pd.DataFrame}: if (len(set(map(lambda s: s.index.name, result.values()))) == 1 and len(result) > 1): # Can safely concatenate these to a pd.MultiIndex'd Series. return pd.concat(result) else: # The individual pd.Series are indexed by different dimensions; do # not concatenate. return DictLike(result) elif result_type == {str}: return pd.Series(result) elif result_type == {DictLike}: return result elif result_type == set(): # No results return pd.Series() else: raise ValueError(result_type)
class Foo(BaseModel): items: DictLike[StrictStr, int] = DictLike()
class Key(BaseModel): """SDMX Key class. The constructor takes an optional list of keyword arguments; the keywords are used as Dimension or Attribute IDs, and the values as KeyValues. For convience, the values of the key may be accessed directly: >>> k = Key(foo=1, bar=2) >>> k.values['foo'] 1 >>> k['foo'] 1 Parameters ---------- dsd : DataStructureDefinition If supplied, the :attr:`~.DataStructureDefinition.dimensions` and :attr:`~.DataStructureDefinition.attributes` are used to separate the *kwargs* into :class:`KeyValues <.KeyValue>` and :class:`AttributeValues <.AttributeValue>`. The *kwarg* for :attr:`described_by`, if any, must be :attr:`~.DataStructureDefinition.dimensions` or appear in :attr:`~.DataStructureDefinition.group_dimensions`. kwargs Dimension and Attribute IDs, and/or the class properties. """ #: attrib: DictLike[str, AttributeValue] = DictLike() #: described_by: DimensionDescriptor = None #: Individual KeyValues that describe the key. values: DictLike[str, KeyValue] = DictLike() def __init__(self, arg=None, **kwargs): super().__init__() if arg: if len(kwargs): raise ValueError("Key() accepts either a single argument, or " "keyword arguments; not both.") kwargs.update(arg) # DSD argument dsd = kwargs.pop('dsd', None) # DimensionDescriptor dd = kwargs.pop('described_by', None) if dsd: if not dd: dd = dsd.dimensions # DD must appear in the DSD if both are supplied if (dd is not dsd.dimensions and dd not in dsd.group_dimensions.values()): raise ValueError(f'described_by={dd} is not a [Group]' f'DimensionDescriptor of dsd={dsd}') try: self.described_by = dd except Exception: dd = None # Convert keyword arguments to either KeyValue or AttributeValue values = [] for order, (id, value) in enumerate(kwargs.items()): args = dict(id=id, value=value) if dsd and id in dsd.attributes: # Reference a DataAttribute from the AttributeDescriptor da = dsd.attributes.get(id) # Store the attribute value self.attrib[da.id] = AttributeValue(**args, value_for=da) continue if dd: # Reference a Dimension from the DimensionDescriptor args['value_for'] = dd.get(id) # Retrieve the order order = args['value_for'].order # Store a KeyValue, to be sorted later values.append((order, KeyValue(**args))) # Sort the values according to *order* self.values.update({kv.id: kv for _, kv in sorted(values)}) def __len__(self): """The length of the Key is the number of KeyValues it contains.""" return len(self.values) def __contains__(self, other): """A Key contains another if it is a superset.""" try: return all([self.values[k] == v for k, v in other.values.items()]) except KeyError: # 'k' in other does not appear in this Key() return False def __iter__(self): yield from self.values.values() # Convenience access to values by name def __getitem__(self, name): return self.values[name] def __setitem__(self, name, value): # Convert a bare string or other Python object to a KeyValue instance if not isinstance(value, KeyValue): value = KeyValue(id=name, value=value) self.values[name] = value # Convenience access to values by attribute def __getattr__(self, name): try: return self.__getitem__(name) except KeyError as e: raise e # Copying def __copy__(self): result = Key() if self.described_by: result.described_by = self.described_by for kv in self.values.values(): result[kv.id] = kv return result def copy(self, arg=None, **kwargs): result = copy(self) for id, value in kwargs.items(): result[id] = value return result def __add__(self, other): if not isinstance(other, Key): raise NotImplementedError result = copy(self) for id, value in other.values.items(): result[id] = value return result def __radd__(self, other): if other is None: return copy(self) else: raise NotImplementedError def __eq__(self, other): if hasattr(other, 'values'): return all([a == b for a, b in zip(self.values, other.values)]) elif isinstance(other, str) and len(self.values) == 1: return self.values[0] == other else: raise ValueError(other) def __hash__(self): # Hash of the individual KeyValues, in order return hash(tuple(hash(kv) for kv in self.values.values())) # Representations def __str__(self): return '({})'.format(', '.join(map(str, self.values.values()))) def __repr__(self): return '<{}: {}>'.format(self.__class__.__name__, ', '.join(map(str, self.values.values()))) def order(self, value=None): if value is None: value = self try: return self.described_by.order_key(value) except AttributeError: return value def get_values(self): return tuple([kv.value for kv in self.values.values()])
class DataStructureDefinition(Structure, ConstrainableArtefact): """Defines a data structure. Referred to as “DSD”.""" #: A :class:`AttributeDescriptor` that describes the attributes of the #: data structure. attributes: AttributeDescriptor = AttributeDescriptor() #: A :class:`DimensionDescriptor` that describes the dimensions of the #: data structure. dimensions: DimensionDescriptor = DimensionDescriptor() #: A :class:`.MeasureDescriptor`. measures: MeasureDescriptor = None #: A :class:`.GroupDimensionDescriptor`. group_dimensions: DictLike[str, GroupDimensionDescriptor] = DictLike() # Convenience methods def attribute(self, id, **kwargs): """Call :meth:`ComponentList.get` on :attr:`attributes`.""" return self.attributes.get(id, **kwargs) def dimension(self, id, **kwargs): """Call :meth:`ComponentList.get` on :attr:`dimensions`.""" return self.dimensions.get(id, **kwargs) def make_constraint(self, key): """Return a constraint for *key*. *key* is a :class:`dict` wherein: - keys are :class:`str` ids of Dimensions appearing in this DSD's :attr:`dimensions`, and - values are '+'-delimited :class:`str` containing allowable values, _or_ iterables of :class:`str`, each an allowable value. For example:: cc2 = dsd.make_constraint({'foo': 'bar+baz', 'qux': 'q1+q2+q3'}) ``cc2`` includes any key where the 'foo' dimension is 'bar' *or* 'baz', *and* the 'qux' dimension is one of 'q1', 'q2', or 'q3'. Returns ------- ContentConstraint A constraint with one :class:`CubeRegion` in its :attr:`data_content_region <ContentConstraint.data_content_region>` , including only the values appearing in *keys*. Raises ------ ValueError if *key* contains a dimension IDs not appearing in :attr:`dimensions`. """ # Make a copy to avoid pop()'ing off the object in the calling scope key = key.copy() cr = CubeRegion() for dim in self.dimensions: mvs = set() try: values = key.pop(dim.id) except KeyError: continue values = values.split('+') if isinstance(values, str) else values for value in values: # TODO validate values mvs.add(MemberValue(value=value)) cr.member[dim] = MemberSelection(included=True, values_for=dim, values=mvs) if len(key): raise ValueError('Dimensions {!r} not in {!r}'.format( list(key.keys()), self.dimensions)) return ContentConstraint( data_content_region=[cr], role=ConstraintRole(role=ConstraintRoleType.allowable)) @classmethod def from_keys(cls, keys): """Return a new DSD given some *keys*. The DSD's :attr:`dimensions` refers to a set of new :class:`Concepts <Concept>` and :class:`Codelists <Codelist>`, created to represent all the values observed across *keys* for each dimension. Parameters ---------- keys : iterable of :class:`Key` or of subclasses such as :class:`SeriesKey` or :class:`GroupKey`. """ iter_keys = iter(keys) dd = DimensionDescriptor.from_key(next(iter_keys)) for k in iter_keys: for i, (id, kv) in enumerate(k.values.items()): dd[i].local_representation.enumerated.append(Code(id=kv.value)) return cls(dimensions=dd)
class Key(BaseModel): """SDMX Key class. The constructor takes an optional list of keyword arguments; the keywords are used as Dimension IDs, and the values as KeyValues. For convience, the values of the key may be accessed directly: >>> k = Key(foo=1, bar=2) >>> k.values['foo'] 1 >>> k['foo'] 1 """ attrib: DictLike[str, AttributeValue] = DictLike() values: DictLike[str, KeyValue] = DictLike() described_by: DimensionDescriptor = None def __init__(self, arg=None, **kwargs): super().__init__() if arg: if len(kwargs): raise ValueError("Key() accepts either a single argument, or " "keyword arguments; not both.") kwargs.update(arg) # DimensionDescriptor self.described_by = kwargs.pop('described_by', None) # Convert keyword arguments to KeyValues for id, value in kwargs.items(): kv = KeyValue(id=id, value=value) try: # Reference a Dimension from the DimensionDescriptor kv.value_for = self.described_by.get(id) except AttributeError: pass self.values[id] = kv def __len__(self): """The length of the Key is the number of KeyValues it contains.""" return len(self.values) def __contains__(self, other): """A Key contains another if it is a superset.""" try: return all([self.values[k] == v for k, v in other.values.items()]) except KeyError: # 'k' in other does not appear in this Key() return False def __iter__(self): yield from self.values.values() # Convenience access to values by name def __getitem__(self, name): return self.values[name] def __setitem__(self, name, value): # Convert a bare string or other Python object to a KeyValue instance if not isinstance(value, KeyValue): value = KeyValue(id=name, value=value) self.values[name] = value # Convenience access to values by attribute def __getattr__(self, name): try: return self.__getitem__(name) except KeyError as e: raise e # Copying def __copy__(self): result = Key() if self.described_by: result.described_by = self.described_by for kv in self.values.values(): result[kv.id] = kv return result def copy(self, arg=None, **kwargs): result = copy(self) for id, value in kwargs.items(): result[id] = value return result def __add__(self, other): if not isinstance(other, Key): raise NotImplementedError result = copy(self) for id, value in other.values.items(): result[id] = value return result def __radd__(self, other): if other is None: return copy(self) else: raise NotImplementedError def __eq__(self, other): if hasattr(other, 'values'): return all([a == b for a, b in zip(self.values, other.values)]) elif isinstance(other, str) and len(self.values) == 1: return self.values[0] == other else: raise ValueError(other) def __hash__(self): # Hash of the individual KeyValues, in order return hash(tuple(hash(kv) for kv in self.values.values())) # Representations def __str__(self): return '({})'.format(', '.join(map(str, self.values.values()))) def __repr__(self): return '<{}: {}>'.format(self.__class__.__name__, ', '.join(map(str, self.values.values()))) def order(self, value=None): if value is None: value = self try: return self.described_by.order_key(value) except AttributeError: return value def get_values(self): return tuple([kv.value for kv in self.values.values()])
class DataSet(AnnotableArtefact): # SDMX-IM features action: ActionType = None attrib: DictLike[str, AttributeValue] = DictLike() valid_from: Text = None structured_by: DataStructureDefinition = None # Internal storage: a pd.DataFrame with columns: # - 'value': the Observation value. # - ('attr_obs', *id*): value for Observation.attached_attribute[id]. # - TODO 'series_key': integer index of the SeriesKey associated with the # Observation. # - TODO 'group_keys': integer indices of the GroupKey(s) associated with # the Observation. _data = None def add_obs(self, observations, series_key=None): """Add *observations* to a series with *series_key*.""" if series_key: raise NotImplementedError # dict of dicts: key → {value, attributes, etc. } obs_dict = {} for obs in observations: # DataFrame row for this Observation row = {'value': obs.value} # Store attributes for attr_id, av in obs.attached_attribute.items(): row[('attr_obs', attr_id)] = av.value # Store the row obs_dict[obs.key.order().get_values()] = row # Convert to pd.DataFrame. Note similarity to pandasdmx.writer self._data = pd.DataFrame.from_dict(obs_dict, orient='index') if len(obs_dict): self._data.index.names = obs.key.order().values.keys() @property def obs(self): # In pandasdmx.model.DataSet, .obs is typed as List[Observation]. # Here, the Observations are generated on request. for key, data in self._data.iterrows(): yield self._make_obs(key, data) def _make_obs(self, key, data): """Create an Observation from tuple *key* and pd.Series *data.""" # Create the Key key = Key( {dim: value for dim, value in zip(self._data.index.names, key)}) attrs = {} # Handle columns of ._data for col, value in data.items(): try: # A tuple column label is ('attr_obs', attr_id) group, attr_id = col except ValueError: # Not a tuple → the 'value' column, handled below continue if group == 'attr_obs': # Create a DataAttribute attrs[attr_id] = AttributeValue( value_for=DataAttribute(id=attr_id), value=value) return Observation(dimension=key, value=data['value'], attached_attribute=attrs)
class Bar(BaseModel): elems: DictLike[StrictStr, float] = DictLike()