def __init__(self, cache, http_cfg): default_cfg = dict(stream=True, timeout=30.1) for it in default_cfg.items(): http_cfg.setdefault(*it) self.config = DictLike(http_cfg) if cache: requests_cache.install_cache(**cache)
def write(self, source=None, rows=None, **kwargs): ''' Transfform structural metadata, i.e. codelists, concept-schemes, lists of dataflow definitions or category-schemes from a :class:`pandasdmx.model.StructureMessage` instance into a pandas DataFrame. This method is called by :meth:`pandasdmx.api.Response.write` . It is not part of the public-facing API. Yet, certain kwargs are propagated from there. Args: source(pandasdmx.model.StructureMessage): a :class:`pandasdmx.model.StructureMessage` instance. rows(str): sets the desired content to be extracted from the StructureMessage. Must be a name of an attribute of the StructureMessage. The attribute must be an instance of `dict` whose keys are strings. These will be interpreted as ID's and used for the MultiIndex of the DataFrame to be returned. Values can be either instances of `dict` such as for codelists and categoryscheme, or simple nameable objects such as for dataflows. In the latter case, the DataFrame will have a flat index. (default: depends on content found in Message. Common is 'codelist') columns(str, list): if str, it denotes the attribute of attributes of the values (nameable SDMX objects such as Code or ConceptScheme) that will be stored in the DataFrame. If a list, it must contain strings that are valid attibute values. Defaults to: ['name', 'description'] constraint(bool): if True (default), apply any constraints to codelists, i.e. only the codes allowed by the constraints attached to the DSD, dataflow and provision agreements contained in the message are written to the DataFrame. Otherwise, the entire codelist is written. lang(str): locale identifier. Specifies the preferred language for international strings such as names. Default is 'en'. ''' # Set convenient default values for args # is rows a string? if rows is not None and not isinstance(rows, (list, tuple)): rows = [rows] return_df = True elif isinstance(rows, (list, tuple)) and len(rows) == 1: return_df = True else: return_df = False if rows is None: rows = [i for i in self._row_content if hasattr(source, i)] # Generate the DataFrame or -Frames and store them in a DictLike with # content-type names as keys frames = DictLike( {r: self._make_dataframe(source, r, **kwargs) for r in rows}) if return_df: # There is only one item. So return the only value. return frames.any() else: return frames
def write(self, source=None, rows=None, **kwargs): ''' Transfform structural metadata, i.e. codelists, concept-schemes, lists of dataflow definitions or category-schemes from a :class:`pandasdmx.model.StructureMessage` instance into a pandas DataFrame. This method is called by :meth:`pandasdmx.api.Response.write` . It is not part of the public-facing API. Yet, certain kwargs are propagated from there. Args: source(pandasdmx.model.StructureMessage): a :class:`pandasdmx.model.StructureMessage` instance. rows(str): sets the desired content to be extracted from the StructureMessage. Must be a name of an attribute of the StructureMessage. The attribute must be an instance of `dict` whose keys are strings. These will be interpreted as ID's and used for the MultiIndex of the DataFrame to be returned. Values can be either instances of `dict` such as for codelists and categoryscheme, or simple nameable objects such as for dataflows. In the latter case, the DataFrame will have a flat index. (default: depends on content found in Message. Common is 'codelist') columns(str, list): if str, it denotes the attribute of attributes of the values (nameable SDMX objects such as Code or ConceptScheme) that will be stored in the DataFrame. If a list, it must contain strings that are valid attibute values. Defaults to: ['name', 'description'] lang(str): locale identifier. Specifies the preferred language for international strings such as names. Default is 'en'. ''' # Set convenient default values for args # is rows a string? if rows is not None and not isinstance(rows, (list, tuple)): rows = [rows] return_df = True elif isinstance(rows, (list, tuple)) and len(rows) == 1: return_df = True else: return_df = False if rows is None: rows = [i for i in self._row_content if hasattr(source, i)] # Generate the DataFrame or -Frames and store them in a DictLike with # content-type names as keys frames = DictLike( {r: self._make_dataframe(source, r, **kwargs) for r in rows}) if return_df: # There is only one item. So return the only value. return frames.any() else: return frames
def international_str(self, name, sdmxobj): ''' return DictLike of xml:lang attributes. If node has no attributes, assume that language is 'en'. ''' # Get language tokens like 'en', 'fr'... elem_attrib = self._paths['int_str_names'](sdmxobj._elem, name=name) values = self._paths['int_str_values'](sdmxobj._elem, name=name) # Unilingual strings have no attributes. Assume 'en' instead. if not elem_attrib: elem_attrib = ['en'] return DictLike(zip(elem_attrib, values))
def read_identifiables(self, cls, sdmxobj, offset=None): ''' If sdmxobj inherits from dict: update it with modelized elements. These must be instances of model.IdentifiableArtefact, i.e. have an 'id' attribute. This will be used as dict keys. If sdmxobj does not inherit from dict: return a new DictLike. ''' path = self._paths[cls] if offset: try: base = self._paths[offset](sdmxobj._elem)[0] except IndexError: return None else: base = sdmxobj._elem result = {e.get('id'): cls(self, e) for e in path(base)} if isinstance(sdmxobj, dict): sdmxobj.update(result) else: return DictLike(result)
def _constrained_codes(self): ''' Cached property returning a DictLike mapping dim ID's from the DSD to frozensets containing the code IDs from the codelist referenced by the Concept for the dimension after applying all content constraints to the codelists. Those contenten constraints are retrieved pursuant to an implementation of the algorithm described in the SDMX 2.1 Technical Guidelines (Part 6) Chap. 9. Hence, constraints may constrain the DSD, dataflow definition or provision-agreement. ''' if not hasattr(self, '__constrained_codes'): # Run the cascadation mechanism from Chap. 8 of the SDMX 2.1 # Technical Guidelines. cur_dim_codes, cur_attr_codes = self._dim_codes, self._attr_codes for c in self._constrainables: cur_dim_codes, cur_attr_codes = c.apply( cur_dim_codes, cur_attr_codes) self.__constrained_codes = DictLike(cur_dim_codes) self.__constrained_codes.update(cur_attr_codes) return self.__constrained_codes
def header_error(self, sdmxobj): try: return DictLike(sdmxobj._elem.Error.attrib) except AttributeError: return None
class REST: """ Query SDMX resources via REST or from a file The constructor accepts arbitrary keyword arguments that will be passed to the requests.get function on each call. This makes the REST class somewhat similar to a requests.Session. E.g., proxies or authorisation data needs only be provided once. The keyword arguments are stored in self.config. Modify this dict to issue the next 'get' request with changed arguments. """ max_size = 2 ** 24 '''upper bound for in-memory temp file. Larger files will be spooled from disc''' def __init__(self, cache, http_cfg): default_cfg = dict(stream=True, timeout=30.1) for it in default_cfg.items(): http_cfg.setdefault(*it) self.config = DictLike(http_cfg) if cache: requests_cache.install_cache(**cache) def get(self, url, fromfile=None, params={}, headers={}): '''Get SDMX message from REST service or local file Args: url(str): URL of the REST service without the query part If None, fromfile must be set. Default is None params(dict): will be appended as query part to the URL after a '?' fromfile(str): path to SDMX file containing an SDMX message. It will be passed on to the reader for parsing. headers(dict): http headers. Overwrite instance-wide headers. Default is {} Returns: tuple: three objects: 0. file-like object containing the SDMX message 1. the complete URL, if any, including the query part constructed from params 2. the status code Raises: HTTPError if SDMX service responded with status code 401. Otherwise, the status code is returned ''' if fromfile: try: # Load data from local file source = open(fromfile, 'rb') except TypeError: # so fromfile must be file-like source = fromfile final_url = resp_headers = status_code = None else: source, final_url, resp_headers, status_code = self.request( url, params=params, headers=headers) return source, final_url, resp_headers, status_code def request(self, url, params={}, headers={}): """ Retrieve SDMX messages. If needed, override in subclasses to support other data providers. :param url: The URL of the message. :type url: str :return: the xml data as file-like object """ # Generate current config. Merge in any given headers cur_config = self.config.copy() if 'headers' in cur_config: cur_config['headers'] = cur_config['headers'].copy() cur_config['headers'].update(headers) else: cur_config['headers'] = headers with closing(requests.get(url, params=params, **cur_config)) as response: if response.status_code == requests.codes.OK: source = STF(max_size=self.max_size) for c in response.iter_content(chunk_size=1000000): source.write(c) else: source = None code = int(response.status_code) if 400 <= code <= 499: raise response.raise_for_status() return source, response.url, response.headers, code
class CodelistHandler(KeyValidatorMixin): ''' High-level API implementing the application of content constraints to codelists. It is primarily used as a mixin to StructureMessage instances containing codelists, a DSD, Dataflow and related constraints. However, it may also be used stand-online. It computes the constrained codelists in collaboration with Constrainable, ContentConstraint and Cube Region classes. ''' def __init__(self, *args, **kwargs): ''' Prepare computation of constrained codelists using the cascading mechanism described in Chap. 8 of the Technical Guideline (Part 6 of the SDMX 2.1 standard) args: constrainables(list of model.Constrainable instances): Constrainable artefacts in descending order sorted by cascading level (e.g., `[DSD, Dataflow]`). At position 0 there must be the DSD. Defaults to []. If not given, try to collect the constrainables from the StructureMessage. this will be the most common use case. ''' super(CodelistHandler, self).__init__(*args, **kwargs) constrainables = kwargs.get('constrainables', []) if constrainables: self.__constrainables = constrainables elif (hasattr(self, 'datastructure') and hasattr(self, 'codelist')): self.in_codes = self._in_codes if hasattr(self, 'constraint'): self.in_constraints = self._in_constraints else: self.in_constraints = self.in_codes @property def _constrainables(self): if not hasattr(self, '__constrainables'): self.__constrainables = [] # Collecting any constrainables from the StructureMessage # is only meaningful if the Message contains but one DataFlow and # DSD. if (hasattr(self, 'datastructure') and len(self.datastructure) == 1): dsd = self.datastructure.aslist()[0] self.__constrainables.append(dsd) if hasattr(self, 'dataflow'): flow = self.dataflow.aslist()[0] self.__constrainables.append(flow) if hasattr(self, 'provisionagreement'): for p in self.provisionagreement.values(): if flow in p.constrained_by: self.__constrainables.append(p) break return self.__constrainables @property def _dim_ids(self): ''' Collect the IDs of dimensions which are represented by codelists (this excludes TIME_PERIOD etc.) ''' if not hasattr(self, '__dim_ids'): self.__dim_ids = tuple(d.id for d in self._constrainables[0].dimensions.aslist() if d.local_repr.enum) return self.__dim_ids @property def _attr_ids(self): ''' Collect the IDs of attributes which are represented by codelists ''' if not hasattr(self, '__attr_ids'): self.__attr_ids = tuple(d.id for d in self._constrainables[0].attributes.aslist() if d.local_repr.enum) return self.__attr_ids @property def _dim_codes(self): ''' Cached property returning a DictLike mapping dim ID's from the DSD to frozensets containing all code IDs from the codelist referenced by the Concept describing the respective dimensions. ''' if not hasattr(self, '__dim_codes'): if self._constrainables: enum_components = [d for d in self._constrainables[0].dimensions.aslist() if d.local_repr.enum] self.__dim_codes = DictLike({d.id: frozenset(d.local_repr.enum()) for d in enum_components}) else: self.__dim_codes = {} return self.__dim_codes @property def _attr_codes(self): ''' Cached property returning a DictLike mapping attribute ID's from the DSD to frozensets containing all code IDs from the codelist referenced by the Concept describing the respective attributes. ''' if not hasattr(self, '__attr_codes'): if self._constrainables: enum_components = [d for d in self._constrainables[0].attributes.aslist() if d.local_repr.enum] self.__attr_codes = DictLike({d.id: frozenset(d.local_repr.enum()) for d in enum_components}) else: self.__attr_codes = {} return self.__attr_codes @property def _constrained_codes(self): ''' Cached property returning a DictLike mapping dim ID's from the DSD to frozensets containing the code IDs from the codelist referenced by the Concept for the dimension after applying all content constraints to the codelists. Those contenten constraints are retrieved pursuant to an implementation of the algorithm described in the SDMX 2.1 Technical Guidelines (Part 6) Chap. 9. Hence, constraints may constrain the DSD, dataflow definition or provision-agreement. ''' if not hasattr(self, '__constrained_codes'): # Run the cascadation mechanism from Chap. 8 of the SDMX 2.1 # Technical Guidelines. cur_dim_codes, cur_attr_codes = self._dim_codes, self._attr_codes for c in self._constrainables: cur_dim_codes, cur_attr_codes = c.apply( cur_dim_codes, cur_attr_codes) self.__constrained_codes = DictLike(cur_dim_codes) self.__constrained_codes.update(cur_attr_codes) return self.__constrained_codes
class REST: """ Query SDMX resources via REST or from a file The constructor accepts arbitrary keyword arguments that will be passed to the requests.get function on each call. This makes the REST class somewhat similar to a requests.Session. E.g., proxies or authorisation data needs only be provided once. The keyword arguments are stored in self.config. Modify this dict to issue the next 'get' request with changed arguments. """ max_size = 2**24 '''upper bound for in-memory temp file. Larger files will be spooled from disc''' def __init__(self, cache, http_cfg): default_cfg = dict(stream=True, timeout=30.1) for it in default_cfg.items(): http_cfg.setdefault(*it) self.config = DictLike(http_cfg) if cache: requests_cache.install_cache(**cache) def get(self, url, fromfile=None, params={}, headers={}): '''Get SDMX message from REST service or local file Args: url(str): URL of the REST service without the query part If None, fromfile must be set. Default is None params(dict): will be appended as query part to the URL after a '?' fromfile(str): path to SDMX file containing an SDMX message. It will be passed on to the reader for parsing. headers(dict): http headers. Overwrite instance-wide headers. Default is {} Returns: tuple: three objects: 0. file-like object containing the SDMX message 1. the complete URL, if any, including the query part constructed from params 2. the status code Raises: HTTPError if SDMX service responded with status code 401. Otherwise, the status code is returned ''' if fromfile: try: # Load data from local file # json files must be opened in text mode, all others in binary as # they may be zip files or xml. if fromfile.endswith('.json'): mode_str = 'r' else: mode_str = 'rb' source = open(fromfile, mode_str) except TypeError: # so fromfile must be file-like source = fromfile final_url = resp_headers = status_code = None else: source, final_url, resp_headers, status_code = self.request( url, params=params, headers=headers) return source, final_url, resp_headers, status_code def request(self, url, params={}, headers={}): """ Retrieve SDMX messages. If needed, override in subclasses to support other data providers. :param url: The URL of the message. :type url: str :return: the xml data as file-like object """ # Generate current config. Merge in any given headers cur_config = self.config.copy() if 'headers' in cur_config: cur_config['headers'] = cur_config['headers'].copy() cur_config['headers'].update(headers) else: cur_config['headers'] = headers with closing(requests.get(url, params=params, **cur_config)) as response: if response.status_code == requests.codes.OK: source = STF(max_size=self.max_size) for c in response.iter_content(chunk_size=1000000): source.write(c) else: source = None code = int(response.status_code) if 400 <= code <= 499: raise response.raise_for_status() return source, response.url, response.headers, code