Esempio n. 1
0
 def __init__(self, cache, http_cfg):
     default_cfg = dict(stream=True, timeout=30.1)
     for it in default_cfg.items():
         http_cfg.setdefault(*it)
     self.config = DictLike(http_cfg)
     if cache:
         requests_cache.install_cache(**cache)
Esempio n. 2
0
    def write(self, source=None, rows=None, **kwargs):
        '''
        Transfform structural metadata, i.e. codelists, concept-schemes,
        lists of dataflow definitions or category-schemes  
        from a :class:`pandasdmx.model.StructureMessage` instance into a pandas DataFrame.
        This method is called by :meth:`pandasdmx.api.Response.write` . It is not
        part of the public-facing API. Yet, certain kwargs are 
        propagated from there.

        Args:
            source(pandasdmx.model.StructureMessage): a :class:`pandasdmx.model.StructureMessage` instance.

            rows(str): sets the desired content 
                to be extracted from the StructureMessage.
                Must be a name of an attribute of the StructureMessage. The attribute must
                be an instance of `dict` whose keys are strings. These will be
                interpreted as ID's and used for the MultiIndex of the DataFrame
                to be returned. Values can be either instances of `dict` such as for codelists and categoryscheme, 
                or simple nameable objects
                such as for dataflows. In the latter case, the DataFrame will have a flat index.  
                (default: depends on content found in Message. 
                Common is 'codelist')
            columns(str, list): if str, it denotes the attribute of attributes of the
                values (nameable SDMX objects such as Code or ConceptScheme) that will be stored in the
                DataFrame. If a list, it must contain strings
                that are valid attibute values. Defaults to: ['name', 'description']
            constraint(bool): if True (default), apply any constraints to codelists, i.e. only the codes allowed by
                the constraints attached to the DSD, dataflow and provision agreements contained in the
                message are written to the DataFrame. Otherwise, the entire codelist
                is written.
            lang(str): locale identifier. Specifies the preferred 
                language for international strings such as names.
                Default is 'en'.
        '''

        # Set convenient default values for args
        # is rows a string?
        if rows is not None and not isinstance(rows, (list, tuple)):
            rows = [rows]
            return_df = True
        elif isinstance(rows, (list, tuple)) and len(rows) == 1:
            return_df = True
        else:
            return_df = False
        if rows is None:
            rows = [i for i in self._row_content if hasattr(source, i)]
        # Generate the DataFrame or -Frames and store them in a DictLike with
        # content-type names as keys
        frames = DictLike(
            {r: self._make_dataframe(source, r, **kwargs) for r in rows})
        if return_df:
            # There is only one item. So return the only value.
            return frames.any()
        else:
            return frames
Esempio n. 3
0
    def write(self, source=None, rows=None, **kwargs):
        '''
        Transfform structural metadata, i.e. codelists, concept-schemes,
        lists of dataflow definitions or category-schemes  
        from a :class:`pandasdmx.model.StructureMessage` instance into a pandas DataFrame.
        This method is called by :meth:`pandasdmx.api.Response.write` . It is not
        part of the public-facing API. Yet, certain kwargs are 
        propagated from there.

        Args:
            source(pandasdmx.model.StructureMessage): a :class:`pandasdmx.model.StructureMessage` instance.

            rows(str): sets the desired content 
                to be extracted from the StructureMessage.
                Must be a name of an attribute of the StructureMessage. The attribute must
                be an instance of `dict` whose keys are strings. These will be
                interpreted as ID's and used for the MultiIndex of the DataFrame
                to be returned. Values can be either instances of `dict` such as for codelists and categoryscheme, 
                or simple nameable objects
                such as for dataflows. In the latter case, the DataFrame will have a flat index.  
                (default: depends on content found in Message. 
                Common is 'codelist')
            columns(str, list): if str, it denotes the attribute of attributes of the
                values (nameable SDMX objects such as Code or ConceptScheme) that will be stored in the
                DataFrame. If a list, it must contain strings
                that are valid attibute values. Defaults to: ['name', 'description']
            lang(str): locale identifier. Specifies the preferred 
                language for international strings such as names.
                Default is 'en'.
        '''

        # Set convenient default values for args
        # is rows a string?
        if rows is not None and not isinstance(rows, (list, tuple)):
            rows = [rows]
            return_df = True
        elif isinstance(rows, (list, tuple)) and len(rows) == 1:
            return_df = True
        else:
            return_df = False
        if rows is None:
            rows = [i for i in self._row_content if hasattr(source, i)]
        # Generate the DataFrame or -Frames and store them in a DictLike with
        # content-type names as keys
        frames = DictLike(
            {r: self._make_dataframe(source, r, **kwargs) for r in rows})
        if return_df:
            # There is only one item. So return the only value.
            return frames.any()
        else:
            return frames
Esempio n. 4
0
 def __init__(self, cache, http_cfg):
     default_cfg = dict(stream=True, timeout=30.1)
     for it in default_cfg.items():
         http_cfg.setdefault(*it)
     self.config = DictLike(http_cfg)
     if cache:
         requests_cache.install_cache(**cache)
Esempio n. 5
0
 def international_str(self, name, sdmxobj):
     '''
     return DictLike of xml:lang attributes. If node has no attributes,
     assume that language is 'en'.
     '''
     # Get language tokens like 'en', 'fr'...
     elem_attrib = self._paths['int_str_names'](sdmxobj._elem, name=name)
     values = self._paths['int_str_values'](sdmxobj._elem, name=name)
     # Unilingual strings have no attributes. Assume 'en' instead.
     if not elem_attrib:
         elem_attrib = ['en']
     return DictLike(zip(elem_attrib, values))
Esempio n. 6
0
 def read_identifiables(self, cls,  sdmxobj, offset=None):
     '''
     If sdmxobj inherits from dict: update it  with modelized elements.
     These must be instances of model.IdentifiableArtefact,
     i.e. have an 'id' attribute. This will be used as dict keys.
     If sdmxobj does not inherit from dict: return a new DictLike.
     '''
     path = self._paths[cls]
     if offset:
         try:
             base = self._paths[offset](sdmxobj._elem)[0]
         except IndexError:
             return None
     else:
         base = sdmxobj._elem
     result = {e.get('id'): cls(self, e) for e in path(base)}
     if isinstance(sdmxobj, dict):
         sdmxobj.update(result)
     else:
         return DictLike(result)
Esempio n. 7
0
 def _constrained_codes(self):
     '''
     Cached property returning a DictLike mapping dim ID's from the DSD to
     frozensets containing the code IDs from the codelist
     referenced by the Concept for the dimension after applying
     all content constraints to the codelists. Those contenten constraints are
     retrieved pursuant to an implementation of the algorithm described in the
     SDMX 2.1 Technical Guidelines (Part 6) Chap. 9. Hence, constraints
     may constrain the DSD, dataflow definition or provision-agreement.
     '''
     if not hasattr(self, '__constrained_codes'):
         # Run the cascadation mechanism from Chap. 8 of the SDMX 2.1
         # Technical Guidelines.
         cur_dim_codes, cur_attr_codes = self._dim_codes, self._attr_codes
         for c in self._constrainables:
             cur_dim_codes, cur_attr_codes = c.apply(
                 cur_dim_codes, cur_attr_codes)
         self.__constrained_codes = DictLike(cur_dim_codes)
         self.__constrained_codes.update(cur_attr_codes)
     return self.__constrained_codes
Esempio n. 8
0
 def header_error(self, sdmxobj):
     try:
         return DictLike(sdmxobj._elem.Error.attrib)
     except AttributeError:
         return None
Esempio n. 9
0
class REST:

    """
    Query SDMX resources via REST or from a file

    The constructor accepts arbitrary keyword arguments that will be passed
    to the requests.get function on each call. This makes the REST class somewhat similar to a requests.Session. E.g., proxies or
    authorisation data needs only be provided once. The keyword arguments are
    stored in self.config. Modify this dict to issue the next 'get' request with
    changed arguments.
    """

    max_size = 2 ** 24
    '''upper bound for in-memory temp file. Larger files will be spooled from disc'''

    def __init__(self, cache, http_cfg):
        default_cfg = dict(stream=True, timeout=30.1)
        for it in default_cfg.items():
            http_cfg.setdefault(*it)
        self.config = DictLike(http_cfg)
        if cache:
            requests_cache.install_cache(**cache)

    def get(self, url, fromfile=None, params={}, headers={}):
        '''Get SDMX message from REST service or local file

        Args:

            url(str): URL of the REST service without the query part
                If None, fromfile must be set. Default is None
            params(dict): will be appended as query part to the URL after a '?'
            fromfile(str): path to SDMX file containing an SDMX message.
                It will be passed on to the
                reader for parsing.
            headers(dict): http headers. Overwrite instance-wide headers.
                Default is {}

        Returns:
            tuple: three objects:

                0. file-like object containing the SDMX message
                1. the complete URL, if any, including the query part
                   constructed from params
                2. the status code

        Raises:
            HTTPError if SDMX service responded with
                status code 401. Otherwise, the status code
                is returned
 '''
        if fromfile:
            try:
                # Load data from local file
                source = open(fromfile, 'rb')
            except TypeError:
                # so fromfile must be file-like
                source = fromfile
            final_url = resp_headers = status_code = None
        else:
            source, final_url, resp_headers, status_code = self.request(
                url, params=params, headers=headers)
        return source, final_url, resp_headers, status_code

    def request(self, url, params={}, headers={}):
        """
        Retrieve SDMX messages.
        If needed, override in subclasses to support other data providers.

        :param url: The URL of the message.
        :type url: str
        :return: the xml data as file-like object
        """
        # Generate current config. Merge in any given headers
        cur_config = self.config.copy()
        if 'headers' in cur_config:
            cur_config['headers'] = cur_config['headers'].copy()
            cur_config['headers'].update(headers)
        else:
            cur_config['headers'] = headers

        with closing(requests.get(url, params=params, **cur_config)) as response:
            if response.status_code == requests.codes.OK:
                source = STF(max_size=self.max_size)
                for c in response.iter_content(chunk_size=1000000):
                    source.write(c)

            else:
                source = None
            code = int(response.status_code)
            if 400 <= code <= 499:
                raise response.raise_for_status()
            return source, response.url, response.headers, code
Esempio n. 10
0
class CodelistHandler(KeyValidatorMixin):
    '''
    High-level API implementing the
    application of content constraints to codelists. It is primarily
    used as a mixin to StructureMessage instances containing codelists,
    a DSD, Dataflow and related constraints. However, it
    may also be used stand-online. It computes
    the constrained codelists in collaboration with 
    Constrainable, ContentConstraint and Cube Region classes. 
    '''

    def __init__(self, *args, **kwargs):
        '''
        Prepare computation of constrained codelists using the
        cascading mechanism described in Chap. 8 of the Technical Guideline (Part 6 of the SDMX 2.1 standard)

        args:

            constrainables(list of model.Constrainable instances): 
                Constrainable artefacts in descending order sorted by 
                cascading level (e.g., `[DSD, Dataflow]`). At position 0 
                there must be the DSD. Defaults to []. 
                If not given, try to
                collect the constrainables from the StructureMessage. 
                this will be the most common use case. 
        '''
        super(CodelistHandler, self).__init__(*args, **kwargs)
        constrainables = kwargs.get('constrainables', [])
        if constrainables:
            self.__constrainables = constrainables
        elif (hasattr(self, 'datastructure')
              and hasattr(self, 'codelist')):
            self.in_codes = self._in_codes
            if hasattr(self, 'constraint'):
                self.in_constraints = self._in_constraints
            else:
                self.in_constraints = self.in_codes

    @property
    def _constrainables(self):
        if not hasattr(self, '__constrainables'):
            self.__constrainables = []
            # Collecting any constrainables from the StructureMessage
            # is only meaningful if the Message contains but one DataFlow and
            # DSD.
            if (hasattr(self, 'datastructure') and len(self.datastructure) == 1):
                dsd = self.datastructure.aslist()[0]
                self.__constrainables.append(dsd)
                if hasattr(self, 'dataflow'):
                    flow = self.dataflow.aslist()[0]
                    self.__constrainables.append(flow)
                if hasattr(self, 'provisionagreement'):
                    for p in self.provisionagreement.values():
                        if flow in p.constrained_by:
                            self.__constrainables.append(p)
                            break
        return self.__constrainables

    @property
    def _dim_ids(self):
        '''
        Collect the IDs of dimensions which are 
        represented by codelists (this excludes TIME_PERIOD etc.)
        '''
        if not hasattr(self, '__dim_ids'):
            self.__dim_ids = tuple(d.id for d in self._constrainables[0].dimensions.aslist()
                                   if d.local_repr.enum)
        return self.__dim_ids

    @property
    def _attr_ids(self):
        '''
        Collect the IDs of attributes which are 
        represented by codelists 
        '''
        if not hasattr(self, '__attr_ids'):
            self.__attr_ids = tuple(d.id for d in self._constrainables[0].attributes.aslist()
                                    if d.local_repr.enum)
        return self.__attr_ids

    @property
    def _dim_codes(self):
        '''
        Cached property returning a DictLike mapping dim ID's from the DSD to
        frozensets containing all code IDs from the codelist
        referenced by the Concept describing the respective dimensions.
        '''
        if not hasattr(self, '__dim_codes'):
            if self._constrainables:
                enum_components = [d for d in self._constrainables[0].dimensions.aslist()
                                   if d.local_repr.enum]
                self.__dim_codes = DictLike({d.id: frozenset(d.local_repr.enum())
                                             for d in enum_components})
            else:
                self.__dim_codes = {}
        return self.__dim_codes

    @property
    def _attr_codes(self):
        '''
        Cached property returning a DictLike mapping attribute ID's from the DSD to
        frozensets containing all code IDs from the codelist
        referenced by the Concept describing the respective attributes.
        '''
        if not hasattr(self, '__attr_codes'):
            if self._constrainables:
                enum_components = [d for d in self._constrainables[0].attributes.aslist()
                                   if d.local_repr.enum]
                self.__attr_codes = DictLike({d.id: frozenset(d.local_repr.enum())
                                              for d in enum_components})
            else:
                self.__attr_codes = {}
        return self.__attr_codes

    @property
    def _constrained_codes(self):
        '''
        Cached property returning a DictLike mapping dim ID's from the DSD to
        frozensets containing the code IDs from the codelist
        referenced by the Concept for the dimension after applying
        all content constraints to the codelists. Those contenten constraints are
        retrieved pursuant to an implementation of the algorithm described in the
        SDMX 2.1 Technical Guidelines (Part 6) Chap. 9. Hence, constraints
        may constrain the DSD, dataflow definition or provision-agreement.
        '''
        if not hasattr(self, '__constrained_codes'):
            # Run the cascadation mechanism from Chap. 8 of the SDMX 2.1
            # Technical Guidelines.
            cur_dim_codes, cur_attr_codes = self._dim_codes, self._attr_codes
            for c in self._constrainables:
                cur_dim_codes, cur_attr_codes = c.apply(
                    cur_dim_codes, cur_attr_codes)
            self.__constrained_codes = DictLike(cur_dim_codes)
            self.__constrained_codes.update(cur_attr_codes)
        return self.__constrained_codes
Esempio n. 11
0
class REST:
    """
    Query SDMX resources via REST or from a file

    The constructor accepts arbitrary keyword arguments that will be passed
    to the requests.get function on each call. This makes the REST class somewhat similar to a requests.Session. E.g., proxies or
    authorisation data needs only be provided once. The keyword arguments are
    stored in self.config. Modify this dict to issue the next 'get' request with
    changed arguments.
    """

    max_size = 2**24
    '''upper bound for in-memory temp file. Larger files will be spooled from disc'''
    def __init__(self, cache, http_cfg):
        default_cfg = dict(stream=True, timeout=30.1)
        for it in default_cfg.items():
            http_cfg.setdefault(*it)
        self.config = DictLike(http_cfg)
        if cache:
            requests_cache.install_cache(**cache)

    def get(self, url, fromfile=None, params={}, headers={}):
        '''Get SDMX message from REST service or local file

        Args:

            url(str): URL of the REST service without the query part
                If None, fromfile must be set. Default is None
            params(dict): will be appended as query part to the URL after a '?'
            fromfile(str): path to SDMX file containing an SDMX message.
                It will be passed on to the
                reader for parsing.
            headers(dict): http headers. Overwrite instance-wide headers.
                Default is {}

        Returns:
            tuple: three objects:

                0. file-like object containing the SDMX message
                1. the complete URL, if any, including the query part
                   constructed from params
                2. the status code

        Raises:
            HTTPError if SDMX service responded with
                status code 401. Otherwise, the status code
                is returned
 '''
        if fromfile:
            try:
                # Load data from local file
                # json files must be opened in text mode, all others in binary as
                # they may be zip files or xml.
                if fromfile.endswith('.json'):
                    mode_str = 'r'
                else:
                    mode_str = 'rb'
                source = open(fromfile, mode_str)
            except TypeError:
                # so fromfile must be file-like
                source = fromfile
            final_url = resp_headers = status_code = None
        else:
            source, final_url, resp_headers, status_code = self.request(
                url, params=params, headers=headers)
        return source, final_url, resp_headers, status_code

    def request(self, url, params={}, headers={}):
        """
        Retrieve SDMX messages.
        If needed, override in subclasses to support other data providers.

        :param url: The URL of the message.
        :type url: str
        :return: the xml data as file-like object
        """
        # Generate current config. Merge in any given headers
        cur_config = self.config.copy()
        if 'headers' in cur_config:
            cur_config['headers'] = cur_config['headers'].copy()
            cur_config['headers'].update(headers)
        else:
            cur_config['headers'] = headers

        with closing(requests.get(url, params=params,
                                  **cur_config)) as response:
            if response.status_code == requests.codes.OK:
                source = STF(max_size=self.max_size)
                for c in response.iter_content(chunk_size=1000000):
                    source.write(c)

            else:
                source = None
            code = int(response.status_code)
            if 400 <= code <= 499:
                raise response.raise_for_status()
            return source, response.url, response.headers, code