예제 #1
0
    def _lbl2pos(self, lbl):
        """from label to position

        :param lbl: index for ex.: "2013"
        :returns: integer
        """
        if not self.__valid:
            raise JsonStatException("dimension '{}': is not initialized".format(self.__did))
        if lbl not in self.__idx2cat:
            raise JsonStatException("dimension '{}': do not have label {}".format(self.__did, lbl))
        return self.__lbl2cat[lbl].pos
예제 #2
0
    def _idx2pos(self, idx):
        """from index to position

        :param idx: index for ex.: "2013"
        :returns: integer
        """
        if not self.__valid:
            raise JsonStatException("dimension '{}': is not initialized".format(self.__did))
        if idx not in self.__idx2cat:
            raise JsonStatException("dimension '{}': do not have index '{}'".format(self.__did, idx))
        return self.__idx2cat[idx].pos
예제 #3
0
    def all_pos(self, blocked_dims={}, order=None):
        """all_pos doc

        :param blocked_dims:  {"year":2013, country:"IT"}
        :param order: order
        :returns:
        """

        nr_dim = len(self.__pos2dim)
        if order is not None:
            if len(order) != nr_dim:
                msg = "length of the order vector is different from number of dimension {}".format(
                    nr_dim)
                raise JsonStatException(msg)
            if not isinstance(order[1], int):
                order = [self.__did2dim[iid].pos for iid in order]

        vec_pos_blocked = nr_dim * [False]
        vec_pos = nr_dim * [0]

        for (cat, idx) in blocked_dims.items():
            d = self.dimension(cat)
            vec_pos_blocked[d.pos] = True
            vec_pos[d.pos] = d._idx2pos(idx)

        pos2size = self.__pos2size

        if order is None:
            vec_dimension_reorder = range(nr_dim)
        else:
            vec_dimension_reorder = order

        nrd = nr_dim - 1
        while nrd >= 0:

            yield list(vec_pos)  # make a shallow copy of vec_pos

            nrd = nr_dim - 1
            cur_dim = vec_dimension_reorder[nrd]
            # se la posizione non e bloccata allora puoi far andare avanti la cifra
            if not vec_pos_blocked[cur_dim]:
                vec_pos[cur_dim] += 1

            # se non si arrivati all'ultima dimensione
            # e se la dimensione corrente non e al massimo valore o se la dimensione corrente e bloccata
            while nrd >= 0 and \
                    (vec_pos[cur_dim] == pos2size[cur_dim] or vec_pos_blocked[cur_dim]):

                # se la posizione non e' bloccata allora puoi far partire il valore dall'inizio
                if not vec_pos_blocked[cur_dim]:
                    vec_pos[cur_dim] = 0

                # esamina la prossima posizione
                nrd -= 1
                # se la dimensione corrente non e' la prima
                if nrd >= 0:
                    cur_dim = vec_dimension_reorder[nrd]
                    # se la dimensione corrente non e bloccata puoi farla avanzare
                    if not vec_pos_blocked[cur_dim]:
                        vec_pos[cur_dim] += 1
예제 #4
0
    def dcat_to_lint(self, dims):
        """Transforms a dimension dict to dimension array

        ::

            {"country":"AU", "year":2014} -> [1,2,3]

        :param dims: keys are dimension (id or label), value are categories
             "country" is the id of dimension
             "AU" is the category of dimension
        :returns: a list of integer
        """
        apos = len(self.__pos2dim) * [0]
        for (cat, val) in dims.items():
            # key is id
            if cat in self.__did2dim:
                dim = self.__did2dim[cat]
            # key is label
            elif cat in self.__lbl2dim:
                dim = self.__lbl2dim[cat]
            # key is not id or label so raise error
            else:
                allowed_categories = ", ".join(
                    ["'{}'".format(dim.did) for dim in self.__pos2dim])
                msg = "dataset '{}': category '{}' don't exists allowed categories are: {}"
                msg = msg.format(self.__name, cat, allowed_categories)
                raise JsonStatException(msg)

            apos[dim.pos] = dim.category(val).pos
        return apos
예제 #5
0
    def _2idx(self, *args, **kargs):
        """convert args to integer index """

        if len(args) == 1:
            # data(int)
            if isinstance(args[0], int):
                return args[0]
            # data([i1,i2,i3])
            elif isinstance(args[0], list):
                idx = self.lint_as_idx(args[0])
                return idx
            # data({k1:v1, k2:v2})
            elif isinstance(args[0], dict):
                dims = args[0]
                apos = self.dcat_to_lint(dims)
                idx = self.lint_as_idx(apos)
                return idx
        elif len(args) == 0:
            # data(k1:v1, k2:v2)
            dims = kargs
            # print(dims)
            apos = self.dcat_to_lint(dims)
            # print(apos)
            idx = self.lint_as_idx(apos)
            # print(idx)
            return idx

        msg = "unexpected parameters"
        raise JsonStatException(msg)
예제 #6
0
    def __parse_json_index_helper(self, idx, pos):
        if pos >= self.__size:
            msg = "dimension '{}': index {} is greater than size {}"
            msg = msg.format(self.__did, pos, self.__size)
            raise JsonStatException(msg)

        cat = JsonStatCategory(pos=pos, index=idx, label=None)
        self.__pos2cat[pos] = cat
        self.__idx2cat[idx] = cat
예제 #7
0
    def data(self, *args, **kargs):
        """Returns a JsonStatValue containings value and status about a datapoint
        The datapoint will be retrieved according the parameters

        :param args:
            - data(<int>)  where i is index into the
            - data(<list>) where lst = [i1,i2,i3,...]) each i indicate the dimension len(lst) == number of dimension
            - data(<dict>) where dict is {k1:v1, k2:v2, ...} dimension of size 1 can be ommitted

        :param kargs:
            - data(k1=v1,k2=v2,...) where **ki** are the id or label of dimension
              **vi** are the index or label of the category dimension of size 1 can be ommitted

        :returns: a JsonStatValue object

        kargs { cat1:value1, ..., cati:valuei, ... }
        cati can be the id of the dimension or the label of dimension
        valuei can be the index or label of category
        ex.:{country:"AU", "year":"2014"}

        >>> import os, jsonstat  # doctest: +ELLIPSIS
        >>> filename = os.path.join(jsonstat._examples_dir, "www.json-stat.org", "oecd-canada-col.json")
        >>> dataset = jsonstat.from_file(filename).dataset(0)
        >>> dataset.data(0)
        JsonStatValue(idx=0, value=5.943826289, status=None)
        >>> dataset.data(concept='UNR', area='AU', year='2003')
        JsonStatValue(idx=0, value=5.943826289, status=None)
        >>> dataset.data(area='AU', year='2003')
        JsonStatValue(idx=0, value=5.943826289, status=None)
        >>> dataset.data({'area':'AU', 'year':'2003'})
        JsonStatValue(idx=0, value=5.943826289, status=None)
        """
        if not self.__valid:
            raise JsonStatException('dataset not initialized')

        # decoding args
        idx = str(self._2idx(*args, **kargs))
        value = self.__value[idx]

        #
        # status
        #
        if self.__status is None:
            status = None
        elif isinstance(self.__status, str):
            status = self.__status
        elif isinstance(self.__status, list) and len(self.__status) == 1:
            status = self.__status[0]
        elif isinstance(self.__status, dict) and idx not in self.__status:
            status = None
        else:
            status = self.__status[idx]
        return JsonStatValue(idx, value, status)
예제 #8
0
    def _pos2cat(self, pos):
        """get the category associated with the position (integer)

        :param pos: integer
        :returns: the label or None if the label not exists at position pos
            ex.: JsonStatCategory(index='2013', label='2013', pos=pos)
        """
        if not self.__valid:
            raise JsonStatException("dimension '{}': is not initialized".format(self.__did))
        if self.__pos2cat is None:
            return None
        return self.__pos2cat[pos]
예제 #9
0
    def category(self, spec):
        """return JsonStatCategory according to spec

        :param spec: can be index (string) or label (string) or a position (integer)
        :returns: a JsonStatCategory
        """
        if not self.__valid:
            raise JsonStatException("dimension '{}': is not initialized".format(self.__did))

        if isinstance(spec, int) and spec < len(self.__pos2cat):
            cat = self.__pos2cat[spec]
            return cat

        # try first indexes
        if spec in self.__idx2cat:
            cat = self.__idx2cat[spec]
            return cat

        if self.__lbl2cat is not None and spec in self.__lbl2cat:
            cat = self.__lbl2cat[spec]
            return cat

        raise JsonStatException("dimension '{}': unknown index or label '{}'".format(self.__did, spec))
예제 #10
0
    def dimension(self, spec):
        """get a JsonStatDimension by spec

        :param spec: spec can be:
         - (string) or id of the dimension
         - int position of dimension
        :returns: a JsonStatDimension
        """
        if type(spec) is int:
            return self.__pos2dim[spec]
        if spec not in self.__did2dim:
            msg = "dataset '{}': unknown dimension '{}' know dimensions ids are: {}"
            msg = msg.format(self.__name, spec,
                             ", ".join([dim.did for dim in self.__pos2dim]))
            raise JsonStatException(msg)
        return self.__did2dim[spec]
예제 #11
0
    def __write_page_to_cache(self, pathname, content):
        """write content to pathname

        :param pathname:
        :param content:
        """
        if pathname is None:
            return

        # create cache directory only the fist time it is needed
        if not os.path.exists(self.__cache_dir):
            os.makedirs(self.__cache_dir)
        if not os.path.isdir(self.__cache_dir):
            msg = "cache_dir '{}' is not a directory".format(self.__cache_dir)
            raise JsonStatException(msg)

        # note:
        # in python 3 file must be open without b (binary) option to write string
        # otherwise the following error will be generated
        # TypeError: a bytes-like object is required, not 'str'
        with open(pathname, 'w') as f:
            f.write(content)
예제 #12
0
    def __parse_dimensions(self, json_data_dimension, json_data_roles,
                           pos2iid):
        """Parse dimension in json stat

        it used for format v1 and v2

        :param json_data_dimension:
        :param json_data_roles:
        :returns:
        """

        # parsing roles
        roles = {}
        if json_data_roles is not None:
            json_roles = json_data_roles
            for r in json_roles.items():
                role = r[0]
                for dname in r[1]:
                    roles[dname] = role

        # parsing each dimensions
        self.__pos2dim = self.__dim_nr * [None]
        for dpos, dname in enumerate(pos2iid):
            dsize = self.__pos2size[dpos]

            if dname not in json_data_dimension:
                msg = "dataset '{}': malformed json: missing key {} in dimension".format(
                    self.__name, dname)
                raise JsonStatException(msg)

            dimension = JsonStatDimension(dname, dsize, dpos, roles.get(dname))
            dimension.from_json(json_data_dimension[dname])
            self.__did2dim[dname] = dimension
            self.__pos2dim[dpos] = dimension
            if dimension.label is not None:
                self.__lbl2dim[dimension.label] = dimension
예제 #13
0
    def __parse_category(self, json_data_category):
        """It is used to describe the possible values of a dimension.
        See https://json-stat.org/format/#category
        :param json_data_category:
        :returns:

        jsonschema for dimension is about::

            "category": {
                "type": "object",
                "properties": {
                    "index": {"$ref": "#/definitions/category_index"},
                    "label": {"type": "object"},

                    "unit": {"$ref": "#/definitions/category_index"},
                    "child": {"type": "object", "properties": {"additionalProperties": {"type": "array"}}},
                    "coordinates": {"type": "object",
                                    "properties": {"additionalProperties": {"type": "array"}}},
                    "note": {"type": "array"}

                },
                "additionalProperties": false
            },

        """

        # validate: label or index must be present
        if 'index' not in json_data_category and 'label' not in json_data_category:
            msg = "dimension '{}': one of keys 'label' or 'index' must be presents"
            raise JsonStatMalformedJson(msg)

        if 'index' in json_data_category:
            self.__parse_json_index(json_data_category)

        if 'label' in json_data_category:
            self.__parse_json_label(json_data_category)

        # validate: number of indexes and labels must the same??
        if self.__idx2cat is not None and self.__lbl2cat is not None:
            if len(self.__idx2cat) != len(self.__lbl2cat):
                # TODO: cannot raise exception, emit warning see hierarchy.json
                msg = "dimension '{}': the number of indexes ({}) are different of the numbers of labels ({})"
                msg = msg.format(self.__did, len(self.__idx2cat), len(self.__lbl2cat))
                # raise JsonStatMalformedJson(msg)
            if len(self.__idx2cat) < len(self.__lbl2cat):
                msg = "dimension '{}': the number of labels ({}) are greater than number of indexes ({})"
                msg = msg.format(self.__did, len(self.__lbl2cat), len(self.__idx2cat))
                raise JsonStatMalformedJson(msg)

        # validate: indexes must be consistent with size
        if self.__size != len(self.__idx2cat):
            msg = "dimension '{}': malformed json: number of indexes {} not match with size {}"
            msg = msg.format(self.__did, len(self.__idx2cat), self.__size)
            raise JsonStatMalformedJson(msg)

        # validate: no hole in the indexes
        if any(v is None for v in self.__pos2cat):
            msg = "dimension '{}':hole in index".format(self.__did)
            raise JsonStatMalformedJson(msg)

        # "category_unit": {
        #                      "type": "object",
        #                      "properties": {
        #                          "additionalProperties": {
        #                              "type": "object",
        #                              "properties": {"label": {"type": "string"},
        #                                             "decimals": {"type": "number"},
        #                                             "type": {"type": "string"},
        #                                             "base": {"type": "string"},
        #                                             "multiplier": {"type": "number"},
        #                                             "position": {"type": "string"}},
        #                              "additionalProperties": false
        #                          }
        #                      }
        #                  },

        # TODO: parse 'unit'
        # "unit" : {
        # 	 "exp" : {
        # 			"decimals": 1,
        # 			"label" : "millions",
        # 			"symbol" : "$",
        # 			"position" : "start"
        # 	 }
        # }
        # 	"category" : {
        # 		"label" : {
        # 			"UNR" : "unemployment rate"
        # 		},
        # 		"unit" : {
        # 			"UNR" : {
        # 				"label" : "%",
        # 				"decimals" : 9,
        # 				"type" : "ratio",
        # 				"base" : "per cent",
        # 				"multiplier" : 0
        # 			}
        # 		}
        # 	}
        if 'unit' in json_data_category:
            if self.__role != "metric":
                msg = "dimension {}: 'unit' can be used only when role is 'metric'"
                msg = msg.format(self.__did)
                JsonStatException(msg)
            self.__unit = json_data_category['unit']