Example #1
0
File: stark.py Project: oberix/star
 def __getitem__(self, key):
     if not isinstance(key, list):
         key = [key]
     df = self._df.__getitem__(key)
     md = Meta()
     md['vars'] = utils.filter_tree(self._md['vars'], key)
     for k in key:
         if md['vars'][k]['type'] == 'E':
             terms = self._find_elab_vars(k, md=md)
             for term in terms:
                 if term.strip(r'\$') not in key:
                     md['vars'][k]['type'] = 'N'
                     break
     return Stark(df, md=md, currency=self._currency,
                  currdata=self._currdata)
Example #2
0
File: stark.py Project: oberix/star
    def aggregate(self, func='sum', dim=None, var=None, inplace=False):
        ''' Apply an aggregation function to the DataFrame. If the
        DataFrame contains datas that are calculated as a
        transformation of other columns from the same DataFrame, this
        will be re-calculated in the output one.

        The user can specify which dimension should be used in the
        grouping operation and which columns must appear int the
        output DataFrame.

        @ param func: function used to aggregate, can be either a
        string or a function name.
        @ param dim: name, or list of names, of DataFrame's columns
            that act as dimensions (can be used as indexes, from
            pandas point of view).
        @ param var: name, or list of names, of DataFrame's columns
            that we want to be part of the resulting DataFrame. If
            calculated columns are in this list, also those from which
            they are evaluated must be present.
        @ return: a new Stark instance with aggregated data

        '''
        # Some defaults
        if dim is None:
            dim = self._dim
        if var is None:
            var = self._num + self._imm + self._elab + self._rate + \
                  self._curr
        # var and dim may be single column's name
        if isinstance(var, (str, unicode)):
            var = [var]
        if isinstance(dim, (str, unicode)):
            dim = [dim]
        outkeys = dim + var

        if not inplace:
            df = self._df.copy()
        else:
            df = self._df
        md = Meta()
        md['vars'] = utils.filter_tree(self._md['vars'], outkeys)

        # Prepare operation dictionary: for each variable set the
        # appropriate aggregation function based on its type
        operations = {}
        for name in self._num + self._curr:
            operations[name] = func
        for name in self._imm:
            operations[name] = self._set_unique
        for name in self._rate:
            operations[name] = self._gr_cum
        for name in self._elab:
            # Some elaboration need to become numeric before the
            # aggregation, others must be re-evaluated
            if md['vars'][name].get('rlp') and\
               md['vars'][name]['rlp'] == 'N':
                md['vars'][name]['type'] = 'N'
            # XXX: This is not needed if 'rlp' != 'N', but any other
            # operation seems to introduce a greater overhead to the
            # computation. This should be invesigated further.
            operations[name] = func

        df = df.groupby(dim).aggregate(operations)[var].reset_index()

        if inplace:
            self._md = md
            self._update()
            return
        return Stark(df, md=md, currency=self._currency,
                     currdata=self._currdata)
Example #3
0
File: stark.py Project: oberix/star
 def __delitem__(self, key):
     del self._df[key]
     target = utils.unroll(self._md)
     target.remove(key)
     self._md = utils.filter_tree(self._md, target)