def __getitem__(self, key): if not isinstance(key, list): key = [key] df = self._df.__getitem__(key) md = Meta() md['vars'] = utils.filter_tree(self._md['vars'], key) for k in key: if md['vars'][k]['type'] == 'E': terms = self._find_elab_vars(k, md=md) for term in terms: if term.strip(r'\$') not in key: md['vars'][k]['type'] = 'N' break return Stark(df, md=md, currency=self._currency, currdata=self._currdata)
def aggregate(self, func='sum', dim=None, var=None, inplace=False): ''' Apply an aggregation function to the DataFrame. If the DataFrame contains datas that are calculated as a transformation of other columns from the same DataFrame, this will be re-calculated in the output one. The user can specify which dimension should be used in the grouping operation and which columns must appear int the output DataFrame. @ param func: function used to aggregate, can be either a string or a function name. @ param dim: name, or list of names, of DataFrame's columns that act as dimensions (can be used as indexes, from pandas point of view). @ param var: name, or list of names, of DataFrame's columns that we want to be part of the resulting DataFrame. If calculated columns are in this list, also those from which they are evaluated must be present. @ return: a new Stark instance with aggregated data ''' # Some defaults if dim is None: dim = self._dim if var is None: var = self._num + self._imm + self._elab + self._rate + \ self._curr # var and dim may be single column's name if isinstance(var, (str, unicode)): var = [var] if isinstance(dim, (str, unicode)): dim = [dim] outkeys = dim + var if not inplace: df = self._df.copy() else: df = self._df md = Meta() md['vars'] = utils.filter_tree(self._md['vars'], outkeys) # Prepare operation dictionary: for each variable set the # appropriate aggregation function based on its type operations = {} for name in self._num + self._curr: operations[name] = func for name in self._imm: operations[name] = self._set_unique for name in self._rate: operations[name] = self._gr_cum for name in self._elab: # Some elaboration need to become numeric before the # aggregation, others must be re-evaluated if md['vars'][name].get('rlp') and\ md['vars'][name]['rlp'] == 'N': md['vars'][name]['type'] = 'N' # XXX: This is not needed if 'rlp' != 'N', but any other # operation seems to introduce a greater overhead to the # computation. This should be invesigated further. operations[name] = func df = df.groupby(dim).aggregate(operations)[var].reset_index() if inplace: self._md = md self._update() return return Stark(df, md=md, currency=self._currency, currdata=self._currdata)
def __delitem__(self, key): del self._df[key] target = utils.unroll(self._md) target.remove(key) self._md = utils.filter_tree(self._md, target)