def _apply_basic_agg(self, agg_type, sort_results=False): """ Parameters ---------- agg_type : str The aggregation function to run. """ result = DataFrame() add_col_values = True ctx = ffi.new('gdf_context*') ctx.flag_sorted = 0 ctx.flag_method = self._method ctx.flag_distinct = 0 val_columns = self._val_columns val_columns_out = self._val_columns result = self._apply_agg(agg_type, result, add_col_values, ctx, val_columns, val_columns_out, sort_result=sort_results) # If a Groupby has one index column and one value column # and as_index is set, return a Series instead of a df if isinstance(val_columns, (str, Number)) and self._as_index: result_series = result[val_columns] idx = index.as_index(result[self._by[0]]) if self.level == 0: idx.name = self._original_index_name else: idx.name = self._by[0] result_series = result_series.set_index(idx) return result_series # TODO: Do MultiIndex here if (self._as_index): idx = index.as_index(result[self._by[0]]) idx.name = self._by[0] result.drop_column(idx.name) if self.level == 0: idx.name = self._original_index_name else: idx.name = self._by[0] result = result.set_index(idx) nvtx_range_pop() return result
def agg(self, args): """ Invoke aggregation functions on the groups. Parameters ---------- args : dict, list, str, callable - str The aggregate function name. - list List of *str* of the aggregate function. - dict key-value pairs of source column name and list of aggregate functions as *str*. Returns ------- result : DataFrame Notes ----- Since multi-indexes aren't supported aggregation results are returned in columns using the naming scheme of `aggregation_columnname`. """ result = DataFrame() add_col_values = True ctx = ffi.new('gdf_context*') ctx.flag_sorted = 0 ctx.flag_method = self._method ctx.flag_distinct = 0 sort_result = True # TODO: Use MultiColumn here instead of use_prefix # use_prefix enables old functionality - prefixing column # groupby names since we don't support MultiColumn quite yet use_prefix = 1 < len(self._val_columns) or 1 < len(args) if not isinstance(args, str) and isinstance(args, collections.abc.Sequence): for agg_type in args: val_columns_out = [ agg_type + '_' + val for val in self._val_columns ] if not use_prefix: val_columns_out = self._val_columns result = self._apply_agg(agg_type, result, add_col_values, ctx, self._val_columns, val_columns_out, sort_result=sort_result) add_col_values = False # we only want to add them once # TODO: Do multindex here if (self._as_index) and 1 == len(self._by): idx = index.as_index(result[self._by[0]]) idx.name = self._by[0] result = result.set_index(idx) result.drop_column(idx.name) elif isinstance(args, collections.abc.Mapping): if (len(args.keys()) == 1): if (len(list(args.values())[0]) == 1): sort_result = False for val, agg_type in args.items(): if not isinstance(agg_type, str) and \ isinstance(agg_type, collections.abc.Sequence): for sub_agg_type in agg_type: val_columns_out = [sub_agg_type + '_' + val] if not use_prefix: val_columns_out = self._val_columns result = self._apply_agg(sub_agg_type, result, add_col_values, ctx, [val], val_columns_out, sort_result=sort_result) elif isinstance(agg_type, str): val_columns_out = [agg_type + '_' + val] if not use_prefix: val_columns_out = self._val_columns result = self._apply_agg(agg_type, result, add_col_values, ctx, [val], val_columns_out, sort_result=sort_result) add_col_values = False # we only want to add them once # TODO: Do multindex here if (self._as_index) and 1 == len(self._by): idx = index.as_index(result[self._by[0]]) idx.name = self._by[0] result = result.set_index(idx) result.drop_column(idx.name) else: result = self.agg([args]) nvtx_range_pop() return result