def _reconstruct_func(func, **kwargs): relabeling_required, func, new_columns, order = reconstruct_func( func, **kwargs) # We convert to the string version of the function for simplicity. func = { k: v if not callable(v) or v.__name__ not in dir(self) else v.__name__ for k, v in func.items() } return relabeling_required, func, new_columns, order
def aggregate(self, func=None, *args, **kwargs): if self._axis != 0: # This is not implemented in pandas, # so we throw a different message raise NotImplementedError("axis other than 0 is not supported") if ( callable(func) and isinstance(func, BuiltinFunctionType) and func.__name__ in dir(self) ): func = func.__name__ relabeling_required = False if isinstance(func, dict) or func is None: def try_get_str_func(fn): if not isinstance(fn, str) and isinstance(fn, Iterable): return [try_get_str_func(f) for f in fn] return fn.__name__ if callable(fn) and fn.__name__ in dir(self) else fn relabeling_required, func_dict, new_columns, order = reconstruct_func( func, **kwargs ) func_dict = {col: try_get_str_func(fn) for col, fn in func_dict.items()} if any(i not in self._df.columns for i in func_dict.keys()): from pandas.core.base import SpecificationError raise SpecificationError("nested renamer is not supported") if func is None: kwargs = {} func = func_dict elif is_list_like(func): return self._default_to_pandas( lambda df, *args, **kwargs: df.aggregate(func, *args, **kwargs), *args, **kwargs, ) elif callable(func): return self._apply_agg_function( lambda grp, *args, **kwargs: grp.aggregate(func, *args, **kwargs), *args, **kwargs, ) elif isinstance(func, str): # Using "getattr" here masks possible AttributeError which we throw # in __getattr__, so we should call __getattr__ directly instead. agg_func = self.__getattr__(func) if callable(agg_func): return agg_func(*args, **kwargs) result = self._apply_agg_function( func, *args, **kwargs, ) if relabeling_required: result = result.iloc[:, order] result.columns = new_columns return result
def aggregate(self, func=None, *args, **kwargs): if self._axis != 0: # This is not implemented in pandas, # so we throw a different message raise NotImplementedError("axis other than 0 is not supported") if ( callable(func) and isinstance(func, BuiltinFunctionType) and func.__name__ in dir(self) ): func = func.__name__ relabeling_required = False if isinstance(func, dict) or func is None: def try_get_str_func(fn): if not isinstance(fn, str) and isinstance(fn, Iterable): return [try_get_str_func(f) for f in fn] return fn.__name__ if callable(fn) and fn.__name__ in dir(self) else fn relabeling_required, func_dict, new_columns, order = reconstruct_func( func, **kwargs ) func_dict = {col: try_get_str_func(fn) for col, fn in func_dict.items()} if any(i not in self._df.columns for i in func_dict.keys()): from pandas.core.base import SpecificationError raise SpecificationError("nested renamer is not supported") if func is None: kwargs = {} func = func_dict elif is_list_like(func): return self._default_to_pandas( lambda df, *args, **kwargs: df.aggregate(func, *args, **kwargs), *args, **kwargs, ) elif callable(func): return self._apply_agg_function( lambda grp, *args, **kwargs: grp.aggregate(func, *args, **kwargs), *args, **kwargs, ) elif isinstance(func, str): # Using "getattr" here masks possible AttributeError which we throw # in __getattr__, so we should call __getattr__ directly instead. agg_func = self.__getattr__(func) if callable(agg_func): return agg_func(*args, **kwargs) result = self._apply_agg_function( func, *args, **kwargs, ) if relabeling_required: if not self._as_index: nby_cols = len(result.columns) - len(new_columns) order = np.concatenate([np.arange(nby_cols), order + nby_cols]) by_cols = result.columns[:nby_cols] new_columns = pandas.Index(new_columns) if by_cols.nlevels != new_columns.nlevels: by_cols = by_cols.remove_unused_levels() empty_levels = [ i for i, level in enumerate(by_cols.levels) if len(level) == 1 and level[0] == "" ] by_cols = by_cols.droplevel(empty_levels) new_columns = by_cols.append(new_columns) result = result.iloc[:, order] result.columns = new_columns return result
def aggregate(self, func=None, *args, **kwargs): if self._axis != 0: # This is not implemented in pandas, # so we throw a different message raise NotImplementedError("axis other than 0 is not supported") if ( callable(func) and isinstance(func, BuiltinFunctionType) and func.__name__ in dir(self) ): func = func.__name__ relabeling_required = False if isinstance(func, dict) or func is None: def try_get_str_func(fn): if not isinstance(fn, str) and isinstance(fn, Iterable): return [try_get_str_func(f) for f in fn] return fn.__name__ if callable(fn) and fn.__name__ in dir(self) else fn relabeling_required, func_dict, new_columns, order = reconstruct_func( func, **kwargs ) func_dict = {col: try_get_str_func(fn) for col, fn in func_dict.items()} if ( relabeling_required and not self._as_index and any(col in func_dict for col in self._internal_by) ): ErrorMessage.missmatch_with_pandas( operation="GroupBy.aggregate(**dictionary_renaming_aggregation)", message=( "intersection of the columns to aggregate and 'by' is not yet supported when 'as_index=False', " "columns with group names of the intersection will not be presented in the result. " "To achieve the desired result rewrite the original code from:\n" "df.groupby('by_column', as_index=False).agg(agg_func=('by_column', agg_func))\n" "to the:\n" "df.groupby('by_column').agg(agg_func=('by_column', agg_func)).reset_index()" ), ) if any(i not in self._df.columns for i in func_dict.keys()): from pandas.core.base import SpecificationError raise SpecificationError("nested renamer is not supported") if func is None: kwargs = {} func = func_dict elif is_list_like(func): return self._default_to_pandas( lambda df, *args, **kwargs: df.aggregate(func, *args, **kwargs), *args, **kwargs, ) elif callable(func): return self._check_index( self._wrap_aggregation( qc_method=type(self._query_compiler).groupby_agg, numeric_only=False, agg_func=func, agg_args=args, agg_kwargs=kwargs, how="axis_wise", ) ) elif isinstance(func, str): # Using "getattr" here masks possible AttributeError which we throw # in __getattr__, so we should call __getattr__ directly instead. agg_func = self.__getattr__(func) if callable(agg_func): return agg_func(*args, **kwargs) result = self._wrap_aggregation( qc_method=type(self._query_compiler).groupby_agg, numeric_only=False, agg_func=func, agg_args=args, agg_kwargs=kwargs, how="axis_wise", ) if relabeling_required: if not self._as_index: nby_cols = len(result.columns) - len(new_columns) order = np.concatenate([np.arange(nby_cols), order + nby_cols]) by_cols = result.columns[:nby_cols] new_columns = pandas.Index(new_columns) if by_cols.nlevels != new_columns.nlevels: by_cols = by_cols.remove_unused_levels() empty_levels = [ i for i, level in enumerate(by_cols.levels) if len(level) == 1 and level[0] == "" ] by_cols = by_cols.droplevel(empty_levels) new_columns = by_cols.append(new_columns) result = result.iloc[:, order] result.columns = new_columns return result