def _sdc_pandas_groupby_generic_func_codegen(func_name, columns, func_params, defaults, impl_params): all_params_as_str = ', '.join(sigparams2list(func_params, defaults)) extra_impl_params = ', '.join(kwsparams2list(impl_params)) groupby_obj = f'{func_params[0]}' df = f'{groupby_obj}._parent' groupby_dict = f'{groupby_obj}._data' groupby_param_sort = f'{groupby_obj}._sort' column_names, column_ids = tuple(zip(*columns)) func_lines = [ f'def _dataframe_groupby_{func_name}_impl({all_params_as_str}):', f' group_keys = _sdc_asarray([key for key in {groupby_dict}])', f' res_index_len = len(group_keys)', f' if {groupby_param_sort}:', f' argsorted_index = sdc_arrays_argsort(group_keys, kind=\'mergesort\')', ] # TODO: remove conversion from Numba typed.List to reflected one while creating group_arr_{i} func_lines.extend([ '\n'.join([ f' result_data_{i} = numpy.empty(res_index_len, dtype=res_arrays_dtypes[{i}])', f' for j in numba.prange(res_index_len):', f' column_data_{i} = get_dataframe_data({df}, {column_ids[i]})', f' group_arr_{i} = _sdc_take(column_data_{i}, list({groupby_dict}[group_keys[j]]))', f' group_series_{i} = pandas.Series(group_arr_{i})', f' idx = argsorted_index[j] if {groupby_param_sort} else j', f' result_data_{i}[idx] = group_series_{i}.{func_name}({extra_impl_params})', ]) for i in range(len(columns)) ]) data = ', '.join(f'\'{column_names[i]}\': result_data_{i}' for i in range(len(columns))) func_lines.extend([ '\n'.join([ f' if {groupby_param_sort}:', f' res_index = _sdc_take(group_keys, argsorted_index)', f' else:', f' res_index = group_keys', f' return pandas.DataFrame({{{data}}}, index=res_index)' ]) ]) func_text = '\n'.join(func_lines) global_vars = { 'pandas': pandas, 'numpy': numpy, 'numba': numba, '_sdc_asarray': _sdc_asarray, '_sdc_take': _sdc_take, 'sdc_arrays_argsort': sdc_arrays_argsort, 'get_dataframe_data': get_dataframe_data } return func_text, global_vars
def _sdc_pandas_series_groupby_generic_func_codegen(func_name, func_params, defaults, impl_params): all_params_as_str = ', '.join(sigparams2list(func_params, defaults)) extra_impl_params = ', '.join(kwsparams2list(impl_params)) groupby_obj = f'{func_params[0]}' series = f'{groupby_obj}._parent' groupby_dict = f'{groupby_obj}._data' groupby_param_sort = f'{groupby_obj}._sort' # TODO: remove conversion from Numba typed.List to reflected one while creating group_arr_{i} func_lines = [ f'def _series_groupby_{func_name}_impl({all_params_as_str}):', f' group_keys = _sdc_asarray([key for key in {groupby_dict}])', f' res_index_len = len(group_keys)', f' if {groupby_param_sort}:', f' argsorted_index = sdc_arrays_argsort(group_keys, kind=\'mergesort\')', f' result_data = numpy.empty(res_index_len, dtype=res_dtype)', f' for j in numba.prange(res_index_len):', f' group_arr = _sdc_take({series}._data, list({groupby_dict}[group_keys[j]]))', f' group_series = pandas.Series(group_arr)', f' idx = argsorted_index[j] if {groupby_param_sort} else j', f' result_data[idx] = group_series.{func_name}({extra_impl_params})', f' if {groupby_param_sort}:', f' res_index = _sdc_take(group_keys, argsorted_index)', f' else:', f' res_index = group_keys', f' return pandas.Series(data=result_data, index=res_index, name={series}._name)' ] func_text = '\n'.join(func_lines) global_vars = { 'pandas': pandas, 'numpy': numpy, 'numba': numba, '_sdc_asarray': _sdc_asarray, '_sdc_take': _sdc_take, 'sdc_arrays_argsort': sdc_arrays_argsort } return func_text, global_vars