def df_rolling_method_other_none_codegen(method_name, self, args=None, kws=None): _method_name = rewrite_name or method_name args = args or [] kwargs = kws or {} impl_params = ['self'] + args + kwsparams2list(kwargs) impl_params_as_str = ', '.join(impl_params) impl_name = f'_df_rolling_{_method_name}_other_none_impl' func_lines = [f'def {impl_name}({impl_params_as_str}):'] if 'pairwise' in kwargs: func_lines += [ ' if pairwise is None:', ' _pairwise = True', ' else:', ' _pairwise = pairwise', ' if _pairwise:', f' raise ValueError("Method rolling.{_method_name}(). The object pairwise\\n expected: False")' ] method_params = args + [ '{}={}'.format(k, k) for k in kwargs if k != 'other' ] func_lines += df_rolling_method_main_codegen(method_params, self.data.columns, method_name) func_text = '\n'.join(func_lines) global_vars = {'pandas': pandas} return func_text, global_vars
def test_range_index_create_defaults(self): func_lines = ['def test_impl():', ' return pd.RangeIndex({})'] test_impl_text = '\n'.join(func_lines) # use non default values for all parameters except one (tested) non_default_params = { 'start': 2, 'stop': 7, 'step': 2, 'name': "'index'" } for arg in non_default_params.keys(): with self.subTest(omitted=arg): kwargs = { key: val for key, val in non_default_params.items() if key != arg } func_text = test_impl_text.format(', '.join( kwsparams2list(kwargs))) test_impl = _make_func_from_text(func_text, global_vars={'pd': pd}) sdc_func = self.jit(test_impl) result = sdc_func() result_ref = test_impl() pd.testing.assert_index_equal(result, result_ref)
def _sdc_pandas_groupby_generic_func_codegen(func_name, columns, func_params, defaults, impl_params): all_params_as_str = ', '.join(sigparams2list(func_params, defaults)) extra_impl_params = ', '.join(kwsparams2list(impl_params)) groupby_obj = f'{func_params[0]}' df = f'{groupby_obj}._parent' groupby_dict = f'{groupby_obj}._data' groupby_param_sort = f'{groupby_obj}._sort' column_names, column_ids = tuple(zip(*columns)) func_lines = [ f'def _dataframe_groupby_{func_name}_impl({all_params_as_str}):', f' group_keys = _sdc_asarray([key for key in {groupby_dict}])', f' res_index_len = len(group_keys)', f' if {groupby_param_sort}:', f' argsorted_index = sdc_arrays_argsort(group_keys, kind=\'mergesort\')', ] # TODO: remove conversion from Numba typed.List to reflected one while creating group_arr_{i} func_lines.extend([ '\n'.join([ f' result_data_{i} = numpy.empty(res_index_len, dtype=res_arrays_dtypes[{i}])', f' for j in numba.prange(res_index_len):', f' column_data_{i} = get_dataframe_data({df}, {column_ids[i]})', f' group_arr_{i} = _sdc_take(column_data_{i}, list({groupby_dict}[group_keys[j]]))', f' group_series_{i} = pandas.Series(group_arr_{i})', f' idx = argsorted_index[j] if {groupby_param_sort} else j', f' result_data_{i}[idx] = group_series_{i}.{func_name}({extra_impl_params})', ]) for i in range(len(columns)) ]) data = ', '.join(f'\'{column_names[i]}\': result_data_{i}' for i in range(len(columns))) func_lines.extend([ '\n'.join([ f' if {groupby_param_sort}:', f' res_index = _sdc_take(group_keys, argsorted_index)', f' else:', f' res_index = group_keys', f' return pandas.DataFrame({{{data}}}, index=res_index)' ]) ]) func_text = '\n'.join(func_lines) global_vars = { 'pandas': pandas, 'numpy': numpy, 'numba': numba, '_sdc_asarray': _sdc_asarray, '_sdc_take': _sdc_take, 'sdc_arrays_argsort': sdc_arrays_argsort, 'get_dataframe_data': get_dataframe_data } return func_text, global_vars
def get_groupby_params(**kwargs): """Generate supported groupby parameters""" # only supported groupby parameters are here df_params_defaults = { 'by': "'A'", 'sort': 'True' } groupby_params = {k: kwargs.get(k, df_params_defaults[k]) for k in df_params_defaults} return ', '.join(kwsparams2list(groupby_params))
def df_rolling_method_codegen(method_name, self, args=None, kws=None): args = args or [] kwargs = kws or {} impl_params = ['self'] + args + kwsparams2list(kwargs) impl_params_as_str = ', '.join(impl_params) impl_name = f'_df_rolling_{method_name}_impl' func_lines = [f'def {impl_name}({impl_params_as_str}):'] method_params = args + ['{}={}'.format(k, k) for k in kwargs] func_lines += df_rolling_method_main_codegen(method_params, self.data.columns, self.data.column_loc, method_name) func_text = '\n'.join(func_lines) global_vars = {'pandas': pandas} return func_text, global_vars
def _sdc_pandas_series_groupby_generic_func_codegen(func_name, func_params, defaults, impl_params): all_params_as_str = ', '.join(sigparams2list(func_params, defaults)) extra_impl_params = ', '.join(kwsparams2list(impl_params)) groupby_obj = f'{func_params[0]}' series = f'{groupby_obj}._parent' groupby_dict = f'{groupby_obj}._data' groupby_param_sort = f'{groupby_obj}._sort' # TODO: remove conversion from Numba typed.List to reflected one while creating group_arr_{i} func_lines = [ f'def _series_groupby_{func_name}_impl({all_params_as_str}):', f' group_keys = _sdc_asarray([key for key in {groupby_dict}])', f' res_index_len = len(group_keys)', f' if {groupby_param_sort}:', f' argsorted_index = sdc_arrays_argsort(group_keys, kind=\'mergesort\')', f' result_data = numpy.empty(res_index_len, dtype=res_dtype)', f' for j in numba.prange(res_index_len):', f' group_arr = _sdc_take({series}._data, list({groupby_dict}[group_keys[j]]))', f' group_series = pandas.Series(group_arr)', f' idx = argsorted_index[j] if {groupby_param_sort} else j', f' result_data[idx] = group_series.{func_name}({extra_impl_params})', f' if {groupby_param_sort}:', f' res_index = _sdc_take(group_keys, argsorted_index)', f' else:', f' res_index = group_keys', f' return pandas.Series(data=result_data, index=res_index, name={series}._name)' ] func_text = '\n'.join(func_lines) global_vars = { 'pandas': pandas, 'numpy': numpy, 'numba': numba, '_sdc_asarray': _sdc_asarray, '_sdc_take': _sdc_take, 'sdc_arrays_argsort': sdc_arrays_argsort } return func_text, global_vars
def df_rolling_method_other_df_codegen(method_name, self, other, args=None, kws=None): args = args or [] kwargs = kws or {} rolling_params = df_rolling_params_codegen() method_kws = {k: k for k in kwargs} impl_params = ['self'] + args + kwsparams2list(kwargs) impl_params_as_str = ', '.join(impl_params) data_columns = {col: idx for idx, col in enumerate(self.data.columns)} other_columns = {col: idx for idx, col in enumerate(other.columns)} # columns order matters common_columns = [col for col in data_columns if col in other_columns] all_columns = [col for col in data_columns] for col in other_columns: if col in all_columns: continue all_columns.append(col) results = [] impl_name = f'_df_rolling_{method_name}_other_df_impl' func_lines = [f'def {impl_name}({impl_params_as_str}):'] if 'pairwise' in kwargs: func_lines += [ ' if pairwise is None:', ' _pairwise = False', ' else:', ' _pairwise = pairwise', ' if _pairwise:', f' raise ValueError("Method rolling.{method_name}(). The object pairwise\\n expected: False, None")' ] data_length = 'len(self._data._data[0])' if data_columns else '0' other_length = 'len(other._data[0])' if other_columns else '0' func_lines += [f' length = max([{data_length}, {other_length}])'] for col in all_columns: res_data = f'result_data_{col}' if col in common_columns: other_series = f'other_series_{col}' method_kws['other'] = other_series method_params = ', '.join(args + kwsparams2list(method_kws)) func_lines += [ f' data_{col} = self._data._data[{data_columns[col]}]', f' other_data_{col} = other._data[{other_columns[col]}]', f' series_{col} = pandas.Series(data_{col})', f' {other_series} = pandas.Series(other_data_{col})', f' rolling_{col} = series_{col}.rolling({rolling_params})', f' result_{col} = rolling_{col}.{method_name}({method_params})', f' {res_data} = result_{col}._data[:length]' ] else: func_lines += [ f' {res_data} = numpy.empty(length, dtype=float64)', f' {res_data}[:] = numpy.nan' ] results.append((col, res_data)) data = ', '.join(f'"{col}": {data}' for col, data in results) func_lines += [f' return pandas.DataFrame({{{data}}})'] func_text = '\n'.join(func_lines) global_vars = {'numpy': numpy, 'pandas': pandas, 'float64': float64} return func_text, global_vars