def generic(self, args, kws): assert not kws df, values, index, columns, aggfunc, _pivot_values = args if not (isinstance(values, types.StringLiteral) and isinstance(index, types.StringLiteral) and isinstance(columns, types.StringLiteral)): raise ValueError( "pivot_table() only support string constants for" "'values', 'index' and 'columns' arguments") values = values.literal_value index = index.literal_value columns = columns.literal_value # get output data type data = df.data[df.columns.index(values)] func = get_agg_func(None, aggfunc.literal_value, None) f_ir = numba.ir_utils.get_ir_of_code( { 'np': np, 'numba': numba, 'sdc': sdc }, func.__code__) _, out_dtype, _ = numba.typed_passes.type_inference_stage( self.context, f_ir, (data, ), None) out_arr_typ = _get_series_array_type(out_dtype) pivot_vals = _pivot_values.meta n_vals = len(pivot_vals) out_df = DataFrameType((out_arr_typ, ) * n_vals, None, tuple(pivot_vals)) return signature(out_df, *args)
def _get_agg_typ(self, grp, args, code): f_ir = numba.ir_utils.get_ir_of_code( { 'np': np, 'numba': numba, 'sdc': sdc }, code) out_data = [] out_columns = [] # add key columns of not as_index if not grp.as_index: for k in grp.keys: out_columns.append(k) ind = grp.df_type.columns.index(k) out_data.append(grp.df_type.data[ind]) # get output type for each selected column for c in grp.selection: out_columns.append(c) ind = grp.df_type.columns.index(c) data = grp.df_type.data[ind] _, out_dtype, _ = numba.typed_passes.type_inference_stage( self.context, f_ir, (data, ), None) out_arr = _get_series_array_type(out_dtype) out_data.append(out_arr) out_res = DataFrameType(tuple(out_data), None, tuple(out_columns)) # XXX output becomes series if single output and explicitly selected if len(grp.selection ) == 1 and grp.explicit_select and grp.as_index: out_res = arr_to_series_type(out_data[0]) return signature(out_res, *args)
def generic_resolve(self, rolling, func_name): if func_name not in supported_rolling_funcs: raise ValueError("only ({}) supported in rolling".format( ", ".join(supported_rolling_funcs))) template_key = 'rolling.' + func_name # output is always float64 out_arr = types.Array(types.float64, 1, 'C') # TODO: handle Series case (explicit select) columns = rolling.selection # handle 'on' case if rolling.on is not None: columns = columns + (rolling.on, ) # Pandas sorts the output column names _flex_binary_moment # line: res_columns = arg1.columns.union(arg2.columns) columns = tuple(sorted(columns)) n_out_cols = len(columns) out_data = [out_arr] * n_out_cols if rolling.on is not None: # offset key's data type is preserved out_ind = columns.index(rolling.on) in_ind = rolling.df_type.columns.index(rolling.on) out_data[out_ind] = rolling.df_type.data[in_ind] out_typ = DataFrameType(tuple(out_data), None, columns) class MethodTemplate(AbstractTemplate): key = template_key def generic(self, args, kws): if func_name in ('cov', 'corr'): if len(args) != 1: raise ValueError( "rolling {} requires one argument (other)".format( func_name)) # XXX pandas only accepts variable window cov/corr # when both inputs have time index if rolling.on is not None: raise ValueError( "variable window rolling {} not supported yet.". format(func_name)) # TODO: support variable window rolling cov/corr which is only # possible in pandas with time index other = args[0] # df on df cov/corr returns common columns only (without # pairwise flag) # TODO: support pairwise arg out_cols = tuple(sorted(set(columns) | set(other.columns))) return signature( DataFrameType((out_arr, ) * len(out_cols), None, out_cols), *args) return signature(out_typ, *args) return types.BoundFunction(MethodTemplate, rolling)
def generic(self, args, kws): assert not kws index, columns, _pivot_values = args # TODO: support agg func other than frequency out_arr_typ = types.Array(types.int64, 1, 'C') pivot_vals = _pivot_values.meta n_vals = len(pivot_vals) out_df = DataFrameType((out_arr_typ,) * n_vals, None, tuple(pivot_vals)) return signature(out_df, *args)
def generic(self, args, kws): if func_name in ('cov', 'corr'): if len(args) != 1: raise ValueError( "rolling {} requires one argument (other)".format( func_name)) # XXX pandas only accepts variable window cov/corr # when both inputs have time index if rolling.on is not None: raise ValueError( "variable window rolling {} not supported yet.". format(func_name)) # TODO: support variable window rolling cov/corr which is only # possible in pandas with time index other = args[0] # df on df cov/corr returns common columns only (without # pairwise flag) # TODO: support pairwise arg out_cols = tuple(sorted(set(columns) | set(other.columns))) return signature( DataFrameType((out_arr, ) * len(out_cols), None, out_cols), *args) return signature(out_typ, *args)
def typeof_pd_dataframe(val, c): col_names = tuple(val.columns.tolist()) # TODO: support other types like string and timestamp col_types = get_hiframes_dtypes(val) return DataFrameType(col_types, None, col_names, True)