Ejemplo n.º 1
0
    def generic(self, args, kws):
        assert not kws
        df, values, index, columns, aggfunc, _pivot_values = args

        if not (isinstance(values, types.StringLiteral)
                and isinstance(index, types.StringLiteral)
                and isinstance(columns, types.StringLiteral)):
            raise ValueError("pivot_table() only support string constants for"
                             "'values', 'index' and 'columns' arguments")

        values = values.literal_value
        index = index.literal_value
        columns = columns.literal_value

        # get output data type
        data = df.data[df.columns.index(values)]
        func = get_agg_func(None, aggfunc.literal_value, None)
        f_ir = numba.ir_utils.get_ir_of_code(
            {
                'np': np,
                'numba': numba,
                'hpat': hpat
            }, func.__code__)
        _, out_dtype, _ = numba.typed_passes.type_inference_stage(
            self.context, f_ir, (data, ), None)
        out_arr_typ = _get_series_array_type(out_dtype)

        pivot_vals = _pivot_values.meta
        n_vals = len(pivot_vals)
        out_df = DataFrameType((out_arr_typ, ) * n_vals, None,
                               tuple(pivot_vals))

        return signature(out_df, *args)
Ejemplo n.º 2
0
    def _get_agg_typ(self, grp, args, code):
        f_ir = numba.ir_utils.get_ir_of_code(
            {
                'np': np,
                'numba': numba,
                'hpat': hpat
            }, code)
        out_data = []
        out_columns = []
        # add key columns of not as_index
        if not grp.as_index:
            for k in grp.keys:
                out_columns.append(k)
                ind = grp.df_type.columns.index(k)
                out_data.append(grp.df_type.data[ind])

        # get output type for each selected column
        for c in grp.selection:
            out_columns.append(c)
            ind = grp.df_type.columns.index(c)
            data = grp.df_type.data[ind]
            _, out_dtype, _ = numba.typed_passes.type_inference_stage(
                self.context, f_ir, (data, ), None)
            out_arr = _get_series_array_type(out_dtype)
            out_data.append(out_arr)

        out_res = DataFrameType(tuple(out_data), None, tuple(out_columns))
        # XXX output becomes series if single output and explicitly selected
        if len(grp.selection) == 1 and grp.explicit_select and grp.as_index:
            out_res = arr_to_series_type(out_data[0])
        return signature(out_res, *args)
Ejemplo n.º 3
0
    def generic_resolve(self, rolling, func_name):
        if func_name not in supported_rolling_funcs:
            raise ValueError("only ({}) supported in rolling".format(
                ", ".join(supported_rolling_funcs)))
        template_key = 'rolling.' + func_name
        # output is always float64
        out_arr = types.Array(types.float64, 1, 'C')

        # TODO: handle Series case (explicit select)
        columns = rolling.selection

        # handle 'on' case
        if rolling.on is not None:
            columns = columns + (rolling.on, )
        # Pandas sorts the output column names _flex_binary_moment
        # line: res_columns = arg1.columns.union(arg2.columns)
        columns = tuple(sorted(columns))
        n_out_cols = len(columns)
        out_data = [out_arr] * n_out_cols
        if rolling.on is not None:
            # offset key's data type is preserved
            out_ind = columns.index(rolling.on)
            in_ind = rolling.df_type.columns.index(rolling.on)
            out_data[out_ind] = rolling.df_type.data[in_ind]
        out_typ = DataFrameType(tuple(out_data), None, columns)

        class MethodTemplate(AbstractTemplate):
            key = template_key

            def generic(self, args, kws):
                if func_name in ('cov', 'corr'):
                    if len(args) != 1:
                        raise ValueError(
                            "rolling {} requires one argument (other)".format(
                                func_name))
                    # XXX pandas only accepts variable window cov/corr
                    # when both inputs have time index
                    if rolling.on is not None:
                        raise ValueError(
                            "variable window rolling {} not supported yet.".
                            format(func_name))
                    # TODO: support variable window rolling cov/corr which is only
                    # possible in pandas with time index
                    other = args[0]
                    # df on df cov/corr returns common columns only (without
                    # pairwise flag)
                    # TODO: support pairwise arg
                    out_cols = tuple(sorted(set(columns) | set(other.columns)))
                    return signature(
                        DataFrameType((out_arr, ) * len(out_cols), None,
                                      out_cols), *args)
                return signature(out_typ, *args)

        return types.BoundFunction(MethodTemplate, rolling)
Ejemplo n.º 4
0
    def generic(self, args, kws):
        assert not kws
        index, columns, _pivot_values = args

        # TODO: support agg func other than frequency
        out_arr_typ = types.Array(types.int64, 1, 'C')

        pivot_vals = _pivot_values.meta
        n_vals = len(pivot_vals)
        out_df = DataFrameType((out_arr_typ, ) * n_vals, None,
                               tuple(pivot_vals))

        return signature(out_df, *args)
Ejemplo n.º 5
0
 def generic(self, args, kws):
     if func_name in ('cov', 'corr'):
         if len(args) != 1:
             raise ValueError(
                 "rolling {} requires one argument (other)".format(
                     func_name))
         # XXX pandas only accepts variable window cov/corr
         # when both inputs have time index
         if rolling.on is not None:
             raise ValueError(
                 "variable window rolling {} not supported yet.".
                 format(func_name))
         # TODO: support variable window rolling cov/corr which is only
         # possible in pandas with time index
         other = args[0]
         # df on df cov/corr returns common columns only (without
         # pairwise flag)
         # TODO: support pairwise arg
         out_cols = tuple(sorted(set(columns) | set(other.columns)))
         return signature(
             DataFrameType((out_arr, ) * len(out_cols), None,
                           out_cols), *args)
     return signature(out_typ, *args)
Ejemplo n.º 6
0
def typeof_pd_dataframe(val, c):
    col_names = tuple(val.columns.tolist())
    # TODO: support other types like string and timestamp
    col_types = get_hiframes_dtypes(val)
    return DataFrameType(col_types, None, col_names, True)