Python DataFrameType Exemples, sdc.hiframes.pd_dataframe_ext.DataFrameType Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : pd_groupby_ext.py Projet : Sanyam07/sdc

        def generic(self, args, kws):
            assert not kws
            df, values, index, columns, aggfunc, _pivot_values = args

            if not (isinstance(values, types.StringLiteral)
                    and isinstance(index, types.StringLiteral)
                    and isinstance(columns, types.StringLiteral)):
                raise ValueError(
                    "pivot_table() only support string constants for"
                    "'values', 'index' and 'columns' arguments")

            values = values.literal_value
            index = index.literal_value
            columns = columns.literal_value

            # get output data type
            data = df.data[df.columns.index(values)]
            func = get_agg_func(None, aggfunc.literal_value, None)
            f_ir = numba.ir_utils.get_ir_of_code(
                {
                    'np': np,
                    'numba': numba,
                    'sdc': sdc
                }, func.__code__)
            _, out_dtype, _ = numba.typed_passes.type_inference_stage(
                self.context, f_ir, (data, ), None)
            out_arr_typ = _get_series_array_type(out_dtype)

            pivot_vals = _pivot_values.meta
            n_vals = len(pivot_vals)
            out_df = DataFrameType((out_arr_typ, ) * n_vals, None,
                                   tuple(pivot_vals))

            return signature(out_df, *args)

Exemple #2

0

Afficher le fichier

Fichier : pd_groupby_ext.py Projet : Sanyam07/sdc

        def _get_agg_typ(self, grp, args, code):
            f_ir = numba.ir_utils.get_ir_of_code(
                {
                    'np': np,
                    'numba': numba,
                    'sdc': sdc
                }, code)
            out_data = []
            out_columns = []
            # add key columns of not as_index
            if not grp.as_index:
                for k in grp.keys:
                    out_columns.append(k)
                    ind = grp.df_type.columns.index(k)
                    out_data.append(grp.df_type.data[ind])

            # get output type for each selected column
            for c in grp.selection:
                out_columns.append(c)
                ind = grp.df_type.columns.index(c)
                data = grp.df_type.data[ind]
                _, out_dtype, _ = numba.typed_passes.type_inference_stage(
                    self.context, f_ir, (data, ), None)
                out_arr = _get_series_array_type(out_dtype)
                out_data.append(out_arr)

            out_res = DataFrameType(tuple(out_data), None, tuple(out_columns))
            # XXX output becomes series if single output and explicitly selected
            if len(grp.selection
                   ) == 1 and grp.explicit_select and grp.as_index:
                out_res = arr_to_series_type(out_data[0])
            return signature(out_res, *args)

Exemple #3

0

Afficher le fichier

    def generic_resolve(self, rolling, func_name):
        if func_name not in supported_rolling_funcs:
            raise ValueError("only ({}) supported in rolling".format(
                ", ".join(supported_rolling_funcs)))
        template_key = 'rolling.' + func_name
        # output is always float64
        out_arr = types.Array(types.float64, 1, 'C')

        # TODO: handle Series case (explicit select)
        columns = rolling.selection

        # handle 'on' case
        if rolling.on is not None:
            columns = columns + (rolling.on, )
        # Pandas sorts the output column names _flex_binary_moment
        # line: res_columns = arg1.columns.union(arg2.columns)
        columns = tuple(sorted(columns))
        n_out_cols = len(columns)
        out_data = [out_arr] * n_out_cols
        if rolling.on is not None:
            # offset key's data type is preserved
            out_ind = columns.index(rolling.on)
            in_ind = rolling.df_type.columns.index(rolling.on)
            out_data[out_ind] = rolling.df_type.data[in_ind]
        out_typ = DataFrameType(tuple(out_data), None, columns)

        class MethodTemplate(AbstractTemplate):
            key = template_key

            def generic(self, args, kws):
                if func_name in ('cov', 'corr'):
                    if len(args) != 1:
                        raise ValueError(
                            "rolling {} requires one argument (other)".format(
                                func_name))
                    # XXX pandas only accepts variable window cov/corr
                    # when both inputs have time index
                    if rolling.on is not None:
                        raise ValueError(
                            "variable window rolling {} not supported yet.".
                            format(func_name))
                    # TODO: support variable window rolling cov/corr which is only
                    # possible in pandas with time index
                    other = args[0]
                    # df on df cov/corr returns common columns only (without
                    # pairwise flag)
                    # TODO: support pairwise arg
                    out_cols = tuple(sorted(set(columns) | set(other.columns)))
                    return signature(
                        DataFrameType((out_arr, ) * len(out_cols), None,
                                      out_cols), *args)
                return signature(out_typ, *args)

        return types.BoundFunction(MethodTemplate, rolling)

Exemple #4

0

Afficher le fichier

        def generic(self, args, kws):
            assert not kws
            index, columns, _pivot_values = args

            # TODO: support agg func other than frequency
            out_arr_typ = types.Array(types.int64, 1, 'C')

            pivot_vals = _pivot_values.meta
            n_vals = len(pivot_vals)
            out_df = DataFrameType((out_arr_typ,) * n_vals, None, tuple(pivot_vals))

            return signature(out_df, *args)

Exemple #5

0

Afficher le fichier

 def generic(self, args, kws):
     if func_name in ('cov', 'corr'):
         if len(args) != 1:
             raise ValueError(
                 "rolling {} requires one argument (other)".format(
                     func_name))
         # XXX pandas only accepts variable window cov/corr
         # when both inputs have time index
         if rolling.on is not None:
             raise ValueError(
                 "variable window rolling {} not supported yet.".
                 format(func_name))
         # TODO: support variable window rolling cov/corr which is only
         # possible in pandas with time index
         other = args[0]
         # df on df cov/corr returns common columns only (without
         # pairwise flag)
         # TODO: support pairwise arg
         out_cols = tuple(sorted(set(columns) | set(other.columns)))
         return signature(
             DataFrameType((out_arr, ) * len(out_cols), None,
                           out_cols), *args)
     return signature(out_typ, *args)

Exemple #6

0

Afficher le fichier

def typeof_pd_dataframe(val, c):
    col_names = tuple(val.columns.tolist())
    # TODO: support other types like string and timestamp
    col_types = get_hiframes_dtypes(val)
    return DataFrameType(col_types, None, col_names, True)