def evaluate(self, verbose=False, passes=None):
     if self.index_type is not None:
         index, column = LazyOpResult(
             self.expr,
             WeldStruct([WeldVec(self.index_type),
                         WeldVec(self.weld_type)]),
             0).evaluate(verbose=verbose, passes=passes)
         series = pd.Series(column, index)
         series.index.rename(self.index_name, True)
         return series
     else:
         column = LazyOpResult.evaluate(self,
                                        verbose=verbose,
                                        passes=passes)
         return pd.Series(column)
예제 #2
0
    def count(self):
        """Summary

        Returns:
            TYPE: Description
        """
        return LazyOpResult(grizzly_impl.count(self.expr, self.weld_type),
                            WeldInt(), 0)
예제 #3
0
    def unique(self):
        """Summary

        Returns:
            TYPE: Description
        """
        return LazyOpResult(grizzly_impl.unique(self.expr, self.weld_type),
                            self.weld_type, self.dim)
예제 #4
0
    def sum(self):
        """Summary

        Returns:
            TYPE: Description
        """
        return LazyOpResult(
            grizzly_impl.aggr(self.expr, "+", 0, self.weld_type),
            self.weld_type, 0)
예제 #5
0
def group_eval(objs, passes=None):
    LazyOpResults = []
    for ob in objs:
        if isinstance(ob, SeriesWeld):
            if ob.index_type is not None:
                weld_type = WeldStruct(
                    [WeldVec(ob.index_type),
                     WeldVec(ob.weld_type)])
                LazyOpResults.append(LazyOpResult(ob.expr, weld_type, 0))
        else:
            LazyOpResults.append(LazyOpResult(ob.expr, ob.weld_type, 0))

    results = group(LazyOpResults).evaluate((True, -1), passes=passes)
    pd_results = []
    for i, result in enumerate(results):
        ob = objs[i]
        if isinstance(ob, SeriesWeld):
            if ob.index_type is not None:
                index, column = result
                series = pd.Series(column, index)
                series.index.rename(ob.index_name, True)
                pd_results.append(series)
            else:
                pd_results.append(series)
        if isinstance(ob, DataFrameWeldExpr):
            if ob.is_pivot:
                index, pivot, columns = result
                df_dict = {}
                for i, column_name in enumerate(columns):
                    df_dict[column_name] = pivot[i]
                pd_results.append(pd.DataFrame(df_dict, index=index))
            else:
                columns = result
                df_dict = {}
                for i, column_name in enumerate(ob.column_names):
                    df_dict[column_name] = columns[i]
                pd_results.append(pd.DataFrame(df_dict))
    return pd_results
예제 #6
0
    def get_column(self, column_name, column_type, index):
        """Summary

        Args:
            column_name (TYPE): Description
            column_type (TYPE): Description
            index (TYPE): Description

        Returns:
            TYPE: Description
        """
        return LazyOpResult(
            grizzly_impl.get_column(self.expr, self.weld_type, index),
            column_type, 1).evaluate()
예제 #7
0
    def std(self):
        """Standard deviation

        Note that is by default normalizd by n - 1
        # TODO, what does pandas do for multiple grouping columns?
        # Currently we are just going to use one grouping column
        """
        std_expr = grizzly_impl.groupby_std([self.column], [self.column_type],
                                            self.grouping_columns,
                                            self.grouping_column_types)
        unzipped_columns = grizzly_impl.unzip_columns(
            std_expr,
            self.grouping_column_types + [WeldDouble()],
        )
        index_expr = LazyOpResult(grizzly_impl.get_field(unzipped_columns, 0),
                                  self.grouping_column_types[0], 1)
        column_expr = LazyOpResult(grizzly_impl.get_field(unzipped_columns, 1),
                                   self.grouping_column_types[0], 1)
        group_expr = utils.group([index_expr, column_expr])
        return SeriesWeld(group_expr.expr,
                          WeldDouble(),
                          index_type=self.grouping_column_types[0],
                          index_name=self.grouping_column_names[0])
예제 #8
0
def group(exprs):
    weld_type = [to_weld_type(expr.weld_type, expr.dim) for expr in exprs]
    exprs = [expr.expr for expr in exprs]
    weld_obj = WeldObject(grizzly_impl.encoder_, grizzly_impl.decoder_)
    weld_type = WeldStruct(weld_type)
    dim = 0

    expr_names = [expr.obj_id for expr in exprs]
    for expr in exprs:
        weld_obj.update(expr)
    weld_obj.weld_code = "{%s}" % ", ".join(expr_names)
    for expr in exprs:
        weld_obj.dependencies[expr.obj_id] = expr

    return LazyOpResult(weld_obj, weld_type, dim)
    def __sub__(self, other):
        # TODO subtractionw without index variables
        if self.index_type is not None:
            index = grizzly_impl.get_field(self.expr, 0)
            expr1 = grizzly_impl.get_field(self.expr, 1)
        else:
            expr1 = self.expr
        if other.index_type is not None:
            index2 = grizzly_impl.get_field(other.expr, 0)
            expr2 = grizzly_impl.get_field(other.expr, 1)
        else:
            expr2 = other.expr
        index_expr = LazyOpResult(index, self.index_type, 0)
        sub_expr = SeriesWeld(
            grizzly_impl.element_wise_op(expr1, expr2, "-", self.weld_type),
            self.weld_type, self.df, self.column_name)

        index_sub_expr = utils.group([index_expr, sub_expr])
        return SeriesWeld(index_sub_expr.expr, self.weld_type, self.df,
                          self.column_name, self.index_type, self.index_name)
예제 #10
0
    def __getattr__(self, key):
        """Summary

        Args:
            key (TYPE): Description

        Returns:
            TYPE: Description

        Raises:
            Exception: Description
        """
        if key == 'values':
            if self.predicates is None:
                return self.df.values
            if isinstance(self.df.values, np.ndarray):
                weld_type = grizzly_impl.numpy_to_weld_type_mapping[str(
                    self.df.values.dtype)]
                dim = self.df.values.ndim
                return LazyOpResult(
                    grizzly_impl.filter(self.df.values, self.predicates.expr,
                                        weld_type), weld_type, dim)
        raise Exception("Attr %s does not exist" % key)