def generate_lazy_op_list(arrays):
    '''
    Slightly hacky way to match the group operator syntax.
    '''
    ret = []
    for a in arrays:
        lazy_arr = LazyOpResult(a.weldobj, a._weld_type, 1)
        ret.append(lazy_arr)
    return ret
Example #2
0
 def values(self):
     if self.predicates is None:
         return self.df.values
     else:
         if isinstance(self.df.values, np.ndarray):
             weld_type = grizzly_impl.numpy_to_weld_type_mapping[str(
                 self.df.values.dtype)]
             dim = self.df.values.ndim
             return LazyOpResult(
                 grizzly_impl.filter(self.df.values, self.predicates.expr,
                                     weld_type), weld_type, dim)
Example #3
0
    def get_column(self, column_name, column_type, index, verbose=True):
        """Summary

        Args:
            column_name (TYPE): Description
            column_type (TYPE): Description
            index (TYPE): Description

        Returns:
            TYPE: Description
        """
        return LazyOpResult(
            grizzly_impl.get_column(self.expr, self.weld_type, index),
            column_type, 1).evaluate(verbose=verbose)
Example #4
0
    def std(self):
        """Standard deviation

        Note that is by default normalizd by n - 1
        # TODO, what does pandas do for multiple grouping columns?
        # Currently we are just going to use one grouping column
        """
        std_expr = grizzly_impl.groupby_std([self.column], [self.column_type],
                                            self.grouping_columns,
                                            self.grouping_column_types)
        unzipped_columns = grizzly_impl.unzip_columns(
            std_expr,
            self.grouping_column_types + [WeldDouble()],
        )
        index_expr = LazyOpResult(grizzly_impl.get_field(unzipped_columns, 0),
                                  self.grouping_column_types[0], 1)
        column_expr = LazyOpResult(grizzly_impl.get_field(unzipped_columns, 1),
                                   self.grouping_column_types[0], 1)
        group_expr = utils.group([index_expr, column_expr])
        return SeriesWeld(group_expr.expr,
                          WeldDouble(),
                          index_type=self.grouping_column_types[0],
                          index_name=self.grouping_column_names[0])
Example #5
0
    def __sub__(self, other):
        # TODO subtractionw without index variables
        if self.index_type is not None:
            index = grizzly_impl.get_field(self.expr, 0)
            expr1 = grizzly_impl.get_field(self.expr, 1)
        else:
            expr1 = self.expr
        if other.index_type is not None:
            index2 = grizzly_impl.get_field(other.expr, 0)
            expr2 = grizzly_impl.get_field(other.expr, 1)
        else:
            expr2 = other.expr
        index_expr = LazyOpResult(index, self.index_type, 0)
        sub_expr = SeriesWeld(
            grizzly_impl.element_wise_op(expr1, expr2, "-", self.weld_type),
            self.weld_type, self.df, self.column_name)

        index_sub_expr = utils.group([index_expr, sub_expr])
        return SeriesWeld(index_sub_expr.expr, self.weld_type, self.df,
                          self.column_name, self.index_type, self.index_name)
Example #6
0
def generate_lazy_op_list(arrays):
    ret = []
    for a in arrays:
        lazy_arr = LazyOpResult(a.weldobj, a._weld_type, 1)
        ret.append(lazy_arr)
    return ret