def generate_lazy_op_list(arrays): ''' Slightly hacky way to match the group operator syntax. ''' ret = [] for a in arrays: lazy_arr = LazyOpResult(a.weldobj, a._weld_type, 1) ret.append(lazy_arr) return ret
def values(self): if self.predicates is None: return self.df.values else: if isinstance(self.df.values, np.ndarray): weld_type = grizzly_impl.numpy_to_weld_type_mapping[str( self.df.values.dtype)] dim = self.df.values.ndim return LazyOpResult( grizzly_impl.filter(self.df.values, self.predicates.expr, weld_type), weld_type, dim)
def get_column(self, column_name, column_type, index, verbose=True): """Summary Args: column_name (TYPE): Description column_type (TYPE): Description index (TYPE): Description Returns: TYPE: Description """ return LazyOpResult( grizzly_impl.get_column(self.expr, self.weld_type, index), column_type, 1).evaluate(verbose=verbose)
def std(self): """Standard deviation Note that is by default normalizd by n - 1 # TODO, what does pandas do for multiple grouping columns? # Currently we are just going to use one grouping column """ std_expr = grizzly_impl.groupby_std([self.column], [self.column_type], self.grouping_columns, self.grouping_column_types) unzipped_columns = grizzly_impl.unzip_columns( std_expr, self.grouping_column_types + [WeldDouble()], ) index_expr = LazyOpResult(grizzly_impl.get_field(unzipped_columns, 0), self.grouping_column_types[0], 1) column_expr = LazyOpResult(grizzly_impl.get_field(unzipped_columns, 1), self.grouping_column_types[0], 1) group_expr = utils.group([index_expr, column_expr]) return SeriesWeld(group_expr.expr, WeldDouble(), index_type=self.grouping_column_types[0], index_name=self.grouping_column_names[0])
def __sub__(self, other): # TODO subtractionw without index variables if self.index_type is not None: index = grizzly_impl.get_field(self.expr, 0) expr1 = grizzly_impl.get_field(self.expr, 1) else: expr1 = self.expr if other.index_type is not None: index2 = grizzly_impl.get_field(other.expr, 0) expr2 = grizzly_impl.get_field(other.expr, 1) else: expr2 = other.expr index_expr = LazyOpResult(index, self.index_type, 0) sub_expr = SeriesWeld( grizzly_impl.element_wise_op(expr1, expr2, "-", self.weld_type), self.weld_type, self.df, self.column_name) index_sub_expr = utils.group([index_expr, sub_expr]) return SeriesWeld(index_sub_expr.expr, self.weld_type, self.df, self.column_name, self.index_type, self.index_name)
def generate_lazy_op_list(arrays): ret = [] for a in arrays: lazy_arr = LazyOpResult(a.weldobj, a._weld_type, 1) ret.append(lazy_arr) return ret