Esempio n. 1
0
    def __getitem__(self, key):
        """Summary

        Args:
            predicates (TYPE): Description
            new_value (TYPE): Description

        Returns:
            TYPE: Description
        """
        if isinstance(key, slice):
            start = key.start
            # TODO : We currently do nothing with step
            step = key.step
            stop = key.stop
            if self.index_type is not None:
                index_expr = grizzly_impl.get_field(self.expr, 0)
                column_expr = grizzly_impl.get_field(self.expr, 1)
                zip_expr = grizzly_impl.zip_columns([index_expr, column_expr])
                sliced_expr = grizzly_impl.slice_vec(zip_expr, start, stop)
                unzip_expr = grizzly_impl.unzip_columns(
                    sliced_expr, [self.index_type, self.weld_type])
                return SeriesWeld(unzip_expr, self.weld_type, self.df,
                                  self.column_name, self.index_type,
                                  self.index_name)
            else:
                return SeriesWeld(
                    grizzly_impl.slice_vec(self.expr, start, stop))
        else:
            # By default we return as if the key were predicates to filter by
            return self.filter(key)
Esempio n. 2
0
    def sort_values(self, ascending=False):
        """ Sorts the values of this series

        """
        if self.index_type is not None:
            index_expr = grizzly_impl.get_field(self.expr, 0)
            column_expr = grizzly_impl.get_field(self.expr, 1)
            zip_expr = grizzly_impl.zip_columns([index_expr, column_expr])
            result_expr = grizzly_impl.sort(zip_expr, 1, self.weld_type,
                                            ascending)
            unzip_expr = grizzly_impl.unzip_columns(
                result_expr, [self.index_type, self.weld_type])
            return SeriesWeld(unzip_expr, self.weld_type, self.df,
                              self.column_name, self.index_type,
                              self.index_name)
        else:
            result_expr = grizzly_impl.sort(self.expr)
Esempio n. 3
0
 def __getitem__(self, key):
     if isinstance(self.grizzly_obj, SeriesWeld):
         series = self.grizzly_obj
         if isinstance(key, SeriesWeld):
             if series.index_type is not None:
                 index_expr = grizzly_impl.get_field(series.expr, 0)
                 column_expr = grizzly_impl.get_field(series.expr, 1)
                 zip_expr = grizzly_impl.zip_columns(
                     [index_expr, column_expr])
                 predicate_expr = grizzly_impl.isin(index_expr, key.expr,
                                                    series.index_type)
                 filtered_expr = grizzly_impl.filter(
                     zip_expr, predicate_expr)
                 unzip_expr = grizzly_impl.unzip_columns(
                     filtered_expr, [series.index_type, series.weld_type])
                 return SeriesWeld(unzip_expr, series.weld_type, series.df,
                                   series.column_name, series.index_type,
                                   series.index_name)
         # TODO : Need to implement for non-pivot tables
     raise Exception("Cannot invoke getitem on non SeriesWeld object")
Esempio n. 4
0
    def __sub__(self, other):
        # TODO subtractionw without index variables
        if self.index_type is not None:
            index = grizzly_impl.get_field(self.expr, 0)
            expr1 = grizzly_impl.get_field(self.expr, 1)
        else:
            expr1 = self.expr
        if other.index_type is not None:
            index2 = grizzly_impl.get_field(other.expr, 0)
            expr2 = grizzly_impl.get_field(other.expr, 1)
        else:
            expr2 = other.expr
        index_expr = LazyOpResult(index, self.index_type, 0)
        sub_expr = SeriesWeld(
            grizzly_impl.element_wise_op(expr1, expr2, "-", self.weld_type),
            self.weld_type, self.df, self.column_name)

        index_sub_expr = utils.group([index_expr, sub_expr])
        return SeriesWeld(index_sub_expr.expr, self.weld_type, self.df,
                          self.column_name, self.index_type, self.index_name)
Esempio n. 5
0
    def std(self):
        """Standard deviation

        Note that is by default normalizd by n - 1
        # TODO, what does pandas do for multiple grouping columns?
        # Currently we are just going to use one grouping column
        """
        std_expr = grizzly_impl.groupby_std([self.column], [self.column_type],
                                            self.grouping_columns,
                                            self.grouping_column_types)
        unzipped_columns = grizzly_impl.unzip_columns(
            std_expr,
            self.grouping_column_types + [WeldDouble()],
        )
        index_expr = LazyOpResult(grizzly_impl.get_field(unzipped_columns, 0),
                                  self.grouping_column_types[0], 1)
        column_expr = LazyOpResult(grizzly_impl.get_field(unzipped_columns, 1),
                                   self.grouping_column_types[0], 1)
        group_expr = utils.group([index_expr, column_expr])
        return SeriesWeld(group_expr.expr,
                          WeldDouble(),
                          index_type=self.grouping_column_types[0],
                          index_name=self.grouping_column_names[0])
Esempio n. 6
0
    def __ge__(self, other):
        """Summary

        Args:
            other (TYPE): Description

        Returns:
            TYPE: Description
        """
        if self.index_type is not None:
            expr = grizzly_impl.get_field(self.expr, 1)
        else:
            expr = self.expr
        return SeriesWeld(
            grizzly_impl.compare(expr, other, ">=", self.weld_type), WeldBit(),
            self.df, self.column_name)
Esempio n. 7
0
 def __getitem__(self, key):
     if isinstance(key, SeriesWeld):
         # We're going to assume that the first column in these dataframes
         # is an index column. This assumption does not hold throughout grizzly,
         # so we should fix that moving forward.
         index_expr = grizzly_impl.get_field(self.df.expr, 0)
         if self.df.is_pivot:
             index_type, pivot_type, column_type = self.df.column_types
             index_elem_type = index_type.elemType
             index_expr_predicate = grizzly_impl.isin(
                 index_expr, key.expr, index_elem_type)
             return DataFrameWeldExpr(grizzly_impl.pivot_filter(
                 self.df.expr, index_expr_predicate),
                                      self.df.column_names,
                                      self.df.weld_type,
                                      is_pivot=True)
         # TODO : Need to implement for non-pivot tables
     raise Exception(
         "Cannot invoke getitem on an object that is not SeriesWeld")
Esempio n. 8
0
 def __setitem__(self, key, item):
     if self.is_pivot:
         # Note that if this is a pivot table,
         # We have to modify the structure of the pivot table
         # that is append item to nested vector of vectors
         # and update the col_vec field
         # Also setitem appends a new item to the pivot table
         # Modifying an existing item is not implemented yet
         # TODO if pivot table check that the column being added
         # is same type
         if isinstance(item, SeriesWeld):
             if item.index_type is not None:
                 item_expr = grizzly_impl.get_field(item.expr, 1)
             else:
                 item_expr = item.expr
             self.expr = grizzly_impl.set_pivot_column(
                 self.expr, key, item_expr, self.column_types[1].elemType,
                 self.column_types[2].elemType)
     else:
         raise Exception("Setitem not implemented for non-pivot table")
Esempio n. 9
0
 def index(self):
     if self.index_type is not None:
         return SeriesWeld(grizzly_impl.get_field(self.expr, 0),
                           self.index_type, self.df, self.index_name)
     # TODO : Make all series have a series attribute
     raise Exception("No index present")