Esempio n. 1
0
    def __getattr__(self, key):
        """Summary

        Args:
            key (TYPE): Description

        Returns:
            TYPE: Description

        Raises:
            Exception: Description
        """
        if key == 'values':
            if self.predicates is None:
                return self.df.values
            if isinstance(self.df.values, np.ndarray):
                weld_type = grizzly_impl.numpy_to_weld_type_mapping[
                    str(self.df.values.dtype)]
                dim = self.df.values.ndim
                return LazyOpResult(
                    grizzly_impl.filter(
                        self.df.values,
                        self.predicates.expr,
                        weld_type
                    ),
                    weld_type,
                    dim
                )
        raise Exception("Attr %s does not exist" % key)
Esempio n. 2
0
 def filter(self, predicates):
     if isinstance(predicates, SeriesWeld):
         predicates = predicates.expr
     return SeriesWeld(
         grizzly_impl.filter(
             self.expr,
             predicates,
             self.weld_type
         ),
         self.weld_type,
         self.df,
         self.column_name
     )
Esempio n. 3
0
    def __getitem__(self, key):
        """Summary

        Args:
            key (TYPE): Description

        Returns:
            TYPE: Description

        Raises:
            Exception: Description
        """
        if isinstance(key, str):  # Single-key get
            # First check if key corresponds to an un-materialized column
            if key in self.unmaterialized_cols:
                return self.unmaterialized_cols[key]
            raw_column = self.df[key].values
            dtype = str(raw_column.dtype)
            # If column type is "object", then cast as "vec[char]" in Weld
            if dtype == 'object':
                raw_column = self.raw_columns[key]
                weld_type = WeldVec(WeldChar())
            else:
                weld_type = grizzly_impl.numpy_to_weld_type_mapping[dtype]
            if self.predicates is None:
                return SeriesWeld(raw_column, weld_type, self, key)
            return SeriesWeld(
                grizzly_impl.filter(
                    raw_column,
                    self.predicates.expr,
                    weld_type
                ),
                weld_type,
                self,
                key
            )
        elif isinstance(key, list):
            # For multi-key get, return type is a dataframe
            return DataFrameWeld(self.df[key], self.predicates)
        elif isinstance(key, SeriesWeld):
            # Can also apply predicate to a dataframe
            return DataFrameWeld(self.df, key)
        raise Exception("Invalid type in __getitem__")
 def __getitem__(self, key):
     if isinstance(self.grizzly_obj, SeriesWeld):
         series = self.grizzly_obj
         if isinstance(key, SeriesWeld):
             if series.index_type is not None:
                 index_expr = grizzly_impl.get_field(series.expr, 0)
                 column_expr = grizzly_impl.get_field(series.expr, 1)
                 zip_expr = grizzly_impl.zip_columns(
                     [index_expr, column_expr])
                 predicate_expr = grizzly_impl.isin(index_expr, key.expr,
                                                    series.index_type)
                 filtered_expr = grizzly_impl.filter(
                     zip_expr, predicate_expr)
                 unzip_expr = grizzly_impl.unzip_columns(
                     filtered_expr, [series.index_type, series.weld_type])
                 return SeriesWeld(unzip_expr, series.weld_type, series.df,
                                   series.column_name, series.index_type,
                                   series.index_name)
         # TODO : Need to implement for non-pivot tables
     raise Exception("Cannot invoke getitem on non SeriesWeld object")
Esempio n. 5
0
    def __getitem__(self, key):
        """Summary

        Args:
            key (TYPE): Description

        Returns:
            TYPE: Description

        Raises:
            Exception: Description
        """
        if isinstance(key, str):  # Single-key get
            # First check if key corresponds to an un-materialized column
            if key in self.unmaterialized_cols:
                return self.unmaterialized_cols[key]
            raw_column = self.df[key].values
            dtype = str(raw_column.dtype)
            # If column type is "object", then cast as "vec[char]" in Weld
            if dtype == 'object':
                raw_column = self.raw_columns[key]
                weld_type = WeldVec(WeldChar())
            else:
                weld_type = grizzly_impl.numpy_to_weld_type_mapping[dtype]
            if self.predicates is None:
                return SeriesWeld(raw_column, weld_type, self, key)
            return SeriesWeld(
                grizzly_impl.filter(raw_column, self.predicates.expr,
                                    weld_type), weld_type, self, key)
        elif isinstance(key, list):
            # For multi-key get, return type is a dataframe
            return DataFrameWeld(self.df[key], self.predicates)
        elif isinstance(key, SeriesWeld):
            # Can also apply predicate to a dataframe
            if self.predicates is not None:
                return DataFrameWeld(self.df,
                                     key.per_element_and(self.predicates))
            return DataFrameWeld(self.df, key)
        raise Exception("Invalid type in __getitem__")
Esempio n. 6
0
    def __getattr__(self, key):
        """Summary

        Args:
            key (TYPE): Description

        Returns:
            TYPE: Description

        Raises:
            Exception: Description
        """
        if key == 'values':
            if self.predicates is None:
                return self.df.values
            if isinstance(self.df.values, np.ndarray):
                weld_type = grizzly_impl.numpy_to_weld_type_mapping[str(
                    self.df.values.dtype)]
                dim = self.df.values.ndim
                return LazyOpResult(
                    grizzly_impl.filter(self.df.values, self.predicates.expr,
                                        weld_type), weld_type, dim)
        raise Exception("Attr %s does not exist" % key)
Esempio n. 7
0
 def filter(self, predicates):
     if isinstance(predicates, SeriesWeld):
         predicates = predicates.expr
     return SeriesWeld(
         grizzly_impl.filter(self.expr, predicates, self.weld_type),
         self.weld_type, self.df, self.column_name)