Esempio n. 1
0
    def __getitem__(self, index):
        from numbers import Number

        if isinstance(index, slice):
            start, stop, step = index.indices(len(self))
            sln = (stop - start) // step
            sln = max(0, sln)
            start += self._start
            stop += self._start
            if sln == 0:
                return RangeIndex(0, None, self.name)
            elif step == 1:
                return RangeIndex(start, stop, self.name)
            else:
                return index_from_range(start, stop, step)

        elif isinstance(index, Number):
            index = utils.normalize_index(index, len(self))
            index += self._start
            return index
        elif isinstance(index, (list, np.ndarray)):
            index = np.asarray(index)
            index = rmm.to_device(index)

        else:
            if is_scalar(index):
                index = min_signed_type(index)(index)
            index = column.as_column(index)

        return as_index(self._values[index], name=self.name)
Esempio n. 2
0
 def normalize_binop_value(
     self, other: ScalarLike
 ) -> Union[ColumnBase, ScalarLike]:
     if other is None:
         return other
     if isinstance(other, cudf.Scalar):
         if self.dtype == other.dtype:
             return other
         # expensive device-host transfer just to
         # adjust the dtype
         other = other.value
     elif isinstance(other, np.ndarray) and other.ndim == 0:
         other = other.item()
     other_dtype = np.min_scalar_type(other)
     if other_dtype.kind in {"b", "i", "u", "f"}:
         if isinstance(other, cudf.Scalar):
             return other
         other_dtype = np.promote_types(self.dtype, other_dtype)
         if other_dtype == np.dtype("float16"):
             other_dtype = np.dtype("float32")
             other = other_dtype.type(other)
         if self.dtype.kind == "b":
             other_dtype = min_signed_type(other)
         if np.isscalar(other):
             other = np.dtype(other_dtype).type(other)
             return other
         else:
             ary = utils.scalar_broadcast_to(
                 other, size=len(self), dtype=other_dtype
             )
             return column.build_column(
                 data=Buffer(ary), dtype=ary.dtype, mask=self.mask,
             )
     else:
         raise TypeError(f"cannot broadcast {type(other)}")
Esempio n. 3
0
 def normalize_binop_value(self, other):
     if other is None:
         return other
     other_dtype = np.min_scalar_type(other)
     if other_dtype.kind in {"b", "i", "u", "f"}:
         other_dtype = np.promote_types(self.dtype, other_dtype)
         if other_dtype == np.dtype("float16"):
             other = np.dtype("float32").type(other)
             other_dtype = other.dtype
         if self.dtype.kind == "b":
             other_dtype = min_signed_type(other)
         if np.isscalar(other):
             other = np.dtype(other_dtype).type(other)
             return other
         else:
             ary = utils.scalar_broadcast_to(other,
                                             size=len(self),
                                             dtype=other_dtype)
             return column.build_column(
                 data=Buffer.from_array_lik(ary),
                 dtype=ary.dtype,
                 mask=self.mask,
             )
     else:
         raise TypeError("cannot broadcast {}".format(type(other)))
Esempio n. 4
0
 def to_pandas(self, index=None, nullable=False):
     signed_dtype = min_signed_type(len(self.categories))
     codes = self.cat().codes.astype(signed_dtype).fillna(-1).to_array()
     categories = self.categories.to_pandas()
     data = pd.Categorical.from_codes(codes,
                                      categories=categories,
                                      ordered=self.ordered)
     return pd.Series(data, index=index)
Esempio n. 5
0
    def _compute_empty_doc_ids(self, count_df, n_doc):
        """
        Compute empty docs ids using the remaining docs, given the total number
        of documents.
        """
        remaining_docs = count_df['doc_id'].unique()
        dtype = min_signed_type(n_doc)
        doc_ids = cudf.DataFrame(
            data={'all_ids': cp.arange(0, n_doc, dtype=dtype)}, dtype=dtype)

        empty_docs = doc_ids - doc_ids.iloc[remaining_docs]
        empty_ids = empty_docs[empty_docs['all_ids'].isnull()].index.values
        return empty_ids
Esempio n. 6
0
    def to_arrow(self) -> pa.Array:
        """Convert to PyArrow Array."""
        # arrow doesn't support unsigned codes
        signed_type = (min_signed_type(self.codes.max())
                       if self.codes.size > 0 else np.int8)
        codes = self.codes.astype(signed_type)
        categories = self.categories

        out_indices = codes.to_arrow()
        out_dictionary = categories.to_arrow()

        return pa.DictionaryArray.from_arrays(
            out_indices,
            out_dictionary,
            ordered=self.ordered,
        )
Esempio n. 7
0
 def normalize_binop_value(self, other):
     other_dtype = np.min_scalar_type(other)
     if other_dtype.kind in "biuf":
         other_dtype = np.promote_types(self.dtype, other_dtype)
         if other_dtype == np.dtype("float16"):
             other = np.dtype("float32").type(other)
             other_dtype = other.dtype
         if other_dtype.kind in "u":
             other_dtype = min_signed_type(other)
         if np.isscalar(other):
             other = np.dtype(other_dtype).type(other)
             return other
         else:
             ary = utils.scalar_broadcast_to(other,
                                             shape=len(self),
                                             dtype=other_dtype)
             return self.replace(data=Buffer(ary), dtype=ary.dtype)
     else:
         raise TypeError("cannot broadcast {}".format(type(other)))
Esempio n. 8
0
    def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series:
        if self.categories.dtype.kind == "f":
            new_mask = bools_to_mask(self.notnull())
            col = column.build_categorical_column(
                categories=self.categories,
                codes=column.as_column(self.codes, dtype=self.codes.dtype),
                mask=new_mask,
                ordered=self.dtype.ordered,
                size=self.codes.size,
            )
        else:
            col = self

        signed_dtype = min_signed_type(len(col.categories))
        codes = col.cat().codes.astype(signed_dtype).fillna(-1).to_array()
        categories = col.categories.dropna(drop_nan=True).to_pandas()
        data = pd.Categorical.from_codes(codes,
                                         categories=categories,
                                         ordered=col.ordered)
        return pd.Series(data, index=index)