def __getitem__(self, index): from numbers import Number if isinstance(index, slice): start, stop, step = index.indices(len(self)) sln = (stop - start) // step sln = max(0, sln) start += self._start stop += self._start if sln == 0: return RangeIndex(0, None, self.name) elif step == 1: return RangeIndex(start, stop, self.name) else: return index_from_range(start, stop, step) elif isinstance(index, Number): index = utils.normalize_index(index, len(self)) index += self._start return index elif isinstance(index, (list, np.ndarray)): index = np.asarray(index) index = rmm.to_device(index) else: if is_scalar(index): index = min_signed_type(index)(index) index = column.as_column(index) return as_index(self._values[index], name=self.name)
def normalize_binop_value( self, other: ScalarLike ) -> Union[ColumnBase, ScalarLike]: if other is None: return other if isinstance(other, cudf.Scalar): if self.dtype == other.dtype: return other # expensive device-host transfer just to # adjust the dtype other = other.value elif isinstance(other, np.ndarray) and other.ndim == 0: other = other.item() other_dtype = np.min_scalar_type(other) if other_dtype.kind in {"b", "i", "u", "f"}: if isinstance(other, cudf.Scalar): return other other_dtype = np.promote_types(self.dtype, other_dtype) if other_dtype == np.dtype("float16"): other_dtype = np.dtype("float32") other = other_dtype.type(other) if self.dtype.kind == "b": other_dtype = min_signed_type(other) if np.isscalar(other): other = np.dtype(other_dtype).type(other) return other else: ary = utils.scalar_broadcast_to( other, size=len(self), dtype=other_dtype ) return column.build_column( data=Buffer(ary), dtype=ary.dtype, mask=self.mask, ) else: raise TypeError(f"cannot broadcast {type(other)}")
def normalize_binop_value(self, other): if other is None: return other other_dtype = np.min_scalar_type(other) if other_dtype.kind in {"b", "i", "u", "f"}: other_dtype = np.promote_types(self.dtype, other_dtype) if other_dtype == np.dtype("float16"): other = np.dtype("float32").type(other) other_dtype = other.dtype if self.dtype.kind == "b": other_dtype = min_signed_type(other) if np.isscalar(other): other = np.dtype(other_dtype).type(other) return other else: ary = utils.scalar_broadcast_to(other, size=len(self), dtype=other_dtype) return column.build_column( data=Buffer.from_array_lik(ary), dtype=ary.dtype, mask=self.mask, ) else: raise TypeError("cannot broadcast {}".format(type(other)))
def to_pandas(self, index=None, nullable=False): signed_dtype = min_signed_type(len(self.categories)) codes = self.cat().codes.astype(signed_dtype).fillna(-1).to_array() categories = self.categories.to_pandas() data = pd.Categorical.from_codes(codes, categories=categories, ordered=self.ordered) return pd.Series(data, index=index)
def _compute_empty_doc_ids(self, count_df, n_doc): """ Compute empty docs ids using the remaining docs, given the total number of documents. """ remaining_docs = count_df['doc_id'].unique() dtype = min_signed_type(n_doc) doc_ids = cudf.DataFrame( data={'all_ids': cp.arange(0, n_doc, dtype=dtype)}, dtype=dtype) empty_docs = doc_ids - doc_ids.iloc[remaining_docs] empty_ids = empty_docs[empty_docs['all_ids'].isnull()].index.values return empty_ids
def to_arrow(self) -> pa.Array: """Convert to PyArrow Array.""" # arrow doesn't support unsigned codes signed_type = (min_signed_type(self.codes.max()) if self.codes.size > 0 else np.int8) codes = self.codes.astype(signed_type) categories = self.categories out_indices = codes.to_arrow() out_dictionary = categories.to_arrow() return pa.DictionaryArray.from_arrays( out_indices, out_dictionary, ordered=self.ordered, )
def normalize_binop_value(self, other): other_dtype = np.min_scalar_type(other) if other_dtype.kind in "biuf": other_dtype = np.promote_types(self.dtype, other_dtype) if other_dtype == np.dtype("float16"): other = np.dtype("float32").type(other) other_dtype = other.dtype if other_dtype.kind in "u": other_dtype = min_signed_type(other) if np.isscalar(other): other = np.dtype(other_dtype).type(other) return other else: ary = utils.scalar_broadcast_to(other, shape=len(self), dtype=other_dtype) return self.replace(data=Buffer(ary), dtype=ary.dtype) else: raise TypeError("cannot broadcast {}".format(type(other)))
def to_pandas(self, index: pd.Index = None, **kwargs) -> pd.Series: if self.categories.dtype.kind == "f": new_mask = bools_to_mask(self.notnull()) col = column.build_categorical_column( categories=self.categories, codes=column.as_column(self.codes, dtype=self.codes.dtype), mask=new_mask, ordered=self.dtype.ordered, size=self.codes.size, ) else: col = self signed_dtype = min_signed_type(len(col.categories)) codes = col.cat().codes.astype(signed_dtype).fillna(-1).to_array() categories = col.categories.dropna(drop_nan=True).to_pandas() data = pd.Categorical.from_codes(codes, categories=categories, ordered=col.ordered) return pd.Series(data, index=index)