def prefixsum(vals): """Compute the full prefixsum. Given the input of N. The output size is N + 1. The first value is always 0. The last value is the sum of *vals*. """ import cudf._lib as libcudf from cudf.core.column import NumericalColumn from cudf.core.buffer import Buffer # Allocate output slots = rmm.device_array(shape=vals.size + 1, dtype=vals.dtype) # Fill 0 to slot[0] gpu_fill_value[1, 1](slots[:1], 0) # Compute prefixsum on the mask in_col = NumericalColumn(data=Buffer(vals), mask=None, null_count=0, dtype=vals.dtype) out_col = NumericalColumn(data=Buffer(slots[1:]), mask=None, null_count=0, dtype=vals.dtype) libcudf.reduce.scan(in_col, out_col, "sum", inclusive=True) return slots
def test_typecast_from_decimal(data, from_dtype, to_dtype): actual = data.astype(from_dtype) pa_arr = actual.to_arrow().cast(to_dtype, safe=False) actual = actual.astype(to_dtype) expected = cudf.Series(NumericalColumn.from_arrow(pa_arr)) assert_eq(actual, expected)
def as_column(self): if len(self) > 0: vals = cudautils.arange(self._start, self._stop, dtype=self.dtype) else: vals = rmm.device_array(0, dtype=self.dtype) return NumericalColumn(data=Buffer(vals), dtype=vals.dtype, name=self.name)
def test_typecast_from_decimal(data, from_dtype, to_dtype): got = data.astype(from_dtype) pa_arr = got.to_arrow().cast(to_dtype, safe=False) got = got.astype(to_dtype) expected = cudf.Series(NumericalColumn.from_arrow(pa_arr)) assert_eq(got, expected)
def get_dt_field(self, field): out_column = self._values.get_dt_field(field) # column.column_empty_like always returns a Column object # but we need a NumericalColumn for GenericIndex.. # how should this be handled? out_column = NumericalColumn( data=out_column.data, mask=out_column.mask, null_count=out_column.null_count, dtype=out_column.dtype, name=self.name, ) return as_index(out_column)
def build_column(data, dtype, mask=None, offset=0, children=(), categories=None): """ Build a Column of the appropriate type from the given parameters Parameters ---------- data : Buffer The data buffer (can be None if constructin certain Column types like StringColumn or CategoricalColumn) dtype The dtype associated with the Column to construct mask : Buffer, optionapl The mask buffer offset : int, optional children : tuple, optional categories : Column, optional If constructing a CategoricalColumn, a Column containing the categories """ from cudf.core.column.numerical import NumericalColumn from cudf.core.column.datetime import DatetimeColumn from cudf.core.column.categorical import CategoricalColumn from cudf.core.column.string import StringColumn dtype = pd.api.types.pandas_dtype(dtype) if is_categorical_dtype(dtype): if not len(children) == 1: raise ValueError( "Must specify exactly one child column for CategoricalColumn") if not isinstance(children[0], ColumnBase): raise TypeError("children must be a tuple of Columns") return CategoricalColumn(dtype=dtype, mask=mask, offset=offset, children=children) elif dtype.type is np.datetime64: return DatetimeColumn(data=data, dtype=dtype, mask=mask, offset=offset) elif dtype.type in (np.object_, np.str_): return StringColumn(mask=mask, offset=offset, children=children) else: return NumericalColumn(data=data, dtype=dtype, mask=mask, offset=offset)
def _find_segments(self): seg, markers = cudautils.find_segments(self.gpu_values) return NumericalColumn(data=Buffer(seg), dtype=seg.dtype), markers