def prefixsum(vals): """Compute the full prefixsum. Given the input of N. The output size is N + 1. The first value is always 0. The last value is the sum of *vals*. """ import cudf.bindings.reduce as cpp_reduce from cudf.dataframe.numerical import NumericalColumn from cudf.dataframe.buffer import Buffer # Allocate output slots = rmm.device_array(shape=vals.size + 1, dtype=vals.dtype) # Fill 0 to slot[0] gpu_fill_value[1, 1](slots[:1], 0) # Compute prefixsum on the mask in_col = NumericalColumn(data=Buffer(vals), mask=None, null_count=0, dtype=vals.dtype) out_col = NumericalColumn(data=Buffer(slots[1:]), mask=None, null_count=0, dtype=vals.dtype) cpp_reduce.apply_scan(in_col, out_col, 'sum', inclusive=True) return slots
def _get_mask_as_column(self): from cudf.dataframe.numerical import NumericalColumn data = Buffer(cudautils.ones(len(self), dtype=np.bool_)) mask = NumericalColumn(data=data, mask=None, null_count=0, dtype=np.bool_) if self._mask is not None: mask = mask.set_mask(self._mask).fillna(False) return mask
def as_column(self): if len(self) > 0: vals = cudautils.arange(self._start, self._stop, dtype=self.dtype) else: vals = rmm.device_array(0, dtype=self.dtype) return NumericalColumn(data=Buffer(vals), dtype=vals.dtype, name=self.name)
def get_dt_field(self, field): out_column = self._values.get_dt_field(field) # columnops.column_empty_like always returns a Column object # but we need a NumericalColumn for GenericIndex.. # how should this be handled? out_column = NumericalColumn(data=out_column.data, mask=out_column.mask, null_count=out_column.null_count, dtype=out_column.dtype) return as_index(out_column)
def column_select_by_position(column, positions): """Select by a series of dtype int64 indicating positions. Returns (selected_column, selected_positions) """ from cudf.dataframe.numerical import NumericalColumn pos_ary = positions.data.to_gpu_array() selected_values = cpp_copying.apply_gather_column(column, pos_ary) selected_index = Buffer(pos_ary) return selected_values, NumericalColumn(data=selected_index, dtype=selected_index.dtype)
def column_select_by_position(column, positions): """Select by a series of dtype int64 indicating positions. Returns (selected_column, selected_positions) """ from cudf.dataframe.numerical import NumericalColumn assert column.null_count == 0 selvals = cudautils.gather(column.data.to_gpu_array(), positions.data.to_gpu_array()) selected_values = column.replace(data=Buffer(selvals)) selected_index = Buffer(positions.data.to_gpu_array()) return selected_values, NumericalColumn(data=selected_index, dtype=selected_index.dtype)
def __init__(self, values, name=None): from cudf.dataframe.series import Series # normalize the input if isinstance(values, Series): name = values.name values = values._column elif isinstance(values, columnops.TypedColumnBase): values = values else: values = NumericalColumn(data=Buffer(values), dtype=values.dtype) assert isinstance(values, columnops.TypedColumnBase), type(values) assert values.null_count == 0 self._values = values self.name = name
def column_select_by_boolmask(column, boolmask): """Select by a boolean mask to a column. Returns (selected_column, selected_positions) """ from cudf.dataframe.numerical import NumericalColumn assert column.null_count == 0 # We don't properly handle the boolmask yet boolbits = cudautils.compact_mask_bytes(boolmask.to_gpu_array()) indices = cudautils.arange(len(boolmask)) _, selinds = cudautils.copy_to_dense(indices, mask=boolbits) _, selvals = cudautils.copy_to_dense(column.data.to_gpu_array(), mask=boolbits) selected_values = column.replace(data=Buffer(selvals)) selected_index = Buffer(selinds) return selected_values, NumericalColumn(data=selected_index, dtype=selected_index.dtype)
def _find_segments(self): seg, markers = cudautils.find_segments(self.gpu_values) return NumericalColumn(data=Buffer(seg), dtype=seg.dtype), markers