Example #1
0
def prefixsum(vals):
    """Compute the full prefixsum.

    Given the input of N.  The output size is N + 1.
    The first value is always 0.  The last value is the sum of *vals*.
    """

    import cudf.bindings.reduce as cpp_reduce
    from cudf.dataframe.numerical import NumericalColumn
    from cudf.dataframe.buffer import Buffer

    # Allocate output
    slots = rmm.device_array(shape=vals.size + 1, dtype=vals.dtype)
    # Fill 0 to slot[0]
    gpu_fill_value[1, 1](slots[:1], 0)

    # Compute prefixsum on the mask
    in_col = NumericalColumn(data=Buffer(vals),
                             mask=None,
                             null_count=0,
                             dtype=vals.dtype)
    out_col = NumericalColumn(data=Buffer(slots[1:]),
                              mask=None,
                              null_count=0,
                              dtype=vals.dtype)
    cpp_reduce.apply_scan(in_col, out_col, 'sum', inclusive=True)
    return slots
Example #2
0
    def _get_mask_as_column(self):
        from cudf.dataframe.numerical import NumericalColumn

        data = Buffer(cudautils.ones(len(self), dtype=np.bool_))
        mask = NumericalColumn(data=data,
                               mask=None,
                               null_count=0,
                               dtype=np.bool_)
        if self._mask is not None:
            mask = mask.set_mask(self._mask).fillna(False)
        return mask
Example #3
0
 def as_column(self):
     if len(self) > 0:
         vals = cudautils.arange(self._start, self._stop, dtype=self.dtype)
     else:
         vals = rmm.device_array(0, dtype=self.dtype)
     return NumericalColumn(data=Buffer(vals),
                            dtype=vals.dtype,
                            name=self.name)
Example #4
0
 def get_dt_field(self, field):
     out_column = self._values.get_dt_field(field)
     # columnops.column_empty_like always returns a Column object
     # but we need a NumericalColumn for GenericIndex..
     # how should this be handled?
     out_column = NumericalColumn(data=out_column.data,
                                  mask=out_column.mask,
                                  null_count=out_column.null_count,
                                  dtype=out_column.dtype)
     return as_index(out_column)
Example #5
0
def column_select_by_position(column, positions):
    """Select by a series of dtype int64 indicating positions.

    Returns (selected_column, selected_positions)
    """
    from cudf.dataframe.numerical import NumericalColumn

    pos_ary = positions.data.to_gpu_array()
    selected_values = cpp_copying.apply_gather_column(column, pos_ary)
    selected_index = Buffer(pos_ary)

    return selected_values, NumericalColumn(data=selected_index,
                                            dtype=selected_index.dtype)
Example #6
0
def column_select_by_position(column, positions):
    """Select by a series of dtype int64 indicating positions.

    Returns (selected_column, selected_positions)
    """
    from cudf.dataframe.numerical import NumericalColumn
    assert column.null_count == 0

    selvals = cudautils.gather(column.data.to_gpu_array(),
                               positions.data.to_gpu_array())

    selected_values = column.replace(data=Buffer(selvals))
    selected_index = Buffer(positions.data.to_gpu_array())

    return selected_values, NumericalColumn(data=selected_index,
                                            dtype=selected_index.dtype)
Example #7
0
    def __init__(self, values, name=None):
        from cudf.dataframe.series import Series
        # normalize the input
        if isinstance(values, Series):
            name = values.name
            values = values._column
        elif isinstance(values, columnops.TypedColumnBase):
            values = values
        else:
            values = NumericalColumn(data=Buffer(values), dtype=values.dtype)

        assert isinstance(values, columnops.TypedColumnBase), type(values)
        assert values.null_count == 0

        self._values = values
        self.name = name
Example #8
0
def column_select_by_boolmask(column, boolmask):
    """Select by a boolean mask to a column.

    Returns (selected_column, selected_positions)
    """
    from cudf.dataframe.numerical import NumericalColumn
    assert column.null_count == 0  # We don't properly handle the boolmask yet
    boolbits = cudautils.compact_mask_bytes(boolmask.to_gpu_array())
    indices = cudautils.arange(len(boolmask))
    _, selinds = cudautils.copy_to_dense(indices, mask=boolbits)
    _, selvals = cudautils.copy_to_dense(column.data.to_gpu_array(),
                                         mask=boolbits)

    selected_values = column.replace(data=Buffer(selvals))
    selected_index = Buffer(selinds)
    return selected_values, NumericalColumn(data=selected_index,
                                            dtype=selected_index.dtype)
Example #9
0
 def _find_segments(self):
     seg, markers = cudautils.find_segments(self.gpu_values)
     return NumericalColumn(data=Buffer(seg), dtype=seg.dtype), markers