Example #1
0
def make_mask(size):
    """Create mask to obtain at least *size* number of bits.
    """
    size = calc_chunk_size(size, mask_bitsize)
    return rmm.device_array(shape=size, dtype=mask_dtype)
Example #2
0
        Notes
        -----
        The parameters `case`, `flags`, and `na` are not yet supported and
        will raise a NotImplementedError if anything other than the default
        value is set.
        """
        if case is not True:
            raise NotImplementedError("`case` parameter is not yet supported")
        elif flags != 0:
            raise NotImplementedError("`flags` parameter is not yet supported")
        elif na is not np.nan:
            raise NotImplementedError("`na` parameter is not yet supported")

        from cudf.core import Series

        out_dev_arr = rmm.device_array(len(self._parent), dtype="bool")
        ptr = libcudf.cudf.get_ctype_ptr(out_dev_arr)
        self._parent.nvstrings.contains(pat, regex=regex, devptr=ptr)

        mask = None
        if self._parent.has_nulls:
            mask = self._parent.mask

        col = column.build_column(
            Buffer(out_dev_arr), dtype=np.dtype("bool"), mask=mask
        )

        return Series(col, index=self._index, name=self._name)

    def replace(self, pat, repl, n=-1, case=None, flags=0, regex=True):
        """
Example #3
0
    def _agg_groups(self, functors):
        """Aggregate the groups

        Parameters
        ----------
        functors: dict
            Contains key for column names and value for list of functors.

        """
        functors_mapping = OrderedDict()
        # The "value" columns
        for k, vs in functors.items():
            if k not in self._df.columns:
                raise NameError("column {} not found".format(k))
            if len(vs) == 1:
                [functor] = vs
                functors_mapping[k] = {k: functor}
            else:
                functors_mapping[k] = cur_fn_mapping = OrderedDict()
                for functor in vs:
                    newk = "{}_{}".format(k, functor.__name__)
                    cur_fn_mapping[newk] = functor

            del functor
        # Grouping
        grouped_df, sr_segs = self._group_dataframe(self._df, self._by)
        # Grouped values
        outdf = cudf.DataFrame()
        segs = sr_segs.to_array()

        for k in self._by:
            outdf[k] = grouped_df[k].take(sr_segs).reset_index(drop=True)

        size = len(outdf)

        # Append value columns
        for k, infos in functors_mapping.items():
            values = defaultdict(lambda: np.zeros(size, dtype=np.float64))
            begin = segs
            sr = grouped_df[k].reset_index(drop=True)
            for newk, functor in infos.items():
                if functor.__name__ == "mean":
                    dev_begins = rmm.to_device(np.asarray(begin))
                    dev_out = rmm.device_array(size, dtype=np.float64)
                    if size > 0:
                        group_mean.forall(size)(
                            sr._column.data_array_view, dev_begins, dev_out
                        )
                    values[newk] = dev_out

                elif functor.__name__ == "max":
                    dev_begins = rmm.to_device(np.asarray(begin))
                    dev_out = rmm.device_array(size, dtype=sr.dtype)
                    if size > 0:
                        group_max.forall(size)(
                            sr._column.data_array_view, dev_begins, dev_out
                        )
                    values[newk] = dev_out

                elif functor.__name__ == "min":
                    dev_begins = rmm.to_device(np.asarray(begin))
                    dev_out = rmm.device_array(size, dtype=sr.dtype)
                    if size > 0:
                        group_min.forall(size)(
                            sr._column.data_array_view, dev_begins, dev_out
                        )
                    values[newk] = dev_out
                else:
                    end = chain(segs[1:], [len(grouped_df)])
                    for i, (s, e) in enumerate(zip(begin, end)):
                        values[newk][i] = functor(sr[s:e])
            # Store
            for k, buf in values.items():
                outdf[k] = buf

        return outdf
Example #4
0
def cuda_array(size):
    return rmm.device_array(size, dtype=np.uint8)
Example #5
0
 def null(cls, dtype):
     """Create a "null" buffer with a zero-sized device array.
     """
     mem = rmm.device_array(0, dtype=dtype)
     return cls(mem, size=0, capacity=0)
Example #6
0
os.environ.setdefault("UCX_RNDV_SCHEME", "put_zcopy")
os.environ.setdefault("UCX_MEMTYPE_CACHE", "n")
os.environ.setdefault("UCX_TLS", "tcp,rc,cuda_copy,cuda_ipc")

logger = logging.getLogger(__name__)
MAX_MSG_LOG = 23

# ----------------------------------------------------------------------------
# Comm Interface
# ----------------------------------------------------------------------------

# Let's find the function, `cuda_array`, to use when allocating new CUDA arrays
try:
    import rmm

    cuda_array = lambda n: rmm.device_array(n, dtype=np.uint8)
except ImportError:
    try:
        import numba.cuda

        cuda_array = lambda n: numba.cuda.device_array((n, ), dtype=np.uint8)
    except ImportError:

        def cuda_array(n):
            raise RuntimeError(
                "In order to send/recv CUDA arrays, Numba or RMM is required")


class UCX(Comm):
    """Comm object using UCP.
Example #7
0
def gpu_major_converter(original, nrows, ncols, dtype, to_order='C'):
    row_major = rmm.device_array((nrows, ncols), dtype=dtype, order=to_order)

    tpb = driver.get_device().MAX_THREADS_PER_BLOCK

    tile_width = int(math.pow(2, math.log(tpb, 2) / 2))
    tile_height = int(tpb / tile_width)

    tile_shape = (tile_height, tile_width + 1)

    # blocks and threads for the shared memory/tiled algorithm
    # see http://devblogs.nvidia.com/parallelforall/efficient-matrix-transpose-cuda-cc/ # noqa
    blocks = int((row_major.shape[1]) / tile_height + 1), \
        int((row_major.shape[0]) / tile_width + 1)

    threads = tile_height, tile_width

    # blocks per gpu for the general kernel
    bpg = (nrows + tpb - 1) // tpb

    if dtype == 'float32':
        dev_dtype = numba.float32

    else:
        dev_dtype = numba.float64

    @cuda.jit
    def general_kernel(input, output):
        tid = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
        if tid >= nrows:
            return
        _col_offset = 0
        while _col_offset < input.shape[1]:
            col_idx = _col_offset
            output[tid, col_idx] = input[tid, col_idx]
            _col_offset += 1

    @cuda.jit
    def shared_kernel(input, output):

        tile = cuda.shared.array(shape=tile_shape, dtype=dev_dtype)

        tx = cuda.threadIdx.x
        ty = cuda.threadIdx.y
        bx = cuda.blockIdx.x * cuda.blockDim.x
        by = cuda.blockIdx.y * cuda.blockDim.y
        y = by + tx
        x = bx + ty

        if by + ty < input.shape[0] and bx + tx < input.shape[1]:
            tile[ty, tx] = input[by + ty, bx + tx]
        cuda.syncthreads()
        if y < output.shape[0] and x < output.shape[1]:
            output[y, x] = tile[tx, ty]

    # check if we cannot call the shared memory kernel
    # block limits: 2**31-1 for x, 65535 for y dim of blocks
    if blocks[0] > 2147483647 or blocks[1] > 65535:
        general_kernel[bpg, tpb](original, row_major)

    else:
        shared_kernel[blocks, threads](original, row_major)

    return row_major
Example #8
0
def window_sizes_from_offset(arr, offset):
    window_sizes = rmm.device_array(shape=(arr.shape), dtype="int32")
    if arr.size > 0:
        gpu_window_sizes_from_offset.forall(arr.size)(arr, window_sizes,
                                                      offset)
    return window_sizes
Example #9
0
    def __setitem__(self, key, value):
        """
        Set the value of self[key] to value.

        If value and self are of different types,
        value is coerced to self.dtype
        """
        from cudf.core import column

        if isinstance(key, slice):
            key_start, key_stop, key_stride = key.indices(len(self))
            if key_stride != 1:
                raise NotImplementedError("Stride not supported in slice")
            nelem = abs(key_stop - key_start)
        else:
            key = column.as_column(key)
            if pd.api.types.is_bool_dtype(key.dtype):
                if not len(key) == len(self):
                    raise ValueError(
                        "Boolean mask must be of same length as column")
                key = column.as_column(cudautils.arange(len(self)))[key]
            nelem = len(key)

        if is_scalar(value):
            if is_categorical_dtype(self.dtype):
                from cudf.utils.cudautils import fill_value

                data = rmm.device_array(nelem, dtype=self.codes.dtype)
                fill_value(data, self._encode(value))
                value = build_categorical_column(
                    categories=self.dtype.categories,
                    codes=as_column(data),
                    ordered=self.dtype.ordered,
                )
            elif value is None:
                value = column.column_empty(nelem, self.dtype, masked=True)
            else:
                to_dtype = pd.api.types.pandas_dtype(self.dtype)
                value = utils.scalar_broadcast_to(value, nelem, to_dtype)

        value = column.as_column(value).astype(self.dtype)

        if len(value) != nelem:
            msg = (f"Size mismatch: cannot set value "
                   f"of size {len(value)} to indexing result of size "
                   f"{nelem}")
            raise ValueError(msg)

        if is_categorical_dtype(value.dtype):
            value = value.cat().set_categories(self.categories)._column
            assert self.dtype == value.dtype

        if isinstance(key, slice):
            out = libcudf.copying.copy_range(self, value, key_start, key_stop,
                                             0)
        else:
            try:
                out = libcudf.copying.scatter(value, key, self)
            except RuntimeError as e:
                if "out of bounds" in str(e):
                    raise IndexError(
                        f"index out of bounds for column of size {len(self)}")
                raise

        self._mimic_inplace(out, inplace=True)
Example #10
0
def modulo(arr, d):
    """Array element modulo operator"""
    out = rmm.device_array(shape=arr.shape, dtype=arr.dtype)
    if arr.size > 0:
        gpu_modulo.forall(arr.size)(arr, out, d)
    return out
Example #11
0
def zeros(size, dtype):
    out = rmm.device_array(size, dtype=dtype)
    if size > 0:
        gpu_zeros.forall(size)(size, out)
    return out
Example #12
0
def arange_reversed(size, dtype=np.int32):
    out = rmm.device_array(size, dtype=dtype)
    if size > 0:
        gpu_arange_reversed.forall(size)(size, out)
    return out
Example #13
0
def compute_scale(arr, vmin, vmax):
    out = rmm.device_array(shape=arr.size, dtype=np.float64)
    if out.size > 0:
        configured = gpu_scale.forall(out.size)
        configured(arr, vmin, vmax, out)
    return out
Example #14
0
def full(size, value, dtype):
    out = rmm.device_array(size, dtype=dtype)
    fill_value(out, value)
    return out
Example #15
0
 def rmm_device_array(n):
     a = rmm.device_array(n, dtype="u1")
     weakref.finalize(a, numba.cuda.current_context)
     return a
Example #16
0
 def as_column(self):
     if len(self) > 0:
         vals = cudautils.arange(self._start, self._stop, dtype=self.dtype)
     else:
         vals = rmm.device_array(0, dtype=self.dtype)
     return column.build_column(data=Buffer(vals), dtype=vals.dtype)
Example #17
0
def as_contiguous(arr):
    assert arr.ndim == 1
    out = rmm.device_array(shape=arr.shape, dtype=arr.dtype)
    return copy_array(arr, out=out)
Example #18
0
def apply_binarize(in_col, width):
    out = rmm.device_array((in_col.size, width), dtype="int8")
    if out.size > 0:
        out[:] = 0
        binarize.forall(out.size)(in_col, out, width)
    return out