예제 #1
0
def scalar_broadcast_to(scalar, size, dtype=None):
    from cudf.utils.cudautils import fill_value
    from cudf.utils.dtypes import to_cudf_compatible_scalar, is_string_dtype
    from cudf.core.column import column_empty

    if isinstance(size, (tuple, list)):
        size = size[0]

    if scalar is None:
        if dtype is None:
            dtype = "object"
        return column_empty(size, dtype=dtype, masked=True)

    if isinstance(scalar, pd.Categorical):
        return scalar_broadcast_to(scalar.categories[0], size).astype(dtype)

    if isinstance(scalar, str) and (is_string_dtype(dtype) or dtype is None):
        dtype = "object"
    else:
        scalar = to_cudf_compatible_scalar(scalar, dtype=dtype)
        dtype = scalar.dtype

    if np.dtype(dtype) == np.dtype("object"):
        import nvstrings
        from cudf.core.column import as_column
        from cudf.utils.cudautils import zeros

        gather_map = zeros(size, dtype="int32")
        scalar_str_col = as_column(nvstrings.to_device([scalar]))
        return scalar_str_col[gather_map]
    else:
        da = rmm.device_array((size, ), dtype=dtype)
        if da.size != 0:
            fill_value(da, scalar)
        return da
예제 #2
0
    def __setitem__(self, key, value):
        """
        Set the value of self[key] to value.

        If value and self are of different types,
        value is coerced to self.dtype
        """
        import cudf.bindings.copying as cpp_copying
        from cudf.dataframe import columnops

        if isinstance(key, slice):
            key_start, key_stop, key_stride = key.indices(len(self))
            if key_stride != 1:
                raise NotImplementedError("Stride not supported in slice")
            nelem = abs(key_stop - key_start)
        else:
            key = columnops.as_column(key)
            if pd.api.types.is_bool_dtype(key.dtype):
                if not len(key) == len(self):
                    raise ValueError(
                        "Boolean mask must be of same length as column")
                key = columnops.as_column(cudautils.arange(len(self)))[key]
            nelem = len(key)

        if utils.is_scalar(value):
            if is_categorical_dtype(self.dtype):
                from cudf.dataframe.categorical import CategoricalColumn
                from cudf.dataframe.buffer import Buffer
                from cudf.utils.cudautils import fill_value

                data = rmm.device_array(nelem, dtype="int8")
                fill_value(data, self._encode(value))
                value = CategoricalColumn(
                    data=Buffer(data),
                    categories=self._categories,
                    ordered=False,
                )
            elif value is None:
                value = columnops.column_empty(nelem, self.dtype, masked=True)
            else:
                to_dtype = pd.api.types.pandas_dtype(self.dtype)
                value = utils.scalar_broadcast_to(value, nelem, to_dtype)

        value = columnops.as_column(value).astype(self.dtype)

        if len(value) != nelem:
            msg = (f"Size mismatch: cannot set value "
                   f"of size {len(value)} to indexing result of size "
                   f"{nelem}")
            raise ValueError(msg)

        if isinstance(key, slice):
            out = cpp_copying.apply_copy_range(self, value, key_start,
                                               key_stop, 0)
        else:
            out = cpp_copying.apply_scatter(value, key, self)

        self._data = out.data
        self._mask = out.mask
        self._update_null_count()
예제 #3
0
파일: utils.py 프로젝트: raydouglass/cudf
def scalar_broadcast_to(scalar, shape, dtype):
    from cudf.utils.cudautils import fill_value

    if not isinstance(shape, tuple):
        shape = (shape, )
    da = rmm.device_array(shape, dtype=dtype)
    if da.size != 0:
        fill_value(da, scalar)
    return da
예제 #4
0
파일: column.py 프로젝트: harrism/cudf
 def allocate_mask(self, all_valid=True):
     """Return a new Column with a newly allocated mask buffer.
     If ``all_valid`` is True, the new mask is set to all valid.
     If ``all_valid`` is False, the new mask is set to all null.
     """
     nelem = len(self)
     mask_sz = utils.calc_chunk_size(nelem, utils.mask_bitsize)
     mask = cuda.device_array(mask_sz, dtype=utils.mask_dtype)
     cudautils.fill_value(mask, 0xff if all_valid else 0)
     return self.set_mask(mask=mask, null_count=0 if all_valid else nelem)
예제 #5
0
파일: utils.py 프로젝트: yutiansut/cudf
def scalar_broadcast_to(scalar, shape, dtype):
    from cudf.utils.cudautils import fill_value

    if not isinstance(shape, tuple):
        shape = (shape, )

    if np.dtype(dtype) == np.dtype("object"):
        import nvstrings
        from cudf.dataframe.string import StringColumn
        from cudf.utils.cudautils import zeros
        gather_map = zeros(shape[0], dtype='int32')
        scalar_str_col = StringColumn(nvstrings.to_device([scalar]))
        return scalar_str_col[gather_map]
    else:
        da = rmm.device_array(shape, dtype=dtype)
        if da.size != 0:
            fill_value(da, scalar)
        return da
예제 #6
0
파일: columnops.py 프로젝트: yutiansut/cudf
def column_empty(row_count, dtype, masked, categories=None):
    """Allocate a new column like the given row_count and dtype.
    """
    dtype = pd.api.types.pandas_dtype(dtype)

    if masked:
        mask = cudautils.make_mask(row_count)
        cudautils.fill_value(mask, 0)
    else:
        mask = None

    if (
        categories is not None
        or pd.api.types.is_categorical_dtype(dtype)
    ):
        mem = rmm.device_array((row_count,), dtype=dtype)
        data = Buffer(mem)
        dtype = 'category'
    elif dtype.kind in 'OU':
        if row_count == 0:
            data = nvstrings.to_device([])
        else:
            mem = rmm.device_array((row_count,), dtype='float64')
            data = nvstrings.dtos(mem,
                                  len(mem),
                                  nulls=mask,
                                  bdevmem=True)
    else:
        mem = rmm.device_array((row_count,), dtype=dtype)
        data = Buffer(mem)

    if mask is not None:
        mask = Buffer(mask)

    from cudf.dataframe.columnops import build_column
    return build_column(data,
                        dtype,
                        mask,
                        categories)
예제 #7
0
    def __setitem__(self, key, value):
        """
        Set the value of self[key] to value.

        If value and self are of different types,
        value is coerced to self.dtype
        """
        from cudf.core import column

        if isinstance(key, slice):
            key_start, key_stop, key_stride = key.indices(len(self))
            if key_stride != 1:
                raise NotImplementedError("Stride not supported in slice")
            nelem = abs(key_stop - key_start)
        else:
            key = column.as_column(key)
            if pd.api.types.is_bool_dtype(key.dtype):
                if not len(key) == len(self):
                    raise ValueError(
                        "Boolean mask must be of same length as column"
                    )
                key = column.as_column(cudautils.arange(len(self)))[key]
            nelem = len(key)

        if is_scalar(value):
            if is_categorical_dtype(self.dtype):
                from cudf.utils.cudautils import fill_value

                data = rmm.device_array(nelem, dtype=self.codes.dtype)
                fill_value(data, self._encode(value))
                value = build_categorical_column(
                    categories=self.dtype.categories,
                    codes=as_column(data),
                    ordered=self.dtype.ordered,
                )
            elif value is None:
                value = column.column_empty(nelem, self.dtype, masked=True)
            else:
                to_dtype = pd.api.types.pandas_dtype(self.dtype)
                value = utils.scalar_broadcast_to(value, nelem, to_dtype)

        value = column.as_column(value).astype(self.dtype)

        if len(value) != nelem:
            msg = (
                f"Size mismatch: cannot set value "
                f"of size {len(value)} to indexing result of size "
                f"{nelem}"
            )
            raise ValueError(msg)

        if is_categorical_dtype(value.dtype):
            value = value.cat().set_categories(self.categories)
            assert self.dtype == value.dtype

        if isinstance(key, slice):
            out = libcudf.copying.copy_range(
                self, value, key_start, key_stop, 0
            )
        else:
            try:
                out = libcudf.copying.scatter(value, key, self)
            except RuntimeError as e:
                if "out of bounds" in str(e):
                    raise IndexError(
                        f"index out of bounds for column of size {len(self)}"
                    )
                raise

        self._mimic_inplace(out, inplace=True)