Beispiel #1
0
def scalar_broadcast_to(scalar, size, dtype=None):

    if isinstance(size, (tuple, list)):
        size = size[0]

    if scalar is None or (isinstance(scalar, (np.datetime64, np.timedelta64))
                          and np.isnat(scalar)):
        if dtype is None:
            dtype = "object"
        return column.column_empty(size, dtype=dtype, masked=True)

    if isinstance(scalar, pd.Categorical):
        if dtype is None:
            return _categorical_scalar_broadcast_to(scalar, size)
        else:
            return scalar_broadcast_to(scalar.categories[0],
                                       size).astype(dtype)

    scalar = to_cudf_compatible_scalar(scalar, dtype=dtype)
    dtype = scalar.dtype

    if np.dtype(dtype).kind in ("O", "U"):
        gather_map = column.full(size, 0, dtype="int32")
        scalar_str_col = column.as_column([scalar], dtype="str")
        return scalar_str_col[gather_map]
    else:
        out_col = column.column_empty(size, dtype=dtype)
        if out_col.size != 0:
            out_col.data_array_view[:] = scalar
        return out_col
Beispiel #2
0
 def find_last_value(self, value: ScalarLike, closest: bool = False) -> int:
     """
     Returns offset of last value that matches. For monotonic
     columns, returns the offset of the last smaller value
     if closest=True.
     """
     value = to_cudf_compatible_scalar(value)
     if not pd.api.types.is_number(value):
         raise ValueError("Expected a numeric value")
     found = 0
     if len(self):
         found = cudautils.find_last(
             self.data_array_view, value, mask=self.mask,
         )
     if found == -1 and self.is_monotonic and closest:
         if value < self.min():
             found = -1
         elif value > self.max():
             found = len(self) - 1
         else:
             found = cudautils.find_last(
                 self.data_array_view, value, mask=self.mask, compare="lt",
             )
             if found == -1:
                 raise ValueError("value not found")
     elif found == -1:
         raise ValueError("value not found")
     return found
Beispiel #3
0
def scalar_broadcast_to(scalar, size, dtype=None):

    if isinstance(size, (tuple, list)):
        size = size[0]

    if cudf._lib.scalar._is_null_host_scalar(scalar):
        if dtype is None:
            dtype = "object"
        return column.column_empty(size, dtype=dtype, masked=True)

    if isinstance(scalar, pd.Categorical):
        if dtype is None:
            return _categorical_scalar_broadcast_to(scalar, size)
        else:
            return scalar_broadcast_to(scalar.categories[0],
                                       size).astype(dtype)

    if isinstance(scalar, decimal.Decimal):
        if dtype is None:
            dtype = cudf.Decimal128Dtype._from_decimal(scalar)

        out_col = column.column_empty(size, dtype=dtype)
        if out_col.size != 0:
            out_col[:] = scalar
        return out_col

    scalar = to_cudf_compatible_scalar(scalar, dtype=dtype)
    dtype = scalar.dtype

    return cudf.core.column.full(size=size, fill_value=scalar, dtype=dtype)
Beispiel #4
0
    def _preprocess_host_value(self, value, dtype):
        value = to_cudf_compatible_scalar(value, dtype=dtype)
        valid = not _is_null_host_scalar(value)

        if dtype is None:
            if not valid:
                if isinstance(value, (np.datetime64, np.timedelta64)):
                    unit, _ = np.datetime_data(value)
                    if unit == "generic":
                        raise TypeError(
                            "Cant convert generic NaT to null scalar")
                    else:
                        dtype = value.dtype
                else:
                    raise TypeError(
                        "dtype required when constructing a null scalar")
            else:
                dtype = value.dtype
        dtype = np.dtype(dtype)

        # temporary
        dtype = np.dtype("object") if dtype.char == "U" else dtype

        if not valid:
            value = NA

        return value, dtype
Beispiel #5
0
    def __setitem__(self, key, value):
        from cudf.core.column import column

        if isinstance(key, tuple):
            key = list(key)

        # coerce value into a scalar or column
        if is_scalar(value):
            value = to_cudf_compatible_scalar(value)
        else:
            value = column.as_column(value)

        if (
            not is_categorical_dtype(self._sr._column.dtype)
            and hasattr(value, "dtype")
            and pd.api.types.is_numeric_dtype(value.dtype)
        ):
            # normalize types if necessary:
            if not pd.api.types.is_integer(key):
                to_dtype = np.result_type(value.dtype, self._sr._column.dtype)
                value = value.astype(to_dtype)
                self._sr._column._mimic_inplace(
                    self._sr._column.astype(to_dtype), inplace=True
                )

        self._sr._column[key] = value
Beispiel #6
0
    def __setitem__(self, key, value):
        from cudf.core.column import column

        if isinstance(key, tuple):
            key = list(key)

        # coerce value into a scalar or column
        if is_scalar(value):
            value = to_cudf_compatible_scalar(value)
        elif not (isinstance(value, (list, dict))
                  and isinstance(self._sr._column.dtype,
                                 (cudf.ListDtype, cudf.StructDtype))):
            value = column.as_column(value)

        if (not isinstance(
                self._sr._column.dtype,
            (cudf.Decimal64Dtype, cudf.CategoricalDtype),
        ) and hasattr(value, "dtype")
                and _is_non_decimal_numeric_dtype(value.dtype)):
            # normalize types if necessary:
            if not is_integer(key):
                to_dtype = np.result_type(value.dtype, self._sr._column.dtype)
                value = value.astype(to_dtype)
                self._sr._column._mimic_inplace(
                    self._sr._column.astype(to_dtype), inplace=True)

        self._sr._column[key] = value
Beispiel #7
0
def scalar_broadcast_to(scalar, size, dtype=None):
    from cudf.utils.cudautils import fill_value
    from cudf.utils.dtypes import to_cudf_compatible_scalar, is_string_dtype
    from cudf.core.column import column_empty

    if isinstance(size, (tuple, list)):
        size = size[0]

    if scalar is None:
        if dtype is None:
            dtype = "object"
        return column_empty(size, dtype=dtype, masked=True)

    if isinstance(scalar, pd.Categorical):
        return scalar_broadcast_to(scalar.categories[0], size).astype(dtype)

    if isinstance(scalar, str) and (is_string_dtype(dtype) or dtype is None):
        dtype = "object"
    else:
        scalar = to_cudf_compatible_scalar(scalar, dtype=dtype)
        dtype = scalar.dtype

    if np.dtype(dtype) == np.dtype("object"):
        import nvstrings
        from cudf.core.column import as_column
        from cudf.utils.cudautils import zeros

        gather_map = zeros(size, dtype="int32")
        scalar_str_col = as_column(nvstrings.to_device([scalar]))
        return scalar_str_col[gather_map]
    else:
        da = rmm.device_array((size, ), dtype=dtype)
        if da.size != 0:
            fill_value(da, scalar)
        return da
Beispiel #8
0
def scalar_broadcast_to(scalar, size, dtype=None):
    from cudf.utils.dtypes import to_cudf_compatible_scalar, is_string_dtype
    from cudf.core.column import column_empty

    if isinstance(size, (tuple, list)):
        size = size[0]

    if scalar is None:
        if dtype is None:
            dtype = "object"
        return column_empty(size, dtype=dtype, masked=True)

    if isinstance(scalar, pd.Categorical):
        return scalar_broadcast_to(scalar.categories[0], size).astype(dtype)

    if isinstance(scalar, str) and (is_string_dtype(dtype) or dtype is None):
        dtype = "object"
    else:
        scalar = to_cudf_compatible_scalar(scalar, dtype=dtype)
        dtype = scalar.dtype

    if np.dtype(dtype) == np.dtype("object"):
        from cudf.core.column import as_column

        gather_map = cupy.zeros(size, dtype="int32")
        scalar_str_col = as_column([scalar], dtype="str")
        return scalar_str_col[gather_map]
    else:
        out_col = column_empty(size, dtype=dtype)
        if out_col.size != 0:
            out_col.data_array_view[:] = scalar
        return out_col
Beispiel #9
0
 def _find_value(self, value: ScalarLike, closest: bool, find: Callable,
                 compare: str) -> int:
     value = to_cudf_compatible_scalar(value)
     if not is_number(value):
         raise ValueError("Expected a numeric value")
     found = 0
     if len(self):
         found = find(
             self.data_array_view,
             value,
             mask=self.mask,
         )
     if found == -1:
         if self.is_monotonic_increasing and closest:
             found = find(
                 self.data_array_view,
                 value,
                 mask=self.mask,
                 compare=compare,
             )
             if found == -1:
                 raise ValueError("value not found")
         else:
             raise ValueError("value not found")
     return found
Beispiel #10
0
    def _preprocess_host_value(self, value, dtype):
        valid = not _is_null_host_scalar(value)

        if isinstance(dtype, Decimal64Dtype):
            value = pa.scalar(value,
                              type=pa.decimal128(dtype.precision,
                                                 dtype.scale)).as_py()
        if isinstance(value, decimal.Decimal) and dtype is None:
            dtype = Decimal64Dtype._from_decimal(value)

        value = to_cudf_compatible_scalar(value, dtype=dtype)

        if dtype is None:
            if not valid:
                if isinstance(value, (np.datetime64, np.timedelta64)):
                    unit, _ = np.datetime_data(value)
                    if unit == "generic":
                        raise TypeError(
                            "Cant convert generic NaT to null scalar")
                    else:
                        dtype = value.dtype
                else:
                    raise TypeError(
                        "dtype required when constructing a null scalar")
            else:
                dtype = value.dtype

        if not isinstance(dtype, Decimal64Dtype):
            dtype = np.dtype(dtype)

        if not valid:
            value = NA

        return value, dtype
Beispiel #11
0
    def _preprocess_host_value(self, value, dtype):
        valid = not cudf._lib.scalar._is_null_host_scalar(value)

        if isinstance(value, list):
            if dtype is not None:
                raise TypeError("Lists may not be cast to a different dtype")
            else:
                dtype = ListDtype.from_arrow(
                    pa.infer_type([value], from_pandas=True))
                return value, dtype
        elif isinstance(dtype, ListDtype):
            if value not in {None, NA}:
                raise ValueError(f"Can not coerce {value} to ListDtype")
            else:
                return NA, dtype

        if isinstance(value, dict):
            if dtype is None:
                dtype = StructDtype.from_arrow(
                    pa.infer_type([value], from_pandas=True))
            return value, dtype
        elif isinstance(dtype, StructDtype):
            if value not in {None, NA}:
                raise ValueError(f"Can not coerce {value} to StructDType")
            else:
                return NA, dtype

        if isinstance(dtype, Decimal64Dtype):
            value = pa.scalar(value,
                              type=pa.decimal128(dtype.precision,
                                                 dtype.scale)).as_py()
        if isinstance(value, decimal.Decimal) and dtype is None:
            dtype = Decimal64Dtype._from_decimal(value)

        value = to_cudf_compatible_scalar(value, dtype=dtype)

        if dtype is None:
            if not valid:
                if isinstance(value, (np.datetime64, np.timedelta64)):
                    unit, _ = np.datetime_data(value)
                    if unit == "generic":
                        raise TypeError(
                            "Cant convert generic NaT to null scalar")
                    else:
                        dtype = value.dtype
                else:
                    raise TypeError(
                        "dtype required when constructing a null scalar")
            else:
                dtype = value.dtype

        if not isinstance(dtype, Decimal64Dtype):
            dtype = cudf.dtype(dtype)

        if not valid:
            value = NA

        return value, dtype
Beispiel #12
0
def _cast_to_appropriate_cudf_type(val, index=None):
    # Handle scalar
    if val.ndim == 0:
        return to_cudf_compatible_scalar(val)
    # 1D array
    elif (val.ndim == 1) or (val.ndim == 2 and val.shape[1] == 1):
        # if index is not None and is of a different length
        # than the index, cupy dispatching behaviour is undefined
        # so we dont impliment it
        if (index is None) or (len(index) == len(val)):
            return cudf.Series(val, index=index)

    return NotImplemented
Beispiel #13
0
 def _scalar_binop(self, other, op):
     if isinstance(other, (ColumnBase, Series, Index, np.ndarray)):
         # dispatch to column implementation
         return NotImplemented
     other = to_cudf_compatible_scalar(other)
     out_dtype = self._binop_result_dtype_or_error(other, op)
     valid = self.is_valid and (isinstance(other, np.generic)
                                or other.is_valid)
     if not valid:
         return Scalar(None, dtype=out_dtype)
     else:
         result = self._dispatch_scalar_binop(other, op)
         return Scalar(result, dtype=out_dtype)
Beispiel #14
0
    def _preprocess_host_value(self, value, dtype):
        if isinstance(dtype, Decimal64Dtype):
            # TODO: Support coercion from decimal.Decimal to different dtype
            # TODO: Support coercion from integer to Decimal64Dtype
            raise NotImplementedError(
                "dtype as cudf.Decimal64Dtype is not supported. Pass a "
                "decimal.Decimal to construct a DecimalScalar.")
        if isinstance(value, decimal.Decimal) and dtype is not None:
            raise TypeError(f"Can not coerce decimal to {dtype}")

        value = to_cudf_compatible_scalar(value, dtype=dtype)
        valid = not _is_null_host_scalar(value)

        if isinstance(value, decimal.Decimal):
            # 0.0042 -> Decimal64Dtype(2, 4)
            dtype = Decimal64Dtype._from_decimal(value)

        else:
            if dtype is None:
                if not valid:
                    if isinstance(value, (np.datetime64, np.timedelta64)):
                        unit, _ = np.datetime_data(value)
                        if unit == "generic":
                            raise TypeError(
                                "Cant convert generic NaT to null scalar")
                        else:
                            dtype = value.dtype
                    else:
                        raise TypeError(
                            "dtype required when constructing a null scalar")
                else:
                    dtype = value.dtype
            dtype = np.dtype(dtype)

            # temporary
            dtype = np.dtype("object") if dtype.char == "U" else dtype

        if not valid:
            value = NA

        return value, dtype
Beispiel #15
0
def scalar_broadcast_to(scalar, shape, dtype):
    from cudf.utils.cudautils import fill_value
    from cudf.utils.dtypes import to_cudf_compatible_scalar

    scalar = to_cudf_compatible_scalar(scalar, dtype=dtype)

    if not isinstance(shape, tuple):
        shape = (shape, )

    if np.dtype(dtype) == np.dtype("object"):
        import nvstrings
        from cudf.core.column import StringColumn
        from cudf.utils.cudautils import zeros

        gather_map = zeros(shape[0], dtype="int32")
        scalar_str_col = StringColumn(nvstrings.to_device([scalar]))
        return scalar_str_col[gather_map]
    else:
        da = rmm.device_array(shape, dtype=dtype)
        if da.size != 0:
            fill_value(da, scalar)
        return da
Beispiel #16
0
def scalar_broadcast_to(scalar, size, dtype=None):

    if isinstance(size, (tuple, list)):
        size = size[0]

    if cudf._lib.scalar._is_null_host_scalar(scalar):
        if dtype is None:
            dtype = "object"
        return column.column_empty(size, dtype=dtype, masked=True)

    if isinstance(scalar, pd.Categorical):
        if dtype is None:
            return _categorical_scalar_broadcast_to(scalar, size)
        else:
            return scalar_broadcast_to(scalar.categories[0],
                                       size).astype(dtype)

    if isinstance(scalar, decimal.Decimal):
        if dtype is None:
            dtype = cudf.Decimal64Dtype._from_decimal(scalar)

        out_col = column.column_empty(size, dtype=dtype)
        if out_col.size != 0:
            out_col[:] = scalar
        return out_col

    scalar = to_cudf_compatible_scalar(scalar, dtype=dtype)
    dtype = scalar.dtype

    if cudf.dtype(dtype).kind in ("O", "U"):
        gather_map = column.full(size, 0, dtype="int32")
        scalar_str_col = column.as_column([scalar], dtype="str")
        return scalar_str_col[gather_map]
    else:
        out_col = column.column_empty(size, dtype=dtype)
        if out_col.size != 0:
            out_col.data_array_view[:] = scalar
        return out_col