def scalar_broadcast_to(scalar, size, dtype=None): if isinstance(size, (tuple, list)): size = size[0] if scalar is None or (isinstance(scalar, (np.datetime64, np.timedelta64)) and np.isnat(scalar)): if dtype is None: dtype = "object" return column.column_empty(size, dtype=dtype, masked=True) if isinstance(scalar, pd.Categorical): if dtype is None: return _categorical_scalar_broadcast_to(scalar, size) else: return scalar_broadcast_to(scalar.categories[0], size).astype(dtype) scalar = to_cudf_compatible_scalar(scalar, dtype=dtype) dtype = scalar.dtype if np.dtype(dtype).kind in ("O", "U"): gather_map = column.full(size, 0, dtype="int32") scalar_str_col = column.as_column([scalar], dtype="str") return scalar_str_col[gather_map] else: out_col = column.column_empty(size, dtype=dtype) if out_col.size != 0: out_col.data_array_view[:] = scalar return out_col
def find_last_value(self, value: ScalarLike, closest: bool = False) -> int: """ Returns offset of last value that matches. For monotonic columns, returns the offset of the last smaller value if closest=True. """ value = to_cudf_compatible_scalar(value) if not pd.api.types.is_number(value): raise ValueError("Expected a numeric value") found = 0 if len(self): found = cudautils.find_last( self.data_array_view, value, mask=self.mask, ) if found == -1 and self.is_monotonic and closest: if value < self.min(): found = -1 elif value > self.max(): found = len(self) - 1 else: found = cudautils.find_last( self.data_array_view, value, mask=self.mask, compare="lt", ) if found == -1: raise ValueError("value not found") elif found == -1: raise ValueError("value not found") return found
def scalar_broadcast_to(scalar, size, dtype=None): if isinstance(size, (tuple, list)): size = size[0] if cudf._lib.scalar._is_null_host_scalar(scalar): if dtype is None: dtype = "object" return column.column_empty(size, dtype=dtype, masked=True) if isinstance(scalar, pd.Categorical): if dtype is None: return _categorical_scalar_broadcast_to(scalar, size) else: return scalar_broadcast_to(scalar.categories[0], size).astype(dtype) if isinstance(scalar, decimal.Decimal): if dtype is None: dtype = cudf.Decimal128Dtype._from_decimal(scalar) out_col = column.column_empty(size, dtype=dtype) if out_col.size != 0: out_col[:] = scalar return out_col scalar = to_cudf_compatible_scalar(scalar, dtype=dtype) dtype = scalar.dtype return cudf.core.column.full(size=size, fill_value=scalar, dtype=dtype)
def _preprocess_host_value(self, value, dtype): value = to_cudf_compatible_scalar(value, dtype=dtype) valid = not _is_null_host_scalar(value) if dtype is None: if not valid: if isinstance(value, (np.datetime64, np.timedelta64)): unit, _ = np.datetime_data(value) if unit == "generic": raise TypeError( "Cant convert generic NaT to null scalar") else: dtype = value.dtype else: raise TypeError( "dtype required when constructing a null scalar") else: dtype = value.dtype dtype = np.dtype(dtype) # temporary dtype = np.dtype("object") if dtype.char == "U" else dtype if not valid: value = NA return value, dtype
def __setitem__(self, key, value): from cudf.core.column import column if isinstance(key, tuple): key = list(key) # coerce value into a scalar or column if is_scalar(value): value = to_cudf_compatible_scalar(value) else: value = column.as_column(value) if ( not is_categorical_dtype(self._sr._column.dtype) and hasattr(value, "dtype") and pd.api.types.is_numeric_dtype(value.dtype) ): # normalize types if necessary: if not pd.api.types.is_integer(key): to_dtype = np.result_type(value.dtype, self._sr._column.dtype) value = value.astype(to_dtype) self._sr._column._mimic_inplace( self._sr._column.astype(to_dtype), inplace=True ) self._sr._column[key] = value
def __setitem__(self, key, value): from cudf.core.column import column if isinstance(key, tuple): key = list(key) # coerce value into a scalar or column if is_scalar(value): value = to_cudf_compatible_scalar(value) elif not (isinstance(value, (list, dict)) and isinstance(self._sr._column.dtype, (cudf.ListDtype, cudf.StructDtype))): value = column.as_column(value) if (not isinstance( self._sr._column.dtype, (cudf.Decimal64Dtype, cudf.CategoricalDtype), ) and hasattr(value, "dtype") and _is_non_decimal_numeric_dtype(value.dtype)): # normalize types if necessary: if not is_integer(key): to_dtype = np.result_type(value.dtype, self._sr._column.dtype) value = value.astype(to_dtype) self._sr._column._mimic_inplace( self._sr._column.astype(to_dtype), inplace=True) self._sr._column[key] = value
def scalar_broadcast_to(scalar, size, dtype=None): from cudf.utils.cudautils import fill_value from cudf.utils.dtypes import to_cudf_compatible_scalar, is_string_dtype from cudf.core.column import column_empty if isinstance(size, (tuple, list)): size = size[0] if scalar is None: if dtype is None: dtype = "object" return column_empty(size, dtype=dtype, masked=True) if isinstance(scalar, pd.Categorical): return scalar_broadcast_to(scalar.categories[0], size).astype(dtype) if isinstance(scalar, str) and (is_string_dtype(dtype) or dtype is None): dtype = "object" else: scalar = to_cudf_compatible_scalar(scalar, dtype=dtype) dtype = scalar.dtype if np.dtype(dtype) == np.dtype("object"): import nvstrings from cudf.core.column import as_column from cudf.utils.cudautils import zeros gather_map = zeros(size, dtype="int32") scalar_str_col = as_column(nvstrings.to_device([scalar])) return scalar_str_col[gather_map] else: da = rmm.device_array((size, ), dtype=dtype) if da.size != 0: fill_value(da, scalar) return da
def scalar_broadcast_to(scalar, size, dtype=None): from cudf.utils.dtypes import to_cudf_compatible_scalar, is_string_dtype from cudf.core.column import column_empty if isinstance(size, (tuple, list)): size = size[0] if scalar is None: if dtype is None: dtype = "object" return column_empty(size, dtype=dtype, masked=True) if isinstance(scalar, pd.Categorical): return scalar_broadcast_to(scalar.categories[0], size).astype(dtype) if isinstance(scalar, str) and (is_string_dtype(dtype) or dtype is None): dtype = "object" else: scalar = to_cudf_compatible_scalar(scalar, dtype=dtype) dtype = scalar.dtype if np.dtype(dtype) == np.dtype("object"): from cudf.core.column import as_column gather_map = cupy.zeros(size, dtype="int32") scalar_str_col = as_column([scalar], dtype="str") return scalar_str_col[gather_map] else: out_col = column_empty(size, dtype=dtype) if out_col.size != 0: out_col.data_array_view[:] = scalar return out_col
def _find_value(self, value: ScalarLike, closest: bool, find: Callable, compare: str) -> int: value = to_cudf_compatible_scalar(value) if not is_number(value): raise ValueError("Expected a numeric value") found = 0 if len(self): found = find( self.data_array_view, value, mask=self.mask, ) if found == -1: if self.is_monotonic_increasing and closest: found = find( self.data_array_view, value, mask=self.mask, compare=compare, ) if found == -1: raise ValueError("value not found") else: raise ValueError("value not found") return found
def _preprocess_host_value(self, value, dtype): valid = not _is_null_host_scalar(value) if isinstance(dtype, Decimal64Dtype): value = pa.scalar(value, type=pa.decimal128(dtype.precision, dtype.scale)).as_py() if isinstance(value, decimal.Decimal) and dtype is None: dtype = Decimal64Dtype._from_decimal(value) value = to_cudf_compatible_scalar(value, dtype=dtype) if dtype is None: if not valid: if isinstance(value, (np.datetime64, np.timedelta64)): unit, _ = np.datetime_data(value) if unit == "generic": raise TypeError( "Cant convert generic NaT to null scalar") else: dtype = value.dtype else: raise TypeError( "dtype required when constructing a null scalar") else: dtype = value.dtype if not isinstance(dtype, Decimal64Dtype): dtype = np.dtype(dtype) if not valid: value = NA return value, dtype
def _preprocess_host_value(self, value, dtype): valid = not cudf._lib.scalar._is_null_host_scalar(value) if isinstance(value, list): if dtype is not None: raise TypeError("Lists may not be cast to a different dtype") else: dtype = ListDtype.from_arrow( pa.infer_type([value], from_pandas=True)) return value, dtype elif isinstance(dtype, ListDtype): if value not in {None, NA}: raise ValueError(f"Can not coerce {value} to ListDtype") else: return NA, dtype if isinstance(value, dict): if dtype is None: dtype = StructDtype.from_arrow( pa.infer_type([value], from_pandas=True)) return value, dtype elif isinstance(dtype, StructDtype): if value not in {None, NA}: raise ValueError(f"Can not coerce {value} to StructDType") else: return NA, dtype if isinstance(dtype, Decimal64Dtype): value = pa.scalar(value, type=pa.decimal128(dtype.precision, dtype.scale)).as_py() if isinstance(value, decimal.Decimal) and dtype is None: dtype = Decimal64Dtype._from_decimal(value) value = to_cudf_compatible_scalar(value, dtype=dtype) if dtype is None: if not valid: if isinstance(value, (np.datetime64, np.timedelta64)): unit, _ = np.datetime_data(value) if unit == "generic": raise TypeError( "Cant convert generic NaT to null scalar") else: dtype = value.dtype else: raise TypeError( "dtype required when constructing a null scalar") else: dtype = value.dtype if not isinstance(dtype, Decimal64Dtype): dtype = cudf.dtype(dtype) if not valid: value = NA return value, dtype
def _cast_to_appropriate_cudf_type(val, index=None): # Handle scalar if val.ndim == 0: return to_cudf_compatible_scalar(val) # 1D array elif (val.ndim == 1) or (val.ndim == 2 and val.shape[1] == 1): # if index is not None and is of a different length # than the index, cupy dispatching behaviour is undefined # so we dont impliment it if (index is None) or (len(index) == len(val)): return cudf.Series(val, index=index) return NotImplemented
def _scalar_binop(self, other, op): if isinstance(other, (ColumnBase, Series, Index, np.ndarray)): # dispatch to column implementation return NotImplemented other = to_cudf_compatible_scalar(other) out_dtype = self._binop_result_dtype_or_error(other, op) valid = self.is_valid and (isinstance(other, np.generic) or other.is_valid) if not valid: return Scalar(None, dtype=out_dtype) else: result = self._dispatch_scalar_binop(other, op) return Scalar(result, dtype=out_dtype)
def _preprocess_host_value(self, value, dtype): if isinstance(dtype, Decimal64Dtype): # TODO: Support coercion from decimal.Decimal to different dtype # TODO: Support coercion from integer to Decimal64Dtype raise NotImplementedError( "dtype as cudf.Decimal64Dtype is not supported. Pass a " "decimal.Decimal to construct a DecimalScalar.") if isinstance(value, decimal.Decimal) and dtype is not None: raise TypeError(f"Can not coerce decimal to {dtype}") value = to_cudf_compatible_scalar(value, dtype=dtype) valid = not _is_null_host_scalar(value) if isinstance(value, decimal.Decimal): # 0.0042 -> Decimal64Dtype(2, 4) dtype = Decimal64Dtype._from_decimal(value) else: if dtype is None: if not valid: if isinstance(value, (np.datetime64, np.timedelta64)): unit, _ = np.datetime_data(value) if unit == "generic": raise TypeError( "Cant convert generic NaT to null scalar") else: dtype = value.dtype else: raise TypeError( "dtype required when constructing a null scalar") else: dtype = value.dtype dtype = np.dtype(dtype) # temporary dtype = np.dtype("object") if dtype.char == "U" else dtype if not valid: value = NA return value, dtype
def scalar_broadcast_to(scalar, shape, dtype): from cudf.utils.cudautils import fill_value from cudf.utils.dtypes import to_cudf_compatible_scalar scalar = to_cudf_compatible_scalar(scalar, dtype=dtype) if not isinstance(shape, tuple): shape = (shape, ) if np.dtype(dtype) == np.dtype("object"): import nvstrings from cudf.core.column import StringColumn from cudf.utils.cudautils import zeros gather_map = zeros(shape[0], dtype="int32") scalar_str_col = StringColumn(nvstrings.to_device([scalar])) return scalar_str_col[gather_map] else: da = rmm.device_array(shape, dtype=dtype) if da.size != 0: fill_value(da, scalar) return da
def scalar_broadcast_to(scalar, size, dtype=None): if isinstance(size, (tuple, list)): size = size[0] if cudf._lib.scalar._is_null_host_scalar(scalar): if dtype is None: dtype = "object" return column.column_empty(size, dtype=dtype, masked=True) if isinstance(scalar, pd.Categorical): if dtype is None: return _categorical_scalar_broadcast_to(scalar, size) else: return scalar_broadcast_to(scalar.categories[0], size).astype(dtype) if isinstance(scalar, decimal.Decimal): if dtype is None: dtype = cudf.Decimal64Dtype._from_decimal(scalar) out_col = column.column_empty(size, dtype=dtype) if out_col.size != 0: out_col[:] = scalar return out_col scalar = to_cudf_compatible_scalar(scalar, dtype=dtype) dtype = scalar.dtype if cudf.dtype(dtype).kind in ("O", "U"): gather_map = column.full(size, 0, dtype="int32") scalar_str_col = column.as_column([scalar], dtype="str") return scalar_str_col[gather_map] else: out_col = column.column_empty(size, dtype=dtype) if out_col.size != 0: out_col.data_array_view[:] = scalar return out_col