def test_scalar_add_invalid(dtype_l, dtype_r): test_value = 1 lval_gpu = cudf.Scalar(test_value, dtype=dtype_l) rval_gpu = cudf.Scalar(test_value, dtype=dtype_r) with pytest.raises(TypeError): lval_gpu + rval_gpu
def test_boolean_scalar_binop(op): psr = pd.Series(np.random.choice([True, False], 10)) gsr = cudf.from_pandas(psr) utils.assert_eq(op(psr, True), op(gsr, True)) utils.assert_eq(op(psr, False), op(gsr, False)) # cuDF scalar utils.assert_eq(op(psr, True), op(gsr, cudf.Scalar(True))) utils.assert_eq(op(psr, False), op(gsr, cudf.Scalar(False)))
def test_scalar_dtype_and_validity_decimal(slr, dtype, expect): if expect is pa.lib.ArrowInvalid: with pytest.raises(expect): cudf.Scalar(slr, dtype=dtype) return else: result = cudf.Scalar(slr, dtype=dtype) assert result.dtype == dtype assert result.is_valid
def test_null_scalar(dtype): if isinstance(dtype, cudf.Decimal64Dtype): with pytest.raises(NotImplementedError): s = cudf.Scalar(None, dtype=dtype) return s = cudf.Scalar(None, dtype=dtype) assert s.value is cudf.NA assert s.dtype == np.dtype(dtype) assert s.is_valid() is False
def test_scalar_dtype_and_validity(dtype): if isinstance(dtype, cudf.Decimal64Dtype): with pytest.raises(NotImplementedError): s = cudf.Scalar(None, dtype=dtype) return s = cudf.Scalar(1, dtype=dtype) assert s.dtype == np.dtype(dtype) assert s.is_valid() is True
def __setitem__(self, key, value): if isinstance(value, list): value = cudf.Scalar(value) if isinstance(value, cudf.Scalar): if value.dtype != self.dtype: raise TypeError("list nesting level mismatch") elif value is cudf.NA: value = cudf.Scalar(value, dtype=self.dtype) else: raise ValueError(f"Can not set {value} into ListColumn") super().__setitem__(key, value)
def test_construct_from_scalar(value): value = cudf.utils.utils.to_cudf_compatible_scalar(value) x = cudf.Scalar(value, value.dtype) y = cudf.Scalar(x) assert x.value == y.value or np.isnan(x.value) and np.isnan(y.value) # check that this works: y.device_value x._is_host_value_current == y._is_host_value_current x._is_device_value_current == y._is_device_value_current
def test_scalar_difference(dtype_l, dtype_r): test_value = 1 lval_host = dtype_scalar(test_value, dtype=dtype_l) rval_host = dtype_scalar(test_value, dtype=dtype_r) lval_gpu = cudf.Scalar(test_value, dtype=dtype_l) rval_gpu = cudf.Scalar(test_value, dtype=dtype_r) expect = lval_host - rval_host got = lval_gpu - rval_gpu assert expect == got.value assert expect.dtype == got.dtype
def test_scalar_floordiv(dtype_l, dtype_r): test_value = 1 lval_host = dtype_scalar(test_value, dtype=dtype_l) rval_host = dtype_scalar(test_value, dtype=dtype_r) lval_gpu = cudf.Scalar(test_value, dtype=dtype_l) rval_gpu = cudf.Scalar(test_value, dtype=dtype_r) expect = lval_host // rval_host got = lval_gpu // rval_gpu assert expect == got.value assert expect.dtype == got.dtype
def test_scalar_power(dtype_l, dtype_r): test_value = 1 lval_host = dtype_scalar(test_value, dtype=dtype_l) rval_host = dtype_scalar(test_value, dtype=dtype_r) lval_gpu = cudf.Scalar(test_value, dtype=dtype_l) rval_gpu = cudf.Scalar(test_value, dtype=dtype_r) expect = lval_host**rval_host got = lval_gpu**rval_gpu assert expect == got.value assert expect.dtype == got.dtype
def _check_and_cast_columns_with_other( source_col: ColumnBase, other: Union[ScalarLike, ColumnBase], inplace: bool, ) -> Tuple[ColumnBase, Union[ScalarLike, ColumnBase]]: """ Returns type-casted column `source_col` & scalar `other_scalar` based on `inplace` parameter. """ if cudf.utils.dtypes.is_categorical_dtype(source_col.dtype): return source_col, other if cudf.utils.dtypes.is_scalar(other): device_obj = _normalize_scalars(source_col, other) else: device_obj = other if other is None: return source_col, device_obj elif cudf.utils.dtypes.is_mixed_with_object_dtype(device_obj, source_col): raise TypeError("cudf does not support mixed types, please type-cast " "the column of dataframe/series and other " "to same dtypes.") if inplace: if not cudf.utils.dtypes._can_cast(device_obj.dtype, source_col.dtype): warnings.warn( f"Type-casting from {device_obj.dtype} " f"to {source_col.dtype}, there could be potential data loss") return source_col, device_obj.astype(source_col.dtype) else: if (cudf.utils.dtypes.is_scalar(other) and cudf.utils.dtypes.is_numerical_dtype(source_col.dtype) and cudf.utils.dtypes._can_cast(other, source_col.dtype)): common_dtype = source_col.dtype return ( source_col.astype(common_dtype), cudf.Scalar(other, dtype=common_dtype), ) else: common_dtype = cudf.utils.dtypes.find_common_type([ source_col.dtype, np.min_scalar_type(other) if cudf.utils.dtypes.is_scalar(other) else other.dtype, ]) if cudf.utils.dtypes.is_scalar(device_obj): device_obj = cudf.Scalar(other, dtype=common_dtype) else: device_obj = device_obj.astype(common_dtype) return source_col.astype(common_dtype), device_obj
def test_scalar_add(dtype_l, dtype_r): test_value = 1 lval_host = dtype_scalar(test_value, dtype=dtype_l) rval_host = dtype_scalar(test_value, dtype=dtype_r) lval_gpu = cudf.Scalar(test_value, dtype=dtype_l) rval_gpu = cudf.Scalar(test_value, dtype=dtype_r) # expect = np.add(lval_host, rval_host) expect = lval_host + rval_host got = lval_gpu + rval_gpu assert expect == got.value if not dtype_l == dtype_r == "str": assert expect.dtype == got.dtype
def fillna( self, value: Any = None, method: str = None, dtype: Dtype = None ): """Fill null values with ``value``. Returns a copy with null filled. """ if isinstance(value, (int, Decimal)): value = cudf.Scalar(value, dtype=self.dtype) elif ( isinstance(value, Decimal64Column) or isinstance(value, cudf.core.column.NumericalColumn) and is_integer_dtype(value.dtype) ): value = value.astype(self.dtype) else: raise TypeError( "Decimal columns only support using fillna with decimal and " "integer values" ) result = libcudf.replace.replace_nulls( input_col=self, replacement=value, method=method, dtype=dtype ) return result._with_type_metadata(self.dtype)
def _binary_op_truediv( self, rhs: BinaryOperand ) -> Tuple["column.ColumnBase", BinaryOperand, DtypeObj]: lhs = self # type: column.ColumnBase if pd.api.types.is_timedelta64_dtype(rhs.dtype): common_dtype = determine_out_dtype(self.dtype, rhs.dtype) lhs = lhs.astype(common_dtype).astype("float64") if isinstance(rhs, cudf.Scalar): if rhs.is_valid(): rhs = rhs.value.astype(common_dtype).astype("float64") else: rhs = cudf.Scalar(None, "float64") else: rhs = rhs.astype(common_dtype).astype("float64") out_dtype = np.dtype("float64") elif rhs.dtype.kind in ("f", "i", "u"): out_dtype = self.dtype else: raise TypeError( f"Division of {self.dtype} with {rhs.dtype} " f"cannot be performed." ) return lhs, rhs, out_dtype
def test_list_scalar_host_construction_null(elem_type, nesting_level): dtype = cudf.ListDtype(elem_type) for level in range(nesting_level - 1): dtype = cudf.ListDtype(dtype) slr = cudf.Scalar(None, dtype=dtype) assert slr.value is cudf.NA
def test_scalar_host_initialization(value): s = cudf.Scalar(value) np.testing.assert_equal(s.value, value) assert s.is_valid() is True assert s._is_host_value_current assert not s._is_device_value_current
def test_scalar_logical(): T = cudf.Scalar(True) F = cudf.Scalar(False) assert T assert not F assert T and T assert not (T and F) assert not (F and T) assert not (F and F) assert T or T assert T or F assert F or T assert not (F or F)
def fillna(self, fill_value=None, method=None): """ Fill null values with *fill_value* """ if method is not None: return super().fillna(fill_value, method) if (isinstance(fill_value, cudf.Scalar) and fill_value.dtype == self.dtype): return super().fillna(fill_value, method) if np.isscalar(fill_value): # castsafely to the same dtype as self fill_value_casted = self.dtype.type(fill_value) if not np.isnan(fill_value) and (fill_value_casted != fill_value): raise TypeError( f"Cannot safely cast non-equivalent " f"{type(fill_value).__name__} to {self.dtype.name}") fill_value = cudf.Scalar(fill_value_casted) else: fill_value = column.as_column(fill_value, nan_as_null=False) # cast safely to the same dtype as self if is_integer_dtype(self.dtype): fill_value = _safe_cast_to_int(fill_value, self.dtype) else: fill_value = fill_value.astype(self.dtype) return super().fillna(fill_value, method)
def contains(self, search_key: ScalarLike) -> ParentType: """ Returns boolean values indicating whether the specified scalar is an element of each row. Parameters ---------- search_key : scalar element being searched for in each row of the list column Returns ------- Series or Index Examples -------- >>> s = cudf.Series([[1, 2, 3], [3, 4, 5], [4, 5, 6]]) >>> s.list.contains(4) Series([False, True, True]) dtype: bool """ search_key = cudf.Scalar(search_key) try: res = self._return_or_inplace( contains_scalar(self._column, search_key)) except RuntimeError as e: if ("Type/Scale of search key does not" "match list column element type" in str(e)): raise TypeError("Type/Scale of search key does not" "match list column element type") from e raise else: return res
def test_timedelta_index_ops_with_cudf_scalars(data, cpu_scalar, dtype, op): gtdi = cudf.Index(data=data, dtype=dtype) ptdi = gtdi.to_pandas() gpu_scalar = cudf.Scalar(cpu_scalar) if op == "add": expected = ptdi + cpu_scalar actual = gtdi + gpu_scalar elif op == "sub": expected = ptdi - cpu_scalar actual = gtdi - gpu_scalar elif op == "truediv": expected = ptdi / cpu_scalar actual = gtdi / gpu_scalar elif op == "floordiv": expected = ptdi // cpu_scalar actual = gtdi // gpu_scalar assert_eq(expected, actual) if op == "add": expected = cpu_scalar + ptdi actual = gpu_scalar + gtdi elif op == "sub": expected = cpu_scalar - ptdi actual = gpu_scalar - gtdi elif op == "truediv": expected = cpu_scalar / ptdi actual = gpu_scalar / gtdi elif op == "floordiv": expected = cpu_scalar // ptdi actual = gpu_scalar // gtdi assert_eq(expected, actual)
def test_scalar_no_negative_bools(): x = cudf.Scalar(True) with pytest.raises( TypeError, match=re.escape("Boolean scalars in cuDF do not " "support negation, use logical not"), ): -x
def seconds(self) -> "cudf.core.column.NumericalColumn": """ Number of seconds (>= 0 and less than 1 day). Returns ------- NumericalColumn """ # This property must return the number of seconds (>= 0 and # less than 1 day) for each element, hence first performing # mod operation to remove the number of days and then performing # division operation to extract the number of seconds. return (self % cudf.Scalar( np.timedelta64( _numpy_to_pandas_conversion["D"], "ns"))) // cudf.Scalar( np.timedelta64(_numpy_to_pandas_conversion["s"], "ns"))
def __setitem__(self, key, value): if isinstance(value, dict): # filling in fields not in dict for field in self.dtype.fields: value[field] = value.get(field, cudf.NA) value = cudf.Scalar(value, self.dtype) super().__setitem__(key, value)
def normalize_binop_value(self, other): if is_scalar(other) and isinstance(other, (int, np.int, Decimal)): return cudf.Scalar(Decimal(other)) elif isinstance(other, cudf.Scalar) and isinstance( other.dtype, cudf.core.dtypes.DecimalDtype): return other else: raise TypeError(f"cannot normalize {type(other)}")
def test_datetime_series_ops_with_cudf_scalars(data, scalar, dtype, op): gsr = cudf.Series(data=data, dtype=dtype) psr = gsr.to_pandas() expect = op(psr, scalar) got = op(gsr, cudf.Scalar(scalar)) assert_eq(expect, got)
def nanoseconds(self) -> "cudf.core.column.NumericalColumn": """ Return the number of nanoseconds (n), where 0 <= n < 1 microsecond. Returns ------- NumericalColumn """ # This property must return the number of nanoseconds (>= 0 and # less than 1 microsecond) for each element, hence first performing # mod operation to remove the number of microseconds and then # performing division operation to extract the number # of nanoseconds. return (self % cudf.Scalar( np.timedelta64( _numpy_to_pandas_conversion["us"], "ns"))) // cudf.Scalar( np.timedelta64(_numpy_to_pandas_conversion["ns"], "ns"))
def cast_to_appropriate_cudf_type(val): # TODO Handle scalar if val.ndim == 0: return cudf.Scalar(val).value # 1D array elif (val.ndim == 1) or (val.ndim == 2 and val.shape[1] == 1): return cudf.Series(val) else: return NotImplemented
def test_series_iloc_defer_cudf_scalar(): ps = pd.Series([1, 2, 3], index=pd.Index(["a", "b", "c"])) gs = cudf.from_pandas(ps) for t in index_dtypes: arg = cudf.Scalar(1, dtype=t) got = gs[arg] expect = 2 assert_eq(expect, got)
def test_null_scalar(dtype): s = cudf.Scalar(None, dtype=dtype) assert s.value is cudf.NA assert s.dtype == ( cudf.dtype(dtype) if not isinstance(dtype, cudf.core.dtypes.DecimalDtype) else dtype ) assert s.is_valid() is False
def days(self) -> "cudf.core.column.NumericalColumn": """ Number of days for each element. Returns ------- NumericalColumn """ return self // cudf.Scalar( np.timedelta64(_numpy_to_pandas_conversion["D"], "ns"))