def binary_operator(self, op, other, reflect=False): if reflect: self, other = other, self # Binary Arithmatics between decimal columns. `Scale` and `precision` # are computed outside of libcudf if op in ("add", "sub", "mul"): scale = _binop_scale(self.dtype, other.dtype, op) output_type = Decimal64Dtype( scale=scale, precision=Decimal64Dtype.MAX_PRECISION ) # precision will be ignored, libcudf has no notion of precision result = libcudf.binaryop.binaryop(self, other, op, output_type) result.dtype.precision = _binop_precision(self.dtype, other.dtype, op) elif op in ("eq", "ne", "lt", "gt", "le", "ge"): if not isinstance( other, (DecimalColumn, cudf.core.column.NumericalColumn, cudf.Scalar), ): raise TypeError(f"Operator {op} not supported between" f"{str(type(self))} and {str(type(other))}") if isinstance(other, cudf.core.column.NumericalColumn ) and not is_integer_dtype(other.dtype): raise TypeError( f"Only decimal and integer column is supported for {op}.") if isinstance(other, cudf.core.column.NumericalColumn): other = other.as_decimal_column( Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0)) result = libcudf.binaryop.binaryop(self, other, op, bool) return result
def _preprocess_host_value(self, value, dtype): valid = not _is_null_host_scalar(value) if isinstance(dtype, Decimal64Dtype): value = pa.scalar(value, type=pa.decimal128(dtype.precision, dtype.scale)).as_py() if isinstance(value, decimal.Decimal) and dtype is None: dtype = Decimal64Dtype._from_decimal(value) value = to_cudf_compatible_scalar(value, dtype=dtype) if dtype is None: if not valid: if isinstance(value, (np.datetime64, np.timedelta64)): unit, _ = np.datetime_data(value) if unit == "generic": raise TypeError( "Cant convert generic NaT to null scalar") else: dtype = value.dtype else: raise TypeError( "dtype required when constructing a null scalar") else: dtype = value.dtype if not isinstance(dtype, Decimal64Dtype): dtype = np.dtype(dtype) if not valid: value = NA return value, dtype
def _preprocess_host_value(self, value, dtype): valid = not cudf._lib.scalar._is_null_host_scalar(value) if isinstance(value, list): if dtype is not None: raise TypeError("Lists may not be cast to a different dtype") else: dtype = ListDtype.from_arrow( pa.infer_type([value], from_pandas=True)) return value, dtype elif isinstance(dtype, ListDtype): if value not in {None, NA}: raise ValueError(f"Can not coerce {value} to ListDtype") else: return NA, dtype if isinstance(value, dict): if dtype is None: dtype = StructDtype.from_arrow( pa.infer_type([value], from_pandas=True)) return value, dtype elif isinstance(dtype, StructDtype): if value not in {None, NA}: raise ValueError(f"Can not coerce {value} to StructDType") else: return NA, dtype if isinstance(dtype, Decimal64Dtype): value = pa.scalar(value, type=pa.decimal128(dtype.precision, dtype.scale)).as_py() if isinstance(value, decimal.Decimal) and dtype is None: dtype = Decimal64Dtype._from_decimal(value) value = to_cudf_compatible_scalar(value, dtype=dtype) if dtype is None: if not valid: if isinstance(value, (np.datetime64, np.timedelta64)): unit, _ = np.datetime_data(value) if unit == "generic": raise TypeError( "Cant convert generic NaT to null scalar") else: dtype = value.dtype else: raise TypeError( "dtype required when constructing a null scalar") else: dtype = value.dtype if not isinstance(dtype, Decimal64Dtype): dtype = cudf.dtype(dtype) if not valid: value = NA return value, dtype
def binary_operator(self, op, other, reflect=False): if reflect: self, other = other, self scale = _binop_scale(self.dtype, other.dtype, op) output_type = Decimal64Dtype( scale=scale, precision=Decimal64Dtype.MAX_PRECISION ) # precision will be ignored, libcudf has no notion of precision result = libcudf.binaryop.binaryop(self, other, op, output_type) result.dtype.precision = _binop_precision(self.dtype, other.dtype, op) return result
def test_orc_writer_decimal(tmpdir, scale): np.random.seed(0) fname = tmpdir / "decimal.orc" expected = cudf.DataFrame({"dec_val": gen_rand_series("i", 100)}) expected["dec_val"] = expected["dec_val"].astype(Decimal64Dtype(7, scale)) expected.to_orc(fname) got = pd.read_orc(fname) assert_eq(expected.to_pandas()["dec_val"], got["dec_val"])
def from_arrow(cls, data: pa.Array): dtype = Decimal64Dtype.from_arrow(data.type) mask_buf = data.buffers()[0] mask = (mask_buf if mask_buf is None else pa_mask_buffer_to_mask( mask_buf, len(data))) data_128 = cp.array(np.frombuffer(data.buffers()[1]).view("int64")) data_64 = data_128[::2].copy() return cls( data=Buffer(data_64.view("uint8")), size=len(data), dtype=dtype, mask=mask, )
def binary_operator( self, binop: str, rhs: BinaryOperand, reflect: bool = False, ) -> ColumnBase: int_dtypes = [ cudf.dtype("int8"), cudf.dtype("int16"), cudf.dtype("int32"), cudf.dtype("int64"), cudf.dtype("uint8"), cudf.dtype("uint16"), cudf.dtype("uint32"), cudf.dtype("uint64"), ] if rhs is None: out_dtype = self.dtype else: if not (isinstance( rhs, ( NumericalColumn, cudf.Scalar, cudf.core.column.Decimal64Column, ), ) or np.isscalar(rhs)): msg = "{!r} operator not supported between {} and {}" raise TypeError(msg.format(binop, type(self), type(rhs))) if isinstance(rhs, cudf.core.column.Decimal64Column): lhs: Union[ScalarLike, ColumnBase] = self.as_decimal_column( Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0)) return lhs.binary_operator(binop, rhs) out_dtype = np.result_type(self.dtype, rhs.dtype) if binop in ["mod", "floordiv"]: tmp = self if reflect else rhs if (tmp.dtype in int_dtypes) and ( (np.isscalar(tmp) and (0 == tmp)) or ((isinstance(tmp, NumericalColumn)) and (0.0 in tmp))): out_dtype = cudf.dtype("float64") if binop in {"lt", "gt", "le", "ge", "eq", "ne", "NULL_EQUALS"}: out_dtype = "bool" lhs, rhs = (self, rhs) if not reflect else (rhs, self) return libcudf.binaryop.binaryop(lhs, rhs, binop, out_dtype)
def binary_operator( self, binop: str, rhs: BinaryOperand, reflect: bool = False, ) -> ColumnBase: int_dtypes = [ np.dtype("int8"), np.dtype("int16"), np.dtype("int32"), np.dtype("int64"), np.dtype("uint8"), np.dtype("uint16"), np.dtype("uint32"), np.dtype("uint64"), ] if rhs is None: out_dtype = self.dtype else: if not ( isinstance( rhs, ( NumericalColumn, cudf.Scalar, cudf.core.column.DecimalColumn, ), ) or np.isscalar(rhs) ): msg = "{!r} operator not supported between {} and {}" raise TypeError(msg.format(binop, type(self), type(rhs))) if isinstance(rhs, cudf.core.column.DecimalColumn): lhs = self.as_decimal_column( Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0) ) return lhs.binary_operator(binop, rhs) out_dtype = np.result_type(self.dtype, rhs.dtype) if binop in ["mod", "floordiv"]: tmp = self if reflect else rhs if (tmp.dtype in int_dtypes) and ( (np.isscalar(tmp) and (0 == tmp)) or ((isinstance(tmp, NumericalColumn)) and (0.0 in tmp)) ): out_dtype = np.dtype("float64") return _numeric_column_binop( lhs=self, rhs=rhs, op=binop, out_dtype=out_dtype, reflect=reflect )
def _preprocess_host_value(self, value, dtype): if isinstance(dtype, Decimal64Dtype): # TODO: Support coercion from decimal.Decimal to different dtype # TODO: Support coercion from integer to Decimal64Dtype raise NotImplementedError( "dtype as cudf.Decimal64Dtype is not supported. Pass a " "decimal.Decimal to construct a DecimalScalar.") if isinstance(value, decimal.Decimal) and dtype is not None: raise TypeError(f"Can not coerce decimal to {dtype}") value = to_cudf_compatible_scalar(value, dtype=dtype) valid = not _is_null_host_scalar(value) if isinstance(value, decimal.Decimal): # 0.0042 -> Decimal64Dtype(2, 4) dtype = Decimal64Dtype._from_decimal(value) else: if dtype is None: if not valid: if isinstance(value, (np.datetime64, np.timedelta64)): unit, _ = np.datetime_data(value) if unit == "generic": raise TypeError( "Cant convert generic NaT to null scalar") else: dtype = value.dtype else: raise TypeError( "dtype required when constructing a null scalar") else: dtype = value.dtype dtype = np.dtype(dtype) # temporary dtype = np.dtype("object") if dtype.char == "U" else dtype if not valid: value = NA return value, dtype
np.nan, 0.0, -8.302014, np.nan, 94.31304, -112.2314, 0.3333333, np.nan, ] ), ], ) @pytest.mark.parametrize("from_dtype", FLOAT_TYPES) @pytest.mark.parametrize( "to_dtype", [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)], ) def test_typecast_from_float_to_decimal(data, from_dtype, to_dtype): got = data.astype(from_dtype) pa_arr = got.to_arrow().cast( pa.decimal128(to_dtype.precision, to_dtype.scale) ) expected = cudf.Series(DecimalColumn.from_arrow(pa_arr)) got = got.astype(to_dtype) assert_eq(got, expected) @pytest.mark.parametrize(
97938.2, np.nan, 0.0, -8.302014, np.nan, 94.31304, -112.2314, 0.3333333, np.nan, ]), ], ) @pytest.mark.parametrize("from_dtype", FLOAT_TYPES) @pytest.mark.parametrize( "to_dtype", [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)], ) def test_typecast_from_float_to_decimal(data, from_dtype, to_dtype): got = data.astype(from_dtype) pa_arr = got.to_arrow().cast( pa.decimal128(to_dtype.precision, to_dtype.scale)) expected = cudf.Series(DecimalColumn.from_arrow(pa_arr)) got = got.astype(to_dtype) assert_eq(got, expected) assert_eq(got.dtype, expected.dtype)
assert_eq( pd.concat(dfs, join="inner"), gd.concat([gd.DataFrame(df) for df in dfs], join="inner"), ) @pytest.mark.parametrize("ignore_index", [True, False]) @pytest.mark.parametrize("typ", [gd.DataFrame, gd.Series]) def test_concat_single_object(ignore_index, typ): """Ensure that concat on a single object does not change it.""" obj = typ([1, 2, 3]) assert_eq(gd.concat([obj], ignore_index=ignore_index, axis=0), obj) @pytest.mark.parametrize("ltype", [Decimal64Dtype(3, 1), Decimal64Dtype(7, 2)]) @pytest.mark.parametrize("rtype", [Decimal64Dtype(3, 2), Decimal64Dtype(8, 4)]) def test_concat_decimal_dataframe(ltype, rtype): gdf1 = gd.DataFrame({ "id": np.random.randint(0, 10, 3), "val": ["22.3", "59.5", "81.1"] }) gdf2 = gd.DataFrame({ "id": np.random.randint(0, 10, 3), "val": ["2.35", "5.59", "8.14"] }) gdf1["val"] = gdf1["val"].astype(ltype) gdf2["val"] = gdf2["val"].astype(rtype) pdf1 = gdf1.to_pandas()
expected = "HellothereWorld" assert got == expected s = Series(["Hello", None, "World"]) got = s.sum() expected = "HelloWorld" assert got == expected @pytest.mark.parametrize( "dtype", [ Decimal64Dtype(6, 3), Decimal64Dtype(10, 6), Decimal64Dtype(16, 7), Decimal32Dtype(6, 3), Decimal128Dtype(20, 7), ], ) @pytest.mark.parametrize("nelem", params_sizes) def test_sum_decimal(dtype, nelem): np.random.seed(0) data = [str(x) for x in gen_rand("int64", nelem) / 100] expected = pd.Series([Decimal(x) for x in data]).sum() got = cudf.Series(data).astype(dtype).sum() assert_eq(expected, got)
expected = pdata.fillna(method=method, inplace=inplace) actual = gdata.fillna(method=method, inplace=inplace) if inplace: expected = pdata actual = gdata assert_eq(expected, actual, check_dtype=False) @pytest.mark.parametrize( "gsr_data", [ cudf.Series(["2.34", "5.2", "7.47", None, "92.29", None]).astype( Decimal64Dtype(7, 2)), cudf.Series(["-74.56", None, "-23.73", "34.55", "2.89", None]).astype( Decimal64Dtype(7, 2)), cudf.Series(["85.955", np.nan, "-3.243", np.nan, "29.492", np.nan ]).astype(Decimal64Dtype(8, 3)), cudf.Series(["2.964", None, "57.432", "-989.330", None, "56.444" ]).astype(Decimal64Dtype(8, 3)), cudf.Series([np.nan, "55.2498", np.nan, "-5.2965", "-28.9423", np.nan ]).astype(Decimal64Dtype(10, 4)), ], ) @pytest.mark.parametrize( "fill_value", [ 42, -123,
for type_ in float_types: gs = cudf.Series(data).astype(type_) ps = pd.Series(data).astype(type_) assert_eq(gs.cumsum(), ps.cumsum()) for type_ in INTEGER_TYPES: gs = cudf.Series(data).astype(type_) got = gs.cumsum() expected = pd.Series([1, 3, np.nan, 7, 12], dtype="float64") assert_eq(got, expected) @pytest.mark.parametrize( "dtype", [ Decimal64Dtype(8, 4), Decimal64Dtype(10, 5), Decimal64Dtype(12, 7), Decimal32Dtype(8, 5), Decimal128Dtype(13, 6), ], ) def test_cumsum_decimal(dtype): data = ["243.32", "48.245", "-7234.298", np.nan, "-467.2"] gser = cudf.Series(data).astype(dtype) pser = pd.Series(data, dtype="float64") got = gser.cumsum() expected = cudf.Series.from_pandas(pser.cumsum()).astype(dtype) assert_eq(got, expected)
def test_decimal_dtype(): dt = Decimal64Dtype(4, 2) assert dt.to_arrow() == pa.decimal128(4, 2) assert dt == Decimal64Dtype.from_arrow(pa.decimal128(4, 2))
def test_max_precision(): Decimal64Dtype(scale=0, precision=18) with pytest.raises(ValueError): Decimal64Dtype(scale=0, precision=19)