Example #1
0
    def binary_operator(self, op, other, reflect=False):
        if reflect:
            self, other = other, self

        # Binary Arithmatics between decimal columns. `Scale` and `precision`
        # are computed outside of libcudf
        if op in ("add", "sub", "mul"):
            scale = _binop_scale(self.dtype, other.dtype, op)
            output_type = Decimal64Dtype(
                scale=scale, precision=Decimal64Dtype.MAX_PRECISION
            )  # precision will be ignored, libcudf has no notion of precision
            result = libcudf.binaryop.binaryop(self, other, op, output_type)
            result.dtype.precision = _binop_precision(self.dtype, other.dtype,
                                                      op)
        elif op in ("eq", "ne", "lt", "gt", "le", "ge"):
            if not isinstance(
                    other,
                (DecimalColumn, cudf.core.column.NumericalColumn, cudf.Scalar),
            ):
                raise TypeError(f"Operator {op} not supported between"
                                f"{str(type(self))} and {str(type(other))}")
            if isinstance(other, cudf.core.column.NumericalColumn
                          ) and not is_integer_dtype(other.dtype):
                raise TypeError(
                    f"Only decimal and integer column is supported for {op}.")
            if isinstance(other, cudf.core.column.NumericalColumn):
                other = other.as_decimal_column(
                    Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0))
            result = libcudf.binaryop.binaryop(self, other, op, bool)
        return result
Example #2
0
    def _preprocess_host_value(self, value, dtype):
        valid = not _is_null_host_scalar(value)

        if isinstance(dtype, Decimal64Dtype):
            value = pa.scalar(value,
                              type=pa.decimal128(dtype.precision,
                                                 dtype.scale)).as_py()
        if isinstance(value, decimal.Decimal) and dtype is None:
            dtype = Decimal64Dtype._from_decimal(value)

        value = to_cudf_compatible_scalar(value, dtype=dtype)

        if dtype is None:
            if not valid:
                if isinstance(value, (np.datetime64, np.timedelta64)):
                    unit, _ = np.datetime_data(value)
                    if unit == "generic":
                        raise TypeError(
                            "Cant convert generic NaT to null scalar")
                    else:
                        dtype = value.dtype
                else:
                    raise TypeError(
                        "dtype required when constructing a null scalar")
            else:
                dtype = value.dtype

        if not isinstance(dtype, Decimal64Dtype):
            dtype = np.dtype(dtype)

        if not valid:
            value = NA

        return value, dtype
Example #3
0
    def _preprocess_host_value(self, value, dtype):
        valid = not cudf._lib.scalar._is_null_host_scalar(value)

        if isinstance(value, list):
            if dtype is not None:
                raise TypeError("Lists may not be cast to a different dtype")
            else:
                dtype = ListDtype.from_arrow(
                    pa.infer_type([value], from_pandas=True))
                return value, dtype
        elif isinstance(dtype, ListDtype):
            if value not in {None, NA}:
                raise ValueError(f"Can not coerce {value} to ListDtype")
            else:
                return NA, dtype

        if isinstance(value, dict):
            if dtype is None:
                dtype = StructDtype.from_arrow(
                    pa.infer_type([value], from_pandas=True))
            return value, dtype
        elif isinstance(dtype, StructDtype):
            if value not in {None, NA}:
                raise ValueError(f"Can not coerce {value} to StructDType")
            else:
                return NA, dtype

        if isinstance(dtype, Decimal64Dtype):
            value = pa.scalar(value,
                              type=pa.decimal128(dtype.precision,
                                                 dtype.scale)).as_py()
        if isinstance(value, decimal.Decimal) and dtype is None:
            dtype = Decimal64Dtype._from_decimal(value)

        value = to_cudf_compatible_scalar(value, dtype=dtype)

        if dtype is None:
            if not valid:
                if isinstance(value, (np.datetime64, np.timedelta64)):
                    unit, _ = np.datetime_data(value)
                    if unit == "generic":
                        raise TypeError(
                            "Cant convert generic NaT to null scalar")
                    else:
                        dtype = value.dtype
                else:
                    raise TypeError(
                        "dtype required when constructing a null scalar")
            else:
                dtype = value.dtype

        if not isinstance(dtype, Decimal64Dtype):
            dtype = cudf.dtype(dtype)

        if not valid:
            value = NA

        return value, dtype
Example #4
0
 def binary_operator(self, op, other, reflect=False):
     if reflect:
         self, other = other, self
     scale = _binop_scale(self.dtype, other.dtype, op)
     output_type = Decimal64Dtype(
         scale=scale, precision=Decimal64Dtype.MAX_PRECISION
     )  # precision will be ignored, libcudf has no notion of precision
     result = libcudf.binaryop.binaryop(self, other, op, output_type)
     result.dtype.precision = _binop_precision(self.dtype, other.dtype, op)
     return result
Example #5
0
def test_orc_writer_decimal(tmpdir, scale):
    np.random.seed(0)
    fname = tmpdir / "decimal.orc"

    expected = cudf.DataFrame({"dec_val": gen_rand_series("i", 100)})
    expected["dec_val"] = expected["dec_val"].astype(Decimal64Dtype(7, scale))

    expected.to_orc(fname)

    got = pd.read_orc(fname)
    assert_eq(expected.to_pandas()["dec_val"], got["dec_val"])
Example #6
0
 def from_arrow(cls, data: pa.Array):
     dtype = Decimal64Dtype.from_arrow(data.type)
     mask_buf = data.buffers()[0]
     mask = (mask_buf if mask_buf is None else pa_mask_buffer_to_mask(
         mask_buf, len(data)))
     data_128 = cp.array(np.frombuffer(data.buffers()[1]).view("int64"))
     data_64 = data_128[::2].copy()
     return cls(
         data=Buffer(data_64.view("uint8")),
         size=len(data),
         dtype=dtype,
         mask=mask,
     )
Example #7
0
    def binary_operator(
        self,
        binop: str,
        rhs: BinaryOperand,
        reflect: bool = False,
    ) -> ColumnBase:
        int_dtypes = [
            cudf.dtype("int8"),
            cudf.dtype("int16"),
            cudf.dtype("int32"),
            cudf.dtype("int64"),
            cudf.dtype("uint8"),
            cudf.dtype("uint16"),
            cudf.dtype("uint32"),
            cudf.dtype("uint64"),
        ]
        if rhs is None:
            out_dtype = self.dtype
        else:
            if not (isinstance(
                    rhs,
                (
                    NumericalColumn,
                    cudf.Scalar,
                    cudf.core.column.Decimal64Column,
                ),
            ) or np.isscalar(rhs)):
                msg = "{!r} operator not supported between {} and {}"
                raise TypeError(msg.format(binop, type(self), type(rhs)))
            if isinstance(rhs, cudf.core.column.Decimal64Column):
                lhs: Union[ScalarLike, ColumnBase] = self.as_decimal_column(
                    Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0))
                return lhs.binary_operator(binop, rhs)
            out_dtype = np.result_type(self.dtype, rhs.dtype)
            if binop in ["mod", "floordiv"]:
                tmp = self if reflect else rhs
                if (tmp.dtype in int_dtypes) and (
                    (np.isscalar(tmp) and (0 == tmp)) or
                    ((isinstance(tmp, NumericalColumn)) and (0.0 in tmp))):
                    out_dtype = cudf.dtype("float64")

        if binop in {"lt", "gt", "le", "ge", "eq", "ne", "NULL_EQUALS"}:
            out_dtype = "bool"
        lhs, rhs = (self, rhs) if not reflect else (rhs, self)
        return libcudf.binaryop.binaryop(lhs, rhs, binop, out_dtype)
Example #8
0
 def binary_operator(
     self, binop: str, rhs: BinaryOperand, reflect: bool = False,
 ) -> ColumnBase:
     int_dtypes = [
         np.dtype("int8"),
         np.dtype("int16"),
         np.dtype("int32"),
         np.dtype("int64"),
         np.dtype("uint8"),
         np.dtype("uint16"),
         np.dtype("uint32"),
         np.dtype("uint64"),
     ]
     if rhs is None:
         out_dtype = self.dtype
     else:
         if not (
             isinstance(
                 rhs,
                 (
                     NumericalColumn,
                     cudf.Scalar,
                     cudf.core.column.DecimalColumn,
                 ),
             )
             or np.isscalar(rhs)
         ):
             msg = "{!r} operator not supported between {} and {}"
             raise TypeError(msg.format(binop, type(self), type(rhs)))
         if isinstance(rhs, cudf.core.column.DecimalColumn):
             lhs = self.as_decimal_column(
                 Decimal64Dtype(Decimal64Dtype.MAX_PRECISION, 0)
             )
             return lhs.binary_operator(binop, rhs)
         out_dtype = np.result_type(self.dtype, rhs.dtype)
         if binop in ["mod", "floordiv"]:
             tmp = self if reflect else rhs
             if (tmp.dtype in int_dtypes) and (
                 (np.isscalar(tmp) and (0 == tmp))
                 or ((isinstance(tmp, NumericalColumn)) and (0.0 in tmp))
             ):
                 out_dtype = np.dtype("float64")
     return _numeric_column_binop(
         lhs=self, rhs=rhs, op=binop, out_dtype=out_dtype, reflect=reflect
     )
Example #9
0
File: scalar.py Project: vyasr/cudf
    def _preprocess_host_value(self, value, dtype):
        if isinstance(dtype, Decimal64Dtype):
            # TODO: Support coercion from decimal.Decimal to different dtype
            # TODO: Support coercion from integer to Decimal64Dtype
            raise NotImplementedError(
                "dtype as cudf.Decimal64Dtype is not supported. Pass a "
                "decimal.Decimal to construct a DecimalScalar.")
        if isinstance(value, decimal.Decimal) and dtype is not None:
            raise TypeError(f"Can not coerce decimal to {dtype}")

        value = to_cudf_compatible_scalar(value, dtype=dtype)
        valid = not _is_null_host_scalar(value)

        if isinstance(value, decimal.Decimal):
            # 0.0042 -> Decimal64Dtype(2, 4)
            dtype = Decimal64Dtype._from_decimal(value)

        else:
            if dtype is None:
                if not valid:
                    if isinstance(value, (np.datetime64, np.timedelta64)):
                        unit, _ = np.datetime_data(value)
                        if unit == "generic":
                            raise TypeError(
                                "Cant convert generic NaT to null scalar")
                        else:
                            dtype = value.dtype
                    else:
                        raise TypeError(
                            "dtype required when constructing a null scalar")
                else:
                    dtype = value.dtype
            dtype = np.dtype(dtype)

            # temporary
            dtype = np.dtype("object") if dtype.char == "U" else dtype

        if not valid:
            value = NA

        return value, dtype
Example #10
0
                np.nan,
                0.0,
                -8.302014,
                np.nan,
                94.31304,
                -112.2314,
                0.3333333,
                np.nan,
            ]
        ),
    ],
)
@pytest.mark.parametrize("from_dtype", FLOAT_TYPES)
@pytest.mark.parametrize(
    "to_dtype",
    [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)],
)
def test_typecast_from_float_to_decimal(data, from_dtype, to_dtype):
    got = data.astype(from_dtype)

    pa_arr = got.to_arrow().cast(
        pa.decimal128(to_dtype.precision, to_dtype.scale)
    )
    expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))

    got = got.astype(to_dtype)

    assert_eq(got, expected)


@pytest.mark.parametrize(
Example #11
0
            97938.2,
            np.nan,
            0.0,
            -8.302014,
            np.nan,
            94.31304,
            -112.2314,
            0.3333333,
            np.nan,
        ]),
    ],
)
@pytest.mark.parametrize("from_dtype", FLOAT_TYPES)
@pytest.mark.parametrize(
    "to_dtype",
    [Decimal64Dtype(7, 2),
     Decimal64Dtype(11, 4),
     Decimal64Dtype(18, 9)],
)
def test_typecast_from_float_to_decimal(data, from_dtype, to_dtype):
    got = data.astype(from_dtype)

    pa_arr = got.to_arrow().cast(
        pa.decimal128(to_dtype.precision, to_dtype.scale))
    expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))

    got = got.astype(to_dtype)

    assert_eq(got, expected)
    assert_eq(got.dtype, expected.dtype)
Example #12
0
    assert_eq(
        pd.concat(dfs, join="inner"),
        gd.concat([gd.DataFrame(df) for df in dfs], join="inner"),
    )


@pytest.mark.parametrize("ignore_index", [True, False])
@pytest.mark.parametrize("typ", [gd.DataFrame, gd.Series])
def test_concat_single_object(ignore_index, typ):
    """Ensure that concat on a single object does not change it."""
    obj = typ([1, 2, 3])
    assert_eq(gd.concat([obj], ignore_index=ignore_index, axis=0), obj)


@pytest.mark.parametrize("ltype", [Decimal64Dtype(3, 1), Decimal64Dtype(7, 2)])
@pytest.mark.parametrize("rtype", [Decimal64Dtype(3, 2), Decimal64Dtype(8, 4)])
def test_concat_decimal_dataframe(ltype, rtype):
    gdf1 = gd.DataFrame({
        "id": np.random.randint(0, 10, 3),
        "val": ["22.3", "59.5", "81.1"]
    })
    gdf2 = gd.DataFrame({
        "id": np.random.randint(0, 10, 3),
        "val": ["2.35", "5.59", "8.14"]
    })

    gdf1["val"] = gdf1["val"].astype(ltype)
    gdf2["val"] = gdf2["val"].astype(rtype)

    pdf1 = gdf1.to_pandas()
Example #13
0
    expected = "HellothereWorld"

    assert got == expected

    s = Series(["Hello", None, "World"])

    got = s.sum()
    expected = "HelloWorld"

    assert got == expected


@pytest.mark.parametrize(
    "dtype",
    [
        Decimal64Dtype(6, 3),
        Decimal64Dtype(10, 6),
        Decimal64Dtype(16, 7),
        Decimal32Dtype(6, 3),
        Decimal128Dtype(20, 7),
    ],
)
@pytest.mark.parametrize("nelem", params_sizes)
def test_sum_decimal(dtype, nelem):
    np.random.seed(0)
    data = [str(x) for x in gen_rand("int64", nelem) / 100]

    expected = pd.Series([Decimal(x) for x in data]).sum()
    got = cudf.Series(data).astype(dtype).sum()

    assert_eq(expected, got)
Example #14
0
    expected = pdata.fillna(method=method, inplace=inplace)
    actual = gdata.fillna(method=method, inplace=inplace)

    if inplace:
        expected = pdata
        actual = gdata

    assert_eq(expected, actual, check_dtype=False)


@pytest.mark.parametrize(
    "gsr_data",
    [
        cudf.Series(["2.34", "5.2", "7.47", None, "92.29", None]).astype(
            Decimal64Dtype(7, 2)),
        cudf.Series(["-74.56", None, "-23.73", "34.55", "2.89", None]).astype(
            Decimal64Dtype(7, 2)),
        cudf.Series(["85.955", np.nan, "-3.243", np.nan, "29.492", np.nan
                     ]).astype(Decimal64Dtype(8, 3)),
        cudf.Series(["2.964", None, "57.432", "-989.330", None, "56.444"
                     ]).astype(Decimal64Dtype(8, 3)),
        cudf.Series([np.nan, "55.2498", np.nan, "-5.2965", "-28.9423", np.nan
                     ]).astype(Decimal64Dtype(10, 4)),
    ],
)
@pytest.mark.parametrize(
    "fill_value",
    [
        42,
        -123,
Example #15
0
                np.nan,
                0.0,
                -8.302014,
                np.nan,
                94.31304,
                -112.2314,
                0.3333333,
                np.nan,
            ]
        ),
    ],
)
@pytest.mark.parametrize("from_dtype", FLOAT_TYPES)
@pytest.mark.parametrize(
    "to_dtype",
    [Decimal64Dtype(7, 2), Decimal64Dtype(11, 4), Decimal64Dtype(18, 9)],
)
def test_typecast_from_float_to_decimal(data, from_dtype, to_dtype):
    got = data.astype(from_dtype)

    pa_arr = got.to_arrow().cast(
        pa.decimal128(to_dtype.precision, to_dtype.scale)
    )
    expected = cudf.Series(DecimalColumn.from_arrow(pa_arr))

    got = got.astype(to_dtype)

    assert_eq(got, expected)


@pytest.mark.parametrize(
Example #16
0
    for type_ in float_types:
        gs = cudf.Series(data).astype(type_)
        ps = pd.Series(data).astype(type_)
        assert_eq(gs.cumsum(), ps.cumsum())

    for type_ in INTEGER_TYPES:
        gs = cudf.Series(data).astype(type_)
        got = gs.cumsum()
        expected = pd.Series([1, 3, np.nan, 7, 12], dtype="float64")
        assert_eq(got, expected)


@pytest.mark.parametrize(
    "dtype",
    [
        Decimal64Dtype(8, 4),
        Decimal64Dtype(10, 5),
        Decimal64Dtype(12, 7),
        Decimal32Dtype(8, 5),
        Decimal128Dtype(13, 6),
    ],
)
def test_cumsum_decimal(dtype):
    data = ["243.32", "48.245", "-7234.298", np.nan, "-467.2"]
    gser = cudf.Series(data).astype(dtype)
    pser = pd.Series(data, dtype="float64")

    got = gser.cumsum()
    expected = cudf.Series.from_pandas(pser.cumsum()).astype(dtype)

    assert_eq(got, expected)
Example #17
0
def test_decimal_dtype():
    dt = Decimal64Dtype(4, 2)
    assert dt.to_arrow() == pa.decimal128(4, 2)
    assert dt == Decimal64Dtype.from_arrow(pa.decimal128(4, 2))
Example #18
0
def test_max_precision():
    Decimal64Dtype(scale=0, precision=18)
    with pytest.raises(ValueError):
        Decimal64Dtype(scale=0, precision=19)