Exemple #1
0
 def normalize_binop_value(
         self, other: ScalarLike) -> Union[ColumnBase, ScalarLike]:
     if other is None:
         return other
     if isinstance(other, cudf.Scalar):
         if self.dtype == other.dtype:
             return other
         # expensive device-host transfer just to
         # adjust the dtype
         other = other.value
     elif isinstance(other, np.ndarray) and other.ndim == 0:
         other = other.item()
     other_dtype = np.min_scalar_type(other)
     if other_dtype.kind in {"b", "i", "u", "f"}:
         if isinstance(other, cudf.Scalar):
             return other
         other_dtype = np.promote_types(self.dtype, other_dtype)
         if other_dtype == np.dtype("float16"):
             other_dtype = cudf.dtype("float32")
             other = other_dtype.type(other)
         if self.dtype.kind == "b":
             other_dtype = min_signed_type(other)
         if np.isscalar(other):
             return cudf.dtype(other_dtype).type(other)
         else:
             ary = utils.scalar_broadcast_to(other,
                                             size=len(self),
                                             dtype=other_dtype)
             return column.build_column(
                 data=Buffer(ary),
                 dtype=ary.dtype,
                 mask=self.mask,
             )
     else:
         raise TypeError(f"cannot broadcast {type(other)}")
Exemple #2
0
    def can_cast_safely(self, to_dtype: Dtype) -> bool:
        if np.issubdtype(to_dtype, np.datetime64):

            to_res, _ = np.datetime_data(to_dtype)
            self_res, _ = np.datetime_data(self.dtype)

            max_int = np.iinfo(cudf.dtype("int64")).max

            max_dist = np.timedelta64(
                self.max().astype(cudf.dtype("int64"), copy=False), self_res)
            min_dist = np.timedelta64(
                self.min().astype(cudf.dtype("int64"), copy=False), self_res)

            self_delta_dtype = np.timedelta64(0, self_res).dtype

            if max_dist <= np.timedelta64(max_int, to_res).astype(
                    self_delta_dtype) and min_dist <= np.timedelta64(
                        max_int, to_res).astype(self_delta_dtype):
                return True
            else:
                return False
        elif to_dtype == cudf.dtype("int64") or to_dtype == cudf.dtype("O"):
            # can safely cast to representation, or string
            return True
        else:
            return False
Exemple #3
0
def determine_out_dtype(lhs_dtype: Dtype, rhs_dtype: Dtype) -> Dtype:
    if np.can_cast(cudf.dtype(lhs_dtype), cudf.dtype(rhs_dtype)):
        return rhs_dtype
    elif np.can_cast(cudf.dtype(rhs_dtype), cudf.dtype(lhs_dtype)):
        return lhs_dtype
    else:
        raise TypeError(f"Cannot type-cast {lhs_dtype} and {rhs_dtype}")
Exemple #4
0
    def _binop_result_dtype_or_error(self, other, op):
        if op in {"__eq__", "__ne__", "__lt__", "__gt__", "__le__", "__ge__"}:
            return np.bool_

        out_dtype = get_allowed_combinations_for_operator(
            self.dtype, other.dtype, op
        )

        # datetime handling
        if out_dtype in {"M", "m"}:
            if self.dtype.char in {"M", "m"} and other.dtype.char not in {
                "M",
                "m",
            }:
                return self.dtype
            if other.dtype.char in {"M", "m"} and self.dtype.char not in {
                "M",
                "m",
            }:
                return other.dtype
            else:
                if (
                    op == "__sub__"
                    and self.dtype.char == other.dtype.char == "M"
                ):
                    res, _ = np.datetime_data(max(self.dtype, other.dtype))
                    return cudf.dtype("m8" + f"[{res}]")
                return np.result_type(self.dtype, other.dtype)

        return cudf.dtype(out_dtype)
Exemple #5
0
def test_null_series(nrows, dtype):
    size = 5
    mask = utils.random_bitmask(size)
    data = cudf.Series(np.random.randint(1, 9, size))
    column = data.set_mask(mask)
    sr = cudf.Series(column).astype(dtype)
    if dtype != "category" and cudf.dtype(dtype).kind in {"u", "i"}:
        ps = pd.Series(
            sr._column.data_array_view.copy_to_host(),
            dtype=cudf_dtypes_to_pandas_dtypes.get(cudf.dtype(dtype),
                                                   cudf.dtype(dtype)),
        )
        ps[sr.isnull().to_pandas()] = pd.NA
    else:
        ps = sr.to_pandas()

    pd.options.display.max_rows = int(nrows)
    psrepr = ps.__repr__()
    psrepr = psrepr.replace("NaN", "<NA>")
    psrepr = psrepr.replace("NaT", "<NA>")
    psrepr = psrepr.replace("None", "<NA>")
    if (dtype.startswith("int") or dtype.startswith("uint")
            or dtype.startswith("long")):
        psrepr = psrepr.replace(
            str(sr._column.default_na_value()) + "\n", "<NA>\n")
    if "UInt" in psrepr:
        psrepr = psrepr.replace("UInt", "uint")
    elif "Int" in psrepr:
        psrepr = psrepr.replace("Int", "int")
    assert psrepr.split() == sr.__repr__().split()
    pd.reset_option("display.max_rows")
Exemple #6
0
def test_null_series(nrows, dtype):
    size = 5
    sr = cudf.Series(np.random.randint(1, 9, size)).astype(dtype)
    sr[np.random.choice([False, True], size=size)] = None
    if dtype != "category" and cudf.dtype(dtype).kind in {"u", "i"}:
        ps = pd.Series(
            sr._column.data_array_view.copy_to_host(),
            dtype=np_dtypes_to_pandas_dtypes.get(
                cudf.dtype(dtype), cudf.dtype(dtype)
            ),
        )
        ps[sr.isnull().to_pandas()] = pd.NA
    else:
        ps = sr.to_pandas()

    pd.options.display.max_rows = int(nrows)
    psrepr = ps.__repr__()
    psrepr = psrepr.replace("NaN", "<NA>")
    psrepr = psrepr.replace("NaT", "<NA>")
    psrepr = psrepr.replace("None", "<NA>")
    if "UInt" in psrepr:
        psrepr = psrepr.replace("UInt", "uint")
    elif "Int" in psrepr:
        psrepr = psrepr.replace("Int", "int")
    assert psrepr.split() == sr.__repr__().split()
    pd.reset_option("display.max_rows")
Exemple #7
0
    def _unaop_result_type_or_error(self, op):
        if op == "__neg__" and self.dtype == "bool":
            raise TypeError("Boolean scalars in cuDF do not support"
                            " negation, use logical not")

        if op in {"__ceil__", "__floor__"}:
            if self.dtype.char in "bBhHf?":
                return cudf.dtype("float32")
            else:
                return cudf.dtype("float64")
        return self.dtype
Exemple #8
0
def _can_cast(from_dtype, to_dtype):
    """
    Utility function to determine if we can cast
    from `from_dtype` to `to_dtype`. This function primarily calls
    `np.can_cast` but with some special handling around
    cudf specific dtypes.
    """
    if from_dtype in {None, cudf.NA}:
        return True
    if isinstance(from_dtype, type):
        from_dtype = cudf.dtype(from_dtype)
    if isinstance(to_dtype, type):
        to_dtype = cudf.dtype(to_dtype)

    # TODO : Add precision & scale checking for
    # decimal types in future

    if isinstance(from_dtype, cudf.core.dtypes.DecimalDtype):
        if isinstance(to_dtype, cudf.core.dtypes.DecimalDtype):
            return True
        elif isinstance(to_dtype, np.dtype):
            if to_dtype.kind in {"i", "f", "u", "U", "O"}:
                return True
            else:
                return False
    elif isinstance(from_dtype, np.dtype):
        if isinstance(to_dtype, np.dtype):
            return np.can_cast(from_dtype, to_dtype)
        elif isinstance(to_dtype, cudf.core.dtypes.DecimalDtype):
            if from_dtype.kind in {"i", "f", "u", "U", "O"}:
                return True
            else:
                return False
        elif isinstance(to_dtype, cudf.core.types.CategoricalDtype):
            return True
        else:
            return False
    elif isinstance(from_dtype, cudf.core.dtypes.ListDtype):
        # TODO: Add level based checks too once casting of
        # list columns is supported
        if isinstance(to_dtype, cudf.core.dtypes.ListDtype):
            return np.can_cast(from_dtype.leaf_type, to_dtype.leaf_type)
        else:
            return False
    elif isinstance(from_dtype, cudf.core.dtypes.CategoricalDtype):
        if isinstance(to_dtype, cudf.core.dtypes.CategoricalDtype):
            return True
        elif isinstance(to_dtype, np.dtype):
            return np.can_cast(from_dtype._categories.dtype, to_dtype)
        else:
            return False
    else:
        return np.can_cast(from_dtype, to_dtype)
Exemple #9
0
def _convert_str_col(col, errors, _downcast=None):
    """
    Converts a string column to numeric column

    Converts to integer column if all strings are integer-like (isinteger.all)
    Otherwise, converts to float column if all strings are float-like (
    isfloat.all)

    If error == 'coerce', fill non-numerics strings with null

    Looks ahead to ``downcast`` parameter, if the float may be casted to
    integer, then only process in float32 pipeline.

    Parameters
    ----------
    col : The string column to convert, must be string dtype
    errors : {'raise', 'ignore', 'coerce'}, same as ``to_numeric``
    _downcast : Same as ``to_numeric``, see description for use

    Returns
    -------
    Converted numeric column
    """
    if not is_string_dtype(col):
        raise TypeError("col must be string dtype.")

    is_integer = libstrings.is_integer(col)
    if is_integer.all():
        return col.as_numerical_column(dtype=cudf.dtype("i8"))

    col = _proc_inf_empty_strings(col)

    is_float = libstrings.is_float(col)
    if is_float.all():
        if _downcast in {"unsigned", "signed", "integer"}:
            warnings.warn(
                UserWarning("Downcasting from float to int will be "
                            "limited by float32 precision."))
            return col.as_numerical_column(dtype=cudf.dtype("f"))
        else:
            return col.as_numerical_column(dtype=cudf.dtype("d"))
    else:
        if errors == "coerce":
            col = libcudf.string_casting.stod(col)
            non_numerics = is_float.unary_operator("not")
            col[non_numerics] = None
            return col
        else:
            raise ValueError("Unable to convert some strings to numerics.")
Exemple #10
0
def find_common_type(dtypes):
    """
    Wrapper over np.find_common_type to handle special cases

    Corner cases:
    1. "M8", "M8" -> "M8" | "m8", "m8" -> "m8"

    Parameters
    ----------
    dtypes : iterable, sequence of dtypes to find common types

    Returns
    -------
    dtype : np.dtype optional, the result from np.find_common_type,
    None if input is empty

    """

    if len(dtypes) == 0:
        return None

    # Aggregate same types
    dtypes = set(dtypes)

    if any(is_decimal_dtype(dtype) for dtype in dtypes):
        if all(
                is_decimal_dtype(dtype) or is_numerical_dtype(dtype)
                for dtype in dtypes):
            return _find_common_type_decimal(
                [dtype for dtype in dtypes if is_decimal_dtype(dtype)])
        else:
            return cudf.dtype("O")

    # Corner case 1:
    # Resort to np.result_type to handle "M" and "m" types separately
    dt_dtypes = set(filter(lambda t: is_datetime_dtype(t), dtypes))
    if len(dt_dtypes) > 0:
        dtypes = dtypes - dt_dtypes
        dtypes.add(np.result_type(*dt_dtypes))

    td_dtypes = set(
        filter(lambda t: pd.api.types.is_timedelta64_dtype(t), dtypes))
    if len(td_dtypes) > 0:
        dtypes = dtypes - td_dtypes
        dtypes.add(np.result_type(*td_dtypes))

    common_dtype = np.find_common_type(list(dtypes), [])
    return cudf.dtype(common_dtype)
Exemple #11
0
    def _binary_op_truediv(
        self, rhs: BinaryOperand
    ) -> Tuple["column.ColumnBase", BinaryOperand, DtypeObj]:
        lhs = self  # type: column.ColumnBase
        if pd.api.types.is_timedelta64_dtype(rhs.dtype):
            common_dtype = determine_out_dtype(self.dtype, rhs.dtype)
            lhs = lhs.astype(common_dtype).astype("float64")
            if isinstance(rhs, cudf.Scalar):
                if rhs.is_valid():
                    rhs = rhs.value.astype(common_dtype).astype("float64")
                else:
                    rhs = cudf.Scalar(None, "float64")
            else:
                rhs = rhs.astype(common_dtype).astype("float64")

            out_dtype = cudf.dtype("float64")
        elif rhs.dtype.kind in ("f", "i", "u"):
            out_dtype = self.dtype
        else:
            raise TypeError(
                f"Division of {self.dtype} with {rhs.dtype} "
                f"cannot be performed."
            )

        return lhs, rhs, out_dtype
Exemple #12
0
    def __init__(
        self,
        data: Buffer,
        dtype: DtypeObj,
        mask: Buffer = None,
        size: int = None,  # TODO: make this non-optional
        offset: int = 0,
        null_count: int = None,
    ):
        dtype = cudf.dtype(dtype)

        if data.size % dtype.itemsize:
            raise ValueError("Buffer size must be divisible by element size")
        if size is None:
            size = data.size // dtype.itemsize
            size = size - offset

        super().__init__(
            data,
            size=size,
            dtype=dtype,
            mask=mask,
            offset=offset,
            null_count=null_count,
        )
Exemple #13
0
def test_generic_ptx(dtype):

    size = 500

    lhs_arr = np.random.random(size).astype(dtype)
    lhs_col = Series(lhs_arr)._column

    rhs_arr = np.random.random(size).astype(dtype)
    rhs_col = Series(rhs_arr)._column

    def generic_function(a, b):
        return a ** 3 + b

    nb_type = numpy_support.from_dtype(cudf.dtype(dtype))
    type_signature = (nb_type, nb_type)

    ptx_code, output_type = compile_ptx(
        generic_function, type_signature, device=True
    )

    dtype = numpy_support.as_dtype(output_type).type

    out_col = libcudf.binaryop.binaryop_udf(lhs_col, rhs_col, ptx_code, dtype)

    result = lhs_arr ** 3 + rhs_arr

    np.testing.assert_almost_equal(result, out_col.to_array())
Exemple #14
0
def min_column_type(x, expected_type):
    """
    Return the smallest dtype which can represent all
    elements of the `NumericalColumn` `x`
    If the column is not a subtype of `np.signedinteger` or `np.floating`
    returns the same dtype as the dtype of `x` without modification
    """

    if not isinstance(x, cudf.core.column.NumericalColumn):
        raise TypeError("Argument x must be of type column.NumericalColumn")
    if x.valid_count == 0:
        return x.dtype

    if np.issubdtype(x.dtype, np.floating):
        max_bound_dtype = np.min_scalar_type(x.max())
        min_bound_dtype = np.min_scalar_type(x.min())
        result_type = np.promote_types(max_bound_dtype, min_bound_dtype)

    elif np.issubdtype(expected_type, np.integer):
        max_bound_dtype = np.min_scalar_type(x.max())
        min_bound_dtype = np.min_scalar_type(x.min())
        result_type = np.promote_types(max_bound_dtype, min_bound_dtype)
    else:
        result_type = x.dtype

    return cudf.dtype(result_type)
Exemple #15
0
    def __init__(
        self,
        data: Buffer,
        dtype: DtypeObj,
        mask: Buffer = None,
        size: int = None,  # TODO: make non-optional
        offset: int = 0,
        null_count: int = None,
    ):
        dtype = cudf.dtype(dtype)

        if data.size % dtype.itemsize:
            raise ValueError("Buffer size must be divisible by element size")
        if size is None:
            size = data.size // dtype.itemsize
            size = size - offset
        super().__init__(
            data,
            size=size,
            dtype=dtype,
            mask=mask,
            offset=offset,
            null_count=null_count,
        )

        if not (self.dtype.type is np.datetime64):
            raise TypeError(f"{self.dtype} is not a supported datetime type")

        self._time_unit, _ = np.datetime_data(self.dtype)
Exemple #16
0
 def element_type(self) -> Dtype:
     if isinstance(self._typ.value_type, pa.ListType):
         return ListDtype.from_arrow(self._typ.value_type)
     elif isinstance(self._typ.value_type, pa.StructType):
         return StructDtype.from_arrow(self._typ.value_type)
     else:
         return cudf.dtype(self._typ.value_type.to_pandas_dtype()).name
Exemple #17
0
def get_values_for_nested_data(dtype, lists_max_length=None, size=None):
    """
    Returns list of values based on dtype.
    """
    if size is None:
        cardinality = np.random.randint(0, lists_max_length)
    else:
        cardinality = size

    dtype = cudf.dtype(dtype)
    if dtype.kind in ("i", "u"):
        values = int_generator(dtype=dtype, size=cardinality)()
    elif dtype.kind == "f":
        values = float_generator(dtype=dtype, size=cardinality)()
    elif dtype.kind in ("U", "O"):
        values = [
            mimesis.random.random.schoice(
                string.printable,
                100,
            ) for _ in range(cardinality)
        ]
    elif dtype.kind == "M":
        values = datetime_generator(dtype=dtype,
                                    size=cardinality)().astype(dtype)
    elif dtype.kind == "m":
        values = timedelta_generator(dtype=dtype,
                                     size=cardinality)().astype(dtype)
    elif dtype.kind == "b":
        values = boolean_generator(cardinality)().astype(dtype)
    else:
        raise TypeError(f"Unsupported dtype: {dtype}")

    return values
Exemple #18
0
    def binary_operator(
        self,
        op: str,
        rhs: Union[ColumnBase, "cudf.Scalar"],
        reflect: bool = False,
    ) -> ColumnBase:
        if isinstance(rhs, cudf.DateOffset):
            return rhs._datetime_binop(self, op, reflect=reflect)
        lhs: Union[ScalarLike, ColumnBase] = self
        if op in ("eq", "ne", "lt", "gt", "le", "ge", "NULL_EQUALS"):
            out_dtype = cudf.dtype(np.bool_)  # type: Dtype
        elif op == "add" and pd.api.types.is_timedelta64_dtype(rhs.dtype):
            out_dtype = cudf.core.column.timedelta._timedelta_add_result_dtype(
                rhs, lhs)
        elif op == "sub" and pd.api.types.is_timedelta64_dtype(rhs.dtype):
            out_dtype = cudf.core.column.timedelta._timedelta_sub_result_dtype(
                rhs if reflect else lhs, lhs if reflect else rhs)
        elif op == "sub" and pd.api.types.is_datetime64_dtype(rhs.dtype):
            units = ["s", "ms", "us", "ns"]
            lhs_time_unit = cudf.utils.dtypes.get_time_unit(lhs)
            lhs_unit = units.index(lhs_time_unit)
            rhs_time_unit = cudf.utils.dtypes.get_time_unit(rhs)
            rhs_unit = units.index(rhs_time_unit)
            out_dtype = np.dtype(
                f"timedelta64[{units[max(lhs_unit, rhs_unit)]}]")
        else:
            raise TypeError(f"Series of dtype {self.dtype} cannot perform "
                            f" the operation {op}")

        if reflect:
            lhs, rhs = rhs, lhs
        return libcudf.binaryop.binaryop(lhs, rhs, op, out_dtype)
def get_values_for_nested_data(dtype, lists_max_length):
    """
    Returns list of values based on dtype.
    """
    cardinality = np.random.randint(0, lists_max_length)
    dtype = cudf.dtype(dtype)
    if dtype.kind in ("i", "u"):
        values = int_generator(dtype=dtype, size=cardinality)()
    elif dtype.kind == "f":
        values = float_generator(dtype=dtype, size=cardinality)()
    elif dtype.kind in ("U", "O"):
        values = [
            mimesis.random.random.schoice(
                string.printable,
                100,
            ) for _ in range(cardinality)
        ]
    elif dtype.kind == "M":
        values = datetime_generator(dtype=dtype,
                                    size=cardinality)().astype(dtype)
    elif dtype.kind == "m":
        values = timedelta_generator(dtype=dtype,
                                     size=cardinality)().astype(dtype)
    elif dtype.kind == "b":
        values = boolean_generator(cardinality)().astype(dtype)
    else:
        raise TypeError(f"Unsupported dtype: {dtype}")

    # To ensure numpy arrays are not passed as input to
    # list constructor, returning a python list object here.
    if isinstance(values, np.ndarray):
        return values.tolist()
    else:
        return values
Exemple #20
0
 def dtype(self):
     if self._is_host_value_current:
         if isinstance(self._host_value, str):
             return cudf.dtype("object")
         else:
             return self._host_dtype
     else:
         return self.device_value.dtype
Exemple #21
0
    def _preprocess_host_value(self, value, dtype):
        valid = not cudf._lib.scalar._is_null_host_scalar(value)

        if isinstance(value, list):
            if dtype is not None:
                raise TypeError("Lists may not be cast to a different dtype")
            else:
                dtype = ListDtype.from_arrow(
                    pa.infer_type([value], from_pandas=True))
                return value, dtype
        elif isinstance(dtype, ListDtype):
            if value not in {None, NA}:
                raise ValueError(f"Can not coerce {value} to ListDtype")
            else:
                return NA, dtype

        if isinstance(value, dict):
            if dtype is None:
                dtype = StructDtype.from_arrow(
                    pa.infer_type([value], from_pandas=True))
            return value, dtype
        elif isinstance(dtype, StructDtype):
            if value not in {None, NA}:
                raise ValueError(f"Can not coerce {value} to StructDType")
            else:
                return NA, dtype

        if isinstance(dtype, Decimal64Dtype):
            value = pa.scalar(value,
                              type=pa.decimal128(dtype.precision,
                                                 dtype.scale)).as_py()
        if isinstance(value, decimal.Decimal) and dtype is None:
            dtype = Decimal64Dtype._from_decimal(value)

        value = to_cudf_compatible_scalar(value, dtype=dtype)

        if dtype is None:
            if not valid:
                if isinstance(value, (np.datetime64, np.timedelta64)):
                    unit, _ = np.datetime_data(value)
                    if unit == "generic":
                        raise TypeError(
                            "Cant convert generic NaT to null scalar")
                    else:
                        dtype = value.dtype
                else:
                    raise TypeError(
                        "dtype required when constructing a null scalar")
            else:
                dtype = value.dtype

        if not isinstance(dtype, Decimal64Dtype):
            dtype = cudf.dtype(dtype)

        if not valid:
            value = NA

        return value, dtype
Exemple #22
0
def test_max(dtype, nelem):
    dtype = cudf.dtype(dtype).type
    data = gen_rand(dtype, nelem)
    sr = Series(data)

    got = sr.max()
    expect = dtype(data.max())

    assert expect == got
Exemple #23
0
def _buffer_data_from_array_interface(array_interface):
    ptr = array_interface["data"][0]
    if ptr is None:
        ptr = 0
    itemsize = cudf.dtype(array_interface["typestr"]).itemsize
    shape = (array_interface["shape"] if len(array_interface["shape"]) > 0 else
             (1, ))
    size = functools.reduce(operator.mul, shape)
    return ptr, size * itemsize
Exemple #24
0
def test_sum(dtype, nelem):
    dtype = cudf.dtype(dtype).type
    data = gen_rand(dtype, nelem)
    sr = Series(data)

    got = sr.sum()
    expect = data.sum()
    significant = 4 if dtype == np.float32 else 6
    np.testing.assert_approx_equal(expect, got, significant=significant)
Exemple #25
0
def test_null_scalar(dtype):
    s = cudf.Scalar(None, dtype=dtype)
    assert s.value is cudf.NA
    assert s.dtype == (
        cudf.dtype(dtype)
        if not isinstance(dtype, cudf.core.dtypes.DecimalDtype)
        else dtype
    )
    assert s.is_valid() is False
Exemple #26
0
def confirm_1d_contiguous(array_interface):
    strides = array_interface["strides"]
    shape = array_interface["shape"]
    itemsize = cudf.dtype(array_interface["typestr"]).itemsize
    typestr = array_interface["typestr"]
    if typestr not in ("|i1", "|u1"):
        raise TypeError("Buffer data must be of uint8 type")
    if not get_c_contiguity(shape, strides, itemsize):
        raise ValueError("Buffer data must be 1D C-contiguous")
Exemple #27
0
def test_product(dtype, nelem):
    np.random.seed(0)
    dtype = cudf.dtype(dtype).type
    if cudf.dtype(dtype).kind in {"u", "i"}:
        data = np.ones(nelem, dtype=dtype)
        # Set at most 30 items to [0..2) to keep the value within 2^32
        for _ in range(30):
            data[np.random.randint(low=0, high=nelem, size=1)] = (
                np.random.uniform() * 2
            )
    else:
        data = gen_rand(dtype, nelem)

    sr = Series(data)

    got = sr.product()
    expect = pd.Series(data).product()
    significant = 4 if dtype == np.float32 else 6
    np.testing.assert_approx_equal(expect, got, significant=significant)
Exemple #28
0
def test_series_construction_with_nulls(input_obj, dtype):
    dtype = cudf.dtype(dtype)
    input_obj = [
        dtype.type(v) if v is not cudf.NA else cudf.NA for v in input_obj
    ]

    expect = pd.Series(input_obj, dtype="category")
    got = cudf.Series(input_obj, dtype="category").to_pandas()

    assert_eq(expect, got)
Exemple #29
0
def _get_nan_for_dtype(dtype):
    dtype = cudf.dtype(dtype)
    if pd.api.types.is_datetime64_dtype(
            dtype) or pd.api.types.is_timedelta64_dtype(dtype):
        time_unit, _ = np.datetime_data(dtype)
        return dtype.type("nat", time_unit)
    elif dtype.kind == "f":
        return dtype.type("nan")
    else:
        return np.float64("nan")
Exemple #30
0
 def as_string_column(self,
                      dtype: Dtype,
                      format=None,
                      **kwargs) -> "cudf.core.column.StringColumn":
     if len(self) > 0:
         return string._numeric_to_str_typecast_functions[cudf.dtype(
             self.dtype)](self)
     else:
         return cast("cudf.core.column.StringColumn",
                     as_column([], dtype="object"))