예제 #1
0
def test_astype_datetime64_bad_dtype_raises(from_type, to_type):
    arr = np.array([from_type("2018")])

    to_type = np.dtype(to_type)

    with pytest.raises(TypeError, match="cannot astype"):
        astype_nansafe(arr, dtype=to_type)
예제 #2
0
def test_astype_nansafe(val, typ):
    arr = np.array([val])

    typ = np.dtype(typ)

    msg = "Cannot convert NaT values to integer"
    with pytest.raises(ValueError, match=msg):
        astype_nansafe(arr, dtype=typ)
예제 #3
0
def test_astype_nansafe(val, typ):
    arr = np.array([val])

    typ = np.dtype(typ)

    msg = "Cannot convert NaT values to integer"
    with pytest.raises(ValueError, match=msg):
        with tm.assert_produces_warning(FutureWarning):
            # datetimelike astype(int64) deprecated
            astype_nansafe(arr, dtype=typ)
예제 #4
0
파일: numeric.py 프로젝트: ParfaitG/pandas
    def astype(self, dtype, copy: bool = True):
        dtype = pandas_dtype(dtype)
        if is_float_dtype(self.dtype):
            if needs_i8_conversion(dtype):
                raise TypeError(
                    f"Cannot convert Float64Index to dtype {dtype}; integer "
                    "values are required for conversion")
            elif is_integer_dtype(
                    dtype) and not is_extension_array_dtype(dtype):
                # TODO(ExtensionIndex); this can change once we have an EA Index type
                # GH 13149
                arr = astype_nansafe(self._values, dtype=dtype)
                if isinstance(self, Float64Index):
                    if dtype.kind == "i":
                        return Int64Index(arr, name=self.name)
                    else:
                        return UInt64Index(arr, name=self.name)
                else:
                    return NumericIndex(arr, name=self.name, dtype=dtype)
        elif self._is_backward_compat_public_numeric_index:
            # this block is needed so e.g. NumericIndex[int8].astype("int32") returns
            # NumericIndex[int32] and not Int64Index with dtype int64.
            # When Int64Index etc. are removed from the code base, removed this also.
            if not is_extension_array_dtype(dtype) and is_numeric_dtype(dtype):
                return self._constructor(self, dtype=dtype, copy=copy)

        return super().astype(dtype, copy=copy)
예제 #5
0
def test_astype_nansafe_copy_false(any_int_numpy_dtype):
    # GH#34457 use astype, not view
    arr = np.array([1, 2, 3], dtype=any_int_numpy_dtype)

    dtype = np.dtype("float64")
    result = astype_nansafe(arr, dtype, copy=False)

    expected = np.array([1.0, 2.0, 3.0], dtype=dtype)
    tm.assert_numpy_array_equal(result, expected)
예제 #6
0
파일: dtype.py 프로젝트: tnir/pandas
    def update_dtype(self, dtype) -> SparseDtype:
        """
        Convert the SparseDtype to a new dtype.

        This takes care of converting the ``fill_value``.

        Parameters
        ----------
        dtype : Union[str, numpy.dtype, SparseDtype]
            The new dtype to use.

            * For a SparseDtype, it is simply returned
            * For a NumPy dtype (or str), the current fill value
              is converted to the new dtype, and a SparseDtype
              with `dtype` and the new fill value is returned.

        Returns
        -------
        SparseDtype
            A new SparseDtype with the correct `dtype` and fill value
            for that `dtype`.

        Raises
        ------
        ValueError
            When the current fill value cannot be converted to the
            new `dtype` (e.g. trying to convert ``np.nan`` to an
            integer dtype).


        Examples
        --------
        >>> SparseDtype(int, 0).update_dtype(float)
        Sparse[float64, 0.0]

        >>> SparseDtype(int, 1).update_dtype(SparseDtype(float, np.nan))
        Sparse[float64, nan]
        """
        cls = type(self)
        dtype = pandas_dtype(dtype)

        if not isinstance(dtype, cls):
            if not isinstance(dtype, np.dtype):
                raise TypeError(
                    "sparse arrays of extension dtypes not supported")

            fvarr = astype_nansafe(np.array(self.fill_value), dtype)
            # NB: not fv_0d.item(), as that casts dt64->int
            fill_value = fvarr[0]
            dtype = cls(dtype, fill_value=fill_value)

        return dtype
예제 #7
0
    def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
        """
        Cast to a NumPy array or ExtensionArray with 'dtype'.

        Parameters
        ----------
        dtype : str or dtype
            Typecode or data-type to which the array is cast.
        copy : bool, default True
            Whether to copy the data, even if not necessary. If False,
            a copy is made only if the old dtype does not match the
            new dtype.

        Returns
        -------
        ndarray or ExtensionArray
            NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with
            'dtype' for its dtype.

        Raises
        ------
        TypeError
            if incompatible type with our dtype, equivalent of same_kind
            casting
        """
        dtype = pandas_dtype(dtype)

        if isinstance(dtype, ExtensionDtype):
            return super().astype(dtype, copy=copy)

        na_value: float | np.datetime64 | lib.NoDefault

        # coerce
        if is_float_dtype(dtype):
            # In astype, we consider dtype=float to also mean na_value=np.nan
            na_value = np.nan
        elif is_datetime64_dtype(dtype):
            na_value = np.datetime64("NaT")
        else:
            na_value = lib.no_default

        data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy)
        if self.dtype.kind == "f":
            # TODO: make this consistent between IntegerArray/FloatingArray,
            #  see test_astype_str
            return astype_nansafe(data, dtype, copy=False)
        return data
예제 #8
0
파일: masked.py 프로젝트: scholer/pandas
    def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
        dtype = pandas_dtype(dtype)

        if is_dtype_equal(dtype, self.dtype):
            if copy:
                return self.copy()
            return self

        # if we are astyping to another nullable masked dtype, we can fastpath
        if isinstance(dtype, BaseMaskedDtype):
            # TODO deal with NaNs for FloatingArray case
            data = self._data.astype(dtype.numpy_dtype, copy=copy)
            # mask is copied depending on whether the data was copied, and
            # not directly depending on the `copy` keyword
            mask = self._mask if data is self._data else self._mask.copy()
            cls = dtype.construct_array_type()
            return cls(data, mask, copy=False)

        if isinstance(dtype, ExtensionDtype):
            eacls = dtype.construct_array_type()
            return eacls._from_sequence(self, dtype=dtype, copy=copy)

        na_value: float | np.datetime64 | lib.NoDefault

        # coerce
        if is_float_dtype(dtype):
            # In astype, we consider dtype=float to also mean na_value=np.nan
            na_value = np.nan
        elif is_datetime64_dtype(dtype):
            na_value = np.datetime64("NaT")
        else:
            na_value = lib.no_default

        # to_numpy will also raise, but we get somewhat nicer exception messages here
        if is_integer_dtype(dtype) and self._hasna:
            raise ValueError("cannot convert NA to integer")
        if is_bool_dtype(dtype) and self._hasna:
            # careful: astype_nansafe converts np.nan to True
            raise ValueError("cannot convert float NaN to bool")

        data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy)
        if self.dtype.kind == "f":
            # TODO: make this consistent between IntegerArray/FloatingArray,
            #  see test_astype_str
            return astype_nansafe(data, dtype, copy=False)
        return data
예제 #9
0
    def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
        """
        Cast to a NumPy array or ExtensionArray with 'dtype'.

        Parameters
        ----------
        dtype : str or dtype
            Typecode or data-type to which the array is cast.
        copy : bool, default True
            Whether to copy the data, even if not necessary. If False,
            a copy is made only if the old dtype does not match the
            new dtype.

        Returns
        -------
        ndarray or ExtensionArray
            NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with
            'dtype' for its dtype.

        Raises
        ------
        TypeError
            if incompatible type with an FloatingDtype, equivalent of same_kind
            casting
        """
        dtype = pandas_dtype(dtype)

        if isinstance(dtype, ExtensionDtype):
            return super().astype(dtype, copy=copy)

        # coerce
        if is_float_dtype(dtype):
            # In astype, we consider dtype=float to also mean na_value=np.nan
            kwargs = {"na_value": np.nan}
        elif is_datetime64_dtype(dtype):
            # error: Dict entry 0 has incompatible type "str": "datetime64"; expected
            # "str": "float"
            kwargs = {"na_value": np.datetime64("NaT")}  # type: ignore[dict-item]
        else:
            kwargs = {}

        # error: Argument 2 to "to_numpy" of "BaseMaskedArray" has incompatible
        # type "**Dict[str, float]"; expected "bool"
        data = self.to_numpy(dtype=dtype, **kwargs)  # type: ignore[arg-type]
        return astype_nansafe(data, dtype, copy=False)
예제 #10
0
def test_astype_object_preserves_datetime_na(from_type):
    arr = np.array([from_type("NaT", "ns")])
    result = astype_nansafe(arr, dtype=np.dtype("object"))

    assert isna(result)[0]
예제 #11
0
    def _cast_types(self, values, cast_type, column):
        """
        Cast values to specified type

        Parameters
        ----------
        values : ndarray
        cast_type : string or np.dtype
           dtype to cast values to
        column : string
            column name - used only for error reporting

        Returns
        -------
        converted : ndarray
        """
        if is_categorical_dtype(cast_type):
            known_cats = (isinstance(cast_type, CategoricalDtype)
                          and cast_type.categories is not None)

            if not is_object_dtype(values) and not known_cats:
                # TODO: this is for consistency with
                # c-parser which parses all categories
                # as strings

                values = astype_nansafe(values, np.dtype(str))

            cats = Index(values).unique().dropna()
            values = Categorical._from_inferred_categories(
                cats,
                cats.get_indexer(values),
                cast_type,
                true_values=self.true_values)

        # use the EA's implementation of casting
        elif is_extension_array_dtype(cast_type):
            # ensure cast_type is an actual dtype and not a string
            cast_type = pandas_dtype(cast_type)
            array_type = cast_type.construct_array_type()
            try:
                if is_bool_dtype(cast_type):
                    return array_type._from_sequence_of_strings(
                        values,
                        dtype=cast_type,
                        true_values=self.true_values,
                        false_values=self.false_values,
                    )
                else:
                    return array_type._from_sequence_of_strings(
                        values, dtype=cast_type)
            except NotImplementedError as err:
                raise NotImplementedError(
                    f"Extension Array: {array_type} must implement "
                    "_from_sequence_of_strings in order to be used in parser methods"
                ) from err

        else:
            try:
                values = astype_nansafe(values,
                                        cast_type,
                                        copy=True,
                                        skipna=True)
            except ValueError as err:
                raise ValueError(
                    f"Unable to convert column {column} to type {cast_type}"
                ) from err
        return values