def test_astype_datetime64_bad_dtype_raises(from_type, to_type): arr = np.array([from_type("2018")]) to_type = np.dtype(to_type) with pytest.raises(TypeError, match="cannot astype"): astype_nansafe(arr, dtype=to_type)
def test_astype_nansafe(val, typ): arr = np.array([val]) typ = np.dtype(typ) msg = "Cannot convert NaT values to integer" with pytest.raises(ValueError, match=msg): astype_nansafe(arr, dtype=typ)
def test_astype_nansafe(val, typ): arr = np.array([val]) typ = np.dtype(typ) msg = "Cannot convert NaT values to integer" with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning(FutureWarning): # datetimelike astype(int64) deprecated astype_nansafe(arr, dtype=typ)
def astype(self, dtype, copy: bool = True): dtype = pandas_dtype(dtype) if is_float_dtype(self.dtype): if needs_i8_conversion(dtype): raise TypeError( f"Cannot convert Float64Index to dtype {dtype}; integer " "values are required for conversion") elif is_integer_dtype( dtype) and not is_extension_array_dtype(dtype): # TODO(ExtensionIndex); this can change once we have an EA Index type # GH 13149 arr = astype_nansafe(self._values, dtype=dtype) if isinstance(self, Float64Index): if dtype.kind == "i": return Int64Index(arr, name=self.name) else: return UInt64Index(arr, name=self.name) else: return NumericIndex(arr, name=self.name, dtype=dtype) elif self._is_backward_compat_public_numeric_index: # this block is needed so e.g. NumericIndex[int8].astype("int32") returns # NumericIndex[int32] and not Int64Index with dtype int64. # When Int64Index etc. are removed from the code base, removed this also. if not is_extension_array_dtype(dtype) and is_numeric_dtype(dtype): return self._constructor(self, dtype=dtype, copy=copy) return super().astype(dtype, copy=copy)
def test_astype_nansafe_copy_false(any_int_numpy_dtype): # GH#34457 use astype, not view arr = np.array([1, 2, 3], dtype=any_int_numpy_dtype) dtype = np.dtype("float64") result = astype_nansafe(arr, dtype, copy=False) expected = np.array([1.0, 2.0, 3.0], dtype=dtype) tm.assert_numpy_array_equal(result, expected)
def update_dtype(self, dtype) -> SparseDtype: """ Convert the SparseDtype to a new dtype. This takes care of converting the ``fill_value``. Parameters ---------- dtype : Union[str, numpy.dtype, SparseDtype] The new dtype to use. * For a SparseDtype, it is simply returned * For a NumPy dtype (or str), the current fill value is converted to the new dtype, and a SparseDtype with `dtype` and the new fill value is returned. Returns ------- SparseDtype A new SparseDtype with the correct `dtype` and fill value for that `dtype`. Raises ------ ValueError When the current fill value cannot be converted to the new `dtype` (e.g. trying to convert ``np.nan`` to an integer dtype). Examples -------- >>> SparseDtype(int, 0).update_dtype(float) Sparse[float64, 0.0] >>> SparseDtype(int, 1).update_dtype(SparseDtype(float, np.nan)) Sparse[float64, nan] """ cls = type(self) dtype = pandas_dtype(dtype) if not isinstance(dtype, cls): if not isinstance(dtype, np.dtype): raise TypeError( "sparse arrays of extension dtypes not supported") fvarr = astype_nansafe(np.array(self.fill_value), dtype) # NB: not fv_0d.item(), as that casts dt64->int fill_value = fvarr[0] dtype = cls(dtype, fill_value=fill_value) return dtype
def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. Parameters ---------- dtype : str or dtype Typecode or data-type to which the array is cast. copy : bool, default True Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. Returns ------- ndarray or ExtensionArray NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with 'dtype' for its dtype. Raises ------ TypeError if incompatible type with our dtype, equivalent of same_kind casting """ dtype = pandas_dtype(dtype) if isinstance(dtype, ExtensionDtype): return super().astype(dtype, copy=copy) na_value: float | np.datetime64 | lib.NoDefault # coerce if is_float_dtype(dtype): # In astype, we consider dtype=float to also mean na_value=np.nan na_value = np.nan elif is_datetime64_dtype(dtype): na_value = np.datetime64("NaT") else: na_value = lib.no_default data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy) if self.dtype.kind == "f": # TODO: make this consistent between IntegerArray/FloatingArray, # see test_astype_str return astype_nansafe(data, dtype, copy=False) return data
def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: dtype = pandas_dtype(dtype) if is_dtype_equal(dtype, self.dtype): if copy: return self.copy() return self # if we are astyping to another nullable masked dtype, we can fastpath if isinstance(dtype, BaseMaskedDtype): # TODO deal with NaNs for FloatingArray case data = self._data.astype(dtype.numpy_dtype, copy=copy) # mask is copied depending on whether the data was copied, and # not directly depending on the `copy` keyword mask = self._mask if data is self._data else self._mask.copy() cls = dtype.construct_array_type() return cls(data, mask, copy=False) if isinstance(dtype, ExtensionDtype): eacls = dtype.construct_array_type() return eacls._from_sequence(self, dtype=dtype, copy=copy) na_value: float | np.datetime64 | lib.NoDefault # coerce if is_float_dtype(dtype): # In astype, we consider dtype=float to also mean na_value=np.nan na_value = np.nan elif is_datetime64_dtype(dtype): na_value = np.datetime64("NaT") else: na_value = lib.no_default # to_numpy will also raise, but we get somewhat nicer exception messages here if is_integer_dtype(dtype) and self._hasna: raise ValueError("cannot convert NA to integer") if is_bool_dtype(dtype) and self._hasna: # careful: astype_nansafe converts np.nan to True raise ValueError("cannot convert float NaN to bool") data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy) if self.dtype.kind == "f": # TODO: make this consistent between IntegerArray/FloatingArray, # see test_astype_str return astype_nansafe(data, dtype, copy=False) return data
def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. Parameters ---------- dtype : str or dtype Typecode or data-type to which the array is cast. copy : bool, default True Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. Returns ------- ndarray or ExtensionArray NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with 'dtype' for its dtype. Raises ------ TypeError if incompatible type with an FloatingDtype, equivalent of same_kind casting """ dtype = pandas_dtype(dtype) if isinstance(dtype, ExtensionDtype): return super().astype(dtype, copy=copy) # coerce if is_float_dtype(dtype): # In astype, we consider dtype=float to also mean na_value=np.nan kwargs = {"na_value": np.nan} elif is_datetime64_dtype(dtype): # error: Dict entry 0 has incompatible type "str": "datetime64"; expected # "str": "float" kwargs = {"na_value": np.datetime64("NaT")} # type: ignore[dict-item] else: kwargs = {} # error: Argument 2 to "to_numpy" of "BaseMaskedArray" has incompatible # type "**Dict[str, float]"; expected "bool" data = self.to_numpy(dtype=dtype, **kwargs) # type: ignore[arg-type] return astype_nansafe(data, dtype, copy=False)
def test_astype_object_preserves_datetime_na(from_type): arr = np.array([from_type("NaT", "ns")]) result = astype_nansafe(arr, dtype=np.dtype("object")) assert isna(result)[0]
def _cast_types(self, values, cast_type, column): """ Cast values to specified type Parameters ---------- values : ndarray cast_type : string or np.dtype dtype to cast values to column : string column name - used only for error reporting Returns ------- converted : ndarray """ if is_categorical_dtype(cast_type): known_cats = (isinstance(cast_type, CategoricalDtype) and cast_type.categories is not None) if not is_object_dtype(values) and not known_cats: # TODO: this is for consistency with # c-parser which parses all categories # as strings values = astype_nansafe(values, np.dtype(str)) cats = Index(values).unique().dropna() values = Categorical._from_inferred_categories( cats, cats.get_indexer(values), cast_type, true_values=self.true_values) # use the EA's implementation of casting elif is_extension_array_dtype(cast_type): # ensure cast_type is an actual dtype and not a string cast_type = pandas_dtype(cast_type) array_type = cast_type.construct_array_type() try: if is_bool_dtype(cast_type): return array_type._from_sequence_of_strings( values, dtype=cast_type, true_values=self.true_values, false_values=self.false_values, ) else: return array_type._from_sequence_of_strings( values, dtype=cast_type) except NotImplementedError as err: raise NotImplementedError( f"Extension Array: {array_type} must implement " "_from_sequence_of_strings in order to be used in parser methods" ) from err else: try: values = astype_nansafe(values, cast_type, copy=True, skipna=True) except ValueError as err: raise ValueError( f"Unable to convert column {column} to type {cast_type}" ) from err return values