def astype(self, dtype, copy=True): """ Cast to a NumPy array or IntegerArray with 'dtype'. Parameters ---------- dtype : str or dtype Typecode or data-type to which the array is cast. copy : bool, default True Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. Returns ------- array : ndarray or IntegerArray NumPy ndarray or IntergerArray with 'dtype' for its dtype. Raises ------ TypeError if incompatible type with an IntegerDtype, equivalent of same_kind casting """ # if we are astyping to an existing IntegerDtype we can fastpath if isinstance(dtype, _IntegerDtype): result = self._data.astype(dtype.numpy_dtype, copy=False) return type(self)(result, mask=self._mask, copy=False) # coerce data = self._coerce_to_ndarray() return astype_nansafe(data, dtype, copy=None)
def astype(self, dtype, copy=True): """ Cast to a NumPy array or IntegerArray with 'dtype'. Parameters ---------- dtype : str or dtype Typecode or data-type to which the array is cast. copy : bool, default True Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. Returns ------- array : ndarray or IntegerArray NumPy ndarray or IntergerArray with 'dtype' for its dtype. Raises ------ TypeError if incompatible type with an IntegerDtype, equivalent of same_kind casting """ # if we are astyping to an existing IntegerDtype we can fastpath if isinstance(dtype, _IntegerDtype): result = self._data.astype(dtype.numpy_dtype, copy=False) return type(self)(result, mask=self._mask, copy=False) # coerce data = self._coerce_to_ndarray() return astype_nansafe(data, dtype, copy=None)
def test_astype_nansafe_copy_false(any_int_dtype): # GH#34457 use astype, not view arr = np.array([1, 2, 3], dtype=any_int_dtype) dtype = np.dtype("float64") result = astype_nansafe(arr, dtype, copy=False) expected = np.array([1.0, 2.0, 3.0], dtype=dtype) tm.assert_numpy_array_equal(result, expected)
def make_sparse(arr: np.ndarray, kind="block", fill_value=None, dtype=None, copy=False): """ Convert ndarray to sparse format Parameters ---------- arr : ndarray kind : {'block', 'integer'} fill_value : NaN or another value dtype : np.dtype, optional copy : bool, default False Returns ------- (sparse_values, index, fill_value) : (ndarray, SparseIndex, Scalar) """ assert isinstance(arr, np.ndarray) if arr.ndim > 1: raise TypeError("expected dimension <= 1 data") if fill_value is None: fill_value = na_value_for_dtype(arr.dtype) if isna(fill_value): mask = notna(arr) else: # cast to object comparison to be safe if is_string_dtype(arr): arr = arr.astype(object) if is_object_dtype(arr.dtype): # element-wise equality check method in numpy doesn't treat # each element type, eg. 0, 0.0, and False are treated as # same. So we have to check the both of its type and value. mask = splib.make_mask_object_ndarray(arr, fill_value) else: mask = arr != fill_value length = len(arr) if length != len(mask): # the arr is a SparseArray indices = mask.sp_index.indices else: indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] if dtype is not None: sparsified_values = astype_nansafe(sparsified_values, dtype=dtype) # TODO: copy return sparsified_values, index, fill_value
def astype(self, dtype, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. Parameters ---------- dtype : str or dtype Typecode or data-type to which the array is cast. copy : bool, default True Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. Returns ------- ndarray or ExtensionArray NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype. Raises ------ TypeError if incompatible type with an IntegerDtype, equivalent of same_kind casting """ from pandas.core.arrays.boolean import BooleanDtype from pandas.core.arrays.string_ import StringDtype dtype = pandas_dtype(dtype) # if we are astyping to an existing IntegerDtype we can fastpath if isinstance(dtype, _IntegerDtype): result = self._data.astype(dtype.numpy_dtype, copy=False) return dtype.construct_array_type()(result, mask=self._mask, copy=False) elif isinstance(dtype, BooleanDtype): result = self._data.astype("bool", copy=False) return dtype.construct_array_type()(result, mask=self._mask, copy=False) elif isinstance(dtype, StringDtype): return dtype.construct_array_type()._from_sequence(self, copy=False) # coerce if is_float_dtype(dtype): # In astype, we consider dtype=float to also mean na_value=np.nan kwargs = dict(na_value=np.nan) elif is_datetime64_dtype(dtype): kwargs = dict(na_value=np.datetime64("NaT")) else: kwargs = {} data = self.to_numpy(dtype=dtype, **kwargs) return astype_nansafe(data, dtype, copy=False)
def astype(self, dtype, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. Parameters ---------- dtype : str or dtype Typecode or data-type to which the array is cast. copy : bool, default True Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. Returns ------- ndarray or ExtensionArray NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with 'dtype' for its dtype. Raises ------ TypeError if incompatible type with an FloatingDtype, equivalent of same_kind casting """ from pandas.core.arrays.string_ import StringArray, StringDtype dtype = pandas_dtype(dtype) # if the dtype is exactly the same, we can fastpath if self.dtype == dtype: # return the same object for copy=False return self.copy() if copy else self # if we are astyping to another nullable masked dtype, we can fastpath if isinstance(dtype, BaseMaskedDtype): # TODO deal with NaNs data = self._data.astype(dtype.numpy_dtype, copy=copy) # mask is copied depending on whether the data was copied, and # not directly depending on the `copy` keyword mask = self._mask if data is self._data else self._mask.copy() return dtype.construct_array_type()(data, mask, copy=False) elif isinstance(dtype, StringDtype): return StringArray._from_sequence(self, copy=False) # coerce if is_float_dtype(dtype): # In astype, we consider dtype=float to also mean na_value=np.nan kwargs = dict(na_value=np.nan) elif is_datetime64_dtype(dtype): kwargs = dict(na_value=np.datetime64("NaT")) else: kwargs = {} data = self.to_numpy(dtype=dtype, **kwargs) return astype_nansafe(data, dtype, copy=False)
def make_sparse(arr, kind='block', fill_value=None, dtype=None, copy=False): """ Convert ndarray to sparse format Parameters ---------- arr : ndarray kind : {'block', 'integer'} fill_value : NaN or another value dtype : np.dtype, optional copy : bool, default False Returns ------- (sparse_values, index, fill_value) : (ndarray, SparseIndex, Scalar) """ arr = _sanitize_values(arr) if arr.ndim > 1: raise TypeError("expected dimension <= 1 data") if fill_value is None: fill_value = na_value_for_dtype(arr.dtype) if isna(fill_value): mask = notna(arr) else: # For str arrays in NumPy 1.12.0, operator!= below isn't # element-wise but just returns False if fill_value is not str, # so cast to object comparison to be safe if is_string_dtype(arr): arr = arr.astype(object) if is_object_dtype(arr.dtype): # element-wise equality check method in numpy doesn't treat # each element type, eg. 0, 0.0, and False are treated as # same. So we have to check the both of its type and value. mask = splib.make_mask_object_ndarray(arr, fill_value) else: mask = arr != fill_value length = len(arr) if length != len(mask): # the arr is a SparseArray indices = mask.sp_index.indices else: indices = mask.nonzero()[0].astype(np.int32) index = _make_index(length, indices, kind) sparsified_values = arr[mask] if dtype is not None: sparsified_values = astype_nansafe(sparsified_values, dtype=dtype) # TODO: copy return sparsified_values, index, fill_value
def astype(self, dtype, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. Parameters ---------- dtype : str or dtype Typecode or data-type to which the array is cast. copy : bool, default True Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. Returns ------- ndarray or ExtensionArray NumPy ndarray, BooleanArray or IntegerArray with 'dtype' for its dtype. Raises ------ TypeError if incompatible type with an BooleanDtype, equivalent of same_kind casting """ dtype = pandas_dtype(dtype) if isinstance(dtype, BooleanDtype): values, mask = coerce_to_array(self, copy=copy) return BooleanArray(values, mask, copy=False) if is_bool_dtype(dtype): # astype_nansafe converts np.nan to True if self._hasna: raise ValueError("cannot convert float NaN to bool") else: return self._data.astype(dtype, copy=copy) if is_extension_array_dtype(dtype) and is_integer_dtype(dtype): from pandas.core.arrays import IntegerArray return IntegerArray(self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False) # for integer, error if there are missing values if is_integer_dtype(dtype): if self._hasna: raise ValueError("cannot convert NA to integer") # for float dtype, ensure we use np.nan before casting (numpy cannot # deal with pd.NA) na_value = self._na_value if is_float_dtype(dtype): na_value = np.nan # coerce data = self.to_numpy(na_value=na_value) return astype_nansafe(data, dtype, copy=False)
def astype(self, dtype, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. Parameters ---------- dtype : str or dtype Typecode or data-type to which the array is cast. copy : bool, default True Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. Returns ------- ndarray or ExtensionArray NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with 'dtype' for its dtype. Raises ------ TypeError if incompatible type with an FloatingDtype, equivalent of same_kind casting """ dtype = pandas_dtype(dtype) if isinstance(dtype, ExtensionDtype): # error: Incompatible return value type (got "ExtensionArray", expected # "ndarray") return super().astype(dtype, copy=copy) # type: ignore[return-value] # coerce if is_float_dtype(dtype): # In astype, we consider dtype=float to also mean na_value=np.nan kwargs = {"na_value": np.nan} elif is_datetime64_dtype(dtype): # error: Dict entry 0 has incompatible type "str": "datetime64"; expected # "str": "float" kwargs = { "na_value": np.datetime64("NaT") } # type: ignore[dict-item] else: kwargs = {} # error: Argument 2 to "to_numpy" of "BaseMaskedArray" has incompatible # type "**Dict[str, float]"; expected "bool" data = self.to_numpy(dtype=dtype, **kwargs) # type: ignore[arg-type] # error: Incompatible return value type (got "ExtensionArray", expected # "ndarray") return astype_nansafe(data, dtype, copy=False) # type: ignore[return-value]
def update_dtype(self, dtype): """ Convert the SparseDtype to a new dtype. This takes care of converting the ``fill_value``. Parameters ---------- dtype : Union[str, numpy.dtype, SparseDtype] The new dtype to use. * For a SparseDtype, it is simply returned * For a NumPy dtype (or str), the current fill value is converted to the new dtype, and a SparseDtype with `dtype` and the new fill value is returned. Returns ------- SparseDtype A new SparseDtype with the correct `dtype` and fill value for that `dtype`. Raises ------ ValueError When the current fill value cannot be converted to the new `dtype` (e.g. trying to convert ``np.nan`` to an integer dtype). Examples -------- >>> SparseDtype(int, 0).update_dtype(float) Sparse[float64, 0.0] >>> SparseDtype(int, 1).update_dtype(SparseDtype(float, np.nan)) Sparse[float64, nan] """ cls = type(self) dtype = pandas_dtype(dtype) if not isinstance(dtype, cls): if is_extension_array_dtype(dtype): raise TypeError( "sparse arrays of extension dtypes not supported") # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no # attribute "item" fill_value = astype_nansafe( # type: ignore[union-attr] np.array(self.fill_value), dtype).item() dtype = cls(dtype, fill_value=fill_value) return dtype
def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) if needs_i8_conversion(dtype): raise TypeError( f"Cannot convert Float64Index to dtype {dtype}; integer " "values are required for conversion") elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype): # TODO(jreback); this can change once we have an EA Index type # GH 13149 arr = astype_nansafe(self._values, dtype=dtype) return Int64Index(arr, name=self.name) return super().astype(dtype, copy=copy)
def astype(self, dtype=None, copy=True): dtype = np.dtype(dtype) sp_values = astype_nansafe(self.sp_values, dtype, copy=copy) try: if is_bool_dtype(dtype): # to avoid np.bool_ dtype fill_value = bool(self.fill_value) else: fill_value = dtype.type(self.fill_value) except ValueError: msg = 'unable to coerce current fill_value {fill} to {dtype} dtype' raise ValueError(msg.format(fill=self.fill_value, dtype=dtype)) return self._simple_new(sp_values, self.sp_index, fill_value=fill_value)
def astype(self, dtype=None, copy=True): dtype = np.dtype(dtype) sp_values = astype_nansafe(self.sp_values, dtype, copy=copy) try: if is_bool_dtype(dtype): # to avoid np.bool_ dtype fill_value = bool(self.fill_value) else: fill_value = dtype.type(self.fill_value) except ValueError: msg = 'unable to coerce current fill_value {0} to {1} dtype' raise ValueError(msg.format(self.fill_value, dtype)) return self._simple_new(sp_values, self.sp_index, fill_value=fill_value)
def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) if needs_i8_conversion(dtype): raise TypeError( f"Cannot convert Float64Index to dtype {dtype}; integer " "values are required for conversion") elif is_integer_dtype(dtype) and not is_extension_array_dtype(dtype): # TODO(jreback); this can change once we have an EA Index type # GH 13149 # error: Argument 1 to "astype_nansafe" has incompatible type # "Union[ExtensionArray, ndarray]"; expected "ndarray" arr = astype_nansafe(self._values, dtype=dtype) # type: ignore[arg-type] return Int64Index(arr, name=self.name) return super().astype(dtype, copy=copy)
def update_dtype(self, dtype): """ Convert the SparseDtype to a new dtype. This takes care of converting the ``fill_value``. Parameters ---------- dtype : Union[str, numpy.dtype, SparseDtype] The new dtype to use. * For a SparseDtype, it is simply returned * For a NumPy dtype (or str), the current fill value is converted to the new dtype, and a SparseDtype with `dtype` and the new fill value is returned. Returns ------- SparseDtype A new SparseDtype with the correct `dtype` and fill value for that `dtype`. Raises ------ ValueError When the current fill value cannot be converted to the new `dtype` (e.g. trying to convert ``np.nan`` to an integer dtype). Examples -------- >>> SparseDtype(int, 0).update_dtype(float) Sparse[float64, 0.0] >>> SparseDtype(int, 1).update_dtype(SparseDtype(float, np.nan)) Sparse[float64, nan] """ cls = type(self) dtype = pandas_dtype(dtype) if not isinstance(dtype, cls): fill_value = astype_nansafe(np.array(self.fill_value), dtype).item() dtype = cls(dtype, fill_value=fill_value) return dtype
def astype(self, dtype, copy=True): """ Cast to a NumPy array or ExtensionArray with 'dtype'. Parameters ---------- dtype : str or dtype Typecode or data-type to which the array is cast. copy : bool, default True Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. Returns ------- array : ndarray or ExtensionArray NumPy ndarray, BooleanArray or IntergerArray with 'dtype' for its dtype. Raises ------ TypeError if incompatible type with an BooleanDtype, equivalent of same_kind casting """ dtype = pandas_dtype(dtype) if isinstance(dtype, BooleanDtype): values, mask = coerce_to_array(self, copy=copy) return BooleanArray(values, mask, copy=False) if is_bool_dtype(dtype): # astype_nansafe converts np.nan to True if self.isna().any(): raise ValueError("cannot convert float NaN to bool") else: return self._data.astype(dtype, copy=copy) if is_extension_array_dtype(dtype) and is_integer_dtype(dtype): from pandas.core.arrays import IntegerArray return IntegerArray(self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False) # coerce data = self._coerce_to_ndarray() return astype_nansafe(data, dtype, copy=None)
def astype(self, dtype, copy: bool = True) -> ArrayLike: """ Cast to a NumPy array or ExtensionArray with 'dtype'. Parameters ---------- dtype : str or dtype Typecode or data-type to which the array is cast. copy : bool, default True Whether to copy the data, even if not necessary. If False, a copy is made only if the old dtype does not match the new dtype. Returns ------- ndarray or ExtensionArray NumPy ndarray, or BooleanArray, IntegerArray or FloatingArray with 'dtype' for its dtype. Raises ------ TypeError if incompatible type with an FloatingDtype, equivalent of same_kind casting """ dtype = pandas_dtype(dtype) if isinstance(dtype, ExtensionDtype): return super().astype(dtype, copy=copy) # coerce if is_float_dtype(dtype): # In astype, we consider dtype=float to also mean na_value=np.nan kwargs = {"na_value": np.nan} elif is_datetime64_dtype(dtype): kwargs = {"na_value": np.datetime64("NaT")} else: kwargs = {} data = self.to_numpy(dtype=dtype, **kwargs) return astype_nansafe(data, dtype, copy=False)
def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) if is_float_dtype(self.dtype): if needs_i8_conversion(dtype): raise TypeError( f"Cannot convert Float64Index to dtype {dtype}; integer " "values are required for conversion") elif is_integer_dtype( dtype) and not is_extension_array_dtype(dtype): # TODO(jreback); this can change once we have an EA Index type # GH 13149 arr = astype_nansafe(self._values, dtype=dtype) if isinstance(self, Float64Index): return Int64Index(arr, name=self.name) else: return NumericIndex(arr, name=self.name, dtype=dtype) elif self._is_backward_compat_public_numeric_index: # this block is needed so e.g. NumericIndex[int8].astype("int32") returns # NumericIndex[int32] and not Int64Index with dtype int64. # When Int64Index etc. are removed from the code base, removed this also. if not is_extension_array_dtype(dtype) and is_numeric_dtype(dtype): return self._constructor(self, dtype=dtype, copy=copy) return super().astype(dtype, copy=copy)
def astype(self, dtype=None, copy=True): """ Change the dtype of a SparseArray. The output will always be a SparseArray. To convert to a dense ndarray with a certain dtype, use :meth:`numpy.asarray`. Parameters ---------- dtype : np.dtype or ExtensionDtype For SparseDtype, this changes the dtype of ``self.sp_values`` and the ``self.fill_value``. For other dtypes, this only changes the dtype of ``self.sp_values``. copy : bool, default True Whether to ensure a copy is made, even if not necessary. Returns ------- SparseArray Examples -------- >>> arr = SparseArray([0, 0, 1, 2]) >>> arr [0, 0, 1, 2] Fill: 0 IntIndex Indices: array([2, 3], dtype=int32) >>> arr.astype(np.dtype('int32')) [0, 0, 1, 2] Fill: 0 IntIndex Indices: array([2, 3], dtype=int32) Using a NumPy dtype with a different kind (e.g. float) will coerce just ``self.sp_values``. >>> arr.astype(np.dtype('float64')) ... # doctest: +NORMALIZE_WHITESPACE [0, 0, 1.0, 2.0] Fill: 0 IntIndex Indices: array([2, 3], dtype=int32) Use a SparseDtype if you wish to be change the fill value as well. >>> arr.astype(SparseDtype("float64", fill_value=np.nan)) ... # doctest: +NORMALIZE_WHITESPACE [nan, nan, 1.0, 2.0] Fill: nan IntIndex Indices: array([2, 3], dtype=int32) """ dtype = pandas_dtype(dtype) if not isinstance(dtype, SparseDtype): dtype = SparseDtype(dtype, fill_value=self.fill_value) sp_values = astype_nansafe(self.sp_values, dtype.subtype, copy=copy) if sp_values is self.sp_values and copy: sp_values = sp_values.copy() return self._simple_new(sp_values, self.sp_index, dtype)
def test_astype_object_preserves_datetime_na(from_type): arr = np.array([from_type("NaT", "ns")]) result = astype_nansafe(arr, dtype=np.dtype("object")) assert isna(result)[0]
def astype(self, dtype=None, copy=True): """ Change the dtype of a SparseArray. The output will always be a SparseArray. To convert to a dense ndarray with a certain dtype, use :meth:`numpy.asarray`. Parameters ---------- dtype : np.dtype or ExtensionDtype For SparseDtype, this changes the dtype of ``self.sp_values`` and the ``self.fill_value``. For other dtypes, this only changes the dtype of ``self.sp_values``. copy : bool, default True Whether to ensure a copy is made, even if not necessary. Returns ------- SparseArray Examples -------- >>> arr = SparseArray([0, 0, 1, 2]) >>> arr [0, 0, 1, 2] Fill: 0 IntIndex Indices: array([2, 3], dtype=int32) >>> arr.astype(np.dtype('int32')) [0, 0, 1, 2] Fill: 0 IntIndex Indices: array([2, 3], dtype=int32) Using a NumPy dtype with a different kind (e.g. float) will coerce just ``self.sp_values``. >>> arr.astype(np.dtype('float64')) ... # doctest: +NORMALIZE_WHITESPACE [0, 0, 1.0, 2.0] Fill: 0 IntIndex Indices: array([2, 3], dtype=int32) Use a SparseDtype if you wish to be change the fill value as well. >>> arr.astype(SparseDtype("float64", fill_value=np.nan)) ... # doctest: +NORMALIZE_WHITESPACE [nan, nan, 1.0, 2.0] Fill: nan IntIndex Indices: array([2, 3], dtype=int32) """ dtype = pandas_dtype(dtype) if not isinstance(dtype, SparseDtype): dtype = SparseDtype(dtype, fill_value=self.fill_value) sp_values = astype_nansafe(self.sp_values, dtype.subtype, copy=copy) if sp_values is self.sp_values and copy: sp_values = sp_values.copy() return self._simple_new(sp_values, self.sp_index, dtype)
def _cast_types(self, values, cast_type, column): """ Cast values to specified type Parameters ---------- values : ndarray cast_type : string or np.dtype dtype to cast values to column : string column name - used only for error reporting Returns ------- converted : ndarray """ if is_categorical_dtype(cast_type): known_cats = (isinstance(cast_type, CategoricalDtype) and cast_type.categories is not None) if not is_object_dtype(values) and not known_cats: # TODO: this is for consistency with # c-parser which parses all categories # as strings values = astype_nansafe(values, np.dtype(str)) cats = Index(values).unique().dropna() values = Categorical._from_inferred_categories( cats, cats.get_indexer(values), cast_type, true_values=self.true_values) # use the EA's implementation of casting elif is_extension_array_dtype(cast_type): # ensure cast_type is an actual dtype and not a string cast_type = pandas_dtype(cast_type) array_type = cast_type.construct_array_type() try: if is_bool_dtype(cast_type): return array_type._from_sequence_of_strings( values, dtype=cast_type, true_values=self.true_values, false_values=self.false_values, ) else: return array_type._from_sequence_of_strings( values, dtype=cast_type) except NotImplementedError as err: raise NotImplementedError( f"Extension Array: {array_type} must implement " "_from_sequence_of_strings in order to be used in parser methods" ) from err else: try: values = astype_nansafe(values, cast_type, copy=True, skipna=True) except ValueError as err: raise ValueError( f"Unable to convert column {column} to type {cast_type}" ) from err return values
def test_astype_nansafe(val, typ): arr = np.array([val]) msg = "Cannot convert NaT values to integer" with pytest.raises(ValueError, match=msg): astype_nansafe(arr, dtype=typ)
def astype(self, dtype=None, copy=True): """ Change the dtype of a SparseArray. The output will always be a SparseArray. To convert to a dense ndarray with a certain dtype, use :meth:`numpy.asarray`. Parameters ---------- dtype : np.dtype or ExtensionDtype For SparseDtype, this changes the dtype of ``self.sp_values`` and the ``self.fill_value``. For other dtypes, this only changes the dtype of ``self.sp_values``. copy : bool, default True Whether to ensure a copy is made, even if not necessary. Returns ------- SparseArray Examples -------- >>> arr = pd.arrays.SparseArray([0, 0, 1, 2]) >>> arr [0, 0, 1, 2] Fill: 0 IntIndex Indices: array([2, 3], dtype=int32) >>> arr.astype(np.dtype('int32')) [0, 0, 1, 2] Fill: 0 IntIndex Indices: array([2, 3], dtype=int32) Using a NumPy dtype with a different kind (e.g. float) will coerce just ``self.sp_values``. >>> arr.astype(np.dtype('float64')) ... # doctest: +NORMALIZE_WHITESPACE [0.0, 0.0, 1.0, 2.0] Fill: 0.0 IntIndex Indices: array([2, 3], dtype=int32) Use a SparseDtype if you wish to be change the fill value as well. >>> arr.astype(SparseDtype("float64", fill_value=np.nan)) ... # doctest: +NORMALIZE_WHITESPACE [nan, nan, 1.0, 2.0] Fill: nan IntIndex Indices: array([2, 3], dtype=int32) """ if is_dtype_equal(dtype, self._dtype): if not copy: return self else: return self.copy() dtype = self.dtype.update_dtype(dtype) subtype = dtype._subtype_with_str # TODO copy=False is broken for astype_nansafe with int -> float, so cannot # passthrough copy keyword: https://github.com/pandas-dev/pandas/issues/34456 sp_values = astype_nansafe(self.sp_values, subtype, copy=True) if sp_values is self.sp_values and copy: sp_values = sp_values.copy() return self._simple_new(sp_values, self.sp_index, dtype)
def test_astype_datetime64_bad_dtype_raises(from_type, to_type): arr = np.array([from_type("2018")]) with pytest.raises(TypeError, match="cannot astype"): astype_nansafe(arr, dtype=to_type)