def _astype_nansafe(arr, dtype, copy=True): """ return a view if copy is False, but need to be very careful as the result shape could change! """ if not isinstance(dtype, np.dtype): dtype = _coerce_to_dtype(dtype) if issubclass(dtype.type, text_type): # in Py3 that's str, in Py2 that's unicode return lib.astype_unicode(arr.ravel()).reshape(arr.shape) elif issubclass(dtype.type, string_types): return lib.astype_str(arr.ravel()).reshape(arr.shape) elif is_datetime64_dtype(arr): if dtype == object: return tslib.ints_to_pydatetime(arr.view(np.int64)) elif dtype == np.int64: return arr.view(dtype) elif dtype != _NS_DTYPE: raise TypeError("cannot astype a datetimelike from [%s] to [%s]" % (arr.dtype, dtype)) return arr.astype(_NS_DTYPE) elif is_timedelta64_dtype(arr): if dtype == np.int64: return arr.view(dtype) elif dtype == object: return tslib.ints_to_pytimedelta(arr.view(np.int64)) # in py3, timedelta64[ns] are int64 elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or (not PY3 and dtype != _TD_DTYPE)): # allow frequency conversions if dtype.kind == 'm': mask = isnull(arr) result = arr.astype(dtype).astype(np.float64) result[mask] = np.nan return result raise TypeError("cannot astype a timedelta from [%s] to [%s]" % (arr.dtype, dtype)) return arr.astype(_TD_DTYPE) elif (np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer)): if not np.isfinite(arr).all(): raise ValueError('Cannot convert non-finite values (NA or inf) to ' 'integer') elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer): # work around NumPy brokenness, #1987 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) if copy: return arr.astype(dtype) return arr.view(dtype)
def astype_nansafe(arr, dtype, copy=True): """ return a view if copy is False, but need to be very careful as the result shape could change! """ if not isinstance(dtype, np.dtype): dtype = _coerce_to_dtype(dtype) if issubclass(dtype.type, text_type): # in Py3 that's str, in Py2 that's unicode return lib.astype_unicode(arr.ravel()).reshape(arr.shape) elif issubclass(dtype.type, string_types): return lib.astype_str(arr.ravel()).reshape(arr.shape) elif is_datetime64_dtype(arr): if dtype == object: return tslib.ints_to_pydatetime(arr.view(np.int64)) elif dtype == np.int64: return arr.view(dtype) elif dtype != _NS_DTYPE: raise TypeError("cannot astype a datetimelike from [%s] to [%s]" % (arr.dtype, dtype)) return arr.astype(_NS_DTYPE) elif is_timedelta64_dtype(arr): if dtype == np.int64: return arr.view(dtype) elif dtype == object: return tslib.ints_to_pytimedelta(arr.view(np.int64)) # in py3, timedelta64[ns] are int64 elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or (not PY3 and dtype != _TD_DTYPE)): # allow frequency conversions if dtype.kind == 'm': mask = isnull(arr) result = arr.astype(dtype).astype(np.float64) result[mask] = np.nan return result raise TypeError("cannot astype a timedelta from [%s] to [%s]" % (arr.dtype, dtype)) return arr.astype(_TD_DTYPE) elif (np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer)): if not np.isfinite(arr).all(): raise ValueError('Cannot convert non-finite values (NA or inf) to ' 'integer') elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer): # work around NumPy brokenness, #1987 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) if copy: return arr.astype(dtype) return arr.view(dtype)
def astype_nansafe(arr, dtype, copy=True, skipna=False): """ Cast the elements of an array to a given dtype a nan-safe manner. Parameters ---------- arr : ndarray dtype : np.dtype copy : bool, default True If False, a view will be attempted but may fail, if e.g. the item sizes don't align. skipna: bool, default False Whether or not we should skip NaN when casting as a string-type. Raises ------ ValueError The dtype was a datetime64/timedelta64 dtype, but it had no unit. """ # dispatch on extension dtype if needed if is_extension_array_dtype(dtype): return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) if not isinstance(dtype, np.dtype): dtype = pandas_dtype(dtype) if issubclass(dtype.type, str): return lib.astype_str(arr.ravel(), skipna=skipna).reshape(arr.shape) elif is_datetime64_dtype(arr): if is_object_dtype(dtype): return tslib.ints_to_pydatetime(arr.view(np.int64)) elif dtype == np.int64: return arr.view(dtype) # allow frequency conversions if dtype.kind == "M": return arr.astype(dtype) raise TypeError("cannot astype a datetimelike from [{from_dtype}] " "to [{to_dtype}]".format(from_dtype=arr.dtype, to_dtype=dtype)) elif is_timedelta64_dtype(arr): if is_object_dtype(dtype): return tslibs.ints_to_pytimedelta(arr.view(np.int64)) elif dtype == np.int64: return arr.view(dtype) if dtype not in [_INT64_DTYPE, _TD_DTYPE]: # allow frequency conversions # we return a float here! if dtype.kind == "m": mask = isna(arr) result = arr.astype(dtype).astype(np.float64) result[mask] = np.nan return result elif dtype == _TD_DTYPE: return arr.astype(_TD_DTYPE, copy=copy) raise TypeError("cannot astype a timedelta from [{from_dtype}] " "to [{to_dtype}]".format(from_dtype=arr.dtype, to_dtype=dtype)) elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype( dtype, np.integer): if not np.isfinite(arr).all(): raise ValueError( "Cannot convert non-finite values (NA or inf) to integer") elif is_object_dtype(arr): # work around NumPy brokenness, #1987 if np.issubdtype(dtype.type, np.integer): return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) # if we have a datetime/timedelta array of objects # then coerce to a proper dtype and recall astype_nansafe elif is_datetime64_dtype(dtype): from pandas import to_datetime return astype_nansafe(to_datetime(arr).values, dtype, copy=copy) elif is_timedelta64_dtype(dtype): from pandas import to_timedelta return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy) if dtype.name in ("datetime64", "timedelta64"): msg = "The '{dtype}' dtype has no unit. Please pass in '{dtype}[ns]' instead." raise ValueError(msg.format(dtype=dtype.name)) if copy or is_object_dtype(arr) or is_object_dtype(dtype): # Explicit copy, or required since NumPy can't view from / to object. return arr.astype(dtype, copy=True) return arr.view(dtype)
def astype_nansafe(arr, dtype, copy=True): """ return a view if copy is False, but need to be very careful as the result shape could change! """ if not isinstance(dtype, np.dtype): dtype = pandas_dtype(dtype) if issubclass(dtype.type, text_type): # in Py3 that's str, in Py2 that's unicode return lib.astype_unicode(arr.ravel()).reshape(arr.shape) elif issubclass(dtype.type, string_types): return lib.astype_str(arr.ravel()).reshape(arr.shape) elif is_datetime64_dtype(arr): if dtype == object: return tslib.ints_to_pydatetime(arr.view(np.int64)) elif dtype == np.int64: return arr.view(dtype) elif dtype != _NS_DTYPE: raise TypeError("cannot astype a datetimelike from [{from_dtype}] " "to [{to_dtype}]".format(from_dtype=arr.dtype, to_dtype=dtype)) return arr.astype(_NS_DTYPE) elif is_timedelta64_dtype(arr): if dtype == np.int64: return arr.view(dtype) elif dtype == object: return tslib.ints_to_pytimedelta(arr.view(np.int64)) # in py3, timedelta64[ns] are int64 elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or (not PY3 and dtype != _TD_DTYPE)): # allow frequency conversions if dtype.kind == 'm': mask = isna(arr) result = arr.astype(dtype).astype(np.float64) result[mask] = np.nan return result raise TypeError("cannot astype a timedelta from [{from_dtype}] " "to [{to_dtype}]".format(from_dtype=arr.dtype, to_dtype=dtype)) return arr.astype(_TD_DTYPE) elif (np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer)): if not np.isfinite(arr).all(): raise ValueError('Cannot convert non-finite values (NA or inf) to ' 'integer') elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer): # work around NumPy brokenness, #1987 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) if dtype.name in ("datetime64", "timedelta64"): msg = ("Passing in '{dtype}' dtype with no frequency is " "deprecated and will raise in a future version. " "Please pass in '{dtype}[ns]' instead.") warnings.warn(msg.format(dtype=dtype.name), FutureWarning, stacklevel=5) dtype = np.dtype(dtype.name + "[ns]") if copy: return arr.astype(dtype) return arr.view(dtype)
def astype_nansafe(arr, dtype, copy=True): """ return a view if copy is False, but need to be very careful as the result shape could change! """ if not isinstance(dtype, np.dtype): dtype = pandas_dtype(dtype) if issubclass(dtype.type, text_type): # in Py3 that's str, in Py2 that's unicode return lib.astype_unicode(arr.ravel()).reshape(arr.shape) elif issubclass(dtype.type, string_types): return lib.astype_str(arr.ravel()).reshape(arr.shape) elif is_datetime64_dtype(arr): if dtype == object: return tslib.ints_to_pydatetime(arr.view(np.int64)) elif dtype == np.int64: return arr.view(dtype) elif dtype != _NS_DTYPE: raise TypeError("cannot astype a datetimelike from [{from_dtype}] " "to [{to_dtype}]".format(from_dtype=arr.dtype, to_dtype=dtype)) return arr.astype(_NS_DTYPE) elif is_timedelta64_dtype(arr): if dtype == np.int64: return arr.view(dtype) elif dtype == object: return tslib.ints_to_pytimedelta(arr.view(np.int64)) # in py3, timedelta64[ns] are int64 elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or (not PY3 and dtype != _TD_DTYPE)): # allow frequency conversions if dtype.kind == 'm': mask = isna(arr) result = arr.astype(dtype).astype(np.float64) result[mask] = np.nan return result raise TypeError("cannot astype a timedelta from [{from_dtype}] " "to [{to_dtype}]".format(from_dtype=arr.dtype, to_dtype=dtype)) return arr.astype(_TD_DTYPE) elif (np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer)): if not np.isfinite(arr).all(): raise ValueError('Cannot convert non-finite values (NA or inf) to ' 'integer') elif arr.dtype == np.object_ and np.issubdtype(dtype.type, np.integer): # work around NumPy brokenness, #1987 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) if dtype.name in ("datetime64", "timedelta64"): msg = ("Passing in '{dtype}' dtype with no frequency is " "deprecated and will raise in a future version. " "Please pass in '{dtype}[ns]' instead.") warnings.warn(msg.format(dtype=dtype.name), FutureWarning, stacklevel=5) dtype = np.dtype(dtype.name + "[ns]") if copy: return arr.astype(dtype) return arr.view(dtype)
def astype_nansafe(arr, dtype, copy=True, skipna=False): """ Cast the elements of an array to a given dtype a nan-safe manner. Parameters ---------- arr : ndarray dtype : np.dtype copy : bool, default True If False, a view will be attempted but may fail, if e.g. the item sizes don't align. skipna: bool, default False Whether or not we should skip NaN when casting as a string-type. Raises ------ ValueError The dtype was a datetime64/timedelta64 dtype, but it had no unit. """ # dispatch on extension dtype if needed if is_extension_array_dtype(dtype): return dtype.construct_array_type()._from_sequence( arr, dtype=dtype, copy=copy) if not isinstance(dtype, np.dtype): dtype = pandas_dtype(dtype) if issubclass(dtype.type, text_type): # in Py3 that's str, in Py2 that's unicode return lib.astype_unicode(arr.ravel(), skipna=skipna).reshape(arr.shape) elif issubclass(dtype.type, string_types): return lib.astype_str(arr.ravel(), skipna=skipna).reshape(arr.shape) elif is_datetime64_dtype(arr): if is_object_dtype(dtype): return tslib.ints_to_pydatetime(arr.view(np.int64)) elif dtype == np.int64: return arr.view(dtype) # allow frequency conversions if dtype.kind == 'M': return arr.astype(dtype) raise TypeError("cannot astype a datetimelike from [{from_dtype}] " "to [{to_dtype}]".format(from_dtype=arr.dtype, to_dtype=dtype)) elif is_timedelta64_dtype(arr): if is_object_dtype(dtype): return tslibs.ints_to_pytimedelta(arr.view(np.int64)) elif dtype == np.int64: return arr.view(dtype) # in py3, timedelta64[ns] are int64 if ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or (not PY3 and dtype != _TD_DTYPE)): # allow frequency conversions # we return a float here! if dtype.kind == 'm': mask = isna(arr) result = arr.astype(dtype).astype(np.float64) result[mask] = np.nan return result elif dtype == _TD_DTYPE: return arr.astype(_TD_DTYPE, copy=copy) raise TypeError("cannot astype a timedelta from [{from_dtype}] " "to [{to_dtype}]".format(from_dtype=arr.dtype, to_dtype=dtype)) elif (np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer)): if not np.isfinite(arr).all(): raise ValueError('Cannot convert non-finite values (NA or inf) to ' 'integer') elif is_object_dtype(arr): # work around NumPy brokenness, #1987 if np.issubdtype(dtype.type, np.integer): return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) # if we have a datetime/timedelta array of objects # then coerce to a proper dtype and recall astype_nansafe elif is_datetime64_dtype(dtype): from pandas import to_datetime return astype_nansafe(to_datetime(arr).values, dtype, copy=copy) elif is_timedelta64_dtype(dtype): from pandas import to_timedelta return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy) if dtype.name in ("datetime64", "timedelta64"): msg = ("The '{dtype}' dtype has no unit. " "Please pass in '{dtype}[ns]' instead.") raise ValueError(msg.format(dtype=dtype.name)) if copy or is_object_dtype(arr) or is_object_dtype(dtype): # Explicit copy, or required since NumPy can't view from / to object. return arr.astype(dtype, copy=True) return arr.view(dtype)
def astype_nansafe(arr, dtype, copy=True): """ return a view if copy is False, but need to be very careful as the result shape could change! Parameters ---------- arr : ndarray dtype : np.dtype copy : bool, default True If False, a view will be attempted but may fail, if e.g. the itemsizes don't align. """ # dispatch on extension dtype if needed if is_extension_array_dtype(dtype): return dtype.construct_array_type()._from_sequence( arr, dtype=dtype, copy=copy) if not isinstance(dtype, np.dtype): dtype = pandas_dtype(dtype) if issubclass(dtype.type, text_type): # in Py3 that's str, in Py2 that's unicode return lib.astype_unicode(arr.ravel()).reshape(arr.shape) elif issubclass(dtype.type, string_types): return lib.astype_str(arr.ravel()).reshape(arr.shape) elif is_datetime64_dtype(arr): if is_object_dtype(dtype): return tslib.ints_to_pydatetime(arr.view(np.int64)) elif dtype == np.int64: return arr.view(dtype) # allow frequency conversions if dtype.kind == 'M': return arr.astype(dtype) raise TypeError("cannot astype a datetimelike from [{from_dtype}] " "to [{to_dtype}]".format(from_dtype=arr.dtype, to_dtype=dtype)) elif is_timedelta64_dtype(arr): if is_object_dtype(dtype): return tslibs.ints_to_pytimedelta(arr.view(np.int64)) elif dtype == np.int64: return arr.view(dtype) # in py3, timedelta64[ns] are int64 if ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or (not PY3 and dtype != _TD_DTYPE)): # allow frequency conversions # we return a float here! if dtype.kind == 'm': mask = isna(arr) result = arr.astype(dtype).astype(np.float64) result[mask] = np.nan return result elif dtype == _TD_DTYPE: return arr.astype(_TD_DTYPE, copy=copy) raise TypeError("cannot astype a timedelta from [{from_dtype}] " "to [{to_dtype}]".format(from_dtype=arr.dtype, to_dtype=dtype)) elif (np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer)): if not np.isfinite(arr).all(): raise ValueError('Cannot convert non-finite values (NA or inf) to ' 'integer') elif is_object_dtype(arr): # work around NumPy brokenness, #1987 if np.issubdtype(dtype.type, np.integer): return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) # if we have a datetime/timedelta array of objects # then coerce to a proper dtype and recall astype_nansafe elif is_datetime64_dtype(dtype): from pandas import to_datetime return astype_nansafe(to_datetime(arr).values, dtype, copy=copy) elif is_timedelta64_dtype(dtype): from pandas import to_timedelta return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy) if dtype.name in ("datetime64", "timedelta64"): msg = ("Passing in '{dtype}' dtype with no frequency is " "deprecated and will raise in a future version. " "Please pass in '{dtype}[ns]' instead.") warnings.warn(msg.format(dtype=dtype.name), FutureWarning, stacklevel=5) dtype = np.dtype(dtype.name + "[ns]") if copy or is_object_dtype(arr) or is_object_dtype(dtype): # Explicit copy, or required since NumPy can't view from / to object. return arr.astype(dtype, copy=True) return arr.view(dtype)
def astype_nansafe(arr: np.ndarray, dtype: DtypeObj, copy: bool = True, skipna: bool = False) -> ArrayLike: """ Cast the elements of an array to a given dtype a nan-safe manner. Parameters ---------- arr : ndarray dtype : np.dtype or ExtensionDtype copy : bool, default True If False, a view will be attempted but may fail, if e.g. the item sizes don't align. skipna: bool, default False Whether or not we should skip NaN when casting as a string-type. Raises ------ ValueError The dtype was a datetime64/timedelta64 dtype, but it had no unit. """ if arr.ndim > 1: flat = arr.ravel() result = astype_nansafe(flat, dtype, copy=copy, skipna=skipna) # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no # attribute "reshape" return result.reshape(arr.shape) # type: ignore[union-attr] # We get here with 0-dim from sparse arr = np.atleast_1d(arr) # dispatch on extension dtype if needed if isinstance(dtype, ExtensionDtype): return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) elif not isinstance(dtype, np.dtype): # pragma: no cover raise ValueError("dtype must be np.dtype or ExtensionDtype") if arr.dtype.kind in ["m", "M"] and (issubclass(dtype.type, str) or dtype == _dtype_obj): from pandas.core.construction import ensure_wrapped_if_datetimelike arr = ensure_wrapped_if_datetimelike(arr) return arr.astype(dtype, copy=copy) if issubclass(dtype.type, str): return lib.ensure_string_array(arr, skipna=skipna, convert_na_value=False) elif is_datetime64_dtype(arr.dtype): if dtype == np.int64: warnings.warn( f"casting {arr.dtype} values to int64 with .astype(...) " "is deprecated and will raise in a future version. " "Use .view(...) instead.", FutureWarning, stacklevel=find_stack_level(), ) if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) # allow frequency conversions if dtype.kind == "M": return arr.astype(dtype) raise TypeError( f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]") elif is_timedelta64_dtype(arr.dtype): if dtype == np.int64: warnings.warn( f"casting {arr.dtype} values to int64 with .astype(...) " "is deprecated and will raise in a future version. " "Use .view(...) instead.", FutureWarning, stacklevel=find_stack_level(), ) if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) elif dtype.kind == "m": return astype_td64_unit_conversion(arr, dtype, copy=copy) raise TypeError( f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype( dtype, np.integer): return _astype_float_to_int_nansafe(arr, dtype, copy) elif is_object_dtype(arr.dtype): # work around NumPy brokenness, #1987 if np.issubdtype(dtype.type, np.integer): return lib.astype_intsafe(arr, dtype) # if we have a datetime/timedelta array of objects # then coerce to a proper dtype and recall astype_nansafe elif is_datetime64_dtype(dtype): from pandas import to_datetime return astype_nansafe( to_datetime(arr).values, dtype, copy=copy, ) elif is_timedelta64_dtype(dtype): from pandas import to_timedelta return astype_nansafe(to_timedelta(arr)._values, dtype, copy=copy) if dtype.name in ("datetime64", "timedelta64"): msg = (f"The '{dtype.name}' dtype has no unit. Please pass in " f"'{dtype.name}[ns]' instead.") raise ValueError(msg) if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype): # Explicit copy, or required since NumPy can't view from / to object. return arr.astype(dtype, copy=True) return arr.astype(dtype, copy=copy)