def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.array: """ Ensure that an dtype array of some integer dtype has an int64 dtype if possible. If it's not possible, potentially because of overflow, convert the array to float64 instead. Parameters ---------- arr : array-like The array whose data type we want to enforce. copy: bool Whether to copy the original array or reuse it in place, if possible. Returns ------- out_arr : The input array cast as int64 if possible without overflow. Otherwise the input array cast to float64. Notes ----- If the array is explicitly of type uint64 the type will remain unchanged. """ # TODO: GH27506 potential bug with ExtensionArrays try: return arr.astype("int64", copy=copy, casting="safe") # type: ignore except TypeError: pass try: return arr.astype("uint64", copy=copy, casting="safe") # type: ignore except TypeError: return arr.astype("float64", copy=copy)
def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike: """ Helper function for `arr.astype(common_dtype)` but handling all special cases. """ if (is_categorical_dtype(arr.dtype) and isinstance(dtype, np.dtype) and np.issubdtype(dtype, np.integer)): # problem case: categorical of int -> gives int as result dtype, # but categorical can contain NAs -> fall back to object dtype try: return arr.astype(dtype, copy=False) except ValueError: return arr.astype(object, copy=False) if is_sparse(arr) and not is_sparse(dtype): # problem case: SparseArray.astype(dtype) doesn't follow the specified # dtype exactly, but converts this to Sparse[dtype] -> first manually # convert to dense array arr = cast(SparseArray, arr) return arr.to_dense().astype(dtype, copy=False) if (isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"] and dtype is np.dtype("object")): # wrap datetime-likes in EA to ensure astype(object) gives Timestamp/Timedelta # this can happen when concat_compat is called directly on arrays (when arrays # are not coming from Index/Series._values), eg in BlockManager.quantile arr = array(arr) if is_extension_array_dtype(dtype): if isinstance(arr, np.ndarray): # numpy's astype cannot handle ExtensionDtypes return array(arr, dtype=dtype, copy=False) return arr.astype(dtype, copy=False)
def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike: """ Helper function for `arr.astype(common_dtype)` but handling all special cases. """ if is_dtype_equal(arr.dtype, dtype): return arr if ( is_categorical_dtype(arr.dtype) and isinstance(dtype, np.dtype) and np.issubdtype(dtype, np.integer) ): # problem case: categorical of int -> gives int as result dtype, # but categorical can contain NAs -> fall back to object dtype try: return arr.astype(dtype, copy=False) except ValueError: return arr.astype(object, copy=False) if is_sparse(arr) and not is_sparse(dtype): # problem case: SparseArray.astype(dtype) doesn't follow the specified # dtype exactly, but converts this to Sparse[dtype] -> first manually # convert to dense array # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _ # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any, # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict, # Tuple[Any, Any]]]" [arg-type] arr = cast("SparseArray", arr) return arr.to_dense().astype(dtype, copy=False) # type: ignore[arg-type] # astype_array includes ensure_wrapped_if_datetimelike return astype_array(arr, dtype=dtype, copy=False)
def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike: """ Helper function for `arr.astype(common_dtype)` but handling all special cases. """ if is_dtype_equal(arr.dtype, dtype): return arr if ( is_categorical_dtype(arr.dtype) and isinstance(dtype, np.dtype) and np.issubdtype(dtype, np.integer) ): # problem case: categorical of int -> gives int as result dtype, # but categorical can contain NAs -> fall back to object dtype try: return arr.astype(dtype, copy=False) except ValueError: return arr.astype(object, copy=False) if is_sparse(arr) and not is_sparse(dtype): # problem case: SparseArray.astype(dtype) doesn't follow the specified # dtype exactly, but converts this to Sparse[dtype] -> first manually # convert to dense array # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _ # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any, # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict, # Tuple[Any, Any]]]" [arg-type] arr = cast(SparseArray, arr) return arr.to_dense().astype(dtype, copy=False) # type: ignore[arg-type] if ( isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"] and dtype is np.dtype("object") ): # wrap datetime-likes in EA to ensure astype(object) gives Timestamp/Timedelta # this can happen when concat_compat is called directly on arrays (when arrays # are not coming from Index/Series._values), eg in BlockManager.quantile arr = ensure_wrapped_if_datetimelike(arr) if isinstance(dtype, ExtensionDtype): if isinstance(arr, np.ndarray): # numpy's astype cannot handle ExtensionDtypes return pd_array(arr, dtype=dtype, copy=False) return arr.astype(dtype, copy=False) return arr.astype(dtype, copy=False)
def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.ndarray: """ Ensure that an dtype array of some integer dtype has an int64 dtype if possible. If it's not possible, potentially because of overflow, convert the array to float64 instead. Parameters ---------- arr : array-like The array whose data type we want to enforce. copy: bool Whether to copy the original array or reuse it in place, if possible. Returns ------- out_arr : The input array cast as int64 if possible without overflow. Otherwise the input array cast to float64. Notes ----- If the array is explicitly of type uint64 the type will remain unchanged. """ # TODO: GH27506 potential bug with ExtensionArrays try: # error: Unexpected keyword argument "casting" for "astype" return arr.astype("int64", copy=copy, casting="safe") # type: ignore[call-arg] except TypeError: pass try: # error: Unexpected keyword argument "casting" for "astype" return arr.astype("uint64", copy=copy, casting="safe") # type: ignore[call-arg] except TypeError: if is_extension_array_dtype(arr.dtype): # pandas/core/dtypes/common.py:168: error: Item "ndarray" of # "Union[ExtensionArray, ndarray]" has no attribute "to_numpy" [union-attr] return arr.to_numpy( # type: ignore[union-attr] dtype="float64", na_value=np.nan) return arr.astype("float64", copy=copy)
def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> ArrayLike: """ Cast array (ndarray or ExtensionArray) to the new dtype. Parameters ---------- values : ndarray or ExtensionArray dtype : dtype object copy : bool, default False copy if indicated Returns ------- ndarray or ExtensionArray """ if ( values.dtype.kind in ["m", "M"] and dtype.kind in ["i", "u"] and isinstance(dtype, np.dtype) and dtype.itemsize != 8 ): # TODO(2.0) remove special case once deprecation on DTA/TDA is enforced msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]" raise TypeError(msg) if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype): return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True) if is_dtype_equal(values.dtype, dtype): if copy: return values.copy() return values if not isinstance(values, np.ndarray): # i.e. ExtensionArray values = values.astype(dtype, copy=copy) else: values = astype_nansafe(values, dtype, copy=copy) # in pandas we don't store numpy str dtypes, so convert to object if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str): values = np.array(values, dtype=object) return values
def take_nd( arr: ArrayLike, indexer, axis: int = 0, fill_value=lib.no_default, allow_fill: bool = True, ) -> ArrayLike: """ Specialized Cython take which sets NaN values in one pass This dispatches to ``take`` defined on ExtensionArrays. It does not currently dispatch to ``SparseArray.take`` for sparse ``arr``. Note: this function assumes that the indexer is a valid(ated) indexer with no out of bound indices. Parameters ---------- arr : np.ndarray or ExtensionArray Input array. indexer : ndarray 1-D array of indices to take, subarrays corresponding to -1 value indices are filed with fill_value axis : int, default 0 Axis to take from fill_value : any, default np.nan Fill value to replace -1 values with allow_fill : bool, default True If False, indexer is assumed to contain no -1 values so no filling will be done. This short-circuits computation of a mask. Result is undefined if allow_fill == False and -1 is present in indexer. Returns ------- subarray : np.ndarray or ExtensionArray May be the same type as the input, or cast to an ndarray. """ if fill_value is lib.no_default: fill_value = na_value_for_dtype(arr.dtype, compat=False) elif isinstance(arr.dtype, np.dtype) and arr.dtype.kind in "mM": dtype, fill_value = maybe_promote(arr.dtype, fill_value) if arr.dtype != dtype: # EA.take is strict about returning a new object of the same type # so for that case cast upfront arr = arr.astype(dtype) if not isinstance(arr, np.ndarray): # i.e. ExtensionArray, # includes for EA to catch DatetimeArray, TimedeltaArray if not is_1d_only_ea_obj(arr): # i.e. DatetimeArray, TimedeltaArray arr = cast("NDArrayBackedExtensionArray", arr) return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis) return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) arr = np.asarray(arr) return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)