Beispiel #1
0
def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.array:
    """
    Ensure that an dtype array of some integer dtype
    has an int64 dtype if possible.
    If it's not possible, potentially because of overflow,
    convert the array to float64 instead.

    Parameters
    ----------
    arr : array-like
          The array whose data type we want to enforce.
    copy: bool
          Whether to copy the original array or reuse
          it in place, if possible.

    Returns
    -------
    out_arr : The input array cast as int64 if
              possible without overflow.
              Otherwise the input array cast to float64.

    Notes
    -----
    If the array is explicitly of type uint64 the type
    will remain unchanged.
    """
    # TODO: GH27506 potential bug with ExtensionArrays
    try:
        return arr.astype("int64", copy=copy, casting="safe")  # type: ignore
    except TypeError:
        pass
    try:
        return arr.astype("uint64", copy=copy, casting="safe")  # type: ignore
    except TypeError:
        return arr.astype("float64", copy=copy)
Beispiel #2
0
def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
    """
    Helper function for `arr.astype(common_dtype)` but handling all special
    cases.
    """
    if (is_categorical_dtype(arr.dtype) and isinstance(dtype, np.dtype)
            and np.issubdtype(dtype, np.integer)):
        # problem case: categorical of int -> gives int as result dtype,
        # but categorical can contain NAs -> fall back to object dtype
        try:
            return arr.astype(dtype, copy=False)
        except ValueError:
            return arr.astype(object, copy=False)

    if is_sparse(arr) and not is_sparse(dtype):
        # problem case: SparseArray.astype(dtype) doesn't follow the specified
        # dtype exactly, but converts this to Sparse[dtype] -> first manually
        # convert to dense array
        arr = cast(SparseArray, arr)
        return arr.to_dense().astype(dtype, copy=False)

    if (isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"]
            and dtype is np.dtype("object")):
        # wrap datetime-likes in EA to ensure astype(object) gives Timestamp/Timedelta
        # this can happen when concat_compat is called directly on arrays (when arrays
        # are not coming from Index/Series._values), eg in BlockManager.quantile
        arr = array(arr)

    if is_extension_array_dtype(dtype):
        if isinstance(arr, np.ndarray):
            # numpy's astype cannot handle ExtensionDtypes
            return array(arr, dtype=dtype, copy=False)
    return arr.astype(dtype, copy=False)
Beispiel #3
0
def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
    """
    Helper function for `arr.astype(common_dtype)` but handling all special
    cases.
    """
    if is_dtype_equal(arr.dtype, dtype):
        return arr
    if (
        is_categorical_dtype(arr.dtype)
        and isinstance(dtype, np.dtype)
        and np.issubdtype(dtype, np.integer)
    ):
        # problem case: categorical of int -> gives int as result dtype,
        # but categorical can contain NAs -> fall back to object dtype
        try:
            return arr.astype(dtype, copy=False)
        except ValueError:
            return arr.astype(object, copy=False)

    if is_sparse(arr) and not is_sparse(dtype):
        # problem case: SparseArray.astype(dtype) doesn't follow the specified
        # dtype exactly, but converts this to Sparse[dtype] -> first manually
        # convert to dense array

        # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
        # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _
        # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any,
        # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict,
        # Tuple[Any, Any]]]"  [arg-type]
        arr = cast("SparseArray", arr)
        return arr.to_dense().astype(dtype, copy=False)  # type: ignore[arg-type]

    # astype_array includes ensure_wrapped_if_datetimelike
    return astype_array(arr, dtype=dtype, copy=False)
Beispiel #4
0
def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
    """
    Helper function for `arr.astype(common_dtype)` but handling all special
    cases.
    """
    if is_dtype_equal(arr.dtype, dtype):
        return arr
    if (
        is_categorical_dtype(arr.dtype)
        and isinstance(dtype, np.dtype)
        and np.issubdtype(dtype, np.integer)
    ):
        # problem case: categorical of int -> gives int as result dtype,
        # but categorical can contain NAs -> fall back to object dtype
        try:
            return arr.astype(dtype, copy=False)
        except ValueError:
            return arr.astype(object, copy=False)

    if is_sparse(arr) and not is_sparse(dtype):
        # problem case: SparseArray.astype(dtype) doesn't follow the specified
        # dtype exactly, but converts this to Sparse[dtype] -> first manually
        # convert to dense array

        # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
        # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _
        # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any,
        # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict,
        # Tuple[Any, Any]]]"  [arg-type]
        arr = cast(SparseArray, arr)
        return arr.to_dense().astype(dtype, copy=False)  # type: ignore[arg-type]

    if (
        isinstance(arr, np.ndarray)
        and arr.dtype.kind in ["m", "M"]
        and dtype is np.dtype("object")
    ):
        # wrap datetime-likes in EA to ensure astype(object) gives Timestamp/Timedelta
        # this can happen when concat_compat is called directly on arrays (when arrays
        # are not coming from Index/Series._values), eg in BlockManager.quantile
        arr = ensure_wrapped_if_datetimelike(arr)

    if isinstance(dtype, ExtensionDtype):
        if isinstance(arr, np.ndarray):
            # numpy's astype cannot handle ExtensionDtypes
            return pd_array(arr, dtype=dtype, copy=False)
        return arr.astype(dtype, copy=False)

    return arr.astype(dtype, copy=False)
Beispiel #5
0
def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.ndarray:
    """
    Ensure that an dtype array of some integer dtype
    has an int64 dtype if possible.
    If it's not possible, potentially because of overflow,
    convert the array to float64 instead.

    Parameters
    ----------
    arr : array-like
          The array whose data type we want to enforce.
    copy: bool
          Whether to copy the original array or reuse
          it in place, if possible.

    Returns
    -------
    out_arr : The input array cast as int64 if
              possible without overflow.
              Otherwise the input array cast to float64.

    Notes
    -----
    If the array is explicitly of type uint64 the type
    will remain unchanged.
    """
    # TODO: GH27506 potential bug with ExtensionArrays
    try:
        # error: Unexpected keyword argument "casting" for "astype"
        return arr.astype("int64", copy=copy,
                          casting="safe")  # type: ignore[call-arg]
    except TypeError:
        pass
    try:
        # error: Unexpected keyword argument "casting" for "astype"
        return arr.astype("uint64", copy=copy,
                          casting="safe")  # type: ignore[call-arg]
    except TypeError:
        if is_extension_array_dtype(arr.dtype):
            # pandas/core/dtypes/common.py:168: error: Item "ndarray" of
            # "Union[ExtensionArray, ndarray]" has no attribute "to_numpy"  [union-attr]
            return arr.to_numpy(  # type: ignore[union-attr]
                dtype="float64", na_value=np.nan)
        return arr.astype("float64", copy=copy)
Beispiel #6
0
def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> ArrayLike:
    """
    Cast array (ndarray or ExtensionArray) to the new dtype.

    Parameters
    ----------
    values : ndarray or ExtensionArray
    dtype : dtype object
    copy : bool, default False
        copy if indicated

    Returns
    -------
    ndarray or ExtensionArray
    """
    if (
        values.dtype.kind in ["m", "M"]
        and dtype.kind in ["i", "u"]
        and isinstance(dtype, np.dtype)
        and dtype.itemsize != 8
    ):
        # TODO(2.0) remove special case once deprecation on DTA/TDA is enforced
        msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]"
        raise TypeError(msg)

    if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
        return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True)

    if is_dtype_equal(values.dtype, dtype):
        if copy:
            return values.copy()
        return values

    if not isinstance(values, np.ndarray):
        # i.e. ExtensionArray
        values = values.astype(dtype, copy=copy)

    else:
        values = astype_nansafe(values, dtype, copy=copy)

    # in pandas we don't store numpy str dtypes, so convert to object
    if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):
        values = np.array(values, dtype=object)

    return values
Beispiel #7
0
def take_nd(
    arr: ArrayLike,
    indexer,
    axis: int = 0,
    fill_value=lib.no_default,
    allow_fill: bool = True,
) -> ArrayLike:
    """
    Specialized Cython take which sets NaN values in one pass

    This dispatches to ``take`` defined on ExtensionArrays. It does not
    currently dispatch to ``SparseArray.take`` for sparse ``arr``.

    Note: this function assumes that the indexer is a valid(ated) indexer with
    no out of bound indices.

    Parameters
    ----------
    arr : np.ndarray or ExtensionArray
        Input array.
    indexer : ndarray
        1-D array of indices to take, subarrays corresponding to -1 value
        indices are filed with fill_value
    axis : int, default 0
        Axis to take from
    fill_value : any, default np.nan
        Fill value to replace -1 values with
    allow_fill : bool, default True
        If False, indexer is assumed to contain no -1 values so no filling
        will be done.  This short-circuits computation of a mask.  Result is
        undefined if allow_fill == False and -1 is present in indexer.

    Returns
    -------
    subarray : np.ndarray or ExtensionArray
        May be the same type as the input, or cast to an ndarray.
    """
    if fill_value is lib.no_default:
        fill_value = na_value_for_dtype(arr.dtype, compat=False)
    elif isinstance(arr.dtype, np.dtype) and arr.dtype.kind in "mM":
        dtype, fill_value = maybe_promote(arr.dtype, fill_value)
        if arr.dtype != dtype:
            # EA.take is strict about returning a new object of the same type
            # so for that case cast upfront
            arr = arr.astype(dtype)

    if not isinstance(arr, np.ndarray):
        # i.e. ExtensionArray,
        # includes for EA to catch DatetimeArray, TimedeltaArray
        if not is_1d_only_ea_obj(arr):
            # i.e. DatetimeArray, TimedeltaArray
            arr = cast("NDArrayBackedExtensionArray", arr)
            return arr.take(indexer,
                            fill_value=fill_value,
                            allow_fill=allow_fill,
                            axis=axis)

        return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

    arr = np.asarray(arr)
    return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)