Ejemplo n.º 1
def _isna_array(values: ArrayLike, inf_as_na: bool = False):
    Return an array indicating which values of the input array are NaN / NA.

    obj: ndarray or ExtensionArray
        The input array whose elements are to be checked.
    inf_as_na: bool
        Whether or not to treat infinite values as NA.

        Array of boolean values denoting the NA status of each element.
    dtype = values.dtype

    if is_extension_array_dtype(dtype):
        if inf_as_na and is_categorical_dtype(dtype):
            result = libmissing.isnaobj_old(values.to_numpy())
            result = values.isna()
    elif is_string_dtype(dtype):
        result = _isna_string_dtype(values, dtype, inf_as_na=inf_as_na)
    elif needs_i8_conversion(dtype):
        # this is the NaT pattern
        result = values.view("i8") == iNaT
        if inf_as_na:
            result = ~np.isfinite(values)
            result = np.isnan(values)

    return result
def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.array:
    Ensure that an dtype array of some integer dtype
    has an int64 dtype if possible.
    If it's not possible, potentially because of overflow,
    convert the array to float64 instead.

    arr : array-like
          The array whose data type we want to enforce.
    copy: bool
          Whether to copy the original array or reuse
          it in place, if possible.

    out_arr : The input array cast as int64 if
              possible without overflow.
              Otherwise the input array cast to float64.

    If the array is explicitly of type uint64 the type
    will remain unchanged.
    # TODO: GH27506 potential bug with ExtensionArrays
        return arr.astype("int64", copy=copy, casting="safe")  # type: ignore
    except TypeError:
        return arr.astype("uint64", copy=copy, casting="safe")  # type: ignore
    except TypeError:
        return arr.astype("float64", copy=copy)
def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
    Helper function for `arr.astype(common_dtype)` but handling all special
    if is_dtype_equal(arr.dtype, dtype):
        return arr
    if (
        and isinstance(dtype, np.dtype)
        and np.issubdtype(dtype, np.integer)
        # problem case: categorical of int -> gives int as result dtype,
        # but categorical can contain NAs -> fall back to object dtype
            return arr.astype(dtype, copy=False)
        except ValueError:
            return arr.astype(object, copy=False)

    if is_sparse(arr) and not is_sparse(dtype):
        # problem case: SparseArray.astype(dtype) doesn't follow the specified
        # dtype exactly, but converts this to Sparse[dtype] -> first manually
        # convert to dense array

        # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
        # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _
        # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any,
        # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict,
        # Tuple[Any, Any]]]"  [arg-type]
        arr = cast("SparseArray", arr)
        return arr.to_dense().astype(dtype, copy=False)  # type: ignore[arg-type]

    # astype_array includes ensure_wrapped_if_datetimelike
    return astype_array(arr, dtype=dtype, copy=False)
def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
    Helper function for `arr.astype(common_dtype)` but handling all special
    if (is_categorical_dtype(arr.dtype) and isinstance(dtype, np.dtype)
            and np.issubdtype(dtype, np.integer)):
        # problem case: categorical of int -> gives int as result dtype,
        # but categorical can contain NAs -> fall back to object dtype
            return arr.astype(dtype, copy=False)
        except ValueError:
            return arr.astype(object, copy=False)

    if is_sparse(arr) and not is_sparse(dtype):
        # problem case: SparseArray.astype(dtype) doesn't follow the specified
        # dtype exactly, but converts this to Sparse[dtype] -> first manually
        # convert to dense array
        arr = cast(SparseArray, arr)
        return arr.to_dense().astype(dtype, copy=False)

    if (isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"]
            and dtype is np.dtype("object")):
        # wrap datetime-likes in EA to ensure astype(object) gives Timestamp/Timedelta
        # this can happen when concat_compat is called directly on arrays (when arrays
        # are not coming from Index/Series._values), eg in BlockManager.quantile
        arr = array(arr)

    if is_extension_array_dtype(dtype):
        if isinstance(arr, np.ndarray):
            # numpy's astype cannot handle ExtensionDtypes
            return array(arr, dtype=dtype, copy=False)
    return arr.astype(dtype, copy=False)
def take_1d(
    arr: ArrayLike,
    indexer: npt.NDArray[np.intp],
    allow_fill: bool = True,
    mask: npt.NDArray[np.bool_] | None = None,
) -> ArrayLike:
    Specialized version for 1D arrays. Differences compared to `take_nd`:

    - Assumes input array has already been converted to numpy array / EA
    - Assumes indexer is already guaranteed to be intp dtype ndarray
    - Only works for 1D arrays

    To ensure the lowest possible overhead.

    Note: similarly to `take_nd`, this function assumes that the indexer is
    a valid(ated) indexer with no out of bound indices.

    arr : np.ndarray or ExtensionArray
        Input array.
    indexer : ndarray
        1-D array of indices to take (validated indices, intp dtype).
    fill_value : any, default np.nan
        Fill value to replace -1 values with
    allow_fill : bool, default True
        If False, indexer is assumed to contain no -1 values so no filling
        will be done.  This short-circuits computation of a mask. Result is
        undefined if allow_fill == False and -1 is present in indexer.
    mask : np.ndarray, optional, default None
        If `allow_fill` is True, and the mask (where indexer == -1) is already
        known, it can be passed to avoid recomputation.
    if not isinstance(arr, np.ndarray):
        # ExtensionArray -> dispatch to their method
        return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

    if not allow_fill:
        return arr.take(indexer)

    dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
        arr, indexer, fill_value, True, mask)

    # at this point, it's guaranteed that dtype can hold both the arr values
    # and the fill_value
    out = np.empty(indexer.shape, dtype=dtype)

    func = _get_take_nd_function(arr.ndim,
    func(arr, indexer, out, fill_value)

    return out
def _isna_array(values: ArrayLike, inf_as_na: bool = False):
    Return an array indicating which values of the input array are NaN / NA.

    obj: ndarray or ExtensionArray
        The input array whose elements are to be checked.
    inf_as_na: bool
        Whether or not to treat infinite values as NA.

        Array of boolean values denoting the NA status of each element.
    dtype = values.dtype

    if is_extension_array_dtype(dtype):
        if inf_as_na and is_categorical_dtype(dtype):
            # error: Item "ndarray" of "Union[ExtensionArray, ndarray]" has no attribute
            # "to_numpy"
            result = libmissing.isnaobj_old(
                values.to_numpy()  # type: ignore[union-attr]
            # error: Item "ndarray" of "Union[ExtensionArray, ndarray]" has no attribute
            # "isna"
            result = values.isna()  # type: ignore[union-attr]
    elif is_string_dtype(dtype):
        # error: Argument 1 to "_isna_string_dtype" has incompatible type
        # "ExtensionArray"; expected "ndarray"
        # error: Argument 2 to "_isna_string_dtype" has incompatible type
        # "ExtensionDtype"; expected "dtype[Any]"
        result = _isna_string_dtype(
            inf_as_na=inf_as_na  # type: ignore[arg-type]
    elif needs_i8_conversion(dtype):
        # this is the NaT pattern
        result = values.view("i8") == iNaT
        if inf_as_na:
            # error: Argument 1 to "__call__" of "ufunc" has incompatible type
            # "ExtensionArray"; expected "Union[Union[int, float, complex, str, bytes,
            # generic], Sequence[Union[int, float, complex, str, bytes, generic]],
            # Sequence[Sequence[Any]], _SupportsArray]"
            result = ~np.isfinite(values)  # type: ignore[arg-type]
            # error: Argument 1 to "__call__" of "ufunc" has incompatible type
            # "ExtensionArray"; expected "Union[Union[int, float, complex, str, bytes,
            # generic], Sequence[Union[int, float, complex, str, bytes, generic]],
            # Sequence[Sequence[Any]], _SupportsArray]"
            result = np.isnan(values)  # type: ignore[arg-type]

    return result
def take_nd(
    arr: ArrayLike,
    axis: int = 0,
    allow_fill: bool = True,
) -> ArrayLike:
    Specialized Cython take which sets NaN values in one pass

    This dispatches to ``take`` defined on ExtensionArrays. It does not
    currently dispatch to ``SparseArray.take`` for sparse ``arr``.

    Note: this function assumes that the indexer is a valid(ated) indexer with
    no out of bound indices.

    arr : np.ndarray or ExtensionArray
        Input array.
    indexer : ndarray
        1-D array of indices to take, subarrays corresponding to -1 value
        indices are filed with fill_value
    axis : int, default 0
        Axis to take from
    fill_value : any, default np.nan
        Fill value to replace -1 values with
    allow_fill : bool, default True
        If False, indexer is assumed to contain no -1 values so no filling
        will be done.  This short-circuits computation of a mask.  Result is
        undefined if allow_fill == False and -1 is present in indexer.

    subarray : np.ndarray or ExtensionArray
        May be the same type as the input, or cast to an ndarray.
    if fill_value is lib.no_default:
        fill_value = na_value_for_dtype(arr.dtype, compat=False)

    if not isinstance(arr, np.ndarray):
        # i.e. ExtensionArray,
        # includes for EA to catch DatetimeArray, TimedeltaArray
        if not is_1d_only_ea_obj(arr):
            # i.e. DatetimeArray, TimedeltaArray
            arr = cast("NDArrayBackedExtensionArray", arr)
            return arr.take(indexer,

        return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

    arr = np.asarray(arr)
    return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)
def take_1d(
    arr: ArrayLike,
    indexer: np.ndarray,
    allow_fill: bool = True,
) -> ArrayLike:
    Specialized version for 1D arrays. Differences compared to `take_nd`:

    - Assumes input array has already been converted to numpy array / EA
    - Assumes indexer is already guaranteed to be int64 dtype ndarray
    - Only works for 1D arrays

    To ensure the lowest possible overhead.

    Note: similarly to `take_nd`, this function assumes that the indexer is
    a valid(ated) indexer with no out of bound indices.

    TODO(ArrayManager): mainly useful for ArrayManager, otherwise can potentially
    be removed again if we don't end up with ArrayManager.
    if not isinstance(arr, np.ndarray):
        # ExtensionArray -> dispatch to their method

        # error: Argument 1 to "take" of "ExtensionArray" has incompatible type
        # "ndarray"; expected "Sequence[int]"
        return arr.take(
            indexer,  # type: ignore[arg-type]

    if not allow_fill:
        return arr.take(indexer)

    indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
        arr, indexer, None, fill_value, allow_fill)

    # at this point, it's guaranteed that dtype can hold both the arr values
    # and the fill_value
    out = np.empty(indexer.shape, dtype=dtype)

    func = _get_take_nd_function(arr.ndim,
    func(arr, indexer, out, fill_value)

    return out
def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
    Helper function for `arr.astype(common_dtype)` but handling all special
    if is_dtype_equal(arr.dtype, dtype):
        return arr
    if (
        and isinstance(dtype, np.dtype)
        and np.issubdtype(dtype, np.integer)
        # problem case: categorical of int -> gives int as result dtype,
        # but categorical can contain NAs -> fall back to object dtype
            return arr.astype(dtype, copy=False)
        except ValueError:
            return arr.astype(object, copy=False)

    if is_sparse(arr) and not is_sparse(dtype):
        # problem case: SparseArray.astype(dtype) doesn't follow the specified
        # dtype exactly, but converts this to Sparse[dtype] -> first manually
        # convert to dense array

        # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
        # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _
        # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any,
        # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict,
        # Tuple[Any, Any]]]"  [arg-type]
        arr = cast(SparseArray, arr)
        return arr.to_dense().astype(dtype, copy=False)  # type: ignore[arg-type]

    if (
        isinstance(arr, np.ndarray)
        and arr.dtype.kind in ["m", "M"]
        and dtype is np.dtype("object")
        # wrap datetime-likes in EA to ensure astype(object) gives Timestamp/Timedelta
        # this can happen when concat_compat is called directly on arrays (when arrays
        # are not coming from Index/Series._values), eg in BlockManager.quantile
        arr = ensure_wrapped_if_datetimelike(arr)

    if isinstance(dtype, ExtensionDtype):
        if isinstance(arr, np.ndarray):
            # numpy's astype cannot handle ExtensionDtypes
            return pd_array(arr, dtype=dtype, copy=False)
        return arr.astype(dtype, copy=False)

    return arr.astype(dtype, copy=False)
def is_inferred_bool_dtype(arr: ArrayLike) -> bool:
    Check if this is a ndarray[bool] or an ndarray[object] of bool objects.

    arr : np.ndarray or ExtensionArray


    This does not include the special treatment is_bool_dtype uses for
    if not isinstance(arr, np.ndarray):
        return False

    dtype = arr.dtype
    if dtype == np.dtype(bool):
        return True
    elif dtype == np.dtype("object"):
        return lib.is_bool_array(arr.ravel("K"))
    return False
def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
    Helper function for `arr.astype(common_dtype)` but handling all special
    if is_dtype_equal(arr.dtype, dtype):
        return arr

    if is_sparse(arr) and not is_sparse(dtype):
        # TODO(2.0): remove special case once SparseArray.astype deprecation
        #  is enforced.
        # problem case: SparseArray.astype(dtype) doesn't follow the specified
        # dtype exactly, but converts this to Sparse[dtype] -> first manually
        # convert to dense array

        # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
        # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _
        # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any,
        # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict,
        # Tuple[Any, Any]]]"  [arg-type]
        arr = cast("SparseArray", arr)
        return arr.to_dense().astype(dtype,
                                     copy=False)  # type: ignore[arg-type]

    # astype_array includes ensure_wrapped_if_datetimelike
    return astype_array(arr, dtype=dtype, copy=False)
def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> ArrayLike:
    Cast array (ndarray or ExtensionArray) to the new dtype.

    values : ndarray or ExtensionArray
    dtype : dtype object
    copy : bool, default False
        copy if indicated

    ndarray or ExtensionArray
    if (
        values.dtype.kind in ["m", "M"]
        and dtype.kind in ["i", "u"]
        and isinstance(dtype, np.dtype)
        and dtype.itemsize != 8
        # TODO(2.0) remove special case once deprecation on DTA/TDA is enforced
        msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]"
        raise TypeError(msg)

    if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
        return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True)

    if is_dtype_equal(values.dtype, dtype):
        if copy:
            return values.copy()
        return values

    if not isinstance(values, np.ndarray):
        # i.e. ExtensionArray
        values = values.astype(dtype, copy=copy)

        values = astype_nansafe(values, dtype, copy=copy)

    # in pandas we don't store numpy str dtypes, so convert to object
    if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):
        values = np.array(values, dtype=object)

    return values
def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike:
    If we have a length-1 array and an index describing how long we expect
    the result to be, repeat the array.
    if index is not None:
        if 1 == len(arr) != len(index):
            arr = arr.repeat(len(index))
    return arr
def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.ndarray:
    Ensure that an dtype array of some integer dtype
    has an int64 dtype if possible.
    If it's not possible, potentially because of overflow,
    convert the array to float64 instead.

    arr : array-like
          The array whose data type we want to enforce.
    copy: bool
          Whether to copy the original array or reuse
          it in place, if possible.

    out_arr : The input array cast as int64 if
              possible without overflow.
              Otherwise the input array cast to float64.

    If the array is explicitly of type uint64 the type
    will remain unchanged.
    # TODO: GH27506 potential bug with ExtensionArrays
        # error: Unexpected keyword argument "casting" for "astype"
        return arr.astype("int64", copy=copy,
                          casting="safe")  # type: ignore[call-arg]
    except TypeError:
        # error: Unexpected keyword argument "casting" for "astype"
        return arr.astype("uint64", copy=copy,
                          casting="safe")  # type: ignore[call-arg]
    except TypeError:
        if is_extension_array_dtype(arr.dtype):
            # pandas/core/dtypes/common.py:168: error: Item "ndarray" of
            # "Union[ExtensionArray, ndarray]" has no attribute "to_numpy"  [union-attr]
            return arr.to_numpy(  # type: ignore[union-attr]
                dtype="float64", na_value=np.nan)
        return arr.astype("float64", copy=copy)
def array_equals(left: ArrayLike, right: ArrayLike) -> bool:
    ExtensionArray-compatible implementation of array_equivalent.
    if not is_dtype_equal(left.dtype, right.dtype):
        return False
    elif isinstance(left, ABCExtensionArray):
        return left.equals(right)
        return array_equivalent(left, right, dtype_equal=True)
def take_1d(
    arr: ArrayLike,
    indexer: npt.NDArray[np.intp],
    allow_fill: bool = True,
) -> ArrayLike:
    Specialized version for 1D arrays. Differences compared to `take_nd`:

    - Assumes input array has already been converted to numpy array / EA
    - Assumes indexer is already guaranteed to be intp dtype ndarray
    - Only works for 1D arrays

    To ensure the lowest possible overhead.

    Note: similarly to `take_nd`, this function assumes that the indexer is
    a valid(ated) indexer with no out of bound indices.
    indexer = ensure_platform_int(indexer)

    if not isinstance(arr, np.ndarray):
        # ExtensionArray -> dispatch to their method
        return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

    if not allow_fill:
        return arr.take(indexer)

    dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
        arr, indexer, fill_value, True)

    # at this point, it's guaranteed that dtype can hold both the arr values
    # and the fill_value
    out = np.empty(indexer.shape, dtype=dtype)

    func = _get_take_nd_function(arr.ndim,
    func(arr, indexer, out, fill_value)

    return out
def extract_bool_array(mask: ArrayLike) -> npt.NDArray[np.bool_]:
    If we have a SparseArray or BooleanArray, convert it to ndarray[bool].
    if isinstance(mask, ExtensionArray):
        # We could have BooleanArray, Sparse[bool], ...
        #  Except for BooleanArray, this is equivalent to just
        #  np.asarray(mask, dtype=bool)
        mask = mask.to_numpy(dtype=bool, na_value=False)

    mask = np.asarray(mask, dtype=bool)
    return mask
def _isna_array(values: ArrayLike, inf_as_na: bool = False):
    Return an array indicating which values of the input array are NaN / NA.

    obj: ndarray or ExtensionArray
        The input array whose elements are to be checked.
    inf_as_na: bool
        Whether or not to treat infinite values as NA.

        Array of boolean values denoting the NA status of each element.
    dtype = values.dtype

    if not isinstance(values, np.ndarray):
        # i.e. ExtensionArray
        if inf_as_na and is_categorical_dtype(dtype):
            result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na)
            # error: Incompatible types in assignment (expression has type
            # "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has
            # type "ndarray[Any, dtype[bool_]]")
            result = values.isna()  # type: ignore[assignment]
    elif is_string_or_object_np_dtype(values.dtype):
        result = _isna_string_dtype(values, inf_as_na=inf_as_na)
    elif needs_i8_conversion(dtype):
        # this is the NaT pattern
        result = values.view("i8") == iNaT
        if inf_as_na:
            result = ~np.isfinite(values)
            result = np.isnan(values)

    return result
def take_nd(
    arr: ArrayLike,
    axis: int = 0,
    out: Optional[np.ndarray] = None,
    allow_fill: bool = True,
) -> ArrayLike:
    Specialized Cython take which sets NaN values in one pass

    This dispatches to ``take`` defined on ExtensionArrays. It does not
    currently dispatch to ``SparseArray.take`` for sparse ``arr``.

    arr : np.ndarray or ExtensionArray
        Input array.
    indexer : ndarray
        1-D array of indices to take, subarrays corresponding to -1 value
        indices are filed with fill_value
    axis : int, default 0
        Axis to take from
    out : ndarray or None, default None
        Optional output array, must be appropriate type to hold input and
        fill_value together, if indexer has any -1 value entries; call
        maybe_promote to determine this type for any fill_value
    fill_value : any, default np.nan
        Fill value to replace -1 values with
    allow_fill : boolean, default True
        If False, indexer is assumed to contain no -1 values so no filling
        will be done.  This short-circuits computation of a mask.  Result is
        undefined if allow_fill == False and -1 is present in indexer.

    subarray : np.ndarray or ExtensionArray
        May be the same type as the input, or cast to an ndarray.
    if fill_value is lib.no_default:
        fill_value = na_value_for_dtype(arr.dtype, compat=False)

    if not isinstance(arr, np.ndarray):
        # i.e. ExtensionArray,
        # includes for EA to catch DatetimeArray, TimedeltaArray
        return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

    arr = np.asarray(arr)
    return _take_nd_ndarray(arr, indexer, axis, out, fill_value, allow_fill)
def hash_array(
    vals: ArrayLike,
    encoding: str = "utf8",
    hash_key: str = _default_hash_key,
    categorize: bool = True,
) -> np.ndarray:
    Given a 1d array, return an array of deterministic integers.

    vals : ndarray or ExtensionArray
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    ndarray[np.uint64, ndim=1]
        Hashed values, same length as the vals.
    if not hasattr(vals, "dtype"):
        raise TypeError("must pass a ndarray-like")
    dtype = vals.dtype

    # For categoricals, we hash the categories, then remap the codes to the
    # hash values. (This check is above the complex check so that we don't ask
    # numpy if categorical is a subdtype of complex, as it will choke).
    if is_categorical_dtype(dtype):
        vals = cast("Categorical", vals)
        return _hash_categorical(vals, encoding, hash_key)

    elif isinstance(vals, ABCExtensionArray):
        vals, _ = vals._values_for_factorize()

    elif not isinstance(vals, np.ndarray):
        # GH#42003
        raise TypeError(
            "hash_array requires np.ndarray or ExtensionArray, not "
            f"{type(vals).__name__}. Use hash_pandas_object instead."

    return _hash_ndarray(vals, encoding, hash_key, categorize)
def hash_array(
    vals: ArrayLike,
    encoding: str = "utf8",
    hash_key: str = _default_hash_key,
    categorize: bool = True,
) -> np.ndarray:
    Given a 1d array, return an array of deterministic integers.

    vals : ndarray or ExtensionArray
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    1d uint64 numpy array of hash values, same length as the vals
    if not hasattr(vals, "dtype"):
        raise TypeError("must pass a ndarray-like")
    dtype = vals.dtype

    # For categoricals, we hash the categories, then remap the codes to the
    # hash values. (This check is above the complex check so that we don't ask
    # numpy if categorical is a subdtype of complex, as it will choke).
    if is_categorical_dtype(dtype):
        # error: Incompatible types in assignment (expression has type "Categorical",
        # variable has type "ndarray")
        vals = cast("Categorical", vals)  # type: ignore[assignment]
        # error: Argument 1 to "_hash_categorical" has incompatible type "ndarray";
        # expected "Categorical"
        return _hash_categorical(vals, encoding, hash_key)  # type: ignore[arg-type]
    elif is_extension_array_dtype(dtype):
        # error: Incompatible types in assignment (expression has type "ndarray",
        # variable has type "ExtensionArray")
        # error: "ndarray" has no attribute "_values_for_factorize"
        vals, _ = vals._values_for_factorize()  # type: ignore[assignment,attr-defined]

    # error: Argument 1 to "_hash_ndarray" has incompatible type "ExtensionArray";
    # expected "ndarray"
    return _hash_ndarray(vals, encoding, hash_key, categorize)  # type: ignore[arg-type]
def extract_bool_array(mask: ArrayLike) -> np.ndarray:
    If we have a SparseArray or BooleanArray, convert it to ndarray[bool].
    if isinstance(mask, ExtensionArray):
        # We could have BooleanArray, Sparse[bool], ...
        #  Except for BooleanArray, this is equivalent to just
        #  np.asarray(mask, dtype=bool)

        # error: Incompatible types in assignment (expression has type "ndarray",
        # variable has type "ExtensionArray")
        mask = mask.to_numpy(dtype=bool, na_value=False)  # type: ignore[assignment]

    # error: Incompatible types in assignment (expression has type "ndarray", variable
    # has type "ExtensionArray")
    mask = np.asarray(mask, dtype=bool)  # type: ignore[assignment]
    # error: Incompatible return value type (got "ExtensionArray", expected "ndarray")
    return mask  # type: ignore[return-value]
def quantile_compat(values: ArrayLike, qs: npt.NDArray[np.float64],
                    interpolation: str) -> ArrayLike:
    Compute the quantiles of the given values for each quantile in `qs`.

    values : np.ndarray or ExtensionArray
    qs : np.ndarray[float64]
    interpolation : str

    np.ndarray or ExtensionArray
    if isinstance(values, np.ndarray):
        fill_value = na_value_for_dtype(values.dtype, compat=False)
        mask = isna(values)
        return quantile_with_mask(values, mask, fill_value, qs, interpolation)
        return values._quantile(qs, interpolation)
def take_1d(
    arr: ArrayLike,
    indexer: np.ndarray,
    allow_fill: bool = True,
) -> ArrayLike:
    Specialized version for 1D arrays. Differences compared to take_nd:

    - Assumes input (arr, indexer) has already been converted to numpy array / EA
    - Only works for 1D arrays

    To ensure the lowest possible overhead.

    TODO(ArrayManager): mainly useful for ArrayManager, otherwise can potentially
    be removed again if we don't end up with ArrayManager.
    if not isinstance(arr, np.ndarray):
        # ExtensionArray -> dispatch to their method
        return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

    indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
        arr, indexer, 0, None, fill_value, allow_fill)

    # at this point, it's guaranteed that dtype can hold both the arr values
    # and the fill_value
    out = np.empty(indexer.shape, dtype=dtype)

    func = _get_take_nd_function(arr.ndim,
    func(arr, indexer, out, fill_value)

    return out
def na_accum_func(values: ArrayLike, accum_func, skipna: bool) -> ArrayLike:
    Cumulative function with skipna support.

    values : np.ndarray or ExtensionArray
    accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate}
    skipna : bool

    np.ndarray or ExtensionArray
    mask_a, mask_b = {
        np.cumprod: (1.0, np.nan),
        np.maximum.accumulate: (-np.inf, np.nan),
        np.cumsum: (0.0, np.nan),
        np.minimum.accumulate: (np.inf, np.nan),

    # We will be applying this function to block values
    if values.dtype.kind in ["m", "M"]:
        # GH#30460, GH#29058
        # numpy 1.18 started sorting NaTs at the end instead of beginning,
        #  so we need to work around to maintain backwards-consistency.
        orig_dtype = values.dtype

        # We need to define mask before masking NaTs
        mask = isna(values)

        if accum_func == np.minimum.accumulate:
            # Note: the accum_func comparison fails as an "is" comparison
            y = values.view("i8")
            y[mask] = np.iinfo(np.int64).max
            changed = True
            y = values
            changed = False

        result = accum_func(y.view("i8"), axis=0)
        if skipna:
            result[mask] = iNaT
        elif accum_func == np.minimum.accumulate:
            # Restore NaTs that we masked previously
            nz = (~np.asarray(mask)).nonzero()[0]
            if len(nz):
                # everything up to the first non-na entry stays NaT
                result[: nz[0]] = iNaT

        if changed:
            # restore NaT elements
            y[mask] = iNaT  # TODO: could try/finally for this?

        if isinstance(values, np.ndarray):
            result = result.view(orig_dtype)
            # DatetimeArray
            result = type(values)._from_sequence(result, dtype=orig_dtype)

    elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):
        vals = values.copy()
        mask = isna(vals)
        vals[mask] = mask_a
        result = accum_func(vals, axis=0)
        result[mask] = mask_b
        result = accum_func(values, axis=0)

    return result
def astype_array_safe(values: ArrayLike,
                      copy: bool = False,
                      errors: IgnoreRaise = "raise") -> ArrayLike:
    Cast array (ndarray or ExtensionArray) to the new dtype.

    This basically is the implementation for DataFrame/Series.astype and
    includes all custom logic for pandas (NaN-safety, converting str to object,
    not allowing )

    values : ndarray or ExtensionArray
    dtype : str, dtype convertible
    copy : bool, default False
        copy if indicated
    errors : str, {'raise', 'ignore'}, default 'raise'
        - ``raise`` : allow exceptions to be raised
        - ``ignore`` : suppress exceptions. On error return original object

    ndarray or ExtensionArray
    errors_legal_values = ("raise", "ignore")

    if errors not in errors_legal_values:
        invalid_arg = (
            "Expected value of kwarg 'errors' to be one of "
            f"{list(errors_legal_values)}. Supplied value is '{errors}'")
        raise ValueError(invalid_arg)

    if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype):
        msg = (f"Expected an instance of {dtype.__name__}, "
               "but got the class instead. Try instantiating 'dtype'.")
        raise TypeError(msg)

    dtype = pandas_dtype(dtype)
    if isinstance(dtype, PandasDtype):
        # Ensure we don't end up with a PandasArray
        dtype = dtype.numpy_dtype

    if (is_datetime64_dtype(values.dtype)
            # need to do np.dtype check instead of is_datetime64_dtype
            #  otherwise pyright complains
            and isinstance(dtype, np.dtype) and dtype.kind == "M" and
            not is_unitless(dtype) and
            not is_dtype_equal(dtype, values.dtype)):
        # unit conversion, we would re-cast to nanosecond, so this is
        #  effectively just a copy (regardless of copy kwd)
        # TODO(2.0): remove special-case
        return values.copy()

        new_values = astype_array(values, dtype, copy=copy)
    except (ValueError, TypeError):
        # e.g. astype_nansafe can fail on object-dtype of strings
        #  trying to convert to float
        if errors == "ignore":
            new_values = values

    return new_values
def astype_dt64_to_dt64tz(
    values: ArrayLike, dtype: DtypeObj, copy: bool, via_utc: bool = False
) -> DatetimeArray:
    # GH#33401 we have inconsistent behaviors between
    #  Datetimeindex[naive].astype(tzaware)
    #  Series[dt64].astype(tzaware)
    # This collects them in one place to prevent further fragmentation.

    from pandas.core.construction import ensure_wrapped_if_datetimelike

    values = ensure_wrapped_if_datetimelike(values)
    values = cast("DatetimeArray", values)
    aware = isinstance(dtype, DatetimeTZDtype)

    if via_utc:
        # Series.astype behavior

        # caller is responsible for checking this
        assert values.tz is None and aware
        dtype = cast(DatetimeTZDtype, dtype)

        if copy:
            # this should be the only copy
            values = values.copy()

            "Using .astype to convert from timezone-naive dtype to "
            "timezone-aware dtype is deprecated and will raise in a "
            "future version.  Use ser.dt.tz_localize instead.",

        # GH#33401 this doesn't match DatetimeArray.astype, which
        #  goes through the `not via_utc` path
        return values.tz_localize("UTC").tz_convert(dtype.tz)

        # DatetimeArray/DatetimeIndex.astype behavior
        if values.tz is None and aware:
            dtype = cast(DatetimeTZDtype, dtype)
                "Using .astype to convert from timezone-naive dtype to "
                "timezone-aware dtype is deprecated and will raise in a "
                "future version.  Use obj.tz_localize instead.",

            return values.tz_localize(dtype.tz)

        elif aware:
            # GH#18951: datetime64_tz dtype but not equal means different tz
            dtype = cast(DatetimeTZDtype, dtype)
            result = values.tz_convert(dtype.tz)
            if copy:
                result = result.copy()
            return result

        elif values.tz is not None:
                "Using .astype to convert from timezone-aware dtype to "
                "timezone-naive dtype is deprecated and will raise in a "
                "future version.  Use obj.tz_localize(None) or "
                "obj.tz_convert('UTC').tz_localize(None) instead",

            result = values.tz_convert("UTC").tz_localize(None)
            if copy:
                result = result.copy()
            return result

        raise NotImplementedError("dtype_equal case should be handled elsewhere")