예제 #1
0
파일: missing.py 프로젝트: yaruyi/pandas
def _isna_array(values: ArrayLike, inf_as_na: bool = False):
    """
    Return an array indicating which values of the input array are NaN / NA.

    Parameters
    ----------
    obj: ndarray or ExtensionArray
        The input array whose elements are to be checked.
    inf_as_na: bool
        Whether or not to treat infinite values as NA.

    Returns
    -------
    array-like
        Array of boolean values denoting the NA status of each element.
    """
    dtype = values.dtype

    if is_extension_array_dtype(dtype):
        if inf_as_na and is_categorical_dtype(dtype):
            result = libmissing.isnaobj_old(values.to_numpy())
        else:
            result = values.isna()
    elif is_string_dtype(dtype):
        result = _isna_string_dtype(values, dtype, inf_as_na=inf_as_na)
    elif needs_i8_conversion(dtype):
        # this is the NaT pattern
        result = values.view("i8") == iNaT
    else:
        if inf_as_na:
            result = ~np.isfinite(values)
        else:
            result = np.isnan(values)

    return result
예제 #2
0
def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.array:
    """
    Ensure that an dtype array of some integer dtype
    has an int64 dtype if possible.
    If it's not possible, potentially because of overflow,
    convert the array to float64 instead.

    Parameters
    ----------
    arr : array-like
          The array whose data type we want to enforce.
    copy: bool
          Whether to copy the original array or reuse
          it in place, if possible.

    Returns
    -------
    out_arr : The input array cast as int64 if
              possible without overflow.
              Otherwise the input array cast to float64.

    Notes
    -----
    If the array is explicitly of type uint64 the type
    will remain unchanged.
    """
    # TODO: GH27506 potential bug with ExtensionArrays
    try:
        return arr.astype("int64", copy=copy, casting="safe")  # type: ignore
    except TypeError:
        pass
    try:
        return arr.astype("uint64", copy=copy, casting="safe")  # type: ignore
    except TypeError:
        return arr.astype("float64", copy=copy)
예제 #3
0
def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
    """
    Helper function for `arr.astype(common_dtype)` but handling all special
    cases.
    """
    if is_dtype_equal(arr.dtype, dtype):
        return arr
    if (
        is_categorical_dtype(arr.dtype)
        and isinstance(dtype, np.dtype)
        and np.issubdtype(dtype, np.integer)
    ):
        # problem case: categorical of int -> gives int as result dtype,
        # but categorical can contain NAs -> fall back to object dtype
        try:
            return arr.astype(dtype, copy=False)
        except ValueError:
            return arr.astype(object, copy=False)

    if is_sparse(arr) and not is_sparse(dtype):
        # problem case: SparseArray.astype(dtype) doesn't follow the specified
        # dtype exactly, but converts this to Sparse[dtype] -> first manually
        # convert to dense array

        # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
        # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _
        # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any,
        # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict,
        # Tuple[Any, Any]]]"  [arg-type]
        arr = cast("SparseArray", arr)
        return arr.to_dense().astype(dtype, copy=False)  # type: ignore[arg-type]

    # astype_array includes ensure_wrapped_if_datetimelike
    return astype_array(arr, dtype=dtype, copy=False)
예제 #4
0
def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
    """
    Helper function for `arr.astype(common_dtype)` but handling all special
    cases.
    """
    if (is_categorical_dtype(arr.dtype) and isinstance(dtype, np.dtype)
            and np.issubdtype(dtype, np.integer)):
        # problem case: categorical of int -> gives int as result dtype,
        # but categorical can contain NAs -> fall back to object dtype
        try:
            return arr.astype(dtype, copy=False)
        except ValueError:
            return arr.astype(object, copy=False)

    if is_sparse(arr) and not is_sparse(dtype):
        # problem case: SparseArray.astype(dtype) doesn't follow the specified
        # dtype exactly, but converts this to Sparse[dtype] -> first manually
        # convert to dense array
        arr = cast(SparseArray, arr)
        return arr.to_dense().astype(dtype, copy=False)

    if (isinstance(arr, np.ndarray) and arr.dtype.kind in ["m", "M"]
            and dtype is np.dtype("object")):
        # wrap datetime-likes in EA to ensure astype(object) gives Timestamp/Timedelta
        # this can happen when concat_compat is called directly on arrays (when arrays
        # are not coming from Index/Series._values), eg in BlockManager.quantile
        arr = array(arr)

    if is_extension_array_dtype(dtype):
        if isinstance(arr, np.ndarray):
            # numpy's astype cannot handle ExtensionDtypes
            return array(arr, dtype=dtype, copy=False)
    return arr.astype(dtype, copy=False)
예제 #5
0
파일: take.py 프로젝트: wkerzendorf/pandas
def take_1d(
    arr: ArrayLike,
    indexer: npt.NDArray[np.intp],
    fill_value=None,
    allow_fill: bool = True,
    mask: npt.NDArray[np.bool_] | None = None,
) -> ArrayLike:
    """
    Specialized version for 1D arrays. Differences compared to `take_nd`:

    - Assumes input array has already been converted to numpy array / EA
    - Assumes indexer is already guaranteed to be intp dtype ndarray
    - Only works for 1D arrays

    To ensure the lowest possible overhead.

    Note: similarly to `take_nd`, this function assumes that the indexer is
    a valid(ated) indexer with no out of bound indices.

    Parameters
    ----------
    arr : np.ndarray or ExtensionArray
        Input array.
    indexer : ndarray
        1-D array of indices to take (validated indices, intp dtype).
    fill_value : any, default np.nan
        Fill value to replace -1 values with
    allow_fill : bool, default True
        If False, indexer is assumed to contain no -1 values so no filling
        will be done.  This short-circuits computation of a mask. Result is
        undefined if allow_fill == False and -1 is present in indexer.
    mask : np.ndarray, optional, default None
        If `allow_fill` is True, and the mask (where indexer == -1) is already
        known, it can be passed to avoid recomputation.
    """
    if not isinstance(arr, np.ndarray):
        # ExtensionArray -> dispatch to their method
        return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

    if not allow_fill:
        return arr.take(indexer)

    dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
        arr, indexer, fill_value, True, mask)

    # at this point, it's guaranteed that dtype can hold both the arr values
    # and the fill_value
    out = np.empty(indexer.shape, dtype=dtype)

    func = _get_take_nd_function(arr.ndim,
                                 arr.dtype,
                                 out.dtype,
                                 axis=0,
                                 mask_info=mask_info)
    func(arr, indexer, out, fill_value)

    return out
예제 #6
0
파일: missing.py 프로젝트: semih384/pandas
def _isna_array(values: ArrayLike, inf_as_na: bool = False):
    """
    Return an array indicating which values of the input array are NaN / NA.

    Parameters
    ----------
    obj: ndarray or ExtensionArray
        The input array whose elements are to be checked.
    inf_as_na: bool
        Whether or not to treat infinite values as NA.

    Returns
    -------
    array-like
        Array of boolean values denoting the NA status of each element.
    """
    dtype = values.dtype

    if is_extension_array_dtype(dtype):
        if inf_as_na and is_categorical_dtype(dtype):
            # error: Item "ndarray" of "Union[ExtensionArray, ndarray]" has no attribute
            # "to_numpy"
            result = libmissing.isnaobj_old(
                values.to_numpy()  # type: ignore[union-attr]
            )
        else:
            # error: Item "ndarray" of "Union[ExtensionArray, ndarray]" has no attribute
            # "isna"
            result = values.isna()  # type: ignore[union-attr]
    elif is_string_dtype(dtype):
        # error: Argument 1 to "_isna_string_dtype" has incompatible type
        # "ExtensionArray"; expected "ndarray"
        # error: Argument 2 to "_isna_string_dtype" has incompatible type
        # "ExtensionDtype"; expected "dtype[Any]"
        result = _isna_string_dtype(
            values,
            dtype,
            inf_as_na=inf_as_na  # type: ignore[arg-type]
        )
    elif needs_i8_conversion(dtype):
        # this is the NaT pattern
        result = values.view("i8") == iNaT
    else:
        if inf_as_na:
            # error: Argument 1 to "__call__" of "ufunc" has incompatible type
            # "ExtensionArray"; expected "Union[Union[int, float, complex, str, bytes,
            # generic], Sequence[Union[int, float, complex, str, bytes, generic]],
            # Sequence[Sequence[Any]], _SupportsArray]"
            result = ~np.isfinite(values)  # type: ignore[arg-type]
        else:
            # error: Argument 1 to "__call__" of "ufunc" has incompatible type
            # "ExtensionArray"; expected "Union[Union[int, float, complex, str, bytes,
            # generic], Sequence[Union[int, float, complex, str, bytes, generic]],
            # Sequence[Sequence[Any]], _SupportsArray]"
            result = np.isnan(values)  # type: ignore[arg-type]

    return result
예제 #7
0
파일: take.py 프로젝트: prakhar987/pandas
def take_nd(
    arr: ArrayLike,
    indexer,
    axis: int = 0,
    fill_value=lib.no_default,
    allow_fill: bool = True,
) -> ArrayLike:
    """
    Specialized Cython take which sets NaN values in one pass

    This dispatches to ``take`` defined on ExtensionArrays. It does not
    currently dispatch to ``SparseArray.take`` for sparse ``arr``.

    Note: this function assumes that the indexer is a valid(ated) indexer with
    no out of bound indices.

    Parameters
    ----------
    arr : np.ndarray or ExtensionArray
        Input array.
    indexer : ndarray
        1-D array of indices to take, subarrays corresponding to -1 value
        indices are filed with fill_value
    axis : int, default 0
        Axis to take from
    fill_value : any, default np.nan
        Fill value to replace -1 values with
    allow_fill : bool, default True
        If False, indexer is assumed to contain no -1 values so no filling
        will be done.  This short-circuits computation of a mask.  Result is
        undefined if allow_fill == False and -1 is present in indexer.

    Returns
    -------
    subarray : np.ndarray or ExtensionArray
        May be the same type as the input, or cast to an ndarray.
    """
    if fill_value is lib.no_default:
        fill_value = na_value_for_dtype(arr.dtype, compat=False)

    if not isinstance(arr, np.ndarray):
        # i.e. ExtensionArray,
        # includes for EA to catch DatetimeArray, TimedeltaArray
        if not is_1d_only_ea_obj(arr):
            # i.e. DatetimeArray, TimedeltaArray
            arr = cast("NDArrayBackedExtensionArray", arr)
            return arr.take(indexer,
                            fill_value=fill_value,
                            allow_fill=allow_fill,
                            axis=axis)

        return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

    arr = np.asarray(arr)
    return _take_nd_ndarray(arr, indexer, axis, fill_value, allow_fill)
예제 #8
0
파일: take.py 프로젝트: burbanom/pandas
def take_1d(
    arr: ArrayLike,
    indexer: np.ndarray,
    fill_value=None,
    allow_fill: bool = True,
) -> ArrayLike:
    """
    Specialized version for 1D arrays. Differences compared to `take_nd`:

    - Assumes input array has already been converted to numpy array / EA
    - Assumes indexer is already guaranteed to be int64 dtype ndarray
    - Only works for 1D arrays

    To ensure the lowest possible overhead.

    Note: similarly to `take_nd`, this function assumes that the indexer is
    a valid(ated) indexer with no out of bound indices.

    TODO(ArrayManager): mainly useful for ArrayManager, otherwise can potentially
    be removed again if we don't end up with ArrayManager.
    """
    if not isinstance(arr, np.ndarray):
        # ExtensionArray -> dispatch to their method

        # error: Argument 1 to "take" of "ExtensionArray" has incompatible type
        # "ndarray"; expected "Sequence[int]"
        return arr.take(
            indexer,  # type: ignore[arg-type]
            fill_value=fill_value,
            allow_fill=allow_fill,
        )

    if not allow_fill:
        return arr.take(indexer)

    indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
        arr, indexer, None, fill_value, allow_fill)

    # at this point, it's guaranteed that dtype can hold both the arr values
    # and the fill_value
    out = np.empty(indexer.shape, dtype=dtype)

    func = _get_take_nd_function(arr.ndim,
                                 arr.dtype,
                                 out.dtype,
                                 axis=0,
                                 mask_info=mask_info)
    func(arr, indexer, out, fill_value)

    return out
예제 #9
0
파일: concat.py 프로젝트: wesbarnett/pandas
def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
    """
    Helper function for `arr.astype(common_dtype)` but handling all special
    cases.
    """
    if is_dtype_equal(arr.dtype, dtype):
        return arr
    if (
        is_categorical_dtype(arr.dtype)
        and isinstance(dtype, np.dtype)
        and np.issubdtype(dtype, np.integer)
    ):
        # problem case: categorical of int -> gives int as result dtype,
        # but categorical can contain NAs -> fall back to object dtype
        try:
            return arr.astype(dtype, copy=False)
        except ValueError:
            return arr.astype(object, copy=False)

    if is_sparse(arr) and not is_sparse(dtype):
        # problem case: SparseArray.astype(dtype) doesn't follow the specified
        # dtype exactly, but converts this to Sparse[dtype] -> first manually
        # convert to dense array

        # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
        # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _
        # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any,
        # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict,
        # Tuple[Any, Any]]]"  [arg-type]
        arr = cast(SparseArray, arr)
        return arr.to_dense().astype(dtype, copy=False)  # type: ignore[arg-type]

    if (
        isinstance(arr, np.ndarray)
        and arr.dtype.kind in ["m", "M"]
        and dtype is np.dtype("object")
    ):
        # wrap datetime-likes in EA to ensure astype(object) gives Timestamp/Timedelta
        # this can happen when concat_compat is called directly on arrays (when arrays
        # are not coming from Index/Series._values), eg in BlockManager.quantile
        arr = ensure_wrapped_if_datetimelike(arr)

    if isinstance(dtype, ExtensionDtype):
        if isinstance(arr, np.ndarray):
            # numpy's astype cannot handle ExtensionDtypes
            return pd_array(arr, dtype=dtype, copy=False)
        return arr.astype(dtype, copy=False)

    return arr.astype(dtype, copy=False)
예제 #10
0
def is_inferred_bool_dtype(arr: ArrayLike) -> bool:
    """
    Check if this is a ndarray[bool] or an ndarray[object] of bool objects.

    Parameters
    ----------
    arr : np.ndarray or ExtensionArray

    Returns
    -------
    bool

    Notes
    -----
    This does not include the special treatment is_bool_dtype uses for
    Categorical.
    """
    if not isinstance(arr, np.ndarray):
        return False

    dtype = arr.dtype
    if dtype == np.dtype(bool):
        return True
    elif dtype == np.dtype("object"):
        return lib.is_bool_array(arr.ravel("K"))
    return False
예제 #11
0
def cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
    """
    Helper function for `arr.astype(common_dtype)` but handling all special
    cases.
    """
    if is_dtype_equal(arr.dtype, dtype):
        return arr

    if is_sparse(arr) and not is_sparse(dtype):
        # TODO(2.0): remove special case once SparseArray.astype deprecation
        #  is enforced.
        # problem case: SparseArray.astype(dtype) doesn't follow the specified
        # dtype exactly, but converts this to Sparse[dtype] -> first manually
        # convert to dense array

        # error: Argument 1 to "astype" of "_ArrayOrScalarCommon" has incompatible type
        # "Union[dtype[Any], ExtensionDtype]"; expected "Union[dtype[Any], None, type, _
        # SupportsDType[dtype[Any]], str, Union[Tuple[Any, int], Tuple[Any,
        # Union[SupportsIndex, Sequence[SupportsIndex]]], List[Any], _DTypeDict,
        # Tuple[Any, Any]]]"  [arg-type]
        arr = cast("SparseArray", arr)
        return arr.to_dense().astype(dtype,
                                     copy=False)  # type: ignore[arg-type]

    # astype_array includes ensure_wrapped_if_datetimelike
    return astype_array(arr, dtype=dtype, copy=False)
예제 #12
0
def astype_array(values: ArrayLike, dtype: DtypeObj, copy: bool = False) -> ArrayLike:
    """
    Cast array (ndarray or ExtensionArray) to the new dtype.

    Parameters
    ----------
    values : ndarray or ExtensionArray
    dtype : dtype object
    copy : bool, default False
        copy if indicated

    Returns
    -------
    ndarray or ExtensionArray
    """
    if (
        values.dtype.kind in ["m", "M"]
        and dtype.kind in ["i", "u"]
        and isinstance(dtype, np.dtype)
        and dtype.itemsize != 8
    ):
        # TODO(2.0) remove special case once deprecation on DTA/TDA is enforced
        msg = rf"cannot astype a datetimelike from [{values.dtype}] to [{dtype}]"
        raise TypeError(msg)

    if is_datetime64tz_dtype(dtype) and is_datetime64_dtype(values.dtype):
        return astype_dt64_to_dt64tz(values, dtype, copy, via_utc=True)

    if is_dtype_equal(values.dtype, dtype):
        if copy:
            return values.copy()
        return values

    if not isinstance(values, np.ndarray):
        # i.e. ExtensionArray
        values = values.astype(dtype, copy=copy)

    else:
        values = astype_nansafe(values, dtype, copy=copy)

    # in pandas we don't store numpy str dtypes, so convert to object
    if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str):
        values = np.array(values, dtype=object)

    return values
예제 #13
0
def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike:
    """
    If we have a length-1 array and an index describing how long we expect
    the result to be, repeat the array.
    """
    if index is not None:
        if 1 == len(arr) != len(index):
            arr = arr.repeat(len(index))
    return arr
예제 #14
0
def ensure_int_or_float(arr: ArrayLike, copy: bool = False) -> np.ndarray:
    """
    Ensure that an dtype array of some integer dtype
    has an int64 dtype if possible.
    If it's not possible, potentially because of overflow,
    convert the array to float64 instead.

    Parameters
    ----------
    arr : array-like
          The array whose data type we want to enforce.
    copy: bool
          Whether to copy the original array or reuse
          it in place, if possible.

    Returns
    -------
    out_arr : The input array cast as int64 if
              possible without overflow.
              Otherwise the input array cast to float64.

    Notes
    -----
    If the array is explicitly of type uint64 the type
    will remain unchanged.
    """
    # TODO: GH27506 potential bug with ExtensionArrays
    try:
        # error: Unexpected keyword argument "casting" for "astype"
        return arr.astype("int64", copy=copy,
                          casting="safe")  # type: ignore[call-arg]
    except TypeError:
        pass
    try:
        # error: Unexpected keyword argument "casting" for "astype"
        return arr.astype("uint64", copy=copy,
                          casting="safe")  # type: ignore[call-arg]
    except TypeError:
        if is_extension_array_dtype(arr.dtype):
            # pandas/core/dtypes/common.py:168: error: Item "ndarray" of
            # "Union[ExtensionArray, ndarray]" has no attribute "to_numpy"  [union-attr]
            return arr.to_numpy(  # type: ignore[union-attr]
                dtype="float64", na_value=np.nan)
        return arr.astype("float64", copy=copy)
예제 #15
0
파일: missing.py 프로젝트: YarShev/pandas
def array_equals(left: ArrayLike, right: ArrayLike) -> bool:
    """
    ExtensionArray-compatible implementation of array_equivalent.
    """
    if not is_dtype_equal(left.dtype, right.dtype):
        return False
    elif isinstance(left, ABCExtensionArray):
        return left.equals(right)
    else:
        return array_equivalent(left, right, dtype_equal=True)
예제 #16
0
파일: take.py 프로젝트: prakhar987/pandas
def take_1d(
    arr: ArrayLike,
    indexer: npt.NDArray[np.intp],
    fill_value=None,
    allow_fill: bool = True,
) -> ArrayLike:
    """
    Specialized version for 1D arrays. Differences compared to `take_nd`:

    - Assumes input array has already been converted to numpy array / EA
    - Assumes indexer is already guaranteed to be intp dtype ndarray
    - Only works for 1D arrays

    To ensure the lowest possible overhead.

    Note: similarly to `take_nd`, this function assumes that the indexer is
    a valid(ated) indexer with no out of bound indices.
    """
    indexer = ensure_platform_int(indexer)

    if not isinstance(arr, np.ndarray):
        # ExtensionArray -> dispatch to their method
        return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

    if not allow_fill:
        return arr.take(indexer)

    dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
        arr, indexer, fill_value, True)

    # at this point, it's guaranteed that dtype can hold both the arr values
    # and the fill_value
    out = np.empty(indexer.shape, dtype=dtype)

    func = _get_take_nd_function(arr.ndim,
                                 arr.dtype,
                                 out.dtype,
                                 axis=0,
                                 mask_info=mask_info)
    func(arr, indexer, out, fill_value)

    return out
예제 #17
0
def extract_bool_array(mask: ArrayLike) -> npt.NDArray[np.bool_]:
    """
    If we have a SparseArray or BooleanArray, convert it to ndarray[bool].
    """
    if isinstance(mask, ExtensionArray):
        # We could have BooleanArray, Sparse[bool], ...
        #  Except for BooleanArray, this is equivalent to just
        #  np.asarray(mask, dtype=bool)
        mask = mask.to_numpy(dtype=bool, na_value=False)

    mask = np.asarray(mask, dtype=bool)
    return mask
예제 #18
0
파일: missing.py 프로젝트: YarShev/pandas
def _isna_array(values: ArrayLike, inf_as_na: bool = False):
    """
    Return an array indicating which values of the input array are NaN / NA.

    Parameters
    ----------
    obj: ndarray or ExtensionArray
        The input array whose elements are to be checked.
    inf_as_na: bool
        Whether or not to treat infinite values as NA.

    Returns
    -------
    array-like
        Array of boolean values denoting the NA status of each element.
    """
    dtype = values.dtype

    if not isinstance(values, np.ndarray):
        # i.e. ExtensionArray
        if inf_as_na and is_categorical_dtype(dtype):
            result = libmissing.isnaobj(values.to_numpy(), inf_as_na=inf_as_na)
        else:
            # error: Incompatible types in assignment (expression has type
            # "Union[ndarray[Any, Any], ExtensionArraySupportsAnyAll]", variable has
            # type "ndarray[Any, dtype[bool_]]")
            result = values.isna()  # type: ignore[assignment]
    elif is_string_or_object_np_dtype(values.dtype):
        result = _isna_string_dtype(values, inf_as_na=inf_as_na)
    elif needs_i8_conversion(dtype):
        # this is the NaT pattern
        result = values.view("i8") == iNaT
    else:
        if inf_as_na:
            result = ~np.isfinite(values)
        else:
            result = np.isnan(values)

    return result
예제 #19
0
파일: take.py 프로젝트: rayev2208/pandas
def take_nd(
    arr: ArrayLike,
    indexer,
    axis: int = 0,
    out: Optional[np.ndarray] = None,
    fill_value=lib.no_default,
    allow_fill: bool = True,
) -> ArrayLike:
    """
    Specialized Cython take which sets NaN values in one pass

    This dispatches to ``take`` defined on ExtensionArrays. It does not
    currently dispatch to ``SparseArray.take`` for sparse ``arr``.

    Parameters
    ----------
    arr : np.ndarray or ExtensionArray
        Input array.
    indexer : ndarray
        1-D array of indices to take, subarrays corresponding to -1 value
        indices are filed with fill_value
    axis : int, default 0
        Axis to take from
    out : ndarray or None, default None
        Optional output array, must be appropriate type to hold input and
        fill_value together, if indexer has any -1 value entries; call
        maybe_promote to determine this type for any fill_value
    fill_value : any, default np.nan
        Fill value to replace -1 values with
    allow_fill : boolean, default True
        If False, indexer is assumed to contain no -1 values so no filling
        will be done.  This short-circuits computation of a mask.  Result is
        undefined if allow_fill == False and -1 is present in indexer.

    Returns
    -------
    subarray : np.ndarray or ExtensionArray
        May be the same type as the input, or cast to an ndarray.
    """
    if fill_value is lib.no_default:
        fill_value = na_value_for_dtype(arr.dtype, compat=False)

    if not isinstance(arr, np.ndarray):
        # i.e. ExtensionArray,
        # includes for EA to catch DatetimeArray, TimedeltaArray
        return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

    arr = np.asarray(arr)
    return _take_nd_ndarray(arr, indexer, axis, out, fill_value, allow_fill)
예제 #20
0
def hash_array(
    vals: ArrayLike,
    encoding: str = "utf8",
    hash_key: str = _default_hash_key,
    categorize: bool = True,
) -> np.ndarray:
    """
    Given a 1d array, return an array of deterministic integers.

    Parameters
    ----------
    vals : ndarray or ExtensionArray
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    ndarray[np.uint64, ndim=1]
        Hashed values, same length as the vals.
    """
    if not hasattr(vals, "dtype"):
        raise TypeError("must pass a ndarray-like")
    dtype = vals.dtype

    # For categoricals, we hash the categories, then remap the codes to the
    # hash values. (This check is above the complex check so that we don't ask
    # numpy if categorical is a subdtype of complex, as it will choke).
    if is_categorical_dtype(dtype):
        vals = cast("Categorical", vals)
        return _hash_categorical(vals, encoding, hash_key)

    elif isinstance(vals, ABCExtensionArray):
        vals, _ = vals._values_for_factorize()

    elif not isinstance(vals, np.ndarray):
        # GH#42003
        raise TypeError(
            "hash_array requires np.ndarray or ExtensionArray, not "
            f"{type(vals).__name__}. Use hash_pandas_object instead."
        )

    return _hash_ndarray(vals, encoding, hash_key, categorize)
예제 #21
0
def hash_array(
    vals: ArrayLike,
    encoding: str = "utf8",
    hash_key: str = _default_hash_key,
    categorize: bool = True,
) -> np.ndarray:
    """
    Given a 1d array, return an array of deterministic integers.

    Parameters
    ----------
    vals : ndarray or ExtensionArray
    encoding : str, default 'utf8'
        Encoding for data & key when strings.
    hash_key : str, default _default_hash_key
        Hash_key for string key to encode.
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

    Returns
    -------
    1d uint64 numpy array of hash values, same length as the vals
    """
    if not hasattr(vals, "dtype"):
        raise TypeError("must pass a ndarray-like")
    dtype = vals.dtype

    # For categoricals, we hash the categories, then remap the codes to the
    # hash values. (This check is above the complex check so that we don't ask
    # numpy if categorical is a subdtype of complex, as it will choke).
    if is_categorical_dtype(dtype):
        # error: Incompatible types in assignment (expression has type "Categorical",
        # variable has type "ndarray")
        vals = cast("Categorical", vals)  # type: ignore[assignment]
        # error: Argument 1 to "_hash_categorical" has incompatible type "ndarray";
        # expected "Categorical"
        return _hash_categorical(vals, encoding, hash_key)  # type: ignore[arg-type]
    elif is_extension_array_dtype(dtype):
        # error: Incompatible types in assignment (expression has type "ndarray",
        # variable has type "ExtensionArray")
        # error: "ndarray" has no attribute "_values_for_factorize"
        vals, _ = vals._values_for_factorize()  # type: ignore[assignment,attr-defined]

    # error: Argument 1 to "_hash_ndarray" has incompatible type "ExtensionArray";
    # expected "ndarray"
    return _hash_ndarray(vals, encoding, hash_key, categorize)  # type: ignore[arg-type]
예제 #22
0
파일: putmask.py 프로젝트: queantt/pandas
def extract_bool_array(mask: ArrayLike) -> np.ndarray:
    """
    If we have a SparseArray or BooleanArray, convert it to ndarray[bool].
    """
    if isinstance(mask, ExtensionArray):
        # We could have BooleanArray, Sparse[bool], ...
        #  Except for BooleanArray, this is equivalent to just
        #  np.asarray(mask, dtype=bool)

        # error: Incompatible types in assignment (expression has type "ndarray",
        # variable has type "ExtensionArray")
        mask = mask.to_numpy(dtype=bool, na_value=False)  # type: ignore[assignment]

    # error: Incompatible types in assignment (expression has type "ndarray", variable
    # has type "ExtensionArray")
    mask = np.asarray(mask, dtype=bool)  # type: ignore[assignment]
    # error: Incompatible return value type (got "ExtensionArray", expected "ndarray")
    return mask  # type: ignore[return-value]
예제 #23
0
def quantile_compat(values: ArrayLike, qs: npt.NDArray[np.float64],
                    interpolation: str) -> ArrayLike:
    """
    Compute the quantiles of the given values for each quantile in `qs`.

    Parameters
    ----------
    values : np.ndarray or ExtensionArray
    qs : np.ndarray[float64]
    interpolation : str

    Returns
    -------
    np.ndarray or ExtensionArray
    """
    if isinstance(values, np.ndarray):
        fill_value = na_value_for_dtype(values.dtype, compat=False)
        mask = isna(values)
        return quantile_with_mask(values, mask, fill_value, qs, interpolation)
    else:
        return values._quantile(qs, interpolation)
예제 #24
0
파일: take.py 프로젝트: zacqed/pandas
def take_1d(
    arr: ArrayLike,
    indexer: np.ndarray,
    fill_value=None,
    allow_fill: bool = True,
) -> ArrayLike:
    """
    Specialized version for 1D arrays. Differences compared to take_nd:

    - Assumes input (arr, indexer) has already been converted to numpy array / EA
    - Only works for 1D arrays

    To ensure the lowest possible overhead.

    TODO(ArrayManager): mainly useful for ArrayManager, otherwise can potentially
    be removed again if we don't end up with ArrayManager.
    """
    if not isinstance(arr, np.ndarray):
        # ExtensionArray -> dispatch to their method
        return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

    indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
        arr, indexer, 0, None, fill_value, allow_fill)

    # at this point, it's guaranteed that dtype can hold both the arr values
    # and the fill_value
    out = np.empty(indexer.shape, dtype=dtype)

    func = _get_take_nd_function(arr.ndim,
                                 arr.dtype,
                                 out.dtype,
                                 axis=0,
                                 mask_info=mask_info)
    func(arr, indexer, out, fill_value)

    return out
예제 #25
0
def na_accum_func(values: ArrayLike, accum_func, skipna: bool) -> ArrayLike:
    """
    Cumulative function with skipna support.

    Parameters
    ----------
    values : np.ndarray or ExtensionArray
    accum_func : {np.cumprod, np.maximum.accumulate, np.cumsum, np.minimum.accumulate}
    skipna : bool

    Returns
    -------
    np.ndarray or ExtensionArray
    """
    mask_a, mask_b = {
        np.cumprod: (1.0, np.nan),
        np.maximum.accumulate: (-np.inf, np.nan),
        np.cumsum: (0.0, np.nan),
        np.minimum.accumulate: (np.inf, np.nan),
    }[accum_func]

    # We will be applying this function to block values
    if values.dtype.kind in ["m", "M"]:
        # GH#30460, GH#29058
        # numpy 1.18 started sorting NaTs at the end instead of beginning,
        #  so we need to work around to maintain backwards-consistency.
        orig_dtype = values.dtype

        # We need to define mask before masking NaTs
        mask = isna(values)

        if accum_func == np.minimum.accumulate:
            # Note: the accum_func comparison fails as an "is" comparison
            y = values.view("i8")
            y[mask] = np.iinfo(np.int64).max
            changed = True
        else:
            y = values
            changed = False

        result = accum_func(y.view("i8"), axis=0)
        if skipna:
            result[mask] = iNaT
        elif accum_func == np.minimum.accumulate:
            # Restore NaTs that we masked previously
            nz = (~np.asarray(mask)).nonzero()[0]
            if len(nz):
                # everything up to the first non-na entry stays NaT
                result[: nz[0]] = iNaT

        if changed:
            # restore NaT elements
            y[mask] = iNaT  # TODO: could try/finally for this?

        if isinstance(values, np.ndarray):
            result = result.view(orig_dtype)
        else:
            # DatetimeArray
            result = type(values)._from_sequence(result, dtype=orig_dtype)

    elif skipna and not issubclass(values.dtype.type, (np.integer, np.bool_)):
        vals = values.copy()
        mask = isna(vals)
        vals[mask] = mask_a
        result = accum_func(vals, axis=0)
        result[mask] = mask_b
    else:
        result = accum_func(values, axis=0)

    return result
예제 #26
0
def astype_array_safe(values: ArrayLike,
                      dtype,
                      copy: bool = False,
                      errors: IgnoreRaise = "raise") -> ArrayLike:
    """
    Cast array (ndarray or ExtensionArray) to the new dtype.

    This basically is the implementation for DataFrame/Series.astype and
    includes all custom logic for pandas (NaN-safety, converting str to object,
    not allowing )

    Parameters
    ----------
    values : ndarray or ExtensionArray
    dtype : str, dtype convertible
    copy : bool, default False
        copy if indicated
    errors : str, {'raise', 'ignore'}, default 'raise'
        - ``raise`` : allow exceptions to be raised
        - ``ignore`` : suppress exceptions. On error return original object

    Returns
    -------
    ndarray or ExtensionArray
    """
    errors_legal_values = ("raise", "ignore")

    if errors not in errors_legal_values:
        invalid_arg = (
            "Expected value of kwarg 'errors' to be one of "
            f"{list(errors_legal_values)}. Supplied value is '{errors}'")
        raise ValueError(invalid_arg)

    if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype):
        msg = (f"Expected an instance of {dtype.__name__}, "
               "but got the class instead. Try instantiating 'dtype'.")
        raise TypeError(msg)

    dtype = pandas_dtype(dtype)
    if isinstance(dtype, PandasDtype):
        # Ensure we don't end up with a PandasArray
        dtype = dtype.numpy_dtype

    if (is_datetime64_dtype(values.dtype)
            # need to do np.dtype check instead of is_datetime64_dtype
            #  otherwise pyright complains
            and isinstance(dtype, np.dtype) and dtype.kind == "M" and
            not is_unitless(dtype) and
            not is_dtype_equal(dtype, values.dtype)):
        # unit conversion, we would re-cast to nanosecond, so this is
        #  effectively just a copy (regardless of copy kwd)
        # TODO(2.0): remove special-case
        return values.copy()

    try:
        new_values = astype_array(values, dtype, copy=copy)
    except (ValueError, TypeError):
        # e.g. astype_nansafe can fail on object-dtype of strings
        #  trying to convert to float
        if errors == "ignore":
            new_values = values
        else:
            raise

    return new_values
예제 #27
0
def astype_dt64_to_dt64tz(
    values: ArrayLike, dtype: DtypeObj, copy: bool, via_utc: bool = False
) -> DatetimeArray:
    # GH#33401 we have inconsistent behaviors between
    #  Datetimeindex[naive].astype(tzaware)
    #  Series[dt64].astype(tzaware)
    # This collects them in one place to prevent further fragmentation.

    from pandas.core.construction import ensure_wrapped_if_datetimelike

    values = ensure_wrapped_if_datetimelike(values)
    values = cast("DatetimeArray", values)
    aware = isinstance(dtype, DatetimeTZDtype)

    if via_utc:
        # Series.astype behavior

        # caller is responsible for checking this
        assert values.tz is None and aware
        dtype = cast(DatetimeTZDtype, dtype)

        if copy:
            # this should be the only copy
            values = values.copy()

        warnings.warn(
            "Using .astype to convert from timezone-naive dtype to "
            "timezone-aware dtype is deprecated and will raise in a "
            "future version.  Use ser.dt.tz_localize instead.",
            FutureWarning,
            stacklevel=find_stack_level(),
        )

        # GH#33401 this doesn't match DatetimeArray.astype, which
        #  goes through the `not via_utc` path
        return values.tz_localize("UTC").tz_convert(dtype.tz)

    else:
        # DatetimeArray/DatetimeIndex.astype behavior
        if values.tz is None and aware:
            dtype = cast(DatetimeTZDtype, dtype)
            warnings.warn(
                "Using .astype to convert from timezone-naive dtype to "
                "timezone-aware dtype is deprecated and will raise in a "
                "future version.  Use obj.tz_localize instead.",
                FutureWarning,
                stacklevel=find_stack_level(),
            )

            return values.tz_localize(dtype.tz)

        elif aware:
            # GH#18951: datetime64_tz dtype but not equal means different tz
            dtype = cast(DatetimeTZDtype, dtype)
            result = values.tz_convert(dtype.tz)
            if copy:
                result = result.copy()
            return result

        elif values.tz is not None:
            warnings.warn(
                "Using .astype to convert from timezone-aware dtype to "
                "timezone-naive dtype is deprecated and will raise in a "
                "future version.  Use obj.tz_localize(None) or "
                "obj.tz_convert('UTC').tz_localize(None) instead",
                FutureWarning,
                stacklevel=find_stack_level(),
            )

            result = values.tz_convert("UTC").tz_localize(None)
            if copy:
                result = result.copy()
            return result

        raise NotImplementedError("dtype_equal case should be handled elsewhere")