예제 #1
0
def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool,
              raise_cast_failure: bool):
    """
    Convert input to numpy ndarray and optionally cast to a given dtype.

    Parameters
    ----------
    arr : ndarray, list, tuple, iterator (catchall)
        Excludes: ExtensionArray, Series, Index.
    dtype : np.dtype, ExtensionDtype or None
    copy : bool
        If False, don't copy the data if not needed.
    raise_cast_failure : bool
        If True, and if a dtype is specified, raise errors during casting.
        Otherwise an object array is returned.
    """
    # perf shortcut as this is the most common case
    if isinstance(arr, np.ndarray):
        if maybe_castable(arr) and not copy and dtype is None:
            return arr

    if isinstance(dtype, ExtensionDtype) and (dtype.kind != "M"
                                              or is_sparse(dtype)):
        # create an extension array from its dtype
        # DatetimeTZ case needs to go through maybe_cast_to_datetime but
        # SparseDtype does not
        array_type = dtype.construct_array_type()._from_sequence
        subarr = array_type(arr, dtype=dtype, copy=copy)
        return subarr

    if is_object_dtype(dtype) and not isinstance(arr, np.ndarray):
        subarr = construct_1d_object_array_from_listlike(arr)
        return subarr

    try:
        # GH#15832: Check if we are requesting a numeric dtype and
        # that we can convert the data to the requested dtype.
        if is_integer_dtype(dtype):
            # this will raise if we have e.g. floats
            maybe_cast_to_integer_array(arr, dtype)
            subarr = arr
        else:
            subarr = maybe_cast_to_datetime(arr, dtype)

        if not isinstance(subarr, (ABCExtensionArray, ABCIndex)):
            subarr = construct_1d_ndarray_preserving_na(subarr,
                                                        dtype,
                                                        copy=copy)
    except OutOfBoundsDatetime:
        # in case of out of bound datetime64 -> always raise
        raise
    except (ValueError, TypeError) as err:
        if dtype is not None and raise_cast_failure:
            raise
        elif "Cannot cast" in str(err):
            # via _disallow_mismatched_datetimelike
            raise
        else:
            subarr = np.array(arr, dtype=object, copy=copy)
    return subarr
예제 #2
0
def _try_cast(
    arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bool,
):
    """
    Convert input to numpy ndarray and optionally cast to a given dtype.

    Parameters
    ----------
    arr : ndarray, list, tuple, iterator (catchall)
        Excludes: ExtensionArray, Series, Index.
    dtype : np.dtype, ExtensionDtype or None
    copy : bool
        If False, don't copy the data if not needed.
    raise_cast_failure : bool
        If True, and if a dtype is specified, raise errors during casting.
        Otherwise an object array is returned.
    """
    # perf shortcut as this is the most common case
    if isinstance(arr, np.ndarray):
        if maybe_castable(arr) and not copy and dtype is None:
            return arr

    try:
        # GH#15832: Check if we are requesting a numeric dype and
        # that we can convert the data to the requested dtype.
        if is_integer_dtype(dtype):
            subarr = maybe_cast_to_integer_array(arr, dtype)

        subarr = maybe_cast_to_datetime(arr, dtype)
        # Take care in creating object arrays (but iterators are not
        # supported):
        if is_object_dtype(dtype) and (
            is_list_like(subarr)
            and not (is_iterator(subarr) or isinstance(subarr, np.ndarray))
        ):
            subarr = construct_1d_object_array_from_listlike(subarr)
        elif not is_extension_array_dtype(subarr):
            subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy)
    except OutOfBoundsDatetime:
        # in case of out of bound datetime64 -> always raise
        raise
    except (ValueError, TypeError):
        if is_categorical_dtype(dtype):
            # We *do* allow casting to categorical, since we know
            # that Categorical is the only array type for 'category'.
            dtype = cast(CategoricalDtype, dtype)
            subarr = dtype.construct_array_type()(
                arr, dtype.categories, ordered=dtype.ordered
            )
        elif is_extension_array_dtype(dtype):
            # create an extension array from its dtype
            dtype = cast(ExtensionDtype, dtype)
            array_type = dtype.construct_array_type()._from_sequence
            subarr = array_type(arr, dtype=dtype, copy=copy)
        elif dtype is not None and raise_cast_failure:
            raise
        else:
            subarr = np.array(arr, dtype=object, copy=copy)
    return subarr
예제 #3
0
def _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure):

    # perf shortcut as this is the most common case
    if take_fast_path:
        if maybe_castable(arr) and not copy and dtype is None:
            return arr

    try:
        # GH#15832: Check if we are requesting a numeric dype and
        # that we can convert the data to the requested dtype.
        if is_integer_dtype(dtype):
            subarr = maybe_cast_to_integer_array(arr, dtype)

        subarr = maybe_cast_to_datetime(arr, dtype)
        # Take care in creating object arrays (but iterators are not
        # supported):
        if is_object_dtype(dtype) and (
                is_list_like(subarr) and
                not (is_iterator(subarr) or isinstance(subarr, np.ndarray))):
            subarr = construct_1d_object_array_from_listlike(subarr)
        elif not is_extension_type(subarr):
            subarr = construct_1d_ndarray_preserving_na(subarr,
                                                        dtype,
                                                        copy=copy)
    except OutOfBoundsDatetime:
        # in case of out of bound datetime64 -> always raise
        raise
    except (ValueError, TypeError):
        if is_categorical_dtype(dtype):
            # We *do* allow casting to categorical, since we know
            # that Categorical is the only array type for 'category'.
            subarr = Categorical(arr, dtype.categories, ordered=dtype.ordered)
        elif is_extension_array_dtype(dtype):
            # create an extension array from its dtype
            array_type = dtype.construct_array_type()._from_sequence
            subarr = array_type(arr, dtype=dtype, copy=copy)
        elif dtype is not None and raise_cast_failure:
            raise
        else:
            subarr = np.array(arr, dtype=object, copy=copy)
    return subarr
예제 #4
0
def _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure):

    # perf shortcut as this is the most common case
    if take_fast_path:
        if maybe_castable(arr) and not copy and dtype is None:
            return arr

    try:
        # GH#15832: Check if we are requesting a numeric dype and
        # that we can convert the data to the requested dtype.
        if is_integer_dtype(dtype):
            subarr = maybe_cast_to_integer_array(arr, dtype)

        subarr = maybe_cast_to_datetime(arr, dtype)
        # Take care in creating object arrays (but iterators are not
        # supported):
        if is_object_dtype(dtype) and (is_list_like(subarr) and
                                       not (is_iterator(subarr) or
                                       isinstance(subarr, np.ndarray))):
            subarr = construct_1d_object_array_from_listlike(subarr)
        elif not is_extension_type(subarr):
            subarr = construct_1d_ndarray_preserving_na(subarr, dtype,
                                                        copy=copy)
    except (ValueError, TypeError):
        if is_categorical_dtype(dtype):
            # We *do* allow casting to categorical, since we know
            # that Categorical is the only array type for 'category'.
            subarr = Categorical(arr, dtype.categories,
                                 ordered=dtype.ordered)
        elif is_extension_array_dtype(dtype):
            # create an extension array from its dtype
            array_type = dtype.construct_array_type()._from_sequence
            subarr = array_type(arr, dtype=dtype, copy=copy)
        elif dtype is not None and raise_cast_failure:
            raise
        else:
            subarr = np.array(arr, dtype=object, copy=copy)
    return subarr
예제 #5
0
def sanitize_array(
    data,
    index: Optional["Index"],
    dtype: Optional[DtypeObj] = None,
    copy: bool = False,
    raise_cast_failure: bool = False,
) -> ArrayLike:
    """
    Sanitize input data to an ndarray or ExtensionArray, copy if specified,
    coerce to the dtype if specified.
    """

    if isinstance(data, ma.MaskedArray):
        mask = ma.getmaskarray(data)
        if mask.any():
            data, fill_value = maybe_upcast(data, copy=True)
            data.soften_mask()  # set hardmask False if it was True
            data[mask] = fill_value
        else:
            data = data.copy()

    # extract ndarray or ExtensionArray, ensure we have no PandasArray
    data = extract_array(data, extract_numpy=True)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None and is_float_dtype(
                data.dtype) and is_integer_dtype(dtype):
            # possibility of nan -> garbage
            try:
                subarr = _try_cast(data, dtype, copy, True)
            except ValueError:
                if copy:
                    subarr = data.copy()
                else:
                    subarr = np.array(data, copy=False)
        else:
            # we will try to copy be-definition here
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    elif isinstance(data, ABCExtensionArray):
        # it is already ensured above this is not a PandasArray
        subarr = data

        if dtype is not None:
            subarr = subarr.astype(dtype, copy=copy)
        elif copy:
            subarr = subarr.copy()
        return subarr

    elif isinstance(data, (list, tuple)) and len(data) > 0:
        if dtype is not None:
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)
        else:
            subarr = maybe_convert_platform(data)

        subarr = maybe_cast_to_datetime(subarr, dtype)

    elif isinstance(data, range):
        # GH#16804
        arr = np.arange(data.start, data.stop, data.step, dtype="int64")
        subarr = _try_cast(arr, dtype, copy, raise_cast_failure)
    elif isinstance(data, abc.Set):
        raise TypeError("Set type is unordered")
    elif lib.is_scalar(data) and index is not None and dtype is not None:
        data = maybe_cast_to_datetime(data, dtype)
        if not lib.is_scalar(data):
            data = data[0]
        subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype)
    else:
        subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    # scalar like, GH
    if getattr(subarr, "ndim", 0) == 0:
        if isinstance(data, list):  # pragma: no cover
            subarr = np.array(data, dtype=object)
        elif index is not None:
            value = data

            # figure out the dtype from the value (upcast if necessary)
            if dtype is None:
                dtype, value = infer_dtype_from_scalar(value)
            else:
                # need to possibly convert the value here
                value = maybe_cast_to_datetime(value, dtype)

            subarr = construct_1d_arraylike_from_scalar(
                value, len(index), dtype)

        else:
            return subarr.item()

    # the result that we want
    elif subarr.ndim == 1:
        if index is not None:

            # a 1-element ndarray
            if len(subarr) != len(index) and len(subarr) == 1:
                subarr = construct_1d_arraylike_from_scalar(
                    subarr[0], len(index), subarr.dtype)

    elif subarr.ndim > 1:
        if isinstance(data, np.ndarray):
            raise Exception("Data must be 1-dimensional")
        else:
            subarr = com.asarray_tuplesafe(data, dtype=dtype)

    if not (is_extension_array_dtype(subarr.dtype)
            or is_extension_array_dtype(dtype)):
        # This is to prevent mixed-type Series getting all casted to
        # NumPy string type, e.g. NaN --> '-1#IND'.
        if issubclass(subarr.dtype.type, str):
            # GH#16605
            # If not empty convert the data to dtype
            # GH#19853: If data is a scalar, subarr has already the result
            if not lib.is_scalar(data):
                if not np.all(isna(data)):
                    data = np.array(data, dtype=dtype, copy=False)
                subarr = np.array(data, dtype=object, copy=copy)

        if is_object_dtype(subarr.dtype) and not is_object_dtype(dtype):
            inferred = lib.infer_dtype(subarr, skipna=False)
            if inferred in {"interval", "period"}:
                subarr = array(subarr)

    return subarr
예제 #6
0
def sanitize_array(data,
                   index,
                   dtype=None,
                   copy=False,
                   raise_cast_failure=False):
    """
    Sanitize input data to an ndarray, copy if specified, coerce to the
    dtype if specified.
    """
    if dtype is not None:
        dtype = pandas_dtype(dtype)

    if isinstance(data, ma.MaskedArray):
        mask = ma.getmaskarray(data)
        if mask.any():
            data, fill_value = maybe_upcast(data, copy=True)
            data.soften_mask()  # set hardmask False if it was True
            data[mask] = fill_value
        else:
            data = data.copy()

    data = extract_array(data, extract_numpy=True)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None:
            subarr = np.array(data, copy=False)

            # possibility of nan -> garbage
            if is_float_dtype(data.dtype) and is_integer_dtype(dtype):
                try:
                    subarr = _try_cast(data, True, dtype, copy, True)
                except ValueError:
                    if copy:
                        subarr = data.copy()
            else:
                subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)
        elif isinstance(data, Index):
            # don't coerce Index types
            # e.g. indexes can have different conversions (so don't fast path
            # them)
            # GH#6140
            subarr = sanitize_index(data, index, copy=copy)
        else:

            # we will try to copy be-definition here
            subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)

    elif isinstance(data, ExtensionArray):
        if isinstance(data, ABCPandasArray):
            # We don't want to let people put our PandasArray wrapper
            # (the output of Series/Index.array), into a Series. So
            # we explicitly unwrap it here.
            subarr = data.to_numpy()
        else:
            subarr = data

        # everything else in this block must also handle ndarray's,
        # becuase we've unwrapped PandasArray into an ndarray.

        if dtype is not None:
            subarr = data.astype(dtype)

        if copy:
            subarr = data.copy()
        return subarr

    elif isinstance(data, (list, tuple)) and len(data) > 0:
        if dtype is not None:
            try:
                subarr = _try_cast(data, False, dtype, copy,
                                   raise_cast_failure)
            except Exception:
                if raise_cast_failure:  # pragma: no cover
                    raise
                subarr = np.array(data, dtype=object, copy=copy)
                subarr = lib.maybe_convert_objects(subarr)

        else:
            subarr = maybe_convert_platform(data)

        subarr = maybe_cast_to_datetime(subarr, dtype)

    elif isinstance(data, range):
        # GH#16804
        start, stop, step = get_range_parameters(data)
        arr = np.arange(start, stop, step, dtype='int64')
        subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure)
    else:
        subarr = _try_cast(data, False, dtype, copy, raise_cast_failure)

    # scalar like, GH
    if getattr(subarr, 'ndim', 0) == 0:
        if isinstance(data, list):  # pragma: no cover
            subarr = np.array(data, dtype=object)
        elif index is not None:
            value = data

            # figure out the dtype from the value (upcast if necessary)
            if dtype is None:
                dtype, value = infer_dtype_from_scalar(value)
            else:
                # need to possibly convert the value here
                value = maybe_cast_to_datetime(value, dtype)

            subarr = construct_1d_arraylike_from_scalar(
                value, len(index), dtype)

        else:
            return subarr.item()

    # the result that we want
    elif subarr.ndim == 1:
        if index is not None:

            # a 1-element ndarray
            if len(subarr) != len(index) and len(subarr) == 1:
                subarr = construct_1d_arraylike_from_scalar(
                    subarr[0], len(index), subarr.dtype)

    elif subarr.ndim > 1:
        if isinstance(data, np.ndarray):
            raise Exception('Data must be 1-dimensional')
        else:
            subarr = com.asarray_tuplesafe(data, dtype=dtype)

    # This is to prevent mixed-type Series getting all casted to
    # NumPy string type, e.g. NaN --> '-1#IND'.
    if issubclass(subarr.dtype.type, compat.string_types):
        # GH#16605
        # If not empty convert the data to dtype
        # GH#19853: If data is a scalar, subarr has already the result
        if not lib.is_scalar(data):
            if not np.all(isna(data)):
                data = np.array(data, dtype=dtype, copy=False)
            subarr = np.array(data, dtype=object, copy=copy)

    if is_object_dtype(subarr.dtype) and dtype != 'object':
        inferred = lib.infer_dtype(subarr, skipna=False)
        if inferred == 'period':
            try:
                subarr = period_array(subarr)
            except IncompatibleFrequency:
                pass

    return subarr
예제 #7
0
 def convert(arr):
     if dtype != object and dtype != np.object:
         arr = lib.maybe_convert_objects(arr, try_float=coerce_float)
         arr = maybe_cast_to_datetime(arr, dtype)
     return arr
예제 #8
0
 def convert(arr):
     if dtype != np.dtype("O"):
         arr = lib.maybe_convert_objects(arr)
         arr = maybe_cast_to_datetime(arr, dtype)
     return arr
예제 #9
0
def _try_cast(
    arr: list | np.ndarray,
    dtype: DtypeObj | None,
    copy: bool,
    raise_cast_failure: bool,
) -> ArrayLike:
    """
    Convert input to numpy ndarray and optionally cast to a given dtype.

    Parameters
    ----------
    arr : ndarray or list
        Excludes: ExtensionArray, Series, Index.
    dtype : np.dtype, ExtensionDtype or None
    copy : bool
        If False, don't copy the data if not needed.
    raise_cast_failure : bool
        If True, and if a dtype is specified, raise errors during casting.
        Otherwise an object array is returned.

    Returns
    -------
    np.ndarray or ExtensionArray
    """
    # perf shortcut as this is the most common case
    if (isinstance(arr, np.ndarray) and maybe_castable(arr.dtype) and not copy
            and dtype is None):
        return arr

    if isinstance(dtype,
                  ExtensionDtype) and not isinstance(dtype, DatetimeTZDtype):
        # create an extension array from its dtype
        # DatetimeTZ case needs to go through maybe_cast_to_datetime but
        # SparseDtype does not
        array_type = dtype.construct_array_type()._from_sequence
        subarr = array_type(arr, dtype=dtype, copy=copy)
        return subarr

    if is_object_dtype(dtype) and not isinstance(arr, np.ndarray):
        subarr = construct_1d_object_array_from_listlike(arr)
        return subarr

    try:
        # GH#15832: Check if we are requesting a numeric dtype and
        # that we can convert the data to the requested dtype.
        if is_integer_dtype(dtype):
            # this will raise if we have e.g. floats

            # error: Argument 2 to "maybe_cast_to_integer_array" has incompatible type
            # "Union[dtype, ExtensionDtype, None]"; expected "Union[ExtensionDtype, str,
            # dtype, Type[str], Type[float], Type[int], Type[complex], Type[bool],
            # Type[object]]"
            maybe_cast_to_integer_array(arr, dtype)  # type: ignore[arg-type]
            subarr = arr
        else:
            subarr = maybe_cast_to_datetime(arr, dtype)
            if dtype is not None and dtype.kind == "M":
                return subarr

        if not isinstance(subarr, ABCExtensionArray):
            subarr = construct_1d_ndarray_preserving_na(subarr,
                                                        dtype,
                                                        copy=copy)
    except OutOfBoundsDatetime:
        # in case of out of bound datetime64 -> always raise
        raise
    except (ValueError, TypeError) as err:
        if dtype is not None and raise_cast_failure:
            raise
        elif "Cannot cast" in str(err):
            # via _disallow_mismatched_datetimelike
            raise
        else:
            subarr = np.array(arr, dtype=object, copy=copy)
    return subarr
예제 #10
0
def sanitize_array(
    data,
    index: Index | None,
    dtype: DtypeObj | None = None,
    copy: bool = False,
    raise_cast_failure: bool = True,
) -> ArrayLike:
    """
    Sanitize input data to an ndarray or ExtensionArray, copy if specified,
    coerce to the dtype if specified.

    Parameters
    ----------
    data : Any
    index : Index or None, default None
    dtype : np.dtype, ExtensionDtype, or None, default None
    copy : bool, default False
    raise_cast_failure : bool, default True

    Returns
    -------
    np.ndarray or ExtensionArray

    Notes
    -----
    raise_cast_failure=False is only intended to be True when called from the
    DataFrame constructor, as the dtype keyword there may be interpreted as only
    applying to a subset of columns, see GH#24435.
    """
    if isinstance(data, ma.MaskedArray):
        data = sanitize_masked_array(data)

    # extract ndarray or ExtensionArray, ensure we have no PandasArray
    data = extract_array(data, extract_numpy=True)

    if isinstance(data, np.ndarray) and data.ndim == 0:
        if dtype is None:
            dtype = data.dtype
        data = lib.item_from_zerodim(data)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None and is_float_dtype(
                data.dtype) and is_integer_dtype(dtype):
            # possibility of nan -> garbage
            try:
                subarr = _try_cast(data, dtype, copy, True)
            except ValueError:
                subarr = np.array(data, copy=copy)
        else:
            # we will try to copy by-definition here
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    elif isinstance(data, ABCExtensionArray):
        # it is already ensured above this is not a PandasArray
        subarr = data

        if dtype is not None:
            subarr = subarr.astype(dtype, copy=copy)
        elif copy:
            subarr = subarr.copy()
        return subarr

    elif isinstance(data,
                    (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0:
        # TODO: deque, array.array
        if isinstance(data, (set, frozenset)):
            # Raise only for unordered sets, e.g., not for dict_keys
            raise TypeError(f"'{type(data).__name__}' type is unordered")
        data = list(data)

        if dtype is not None:
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)
        else:
            subarr = maybe_convert_platform(data)
            # error: Incompatible types in assignment (expression has type
            # "Union[ExtensionArray, ndarray, List[Any]]", variable has type
            # "ExtensionArray")
            subarr = maybe_cast_to_datetime(subarr,
                                            dtype)  # type: ignore[assignment]

    elif isinstance(data, range):
        # GH#16804
        arr = np.arange(data.start, data.stop, data.step, dtype="int64")
        subarr = _try_cast(arr, dtype, copy, raise_cast_failure)

    elif not is_list_like(data):
        if index is None:
            raise ValueError(
                "index must be specified when data is not list-like")
        subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype)

    else:
        # realize e.g. generators
        # TODO: non-standard array-likes we can convert to ndarray more efficiently?
        data = list(data)
        subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    subarr = _sanitize_ndim(subarr, data, dtype, index)

    if not (isinstance(subarr.dtype, ExtensionDtype)
            or isinstance(dtype, ExtensionDtype)):
        subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)

        is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype(
            dtype)
        if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype:
            inferred = lib.infer_dtype(subarr, skipna=False)
            if inferred in {"interval", "period"}:
                subarr = array(subarr)
                subarr = extract_array(subarr, extract_numpy=True)

    return subarr
예제 #11
0
def _try_cast(
    arr: list | np.ndarray,
    dtype: DtypeObj | None,
    copy: bool,
    raise_cast_failure: bool,
) -> ArrayLike:
    """
    Convert input to numpy ndarray and optionally cast to a given dtype.

    Parameters
    ----------
    arr : ndarray or list
        Excludes: ExtensionArray, Series, Index.
    dtype : np.dtype, ExtensionDtype or None
    copy : bool
        If False, don't copy the data if not needed.
    raise_cast_failure : bool
        If True, and if a dtype is specified, raise errors during casting.
        Otherwise an object array is returned.

    Returns
    -------
    np.ndarray or ExtensionArray
    """
    is_ndarray = isinstance(arr, np.ndarray)

    if dtype is None:
        # perf shortcut as this is the most common case
        if is_ndarray:
            arr = cast(np.ndarray, arr)
            if arr.dtype != object:
                return sanitize_to_nanoseconds(arr, copy=copy)

            out = maybe_infer_to_datetimelike(arr)
            if out is arr and copy:
                out = out.copy()
            return out

        else:
            # i.e. list
            varr = np.array(arr, copy=False)
            # filter out cases that we _dont_ want to go through
            #  maybe_infer_to_datetimelike
            if varr.dtype != object or varr.size == 0:
                return varr
            return maybe_infer_to_datetimelike(varr)

    elif isinstance(dtype, ExtensionDtype):
        # create an extension array from its dtype
        if isinstance(dtype, DatetimeTZDtype):
            # We can't go through _from_sequence because it handles dt64naive
            #  data differently; _from_sequence treats naive as wall times,
            #  while maybe_cast_to_datetime treats it as UTC
            #  see test_maybe_promote_any_numpy_dtype_with_datetimetz
            # TODO(2.0): with deprecations enforced, should be able to remove
            #  special case.
            return maybe_cast_to_datetime(arr, dtype)
            # TODO: copy?

        array_type = dtype.construct_array_type()._from_sequence
        subarr = array_type(arr, dtype=dtype, copy=copy)
        return subarr

    elif is_object_dtype(dtype):
        if not is_ndarray:
            subarr = construct_1d_object_array_from_listlike(arr)
            return subarr
        return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy)

    elif dtype.kind == "U":
        # TODO: test cases with arr.dtype.kind in ["m", "M"]
        return lib.ensure_string_array(arr, convert_na_value=False, copy=copy)

    elif dtype.kind in ["m", "M"]:
        return maybe_cast_to_datetime(arr, dtype)

    try:
        # GH#15832: Check if we are requesting a numeric dtype and
        # that we can convert the data to the requested dtype.
        if is_integer_dtype(dtype):
            # this will raise if we have e.g. floats

            subarr = maybe_cast_to_integer_array(arr, dtype)
        else:
            # 4 tests fail if we move this to a try/except/else; see
            #  test_constructor_compound_dtypes, test_constructor_cast_failure
            #  test_constructor_dict_cast2, test_loc_setitem_dtype
            subarr = np.array(arr, dtype=dtype, copy=copy)

    except (ValueError, TypeError):
        if raise_cast_failure:
            raise
        else:
            # we only get here with raise_cast_failure False, which means
            #  called via the DataFrame constructor
            # GH#24435
            warnings.warn(
                f"Could not cast to {dtype}, falling back to object. This "
                "behavior is deprecated. In a future version, when a dtype is "
                "passed to 'DataFrame', either all columns will be cast to that "
                "dtype, or a TypeError will be raised.",
                FutureWarning,
                stacklevel=find_stack_level(),
            )
            subarr = np.array(arr, dtype=object, copy=copy)
    return subarr
예제 #12
0
def sanitize_array(
    data,
    index: Optional[Index],
    dtype: Optional[DtypeObj] = None,
    copy: bool = False,
    raise_cast_failure: bool = False,
) -> ArrayLike:
    """
    Sanitize input data to an ndarray or ExtensionArray, copy if specified,
    coerce to the dtype if specified.
    """

    if isinstance(data, ma.MaskedArray):
        data = sanitize_masked_array(data)

    # extract ndarray or ExtensionArray, ensure we have no PandasArray
    data = extract_array(data, extract_numpy=True)

    if isinstance(data, np.ndarray) and data.ndim == 0:
        if dtype is None:
            dtype = data.dtype
        data = lib.item_from_zerodim(data)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None and is_float_dtype(
                data.dtype) and is_integer_dtype(dtype):
            # possibility of nan -> garbage
            try:
                subarr = _try_cast(data, dtype, copy, True)
            except ValueError:
                subarr = np.array(data, copy=copy)
        else:
            # we will try to copy by-definition here
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    elif isinstance(data, ABCExtensionArray):
        # it is already ensured above this is not a PandasArray
        subarr = data

        if dtype is not None:
            subarr = subarr.astype(dtype, copy=copy)
        elif copy:
            subarr = subarr.copy()
        return subarr

    elif isinstance(data,
                    (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0:
        # TODO: deque, array.array
        if isinstance(data, set):
            # Raise only for unordered sets, e.g., not for dict_keys
            raise TypeError("Set type is unordered")
        data = list(data)

        if dtype is not None:
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)
        else:
            subarr = maybe_convert_platform(data)
            subarr = maybe_cast_to_datetime(subarr, dtype)

    elif isinstance(data, range):
        # GH#16804
        arr = np.arange(data.start, data.stop, data.step, dtype="int64")
        subarr = _try_cast(arr, dtype, copy, raise_cast_failure)

    elif not is_list_like(data):
        if index is None:
            raise ValueError(
                "index must be specified when data is not list-like")
        subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype)

    else:
        subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    subarr = _sanitize_ndim(subarr, data, dtype, index)

    if not (is_extension_array_dtype(subarr.dtype)
            or is_extension_array_dtype(dtype)):
        subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)

        is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype(
            dtype)
        if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype:
            inferred = lib.infer_dtype(subarr, skipna=False)
            if inferred in {"interval", "period"}:
                subarr = array(subarr)

    return subarr
예제 #13
0
def sanitize_array(data, index, dtype=None, copy=False,
                   raise_cast_failure=False):
    """
    Sanitize input data to an ndarray, copy if specified, coerce to the
    dtype if specified.
    """
    if dtype is not None:
        dtype = pandas_dtype(dtype)

    if isinstance(data, ma.MaskedArray):
        mask = ma.getmaskarray(data)
        if mask.any():
            data, fill_value = maybe_upcast(data, copy=True)
            data.soften_mask()  # set hardmask False if it was True
            data[mask] = fill_value
        else:
            data = data.copy()

    data = extract_array(data, extract_numpy=True)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None:
            subarr = np.array(data, copy=False)

            # possibility of nan -> garbage
            if is_float_dtype(data.dtype) and is_integer_dtype(dtype):
                try:
                    subarr = _try_cast(data, True, dtype, copy,
                                       True)
                except ValueError:
                    if copy:
                        subarr = data.copy()
            else:
                subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)
        elif isinstance(data, Index):
            # don't coerce Index types
            # e.g. indexes can have different conversions (so don't fast path
            # them)
            # GH#6140
            subarr = sanitize_index(data, index, copy=copy)
        else:

            # we will try to copy be-definition here
            subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)

    elif isinstance(data, ExtensionArray):
        if isinstance(data, ABCPandasArray):
            # We don't want to let people put our PandasArray wrapper
            # (the output of Series/Index.array), into a Series. So
            # we explicitly unwrap it here.
            subarr = data.to_numpy()
        else:
            subarr = data

        # everything else in this block must also handle ndarray's,
        # becuase we've unwrapped PandasArray into an ndarray.

        if dtype is not None:
            subarr = data.astype(dtype)

        if copy:
            subarr = data.copy()
        return subarr

    elif isinstance(data, (list, tuple)) and len(data) > 0:
        if dtype is not None:
            try:
                subarr = _try_cast(data, False, dtype, copy,
                                   raise_cast_failure)
            except Exception:
                if raise_cast_failure:  # pragma: no cover
                    raise
                subarr = np.array(data, dtype=object, copy=copy)
                subarr = lib.maybe_convert_objects(subarr)

        else:
            subarr = maybe_convert_platform(data)

        subarr = maybe_cast_to_datetime(subarr, dtype)

    elif isinstance(data, range):
        # GH#16804
        start, stop, step = get_range_parameters(data)
        arr = np.arange(start, stop, step, dtype='int64')
        subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure)
    else:
        subarr = _try_cast(data, False, dtype, copy, raise_cast_failure)

    # scalar like, GH
    if getattr(subarr, 'ndim', 0) == 0:
        if isinstance(data, list):  # pragma: no cover
            subarr = np.array(data, dtype=object)
        elif index is not None:
            value = data

            # figure out the dtype from the value (upcast if necessary)
            if dtype is None:
                dtype, value = infer_dtype_from_scalar(value)
            else:
                # need to possibly convert the value here
                value = maybe_cast_to_datetime(value, dtype)

            subarr = construct_1d_arraylike_from_scalar(
                value, len(index), dtype)

        else:
            return subarr.item()

    # the result that we want
    elif subarr.ndim == 1:
        if index is not None:

            # a 1-element ndarray
            if len(subarr) != len(index) and len(subarr) == 1:
                subarr = construct_1d_arraylike_from_scalar(
                    subarr[0], len(index), subarr.dtype)

    elif subarr.ndim > 1:
        if isinstance(data, np.ndarray):
            raise Exception('Data must be 1-dimensional')
        else:
            subarr = com.asarray_tuplesafe(data, dtype=dtype)

    # This is to prevent mixed-type Series getting all casted to
    # NumPy string type, e.g. NaN --> '-1#IND'.
    if issubclass(subarr.dtype.type, compat.string_types):
        # GH#16605
        # If not empty convert the data to dtype
        # GH#19853: If data is a scalar, subarr has already the result
        if not lib.is_scalar(data):
            if not np.all(isna(data)):
                data = np.array(data, dtype=dtype, copy=False)
            subarr = np.array(data, dtype=object, copy=copy)

    if is_object_dtype(subarr.dtype) and dtype != 'object':
        inferred = lib.infer_dtype(subarr, skipna=False)
        if inferred == 'period':
            try:
                subarr = period_array(subarr)
            except IncompatibleFrequency:
                pass

    return subarr
예제 #14
0
 def convert(arr):
     if dtype != object and dtype != np.object:
         arr = lib.maybe_convert_objects(arr, try_float=coerce_float)
         arr = maybe_cast_to_datetime(arr, dtype)
     return arr
예제 #15
0
def _try_cast(
    arr: list | np.ndarray,
    dtype: DtypeObj | None,
    copy: bool,
    raise_cast_failure: bool,
) -> ArrayLike:
    """
    Convert input to numpy ndarray and optionally cast to a given dtype.

    Parameters
    ----------
    arr : ndarray or list
        Excludes: ExtensionArray, Series, Index.
    dtype : np.dtype, ExtensionDtype or None
    copy : bool
        If False, don't copy the data if not needed.
    raise_cast_failure : bool
        If True, and if a dtype is specified, raise errors during casting.
        Otherwise an object array is returned.

    Returns
    -------
    np.ndarray or ExtensionArray
    """
    is_ndarray = isinstance(arr, np.ndarray)

    # perf shortcut as this is the most common case
    # Item "List[Any]" of "Union[List[Any], ndarray]" has no attribute "dtype"
    if (is_ndarray and arr.dtype != object  # type: ignore[union-attr]
            and not copy and dtype is None):
        # Argument 1 to "sanitize_to_nanoseconds" has incompatible type
        # "Union[List[Any], ndarray]"; expected "ndarray"
        return sanitize_to_nanoseconds(arr)  # type: ignore[arg-type]

    if isinstance(dtype, ExtensionDtype):
        # create an extension array from its dtype
        # DatetimeTZ case needs to go through maybe_cast_to_datetime but
        # SparseDtype does not
        if isinstance(dtype, DatetimeTZDtype):
            # We can't go through _from_sequence because it handles dt64naive
            #  data differently; _from_sequence treats naive as wall times,
            #  while maybe_cast_to_datetime treats it as UTC
            #  see test_maybe_promote_any_numpy_dtype_with_datetimetz

            # error: Incompatible return value type (got "Union[ExtensionArray,
            # ndarray, List[Any]]", expected "Union[ExtensionArray, ndarray]")
            return maybe_cast_to_datetime(arr,
                                          dtype)  # type: ignore[return-value]
            # TODO: copy?

        array_type = dtype.construct_array_type()._from_sequence
        subarr = array_type(arr, dtype=dtype, copy=copy)
        return subarr

    elif is_object_dtype(dtype):
        if not is_ndarray:
            subarr = construct_1d_object_array_from_listlike(arr)
            return subarr
        return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy)

    elif dtype is None and not is_ndarray:
        # filter out cases that we _dont_ want to go through maybe_cast_to_datetime
        varr = np.array(arr, copy=False)
        if varr.dtype != object or varr.size == 0:
            return varr
        # error: Incompatible return value type (got "Union[ExtensionArray,
        # ndarray, List[Any]]", expected "Union[ExtensionArray, ndarray]")
        return maybe_cast_to_datetime(varr, None)  # type: ignore[return-value]

    try:
        # GH#15832: Check if we are requesting a numeric dtype and
        # that we can convert the data to the requested dtype.
        if is_integer_dtype(dtype):
            # this will raise if we have e.g. floats

            dtype = cast(np.dtype, dtype)
            maybe_cast_to_integer_array(arr, dtype)
            subarr = arr
        else:
            subarr = maybe_cast_to_datetime(arr, dtype)
            if dtype is not None and dtype.kind == "M":
                return subarr

        if not isinstance(subarr, ABCExtensionArray):
            subarr = construct_1d_ndarray_preserving_na(subarr,
                                                        dtype,
                                                        copy=copy)
    except OutOfBoundsDatetime:
        # in case of out of bound datetime64 -> always raise
        raise
    except (ValueError, TypeError) as err:
        if dtype is not None and raise_cast_failure:
            raise
        elif "Cannot cast" in str(err):
            # via _disallow_mismatched_datetimelike
            raise
        else:
            subarr = np.array(arr, dtype=object, copy=copy)
    return subarr