Ejemplo n.º 1
def maybe_convert_platform_interval(values):
    Try to do platform conversion, with special casing for IntervalArray.
    Wrapper around maybe_convert_platform that alters the default return
    dtype in certain cases to be compatible with IntervalArray.  For example,
    empty lists return with integer dtype instead of object dtype, which is
    prohibited for IntervalArray.

    values : array-like

    if isinstance(values, (list, tuple)) and len(values) == 0:
        # GH 19016
        # empty lists/tuples get object dtype by default, but this is not
        # prohibited for IntervalArray, so coerce to integer instead
        return np.array([], dtype=np.int64)
    elif is_categorical_dtype(values):
        values = np.asarray(values)

    return maybe_convert_platform(values)
Ejemplo n.º 2
        def convert(v):
            if not is_list_like(v) or isinstance(v, ABCDataFrame):
                return v

            v = extract_array(v, extract_numpy=True)
            res = maybe_convert_platform(v)
            return res
Ejemplo n.º 3
def _sanitize_values(arr):
    return an ndarray for our input,
    in a platform independent manner

    if hasattr(arr, 'values'):
        arr = arr.values

        # scalar
        if is_scalar(arr):
            arr = [arr]

        # ndarray
        if isinstance(arr, np.ndarray):

        elif is_list_like(arr) and len(arr) > 0:
            arr = maybe_convert_platform(arr)

            arr = np.asarray(arr)

    return arr
Ejemplo n.º 4
def maybe_convert_platform_interval(values):
    Try to do platform conversion, with special casing for IntervalArray.
    Wrapper around maybe_convert_platform that alters the default return
    dtype in certain cases to be compatible with IntervalArray.  For example,
    empty lists return with integer dtype instead of object dtype, which is
    prohibited for IntervalArray.

    values : array-like

    if isinstance(values, (list, tuple)) and len(values) == 0:
        # GH 19016
        # empty lists/tuples get object dtype by default, but this is
        # prohibited for IntervalArray, so coerce to integer instead
        return np.array([], dtype=np.int64)
    elif is_categorical_dtype(values):
        values = np.asarray(values)

    return maybe_convert_platform(values)
Ejemplo n.º 5
    def __new__(cls, data, closed=None,
                name=None, copy=False, dtype=None,
                fastpath=False, verify_integrity=True):

        if fastpath:
            return cls._simple_new(data.left, data.right, closed, name,
                                   copy=copy, verify_integrity=False)

        if name is None and hasattr(data, 'name'):
            name = data.name

        if isinstance(data, IntervalIndex):
            left = data.left
            right = data.right
            closed = data.closed

            # don't allow scalars
            if is_scalar(data):

            data = maybe_convert_platform(data)
            left, right, infer_closed = intervals_to_interval_bounds(data)

            if _all_not_none(closed, infer_closed) and closed != infer_closed:
                # GH 18421
                msg = ("conflicting values for closed: constructor got "
                       "'{closed}', inferred from data '{infer_closed}'"
                       .format(closed=closed, infer_closed=infer_closed))
                raise ValueError(msg)

            closed = closed or infer_closed

        return cls._simple_new(left, right, closed, name,
                               copy=copy, verify_integrity=verify_integrity)
Ejemplo n.º 6
    def __new__(cls, data, closed=None,
                name=None, copy=False, dtype=None,
                fastpath=False, verify_integrity=True):

        if fastpath:
            return cls._simple_new(data.left, data.right, closed, name,
                                   copy=copy, verify_integrity=False)

        if name is None and hasattr(data, 'name'):
            name = data.name

        if isinstance(data, IntervalIndex):
            left = data.left
            right = data.right
            closed = data.closed

            # don't allow scalars
            if is_scalar(data):

            data = maybe_convert_platform(data)
            left, right, infer_closed = intervals_to_interval_bounds(data)

            if _all_not_none(closed, infer_closed) and closed != infer_closed:
                # GH 18421
                msg = ("conflicting values for closed: constructor got "
                       "'{closed}', inferred from data '{infer_closed}'"
                       .format(closed=closed, infer_closed=infer_closed))
                raise ValueError(msg)

            closed = closed or infer_closed

        return cls._simple_new(left, right, closed, name,
                               copy=copy, verify_integrity=verify_integrity)
Ejemplo n.º 7
def _sanitize_values(arr):
    return an ndarray for our input,
    in a platform independent manner

    if hasattr(arr, 'values'):
        arr = arr.values

        # scalar
        if is_scalar(arr):
            arr = [arr]

        # ndarray
        if isinstance(arr, np.ndarray):

        elif is_list_like(arr) and len(arr) > 0:
            arr = maybe_convert_platform(arr)

            arr = np.asarray(arr)

    return arr
Ejemplo n.º 8
    def from_arrays(cls, left, right, closed='right', name=None, copy=False):
        Construct an IntervalIndex from a a left and right array

        left : array-like (1-dimensional)
            Left bounds for each interval.
        right : array-like (1-dimensional)
            Right bounds for each interval.
        closed : {'left', 'right', 'both', 'neither'}, optional
            Whether the intervals are closed on the left-side, right-side, both
            or neither. Defaults to 'right'.
        name : object, optional
            Name to be stored in the index.
        copy : boolean, default False
            copy the data

        >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3])
        IntervalIndex([(0, 1], (1, 2], (2, 3]]

        See Also
        interval_range : Function to create a fixed frequency IntervalIndex
        IntervalIndex.from_breaks : Construct an IntervalIndex from an array of
        IntervalIndex.from_intervals : Construct an IntervalIndex from an array
                                       of Interval objects
        IntervalIndex.from_tuples : Construct an IntervalIndex from a
                                    list/array of tuples
        left = maybe_convert_platform(left)
        right = maybe_convert_platform(right)

        return cls._simple_new(left,
Ejemplo n.º 9
        def convert(v):
            if not is_list_like(v) or isinstance(v, ABCDataFrame):
                return v
            elif not hasattr(v, "dtype") and not isinstance(v, (list, tuple, range)):
                # TODO: should we cast these to list?
                return v

            v = extract_array(v, extract_numpy=True)
            res = maybe_convert_platform(v)
            return res
Ejemplo n.º 10
    def from_arrays(cls, left, right, closed='right', name=None, copy=False):
        Construct an IntervalIndex from a a left and right array

        left : array-like (1-dimensional)
            Left bounds for each interval.
        right : array-like (1-dimensional)
            Right bounds for each interval.
        closed : {'left', 'right', 'both', 'neither'}, optional
            Whether the intervals are closed on the left-side, right-side, both
            or neither. Defaults to 'right'.
        name : object, optional
            Name to be stored in the index.
        copy : boolean, default False
            copy the data

        >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3])
        IntervalIndex([(0, 1], (1, 2], (2, 3]]

        See Also
        interval_range : Function to create a fixed frequency IntervalIndex
        IntervalIndex.from_breaks : Construct an IntervalIndex from an array of
        IntervalIndex.from_intervals : Construct an IntervalIndex from an array
                                       of Interval objects
        IntervalIndex.from_tuples : Construct an IntervalIndex from a
                                    list/array of tuples
        left = maybe_convert_platform(left)
        right = maybe_convert_platform(right)

        return cls._simple_new(left, right, closed, name=name,
                               copy=copy, verify_integrity=True)
Ejemplo n.º 11
    def from_intervals(cls, data, name=None, copy=False):
        Construct an IntervalIndex from a 1d array of Interval objects

        data : array-like (1-dimensional)
            Array of Interval objects. All intervals must be closed on the same
        name : object, optional
            Name to be stored in the index.
        copy : boolean, default False
            by-default copy the data, this is compat only and ignored

        >>> pd.IntervalIndex.from_intervals([pd.Interval(0, 1),
        ...                                  pd.Interval(1, 2)])
        IntervalIndex([(0, 1], (1, 2]]
                      closed='right', dtype='interval[int64]')

        The generic Index constructor work identically when it infers an array
        of all intervals:

        >>> pd.Index([pd.Interval(0, 1), pd.Interval(1, 2)])
        IntervalIndex([(0, 1], (1, 2]]
                      closed='right', dtype='interval[int64]')

        See Also
        interval_range : Function to create a fixed frequency IntervalIndex
        IntervalIndex.from_arrays : Construct an IntervalIndex from a left and
                                    right array
        IntervalIndex.from_breaks : Construct an IntervalIndex from an array of
        IntervalIndex.from_tuples : Construct an IntervalIndex from a
                                    list/array of tuples
        if isinstance(data, IntervalIndex):
            left, right, closed = data.left, data.right, data.closed
            name = name or data.name
            data = maybe_convert_platform(data)
            left, right, closed = intervals_to_interval_bounds(data)
        return cls.from_arrays(left, right, closed, name=name, copy=False)
Ejemplo n.º 12
    def from_intervals(cls, data, name=None, copy=False):
        Construct an IntervalIndex from a 1d array of Interval objects

        data : array-like (1-dimensional)
            Array of Interval objects. All intervals must be closed on the same
        name : object, optional
            Name to be stored in the index.
        copy : boolean, default False
            by-default copy the data, this is compat only and ignored

        >>> pd.IntervalIndex.from_intervals([pd.Interval(0, 1),
        ...                                  pd.Interval(1, 2)])
        IntervalIndex([(0, 1], (1, 2]]
                      closed='right', dtype='interval[int64]')

        The generic Index constructor work identically when it infers an array
        of all intervals:

        >>> pd.Index([pd.Interval(0, 1), pd.Interval(1, 2)])
        IntervalIndex([(0, 1], (1, 2]]
                      closed='right', dtype='interval[int64]')

        See Also
        interval_range : Function to create a fixed frequency IntervalIndex
        IntervalIndex.from_arrays : Construct an IntervalIndex from a left and
                                    right array
        IntervalIndex.from_breaks : Construct an IntervalIndex from an array of
        IntervalIndex.from_tuples : Construct an IntervalIndex from a
                                    list/array of tuples
        if isinstance(data, IntervalIndex):
            left, right, closed = data.left, data.right, data.closed
            name = name or data.name
            data = maybe_convert_platform(data)
            left, right, closed = intervals_to_interval_bounds(data)
        return cls.from_arrays(left, right, closed, name=name, copy=False)
Ejemplo n.º 13
    def from_breaks(cls, breaks, closed='right', name=None, copy=False):
        Construct an IntervalIndex from an array of splits

        breaks : array-like (1-dimensional)
            Left and right bounds for each interval.
        closed : {'left', 'right', 'both', 'neither'}, default 'right'
            Whether the intervals are closed on the left-side, right-side, both
            or neither.
        name : object, optional
            Name to be stored in the index.
        copy : boolean, default False
            copy the data

        >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3])
        IntervalIndex([(0, 1], (1, 2], (2, 3]]

        See Also
        interval_range : Function to create a fixed frequency IntervalIndex
        IntervalIndex.from_arrays : Construct an IntervalIndex from a left and
                                    right array
        IntervalIndex.from_intervals : Construct an IntervalIndex from an array
                                       of Interval objects
        IntervalIndex.from_tuples : Construct an IntervalIndex from a
                                    list/array of tuples
        breaks = maybe_convert_platform(breaks)

        return cls.from_arrays(breaks[:-1],
Ejemplo n.º 14
def sanitize_array(
    index: Optional["Index"],
    dtype: Optional[DtypeObj] = None,
    copy: bool = False,
    raise_cast_failure: bool = False,
) -> ArrayLike:
    Sanitize input data to an ndarray or ExtensionArray, copy if specified,
    coerce to the dtype if specified.

    if isinstance(data, ma.MaskedArray):
        mask = ma.getmaskarray(data)
        if mask.any():
            data, fill_value = maybe_upcast(data, copy=True)
            data.soften_mask()  # set hardmask False if it was True
            data[mask] = fill_value
            data = data.copy()

    # extract ndarray or ExtensionArray, ensure we have no PandasArray
    data = extract_array(data, extract_numpy=True)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None and is_float_dtype(
                data.dtype) and is_integer_dtype(dtype):
            # possibility of nan -> garbage
                subarr = _try_cast(data, dtype, copy, True)
            except ValueError:
                if copy:
                    subarr = data.copy()
                    subarr = np.array(data, copy=False)
            # we will try to copy be-definition here
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    elif isinstance(data, ABCExtensionArray):
        # it is already ensured above this is not a PandasArray
        subarr = data

        if dtype is not None:
            subarr = subarr.astype(dtype, copy=copy)
        elif copy:
            subarr = subarr.copy()
        return subarr

    elif isinstance(data, (list, tuple)) and len(data) > 0:
        if dtype is not None:
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)
            subarr = maybe_convert_platform(data)

        subarr = maybe_cast_to_datetime(subarr, dtype)

    elif isinstance(data, range):
        # GH#16804
        arr = np.arange(data.start, data.stop, data.step, dtype="int64")
        subarr = _try_cast(arr, dtype, copy, raise_cast_failure)
    elif isinstance(data, abc.Set):
        raise TypeError("Set type is unordered")
    elif lib.is_scalar(data) and index is not None and dtype is not None:
        data = maybe_cast_to_datetime(data, dtype)
        if not lib.is_scalar(data):
            data = data[0]
        subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype)
        subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    # scalar like, GH
    if getattr(subarr, "ndim", 0) == 0:
        if isinstance(data, list):  # pragma: no cover
            subarr = np.array(data, dtype=object)
        elif index is not None:
            value = data

            # figure out the dtype from the value (upcast if necessary)
            if dtype is None:
                dtype, value = infer_dtype_from_scalar(value)
                # need to possibly convert the value here
                value = maybe_cast_to_datetime(value, dtype)

            subarr = construct_1d_arraylike_from_scalar(
                value, len(index), dtype)

            return subarr.item()

    # the result that we want
    elif subarr.ndim == 1:
        if index is not None:

            # a 1-element ndarray
            if len(subarr) != len(index) and len(subarr) == 1:
                subarr = construct_1d_arraylike_from_scalar(
                    subarr[0], len(index), subarr.dtype)

    elif subarr.ndim > 1:
        if isinstance(data, np.ndarray):
            raise Exception("Data must be 1-dimensional")
            subarr = com.asarray_tuplesafe(data, dtype=dtype)

    if not (is_extension_array_dtype(subarr.dtype)
            or is_extension_array_dtype(dtype)):
        # This is to prevent mixed-type Series getting all casted to
        # NumPy string type, e.g. NaN --> '-1#IND'.
        if issubclass(subarr.dtype.type, str):
            # GH#16605
            # If not empty convert the data to dtype
            # GH#19853: If data is a scalar, subarr has already the result
            if not lib.is_scalar(data):
                if not np.all(isna(data)):
                    data = np.array(data, dtype=dtype, copy=False)
                subarr = np.array(data, dtype=object, copy=copy)

        if is_object_dtype(subarr.dtype) and not is_object_dtype(dtype):
            inferred = lib.infer_dtype(subarr, skipna=False)
            if inferred in {"interval", "period"}:
                subarr = array(subarr)

    return subarr
Ejemplo n.º 15
def sanitize_array(data,
    Sanitize input data to an ndarray, copy if specified, coerce to the
    dtype if specified.
    if dtype is not None:
        dtype = pandas_dtype(dtype)

    if isinstance(data, ma.MaskedArray):
        mask = ma.getmaskarray(data)
        if mask.any():
            data, fill_value = maybe_upcast(data, copy=True)
            data.soften_mask()  # set hardmask False if it was True
            data[mask] = fill_value
            data = data.copy()

    data = extract_array(data, extract_numpy=True)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None:
            subarr = np.array(data, copy=False)

            # possibility of nan -> garbage
            if is_float_dtype(data.dtype) and is_integer_dtype(dtype):
                    subarr = _try_cast(data, True, dtype, copy, True)
                except ValueError:
                    if copy:
                        subarr = data.copy()
                subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)
        elif isinstance(data, Index):
            # don't coerce Index types
            # e.g. indexes can have different conversions (so don't fast path
            # them)
            # GH#6140
            subarr = sanitize_index(data, index, copy=copy)

            # we will try to copy be-definition here
            subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)

    elif isinstance(data, ExtensionArray):
        if isinstance(data, ABCPandasArray):
            # We don't want to let people put our PandasArray wrapper
            # (the output of Series/Index.array), into a Series. So
            # we explicitly unwrap it here.
            subarr = data.to_numpy()
            subarr = data

        # everything else in this block must also handle ndarray's,
        # becuase we've unwrapped PandasArray into an ndarray.

        if dtype is not None:
            subarr = data.astype(dtype)

        if copy:
            subarr = data.copy()
        return subarr

    elif isinstance(data, (list, tuple)) and len(data) > 0:
        if dtype is not None:
                subarr = _try_cast(data, False, dtype, copy,
            except Exception:
                if raise_cast_failure:  # pragma: no cover
                subarr = np.array(data, dtype=object, copy=copy)
                subarr = lib.maybe_convert_objects(subarr)

            subarr = maybe_convert_platform(data)

        subarr = maybe_cast_to_datetime(subarr, dtype)

    elif isinstance(data, range):
        # GH#16804
        start, stop, step = get_range_parameters(data)
        arr = np.arange(start, stop, step, dtype='int64')
        subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure)
        subarr = _try_cast(data, False, dtype, copy, raise_cast_failure)

    # scalar like, GH
    if getattr(subarr, 'ndim', 0) == 0:
        if isinstance(data, list):  # pragma: no cover
            subarr = np.array(data, dtype=object)
        elif index is not None:
            value = data

            # figure out the dtype from the value (upcast if necessary)
            if dtype is None:
                dtype, value = infer_dtype_from_scalar(value)
                # need to possibly convert the value here
                value = maybe_cast_to_datetime(value, dtype)

            subarr = construct_1d_arraylike_from_scalar(
                value, len(index), dtype)

            return subarr.item()

    # the result that we want
    elif subarr.ndim == 1:
        if index is not None:

            # a 1-element ndarray
            if len(subarr) != len(index) and len(subarr) == 1:
                subarr = construct_1d_arraylike_from_scalar(
                    subarr[0], len(index), subarr.dtype)

    elif subarr.ndim > 1:
        if isinstance(data, np.ndarray):
            raise Exception('Data must be 1-dimensional')
            subarr = com.asarray_tuplesafe(data, dtype=dtype)

    # This is to prevent mixed-type Series getting all casted to
    # NumPy string type, e.g. NaN --> '-1#IND'.
    if issubclass(subarr.dtype.type, compat.string_types):
        # GH#16605
        # If not empty convert the data to dtype
        # GH#19853: If data is a scalar, subarr has already the result
        if not lib.is_scalar(data):
            if not np.all(isna(data)):
                data = np.array(data, dtype=dtype, copy=False)
            subarr = np.array(data, dtype=object, copy=copy)

    if is_object_dtype(subarr.dtype) and dtype != 'object':
        inferred = lib.infer_dtype(subarr, skipna=False)
        if inferred == 'period':
                subarr = period_array(subarr)
            except IncompatibleFrequency:

    return subarr
Ejemplo n.º 16
 def convert(v):
     return maybe_convert_platform(v)
Ejemplo n.º 17
def sanitize_array(
    index: Index | None,
    dtype: DtypeObj | None = None,
    copy: bool = False,
    raise_cast_failure: bool = True,
    allow_2d: bool = False,
) -> ArrayLike:
    Sanitize input data to an ndarray or ExtensionArray, copy if specified,
    coerce to the dtype if specified.

    data : Any
    index : Index or None, default None
    dtype : np.dtype, ExtensionDtype, or None, default None
    copy : bool, default False
    raise_cast_failure : bool, default True
    allow_2d : bool, default False
        If False, raise if we have a 2D Arraylike.

    np.ndarray or ExtensionArray

    raise_cast_failure=False is only intended to be True when called from the
    DataFrame constructor, as the dtype keyword there may be interpreted as only
    applying to a subset of columns, see GH#24435.
    if isinstance(data, ma.MaskedArray):
        data = sanitize_masked_array(data)

    # extract ndarray or ExtensionArray, ensure we have no PandasArray
    data = extract_array(data, extract_numpy=True)

    if isinstance(data, np.ndarray) and data.ndim == 0:
        if dtype is None:
            dtype = data.dtype
        data = lib.item_from_zerodim(data)
    elif isinstance(data, range):
        # GH#16804
        data = range_to_ndarray(data)
        copy = False

    if not is_list_like(data):
        if index is None:
            raise ValueError(
                "index must be specified when data is not list-like")
        data = construct_1d_arraylike_from_scalar(data, len(index), dtype)
        return data

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None and is_float_dtype(
                data.dtype) and is_integer_dtype(dtype):
            # possibility of nan -> garbage
                subarr = _try_cast(data, dtype, copy, True)
            except IntCastingNaNError:
                subarr = np.array(data, copy=copy)
            except ValueError:
                if not raise_cast_failure:
                    # i.e. called via DataFrame constructor
                        "In a future version, passing float-dtype values and an "
                        "integer dtype to DataFrame will retain floating dtype "
                        "if they cannot be cast losslessly (matching Series behavior). "
                        "To retain the old behavior, use DataFrame(data).astype(dtype)",
                    # GH#40110 until the deprecation is enforced, we _dont_
                    #  ignore the dtype for DataFrame, and _do_ cast even though
                    #  it is lossy.
                    dtype = cast(np.dtype, dtype)
                    return np.array(data, dtype=dtype, copy=copy)
                subarr = np.array(data, copy=copy)
            # we will try to copy by-definition here
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    elif isinstance(data, ABCExtensionArray):
        # it is already ensured above this is not a PandasArray
        subarr = data

        if dtype is not None:
            subarr = subarr.astype(dtype, copy=copy)
        elif copy:
            subarr = subarr.copy()
        return subarr

        if isinstance(data, (set, frozenset)):
            # Raise only for unordered sets, e.g., not for dict_keys
            raise TypeError(f"'{type(data).__name__}' type is unordered")

        # materialize e.g. generators, convert e.g. tuples, abc.ValueView
        # TODO: non-standard array-likes we can convert to ndarray more efficiently?
        data = list(data)

        if dtype is not None or len(data) == 0:
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)
            # TODO: copy?
            subarr = maybe_convert_platform(data)
            if subarr.dtype == object:
                subarr = cast(np.ndarray, subarr)
                subarr = maybe_infer_to_datetimelike(subarr)

    subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d)

    if not (isinstance(subarr.dtype, ExtensionDtype)
            or isinstance(dtype, ExtensionDtype)):
        subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)

        is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype(
        if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype:
            inferred = lib.infer_dtype(subarr, skipna=False)
            if inferred in {"interval", "period"}:
                subarr = array(subarr)
                subarr = extract_array(subarr, extract_numpy=True)

    return subarr
Ejemplo n.º 18
 def convert(v):
     return maybe_convert_platform(v)
Ejemplo n.º 19
def sanitize_array(data, index, dtype=None, copy=False,
    Sanitize input data to an ndarray, copy if specified, coerce to the
    dtype if specified.
    if dtype is not None:
        dtype = pandas_dtype(dtype)

    if isinstance(data, ma.MaskedArray):
        mask = ma.getmaskarray(data)
        if mask.any():
            data, fill_value = maybe_upcast(data, copy=True)
            data.soften_mask()  # set hardmask False if it was True
            data[mask] = fill_value
            data = data.copy()

    data = extract_array(data, extract_numpy=True)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None:
            subarr = np.array(data, copy=False)

            # possibility of nan -> garbage
            if is_float_dtype(data.dtype) and is_integer_dtype(dtype):
                    subarr = _try_cast(data, True, dtype, copy,
                except ValueError:
                    if copy:
                        subarr = data.copy()
                subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)
        elif isinstance(data, Index):
            # don't coerce Index types
            # e.g. indexes can have different conversions (so don't fast path
            # them)
            # GH#6140
            subarr = sanitize_index(data, index, copy=copy)

            # we will try to copy be-definition here
            subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)

    elif isinstance(data, ExtensionArray):
        if isinstance(data, ABCPandasArray):
            # We don't want to let people put our PandasArray wrapper
            # (the output of Series/Index.array), into a Series. So
            # we explicitly unwrap it here.
            subarr = data.to_numpy()
            subarr = data

        # everything else in this block must also handle ndarray's,
        # becuase we've unwrapped PandasArray into an ndarray.

        if dtype is not None:
            subarr = data.astype(dtype)

        if copy:
            subarr = data.copy()
        return subarr

    elif isinstance(data, (list, tuple)) and len(data) > 0:
        if dtype is not None:
                subarr = _try_cast(data, False, dtype, copy,
            except Exception:
                if raise_cast_failure:  # pragma: no cover
                subarr = np.array(data, dtype=object, copy=copy)
                subarr = lib.maybe_convert_objects(subarr)

            subarr = maybe_convert_platform(data)

        subarr = maybe_cast_to_datetime(subarr, dtype)

    elif isinstance(data, range):
        # GH#16804
        start, stop, step = get_range_parameters(data)
        arr = np.arange(start, stop, step, dtype='int64')
        subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure)
        subarr = _try_cast(data, False, dtype, copy, raise_cast_failure)

    # scalar like, GH
    if getattr(subarr, 'ndim', 0) == 0:
        if isinstance(data, list):  # pragma: no cover
            subarr = np.array(data, dtype=object)
        elif index is not None:
            value = data

            # figure out the dtype from the value (upcast if necessary)
            if dtype is None:
                dtype, value = infer_dtype_from_scalar(value)
                # need to possibly convert the value here
                value = maybe_cast_to_datetime(value, dtype)

            subarr = construct_1d_arraylike_from_scalar(
                value, len(index), dtype)

            return subarr.item()

    # the result that we want
    elif subarr.ndim == 1:
        if index is not None:

            # a 1-element ndarray
            if len(subarr) != len(index) and len(subarr) == 1:
                subarr = construct_1d_arraylike_from_scalar(
                    subarr[0], len(index), subarr.dtype)

    elif subarr.ndim > 1:
        if isinstance(data, np.ndarray):
            raise Exception('Data must be 1-dimensional')
            subarr = com.asarray_tuplesafe(data, dtype=dtype)

    # This is to prevent mixed-type Series getting all casted to
    # NumPy string type, e.g. NaN --> '-1#IND'.
    if issubclass(subarr.dtype.type, compat.string_types):
        # GH#16605
        # If not empty convert the data to dtype
        # GH#19853: If data is a scalar, subarr has already the result
        if not lib.is_scalar(data):
            if not np.all(isna(data)):
                data = np.array(data, dtype=dtype, copy=False)
            subarr = np.array(data, dtype=object, copy=copy)

    if is_object_dtype(subarr.dtype) and dtype != 'object':
        inferred = lib.infer_dtype(subarr, skipna=False)
        if inferred == 'period':
                subarr = period_array(subarr)
            except IncompatibleFrequency:

    return subarr
Ejemplo n.º 20
def sanitize_array(
    index: Optional[Index],
    dtype: Optional[DtypeObj] = None,
    copy: bool = False,
    raise_cast_failure: bool = False,
) -> ArrayLike:
    Sanitize input data to an ndarray or ExtensionArray, copy if specified,
    coerce to the dtype if specified.

    if isinstance(data, ma.MaskedArray):
        data = sanitize_masked_array(data)

    # extract ndarray or ExtensionArray, ensure we have no PandasArray
    data = extract_array(data, extract_numpy=True)

    if isinstance(data, np.ndarray) and data.ndim == 0:
        if dtype is None:
            dtype = data.dtype
        data = lib.item_from_zerodim(data)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None and is_float_dtype(
                data.dtype) and is_integer_dtype(dtype):
            # possibility of nan -> garbage
                subarr = _try_cast(data, dtype, copy, True)
            except ValueError:
                subarr = np.array(data, copy=copy)
            # we will try to copy by-definition here
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    elif isinstance(data, ABCExtensionArray):
        # it is already ensured above this is not a PandasArray
        subarr = data

        if dtype is not None:
            subarr = subarr.astype(dtype, copy=copy)
        elif copy:
            subarr = subarr.copy()
        return subarr

    elif isinstance(data,
                    (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0:
        # TODO: deque, array.array
        if isinstance(data, set):
            # Raise only for unordered sets, e.g., not for dict_keys
            raise TypeError("Set type is unordered")
        data = list(data)

        if dtype is not None:
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)
            subarr = maybe_convert_platform(data)
            subarr = maybe_cast_to_datetime(subarr, dtype)

    elif isinstance(data, range):
        # GH#16804
        arr = np.arange(data.start, data.stop, data.step, dtype="int64")
        subarr = _try_cast(arr, dtype, copy, raise_cast_failure)

    elif not is_list_like(data):
        if index is None:
            raise ValueError(
                "index must be specified when data is not list-like")
        subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype)

        subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    subarr = _sanitize_ndim(subarr, data, dtype, index)

    if not (is_extension_array_dtype(subarr.dtype)
            or is_extension_array_dtype(dtype)):
        subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)

        is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype(
        if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype:
            inferred = lib.infer_dtype(subarr, skipna=False)
            if inferred in {"interval", "period"}:
                subarr = array(subarr)

    return subarr
Ejemplo n.º 21
def sanitize_array(
    index: Index | None,
    dtype: DtypeObj | None = None,
    copy: bool = False,
    raise_cast_failure: bool = True,
) -> ArrayLike:
    Sanitize input data to an ndarray or ExtensionArray, copy if specified,
    coerce to the dtype if specified.

    data : Any
    index : Index or None, default None
    dtype : np.dtype, ExtensionDtype, or None, default None
    copy : bool, default False
    raise_cast_failure : bool, default True

    np.ndarray or ExtensionArray

    raise_cast_failure=False is only intended to be True when called from the
    DataFrame constructor, as the dtype keyword there may be interpreted as only
    applying to a subset of columns, see GH#24435.
    if isinstance(data, ma.MaskedArray):
        data = sanitize_masked_array(data)

    # extract ndarray or ExtensionArray, ensure we have no PandasArray
    data = extract_array(data, extract_numpy=True)

    if isinstance(data, np.ndarray) and data.ndim == 0:
        if dtype is None:
            dtype = data.dtype
        data = lib.item_from_zerodim(data)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None and is_float_dtype(
                data.dtype) and is_integer_dtype(dtype):
            # possibility of nan -> garbage
                subarr = _try_cast(data, dtype, copy, True)
            except ValueError:
                subarr = np.array(data, copy=copy)
            # we will try to copy by-definition here
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    elif isinstance(data, ABCExtensionArray):
        # it is already ensured above this is not a PandasArray
        subarr = data

        if dtype is not None:
            subarr = subarr.astype(dtype, copy=copy)
        elif copy:
            subarr = subarr.copy()
        return subarr

    elif isinstance(data,
                    (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0:
        # TODO: deque, array.array
        if isinstance(data, (set, frozenset)):
            # Raise only for unordered sets, e.g., not for dict_keys
            raise TypeError(f"'{type(data).__name__}' type is unordered")
        data = list(data)

        if dtype is not None:
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)
            subarr = maybe_convert_platform(data)
            # error: Incompatible types in assignment (expression has type
            # "Union[ExtensionArray, ndarray, List[Any]]", variable has type
            # "ExtensionArray")
            subarr = maybe_cast_to_datetime(subarr,
                                            dtype)  # type: ignore[assignment]

    elif isinstance(data, range):
        # GH#16804
        arr = np.arange(data.start, data.stop, data.step, dtype="int64")
        subarr = _try_cast(arr, dtype, copy, raise_cast_failure)

    elif not is_list_like(data):
        if index is None:
            raise ValueError(
                "index must be specified when data is not list-like")
        subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype)

        # realize e.g. generators
        # TODO: non-standard array-likes we can convert to ndarray more efficiently?
        data = list(data)
        subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    subarr = _sanitize_ndim(subarr, data, dtype, index)

    if not (isinstance(subarr.dtype, ExtensionDtype)
            or isinstance(dtype, ExtensionDtype)):
        subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)

        is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype(
        if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype:
            inferred = lib.infer_dtype(subarr, skipna=False)
            if inferred in {"interval", "period"}:
                subarr = array(subarr)
                subarr = extract_array(subarr, extract_numpy=True)

    return subarr
Ejemplo n.º 22
def sanitize_array(
    index: Index | None,
    dtype: DtypeObj | None = None,
    copy: bool = False,
    raise_cast_failure: bool = True,
    allow_2d: bool = False,
) -> ArrayLike:
    Sanitize input data to an ndarray or ExtensionArray, copy if specified,
    coerce to the dtype if specified.

    data : Any
    index : Index or None, default None
    dtype : np.dtype, ExtensionDtype, or None, default None
    copy : bool, default False
    raise_cast_failure : bool, default True
    allow_2d : bool, default False
        If False, raise if we have a 2D Arraylike.

    np.ndarray or ExtensionArray

    raise_cast_failure=False is only intended to be True when called from the
    DataFrame constructor, as the dtype keyword there may be interpreted as only
    applying to a subset of columns, see GH#24435.
    if isinstance(data, ma.MaskedArray):
        data = sanitize_masked_array(data)

    if isinstance(dtype, PandasDtype):
        # Avoid ending up with a PandasArray
        dtype = dtype.numpy_dtype

    # extract ndarray or ExtensionArray, ensure we have no PandasArray
    data = extract_array(data, extract_numpy=True)

    if isinstance(data, np.ndarray) and data.ndim == 0:
        if dtype is None:
            dtype = data.dtype
        data = lib.item_from_zerodim(data)
    elif isinstance(data, range):
        # GH#16804
        data = range_to_ndarray(data)
        copy = False

    if not is_list_like(data):
        if index is None:
            raise ValueError(
                "index must be specified when data is not list-like")
        data = construct_1d_arraylike_from_scalar(data, len(index), dtype)
        return data

    # GH#846
    if isinstance(data, np.ndarray):
        if isinstance(data, np.matrix):
            data = data.A

        if dtype is not None and is_float_dtype(
                data.dtype) and is_integer_dtype(dtype):
            # possibility of nan -> garbage
                subarr = _try_cast(data, dtype, copy, True)
            except IntCastingNaNError:
                    "In a future version, passing float-dtype values containing NaN "
                    "and an integer dtype will raise IntCastingNaNError "
                    "(subclass of ValueError) instead of silently ignoring the "
                    "passed dtype. To retain the old behavior, call Series(arr) or "
                    "DataFrame(arr) without passing a dtype.",
                subarr = np.array(data, copy=copy)
            except ValueError:
                if not raise_cast_failure:
                    # i.e. called via DataFrame constructor
                        "In a future version, passing float-dtype values and an "
                        "integer dtype to DataFrame will retain floating dtype "
                        "if they cannot be cast losslessly (matching Series behavior). "
                        "To retain the old behavior, use DataFrame(data).astype(dtype)",
                    # GH#40110 until the deprecation is enforced, we _dont_
                    #  ignore the dtype for DataFrame, and _do_ cast even though
                    #  it is lossy.
                    dtype = cast(np.dtype, dtype)
                    return np.array(data, dtype=dtype, copy=copy)

                # We ignore the dtype arg and return floating values,
                #  e.g. test_constructor_floating_data_int_dtype
                # TODO: where is the discussion that documents the reason for this?
                subarr = np.array(data, copy=copy)
            # we will try to copy by-definition here
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    elif isinstance(data, ABCExtensionArray):
        # it is already ensured above this is not a PandasArray
        subarr = data

        if dtype is not None:
            subarr = subarr.astype(dtype, copy=copy)
        elif copy:
            subarr = subarr.copy()

        if isinstance(data, (set, frozenset)):
            # Raise only for unordered sets, e.g., not for dict_keys
            raise TypeError(f"'{type(data).__name__}' type is unordered")

        # materialize e.g. generators, convert e.g. tuples, abc.ValueView
        if hasattr(data, "__array__"):
            # e.g. dask array GH#38645
            data = np.asarray(data)
            data = list(data)

        if dtype is not None or len(data) == 0:
                subarr = _try_cast(data, dtype, copy, raise_cast_failure)
            except ValueError:
                if is_integer_dtype(dtype):
                    casted = np.array(data, copy=False)
                    if casted.dtype.kind == "f":
                        # GH#40110 match the behavior we have if we passed
                        #  a ndarray[float] to begin with
                        return sanitize_array(
            subarr = maybe_convert_platform(data)
            if subarr.dtype == object:
                subarr = cast(np.ndarray, subarr)
                subarr = maybe_infer_to_datetimelike(subarr)

    subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d)

    if isinstance(subarr, np.ndarray):
        # at this point we should have dtype be None or subarr.dtype == dtype
        dtype = cast(np.dtype, dtype)
        subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)

    return subarr