コード例 #1
0
ファイル: cast.py プロジェクト: brianholland/pandas
def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True,
                         coerce=False, copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    conversion_count = sum((datetime, numeric, timedelta))
    if conversion_count == 0:
        raise ValueError('At least one of datetime, numeric or timedelta must '
                         'be True.')
    elif conversion_count > 1 and coerce:
        raise ValueError("Only one of 'datetime', 'numeric' or "
                         "'timedelta' can be True when when coerce=True.")

    if isinstance(values, (list, tuple)):
        # List or scalar
        values = np.array(values, dtype=np.object_)
    elif not hasattr(values, 'dtype'):
        values = np.array([values], dtype=np.object_)
    elif not is_object_dtype(values.dtype):
        # If not object, do not attempt conversion
        values = values.copy() if copy else values
        return values

    # If 1 flag is coerce, ensure 2 others are False
    if coerce:
        # Immediate return if coerce
        if datetime:
            from pandas import to_datetime
            return to_datetime(values, errors='coerce', box=False)
        elif timedelta:
            from pandas import to_timedelta
            return to_timedelta(values, errors='coerce', box=False)
        elif numeric:
            from pandas import to_numeric
            return to_numeric(values, errors='coerce')

    # Soft conversions
    if datetime:
        # GH 20380, when datetime is beyond year 2262, hence outside
        # bound of nanosecond-resolution 64-bit integers.
        try:
            values = lib.maybe_convert_objects(values,
                                               convert_datetime=datetime)
        except OutOfBoundsDatetime:
            pass

    if timedelta and is_object_dtype(values.dtype):
        # Object check to ensure only run if previous did not convert
        values = lib.maybe_convert_objects(values, convert_timedelta=timedelta)

    if numeric and is_object_dtype(values.dtype):
        try:
            converted = lib.maybe_convert_numeric(values, set(),
                                                  coerce_numeric=True)
            # If all NaNs, then do not-alter
            values = converted if not isna(converted).all() else values
            values = values.copy() if copy else values
        except Exception:
            pass

    return values
コード例 #2
0
ファイル: cast.py プロジェクト: cpcloud/pandas
def maybe_convert_platform(values):
    """ try to do platform conversion, allow ndarray or list here """

    if isinstance(values, (list, tuple)):
        values = construct_1d_object_array_from_listlike(list(values))
    if getattr(values, 'dtype', None) == np.object_:
        if hasattr(values, '_values'):
            values = values._values
        values = lib.maybe_convert_objects(values)

    return values
コード例 #3
0
def maybe_convert_platform(values):
    """ try to do platform conversion, allow ndarray or list here """

    if isinstance(values, (list, tuple)):
        values = construct_1d_object_array_from_listlike(list(values))
    if getattr(values, 'dtype', None) == np.object_:
        if hasattr(values, '_values'):
            values = values._values
        values = lib.maybe_convert_objects(values)

    return values
コード例 #4
0
ファイル: test_inference.py プロジェクト: cmazzullo/pandas
    def test_maybe_convert_objects_uint64(self):
        # see gh-4471
        arr = np.array([2**63], dtype=object)
        exp = np.array([2**63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)

        # NumPy bug: can't compare uint64 to int64, as that
        # results in both casting to float64, so we should
        # make sure that this function is robust against it
        arr = np.array([np.uint64(2**63)], dtype=object)
        exp = np.array([2**63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)

        arr = np.array([2, -1], dtype=object)
        exp = np.array([2, -1], dtype=np.int64)
        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)

        arr = np.array([2**63, -1], dtype=object)
        exp = np.array([2**63, -1], dtype=object)
        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
コード例 #5
0
ファイル: test_inference.py プロジェクト: zaragomes/pandas
    def test_maybe_convert_objects_uint64(self):
        # see gh-4471
        arr = np.array([2**63], dtype=object)
        exp = np.array([2**63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)

        # NumPy bug: can't compare uint64 to int64, as that
        # results in both casting to float64, so we should
        # make sure that this function is robust against it
        arr = np.array([np.uint64(2**63)], dtype=object)
        exp = np.array([2**63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)

        arr = np.array([2, -1], dtype=object)
        exp = np.array([2, -1], dtype=np.int64)
        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)

        arr = np.array([2**63, -1], dtype=object)
        exp = np.array([2**63, -1], dtype=object)
        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
コード例 #6
0
    def _aggregate_series_pure_python(
        self,
        obj: Series,
        func: F,
        *args,
        engine: str = "cython",
        engine_kwargs=None,
        **kwargs,
    ):

        if engine == "numba":
            nopython, nogil, parallel = get_jit_arguments(engine_kwargs)
            check_kwargs_and_nopython(kwargs, nopython)
            validate_udf(func)
            cache_key = (func, "groupby_agg")
            numba_func = NUMBA_FUNC_CACHE.get(
                cache_key, jit_user_function(func, nopython, nogil, parallel)
            )

        group_index, _, ngroups = self.group_info

        counts = np.zeros(ngroups, dtype=int)
        result = None

        splitter = get_splitter(obj, group_index, ngroups, axis=0)

        for label, group in splitter:
            if engine == "numba":
                values, index = split_for_numba(group)
                res = numba_func(values, index, *args)
                if cache_key not in NUMBA_FUNC_CACHE:
                    NUMBA_FUNC_CACHE[cache_key] = numba_func
            else:
                res = func(group, *args, **kwargs)

            if result is None:
                if isinstance(res, (Series, Index, np.ndarray)):
                    if len(res) == 1:
                        # e.g. test_agg_lambda_with_timezone lambda e: e.head(1)
                        # FIXME: are we potentially losing important res.index info?
                        res = res.item()
                    else:
                        raise ValueError("Function does not reduce")
                result = np.empty(ngroups, dtype="O")

            counts[label] = group.shape[0]
            result[label] = res

        assert result is not None
        result = lib.maybe_convert_objects(result, try_float=0)
        # TODO: maybe_cast_to_extension_array?

        return result, counts
コード例 #7
0
ファイル: ops.py プロジェクト: rth/pandas
    def agg_series(
        self, obj: Series, func: F, preserve_dtype: bool = False
    ) -> ArrayLike:
        """
        Parameters
        ----------
        obj : Series
        func : function taking a Series and returning a scalar-like
        preserve_dtype : bool
            Whether the aggregation is known to be dtype-preserving.

        Returns
        -------
        np.ndarray or ExtensionArray
        """
        # test_groupby_empty_with_category gets here with self.ngroups == 0
        #  and len(obj) > 0

        if len(obj) == 0:
            # SeriesGrouper would raise if we were to call _aggregate_series_fast
            result = self._aggregate_series_pure_python(obj, func)

        elif not isinstance(obj._values, np.ndarray):
            # _aggregate_series_fast would raise TypeError when
            #  calling libreduction.Slider
            # In the datetime64tz case it would incorrectly cast to tz-naive
            # TODO: can we get a performant workaround for EAs backed by ndarray?
            result = self._aggregate_series_pure_python(obj, func)

            # we can preserve a little bit more aggressively with EA dtype
            #  because maybe_cast_pointwise_result will do a try/except
            #  with _from_sequence.  NB we are assuming here that _from_sequence
            #  is sufficiently strict that it casts appropriately.
            preserve_dtype = True

        elif obj.index._has_complex_internals:
            # Preempt TypeError in _aggregate_series_fast
            result = self._aggregate_series_pure_python(obj, func)

        elif isinstance(self, BinGrouper):
            # Not yet able to remove the BaseGrouper aggregate_series_fast,
            #  as test_crosstab.test_categorical breaks without it
            result = self._aggregate_series_pure_python(obj, func)

        else:
            result = self._aggregate_series_fast(obj, func)

        npvalues = lib.maybe_convert_objects(result, try_float=False)
        if preserve_dtype:
            out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
        else:
            out = npvalues
        return out
コード例 #8
0
    def agg_series(self,
                   obj: Series,
                   func: F,
                   preserve_dtype: bool = False) -> ArrayLike:
        """
        Parameters
        ----------
        obj : Series
        func : function taking a Series and returning a scalar-like
        preserve_dtype : bool
            Whether the aggregation is known to be dtype-preserving.

        Returns
        -------
        np.ndarray or ExtensionArray
        """
        # test_groupby_empty_with_category gets here with self.ngroups == 0
        #  and len(obj) > 0

        if len(obj) == 0:
            # SeriesGrouper would raise if we were to call _aggregate_series_fast
            result = self._aggregate_series_pure_python(obj, func)

        elif not isinstance(obj._values, np.ndarray):
            result = self._aggregate_series_pure_python(obj, func)

            # we can preserve a little bit more aggressively with EA dtype
            #  because maybe_cast_pointwise_result will do a try/except
            #  with _from_sequence.  NB we are assuming here that _from_sequence
            #  is sufficiently strict that it casts appropriately.
            preserve_dtype = True

        else:
            result = self._aggregate_series_pure_python(obj, func)

        npvalues = lib.maybe_convert_objects(result, try_float=False)
        if preserve_dtype:
            out = maybe_cast_pointwise_result(npvalues,
                                              obj.dtype,
                                              numeric_only=True)
        else:
            out = npvalues
        return out
コード例 #9
0
def _index_to_interp_indices(index: Index, method: str) -> np.ndarray:
    """
    Convert Index to ndarray of indices to pass to NumPy/SciPy.
    """
    xarr = index._values
    if needs_i8_conversion(xarr.dtype):
        # GH#1646 for dt64tz
        xarr = xarr.view("i8")

    if method == "linear":
        inds = xarr
        inds = cast(np.ndarray, inds)
    else:
        inds = np.asarray(xarr)

        if method in ("values", "index"):
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)

    return inds
コード例 #10
0
ファイル: ops.py プロジェクト: yhaque1213/pandas
    def _aggregate_series_pure_python(self, obj, func):

        group_index, _, ngroups = self.group_info

        counts = np.zeros(ngroups, dtype=int)
        result = None

        splitter = get_splitter(obj, group_index, ngroups, axis=self.axis)

        for label, group in splitter:
            res = func(group)
            if result is None:
                if (isinstance(res, (Series, Index, np.ndarray))):
                    raise ValueError('Function does not reduce')
                result = np.empty(ngroups, dtype='O')

            counts[label] = group.shape[0]
            result[label] = res

        result = lib.maybe_convert_objects(result, try_float=0)
        return result, counts
コード例 #11
0
ファイル: ops.py プロジェクト: bashtage/pandas
    def _aggregate_series_pure_python(self, obj, func):

        group_index, _, ngroups = self.group_info

        counts = np.zeros(ngroups, dtype=int)
        result = None

        splitter = get_splitter(obj, group_index, ngroups, axis=self.axis)

        for label, group in splitter:
            res = func(group)
            if result is None:
                if (isinstance(res, (Series, Index, np.ndarray))):
                    raise ValueError('Function does not reduce')
                result = np.empty(ngroups, dtype='O')

            counts[label] = group.shape[0]
            result[label] = res

        result = lib.maybe_convert_objects(result, try_float=0)
        return result, counts
コード例 #12
0
ファイル: tools.py プロジェクト: OscarDeGar/py_grama
def lookup(df, row_labels, col_labels):
    r"""2D lookup function for a dataframe
    (Old Pandas Lookup Method)

    Args:
        df (DataFrame): DataFrame for lookup
        row_labels (List): Row labels to use for lookup.
        col_labels (List): Column labels to use for lookup.

    Returns:
        numpy.ndarray: Found values
    """
    n = len(row_labels)
    if n != len(col_labels):
        raise ValueError("Row labels must have same size as column labels")
    if not (df.index.is_unique and df.columns.is_unique):
        raise ValueError("DataFrame.lookup requires unique index and columns")

    thresh = 1000
    if not df._is_mixed_type or n > thresh:
        values = df.values
        ridx = df.index.get_indexer(row_labels)
        cidx = df.columns.get_indexer(col_labels)
        if (ridx == -1).any():
            raise KeyError("One or more row labels was not found")
        if (cidx == -1).any():
            raise KeyError("One or more column labels was not found")
        flat_index = ridx * len(df.columns) + cidx
        result = values.flat[flat_index]
    else:
        result = empty(n, dtype="O")
        for i, (r, c) in enumerate(zip(row_labels, col_labels)):
            print(r, c)
            result[i] = df._get_value(r, c)

    if is_object_dtype(result):
        result = lib.maybe_convert_objects(result)

    return result
コード例 #13
0
    def _str_map(self,
                 f,
                 na_value=None,
                 dtype: Dtype | None = None,
                 convert: bool = True):
        """
        Map a callable over valid element of the array.

        Parameters
        ----------
        f : Callable
            A function to call on each non-NA element.
        na_value : Scalar, optional
            The value to set for NA values. Might also be used for the
            fill value if the callable `f` raises an exception.
            This defaults to ``self._str_na_value`` which is ``np.nan``
            for object-dtype and Categorical and ``pd.NA`` for StringArray.
        dtype : Dtype, optional
            The dtype of the result array.
        convert : bool, default True
            Whether to call `maybe_convert_objects` on the resulting ndarray
        """
        if dtype is None:
            dtype = np.dtype("object")
        if na_value is None:
            na_value = self._str_na_value

        if not len(self):
            # error: Argument 1 to "ndarray" has incompatible type "int";
            # expected "Sequence[int]"
            return np.ndarray(0, dtype=dtype)  # type: ignore[arg-type]

        arr = np.asarray(self, dtype=object)
        mask = isna(arr)
        map_convert = convert and not np.all(mask)
        try:
            result = lib.map_infer_mask(arr, f, mask.view(np.uint8),
                                        map_convert)
        except (TypeError, AttributeError) as e:
            # Reraise the exception if callable `f` got wrong number of args.
            # The user may want to be warned by this, instead of getting NaN
            p_err = (r"((takes)|(missing)) (?(2)from \d+ to )?\d+ "
                     r"(?(3)required )positional arguments?")

            if len(e.args) >= 1 and re.search(p_err, e.args[0]):
                # FIXME: this should be totally avoidable
                raise e

            def g(x):
                # This type of fallback behavior can be removed once
                # we remove object-dtype .str accessor.
                try:
                    return f(x)
                except (TypeError, AttributeError):
                    return na_value

            return self._str_map(g, na_value=na_value, dtype=dtype)
        if not isinstance(result, np.ndarray):
            return result
        if na_value is not np.nan:
            np.putmask(result, mask, na_value)
            if convert and result.dtype == object:
                result = lib.maybe_convert_objects(result)
        return result
コード例 #14
0
ファイル: construction.py プロジェクト: PaulGureghian1/Pandas
def sanitize_array(data,
                   index,
                   dtype=None,
                   copy=False,
                   raise_cast_failure=False):
    """
    Sanitize input data to an ndarray, copy if specified, coerce to the
    dtype if specified.
    """
    if dtype is not None:
        dtype = pandas_dtype(dtype)

    if isinstance(data, ma.MaskedArray):
        mask = ma.getmaskarray(data)
        if mask.any():
            data, fill_value = maybe_upcast(data, copy=True)
            data.soften_mask()  # set hardmask False if it was True
            data[mask] = fill_value
        else:
            data = data.copy()

    data = extract_array(data, extract_numpy=True)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None:
            subarr = np.array(data, copy=False)

            # possibility of nan -> garbage
            if is_float_dtype(data.dtype) and is_integer_dtype(dtype):
                try:
                    subarr = _try_cast(data, True, dtype, copy, True)
                except ValueError:
                    if copy:
                        subarr = data.copy()
            else:
                subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)
        elif isinstance(data, Index):
            # don't coerce Index types
            # e.g. indexes can have different conversions (so don't fast path
            # them)
            # GH#6140
            subarr = sanitize_index(data, index, copy=copy)
        else:

            # we will try to copy be-definition here
            subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)

    elif isinstance(data, ExtensionArray):
        if isinstance(data, ABCPandasArray):
            # We don't want to let people put our PandasArray wrapper
            # (the output of Series/Index.array), into a Series. So
            # we explicitly unwrap it here.
            subarr = data.to_numpy()
        else:
            subarr = data

        # everything else in this block must also handle ndarray's,
        # becuase we've unwrapped PandasArray into an ndarray.

        if dtype is not None:
            subarr = data.astype(dtype)

        if copy:
            subarr = data.copy()
        return subarr

    elif isinstance(data, (list, tuple)) and len(data) > 0:
        if dtype is not None:
            try:
                subarr = _try_cast(data, False, dtype, copy,
                                   raise_cast_failure)
            except Exception:
                if raise_cast_failure:  # pragma: no cover
                    raise
                subarr = np.array(data, dtype=object, copy=copy)
                subarr = lib.maybe_convert_objects(subarr)

        else:
            subarr = maybe_convert_platform(data)

        subarr = maybe_cast_to_datetime(subarr, dtype)

    elif isinstance(data, range):
        # GH#16804
        start, stop, step = get_range_parameters(data)
        arr = np.arange(start, stop, step, dtype='int64')
        subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure)
    else:
        subarr = _try_cast(data, False, dtype, copy, raise_cast_failure)

    # scalar like, GH
    if getattr(subarr, 'ndim', 0) == 0:
        if isinstance(data, list):  # pragma: no cover
            subarr = np.array(data, dtype=object)
        elif index is not None:
            value = data

            # figure out the dtype from the value (upcast if necessary)
            if dtype is None:
                dtype, value = infer_dtype_from_scalar(value)
            else:
                # need to possibly convert the value here
                value = maybe_cast_to_datetime(value, dtype)

            subarr = construct_1d_arraylike_from_scalar(
                value, len(index), dtype)

        else:
            return subarr.item()

    # the result that we want
    elif subarr.ndim == 1:
        if index is not None:

            # a 1-element ndarray
            if len(subarr) != len(index) and len(subarr) == 1:
                subarr = construct_1d_arraylike_from_scalar(
                    subarr[0], len(index), subarr.dtype)

    elif subarr.ndim > 1:
        if isinstance(data, np.ndarray):
            raise Exception('Data must be 1-dimensional')
        else:
            subarr = com.asarray_tuplesafe(data, dtype=dtype)

    # This is to prevent mixed-type Series getting all casted to
    # NumPy string type, e.g. NaN --> '-1#IND'.
    if issubclass(subarr.dtype.type, compat.string_types):
        # GH#16605
        # If not empty convert the data to dtype
        # GH#19853: If data is a scalar, subarr has already the result
        if not lib.is_scalar(data):
            if not np.all(isna(data)):
                data = np.array(data, dtype=dtype, copy=False)
            subarr = np.array(data, dtype=object, copy=copy)

    if is_object_dtype(subarr.dtype) and dtype != 'object':
        inferred = lib.infer_dtype(subarr, skipna=False)
        if inferred == 'period':
            try:
                subarr = period_array(subarr)
            except IncompatibleFrequency:
                pass

    return subarr
コード例 #15
0
def soft_convert_objects(
    values: np.ndarray,
    datetime: bool = True,
    numeric: bool = True,
    timedelta: bool = True,
    coerce: bool = False,
    copy: bool = True,
):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    validate_bool_kwarg(datetime, "datetime")
    validate_bool_kwarg(numeric, "numeric")
    validate_bool_kwarg(timedelta, "timedelta")
    validate_bool_kwarg(coerce, "coerce")
    validate_bool_kwarg(copy, "copy")

    conversion_count = sum((datetime, numeric, timedelta))
    if conversion_count == 0:
        raise ValueError(
            "At least one of datetime, numeric or timedelta must be True.")
    elif conversion_count > 1 and coerce:
        raise ValueError("Only one of 'datetime', 'numeric' or "
                         "'timedelta' can be True when when coerce=True.")

    if not is_object_dtype(values.dtype):
        # If not object, do not attempt conversion
        values = values.copy() if copy else values
        return values

    # If 1 flag is coerce, ensure 2 others are False
    if coerce:
        # Immediate return if coerce
        if datetime:
            from pandas import to_datetime

            return to_datetime(values, errors="coerce").to_numpy()
        elif timedelta:
            from pandas import to_timedelta

            return to_timedelta(values, errors="coerce").to_numpy()
        elif numeric:
            from pandas import to_numeric

            return to_numeric(values, errors="coerce")

    # Soft conversions
    if datetime:
        # GH 20380, when datetime is beyond year 2262, hence outside
        # bound of nanosecond-resolution 64-bit integers.
        try:
            values = lib.maybe_convert_objects(values, convert_datetime=True)
        except OutOfBoundsDatetime:
            pass

    if timedelta and is_object_dtype(values.dtype):
        # Object check to ensure only run if previous did not convert
        values = lib.maybe_convert_objects(values, convert_timedelta=True)

    if numeric and is_object_dtype(values.dtype):
        try:
            converted = lib.maybe_convert_numeric(values,
                                                  set(),
                                                  coerce_numeric=True)
            # If all NaNs, then do not-alter
            values = converted if not isna(converted).all() else values
            values = values.copy() if copy else values
        except Exception:
            pass

    return values
コード例 #16
0
ファイル: construction.py プロジェクト: ParfaitG/pandas
 def convert(arr):
     if dtype != np.dtype("O"):
         arr = lib.maybe_convert_objects(arr)
         arr = maybe_cast_to_datetime(arr, dtype)
     return arr
コード例 #17
0
ファイル: test_inference.py プロジェクト: yyonggonyy/pandas
 def test_convert_int_overflow(self, value):
     # see gh-18584
     arr = np.array([value], dtype=object)
     result = lib.maybe_convert_objects(arr)
     tm.assert_numpy_array_equal(arr, result)
コード例 #18
0
def maybe_convert_objects(values,
                          convert_dates=True,
                          convert_numeric=True,
                          convert_timedeltas=True,
                          copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    # if we have passed in a list or scalar
    if isinstance(values, (list, tuple)):
        values = np.array(values, dtype=np.object_)
    if not hasattr(values, 'dtype'):
        values = np.array([values], dtype=np.object_)

    # convert dates
    if convert_dates and values.dtype == np.object_:

        # we take an aggressive stance and convert to datetime64[ns]
        if convert_dates == 'coerce':
            new_values = maybe_cast_to_datetime(values,
                                                'M8[ns]',
                                                errors='coerce')

            # if we are all nans then leave me alone
            if not isna(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(values,
                                               convert_datetime=convert_dates)

    # convert timedeltas
    if convert_timedeltas and values.dtype == np.object_:

        if convert_timedeltas == 'coerce':
            from pandas.core.tools.timedeltas import to_timedelta
            new_values = to_timedelta(values, errors='coerce')

            # if we are all nans then leave me alone
            if not isna(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(
                values, convert_timedelta=convert_timedeltas)

    # convert to numeric
    if values.dtype == np.object_:
        if convert_numeric:
            try:
                new_values = lib.maybe_convert_numeric(values,
                                                       set(),
                                                       coerce_numeric=True)

                # if we are all nans then leave me alone
                if not isna(new_values).all():
                    values = new_values

            except Exception:
                pass
        else:
            # soft-conversion
            values = lib.maybe_convert_objects(values)

    values = values.copy() if copy else values

    return values
コード例 #19
0
ファイル: construction.py プロジェクト: tyang20/pandas
def sanitize_array(data,
                   index,
                   dtype=None,
                   copy=False,
                   raise_cast_failure=False):
    """
    Sanitize input data to an ndarray, copy if specified, coerce to the
    dtype if specified.
    """
    if dtype is not None:
        dtype = pandas_dtype(dtype)

    if isinstance(data, ma.MaskedArray):
        mask = ma.getmaskarray(data)
        if mask.any():
            data, fill_value = maybe_upcast(data, copy=True)
            data.soften_mask()  # set hardmask False if it was True
            data[mask] = fill_value
        else:
            data = data.copy()

    # extract ndarray or ExtensionArray, ensure we have no PandasArray
    data = extract_array(data, extract_numpy=True)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None and is_float_dtype(
                data.dtype) and is_integer_dtype(dtype):
            # possibility of nan -> garbage
            try:
                subarr = _try_cast(data, dtype, copy, True)
            except ValueError:
                if copy:
                    subarr = data.copy()
                else:
                    subarr = np.array(data, copy=False)
        else:
            # we will try to copy be-definition here
            subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    elif isinstance(data, ExtensionArray):
        # it is already ensured above this is not a PandasArray
        subarr = data

        if dtype is not None:
            subarr = subarr.astype(dtype, copy=copy)
        elif copy:
            subarr = subarr.copy()
        return subarr

    elif isinstance(data, (list, tuple)) and len(data) > 0:
        if dtype is not None:
            try:
                subarr = _try_cast(data, dtype, copy, raise_cast_failure)
            except Exception:
                if raise_cast_failure:  # pragma: no cover
                    raise
                subarr = np.array(data, dtype=object, copy=copy)
                subarr = lib.maybe_convert_objects(subarr)

        else:
            subarr = maybe_convert_platform(data)

        subarr = maybe_cast_to_datetime(subarr, dtype)

    elif isinstance(data, range):
        # GH#16804
        arr = np.arange(data.start, data.stop, data.step, dtype="int64")
        subarr = _try_cast(arr, dtype, copy, raise_cast_failure)
    else:
        subarr = _try_cast(data, dtype, copy, raise_cast_failure)

    # scalar like, GH
    if getattr(subarr, "ndim", 0) == 0:
        if isinstance(data, list):  # pragma: no cover
            subarr = np.array(data, dtype=object)
        elif index is not None:
            value = data

            # figure out the dtype from the value (upcast if necessary)
            if dtype is None:
                dtype, value = infer_dtype_from_scalar(value)
            else:
                # need to possibly convert the value here
                value = maybe_cast_to_datetime(value, dtype)

            subarr = construct_1d_arraylike_from_scalar(
                value, len(index), dtype)

        else:
            return subarr.item()

    # the result that we want
    elif subarr.ndim == 1:
        if index is not None:

            # a 1-element ndarray
            if len(subarr) != len(index) and len(subarr) == 1:
                subarr = construct_1d_arraylike_from_scalar(
                    subarr[0], len(index), subarr.dtype)

    elif subarr.ndim > 1:
        if isinstance(data, np.ndarray):
            raise Exception("Data must be 1-dimensional")
        else:
            subarr = com.asarray_tuplesafe(data, dtype=dtype)

    # This is to prevent mixed-type Series getting all casted to
    # NumPy string type, e.g. NaN --> '-1#IND'.
    if issubclass(subarr.dtype.type, str):
        # GH#16605
        # If not empty convert the data to dtype
        # GH#19853: If data is a scalar, subarr has already the result
        if not lib.is_scalar(data):
            if not np.all(isna(data)):
                data = np.array(data, dtype=dtype, copy=False)
            subarr = np.array(data, dtype=object, copy=copy)

    if (not (is_extension_array_dtype(subarr.dtype)
             or is_extension_array_dtype(dtype))
            and is_object_dtype(subarr.dtype) and not is_object_dtype(dtype)):
        inferred = lib.infer_dtype(subarr, skipna=False)
        if inferred == "period":
            try:
                subarr = period_array(subarr)
            except IncompatibleFrequency:
                pass

    return subarr
コード例 #20
0
ファイル: construction.py プロジェクト: Itay4/pandas
 def convert(arr):
     if dtype != object and dtype != np.object:
         arr = lib.maybe_convert_objects(arr, try_float=coerce_float)
         arr = maybe_cast_to_datetime(arr, dtype)
     return arr
コード例 #21
0
ファイル: construction.py プロジェクト: Itay4/pandas
def sanitize_array(data, index, dtype=None, copy=False,
                   raise_cast_failure=False):
    """
    Sanitize input data to an ndarray, copy if specified, coerce to the
    dtype if specified.
    """
    if dtype is not None:
        dtype = pandas_dtype(dtype)

    if isinstance(data, ma.MaskedArray):
        mask = ma.getmaskarray(data)
        if mask.any():
            data, fill_value = maybe_upcast(data, copy=True)
            data.soften_mask()  # set hardmask False if it was True
            data[mask] = fill_value
        else:
            data = data.copy()

    data = extract_array(data, extract_numpy=True)

    # GH#846
    if isinstance(data, np.ndarray):

        if dtype is not None:
            subarr = np.array(data, copy=False)

            # possibility of nan -> garbage
            if is_float_dtype(data.dtype) and is_integer_dtype(dtype):
                try:
                    subarr = _try_cast(data, True, dtype, copy,
                                       True)
                except ValueError:
                    if copy:
                        subarr = data.copy()
            else:
                subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)
        elif isinstance(data, Index):
            # don't coerce Index types
            # e.g. indexes can have different conversions (so don't fast path
            # them)
            # GH#6140
            subarr = sanitize_index(data, index, copy=copy)
        else:

            # we will try to copy be-definition here
            subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)

    elif isinstance(data, ExtensionArray):
        if isinstance(data, ABCPandasArray):
            # We don't want to let people put our PandasArray wrapper
            # (the output of Series/Index.array), into a Series. So
            # we explicitly unwrap it here.
            subarr = data.to_numpy()
        else:
            subarr = data

        # everything else in this block must also handle ndarray's,
        # becuase we've unwrapped PandasArray into an ndarray.

        if dtype is not None:
            subarr = data.astype(dtype)

        if copy:
            subarr = data.copy()
        return subarr

    elif isinstance(data, (list, tuple)) and len(data) > 0:
        if dtype is not None:
            try:
                subarr = _try_cast(data, False, dtype, copy,
                                   raise_cast_failure)
            except Exception:
                if raise_cast_failure:  # pragma: no cover
                    raise
                subarr = np.array(data, dtype=object, copy=copy)
                subarr = lib.maybe_convert_objects(subarr)

        else:
            subarr = maybe_convert_platform(data)

        subarr = maybe_cast_to_datetime(subarr, dtype)

    elif isinstance(data, range):
        # GH#16804
        start, stop, step = get_range_parameters(data)
        arr = np.arange(start, stop, step, dtype='int64')
        subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure)
    else:
        subarr = _try_cast(data, False, dtype, copy, raise_cast_failure)

    # scalar like, GH
    if getattr(subarr, 'ndim', 0) == 0:
        if isinstance(data, list):  # pragma: no cover
            subarr = np.array(data, dtype=object)
        elif index is not None:
            value = data

            # figure out the dtype from the value (upcast if necessary)
            if dtype is None:
                dtype, value = infer_dtype_from_scalar(value)
            else:
                # need to possibly convert the value here
                value = maybe_cast_to_datetime(value, dtype)

            subarr = construct_1d_arraylike_from_scalar(
                value, len(index), dtype)

        else:
            return subarr.item()

    # the result that we want
    elif subarr.ndim == 1:
        if index is not None:

            # a 1-element ndarray
            if len(subarr) != len(index) and len(subarr) == 1:
                subarr = construct_1d_arraylike_from_scalar(
                    subarr[0], len(index), subarr.dtype)

    elif subarr.ndim > 1:
        if isinstance(data, np.ndarray):
            raise Exception('Data must be 1-dimensional')
        else:
            subarr = com.asarray_tuplesafe(data, dtype=dtype)

    # This is to prevent mixed-type Series getting all casted to
    # NumPy string type, e.g. NaN --> '-1#IND'.
    if issubclass(subarr.dtype.type, compat.string_types):
        # GH#16605
        # If not empty convert the data to dtype
        # GH#19853: If data is a scalar, subarr has already the result
        if not lib.is_scalar(data):
            if not np.all(isna(data)):
                data = np.array(data, dtype=dtype, copy=False)
            subarr = np.array(data, dtype=object, copy=copy)

    if is_object_dtype(subarr.dtype) and dtype != 'object':
        inferred = lib.infer_dtype(subarr, skipna=False)
        if inferred == 'period':
            try:
                subarr = period_array(subarr)
            except IncompatibleFrequency:
                pass

    return subarr
コード例 #22
0
ファイル: algorithms.py プロジェクト: markmc0/pandas-tester
 def time_maybe_convert_objects(self):
     lib.maybe_convert_objects(self.data)
コード例 #23
0
ファイル: missing.py プロジェクト: lichunhong2010/-python-
def interpolate_1d(xvalues,
                   yvalues,
                   method='linear',
                   limit=None,
                   limit_direction='forward',
                   fill_value=None,
                   bounds_error=False,
                   order=None,
                   **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isnull(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which cant be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    def _interp_limit(invalid, fw_limit, bw_limit):
        "Get idx of values that won't be filled b/c they exceed the limits."
        for x in np.where(invalid)[0]:
            if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():
                yield x

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        raise ValueError('Invalid limit_direction: expecting one of %r, got '
                         '%r.' % (valid_limit_directions, limit_direction))

    from pandas import Series
    ys = Series(yvalues)
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))

    # This is a list of the indexes in the series whose yvalue is currently
    # NaN, but whose interpolated yvalue will be overwritten with NaN after
    # computing the interpolation. For each index in this list, one of these
    # conditions is true of the corresponding NaN in the yvalues:
    #
    # a) It is one of a chain of NaNs at the beginning of the series, and
    #    either limit is not specified or limit_direction is 'forward'.
    # b) It is one of a chain of NaNs at the end of the series, and limit is
    #    specified and limit_direction is 'backward' or 'both'.
    # c) Limit is nonzero and it is further than limit from the nearest non-NaN
    #    value (with respect to the limit_direction setting).
    #
    # The default behavior is to fill forward with no limit, ignoring NaNs at
    # the beginning (see issues #9218 and #10420)
    violate_limit = sorted(start_nans)

    if limit is not None:
        if not is_integer(limit):
            raise ValueError('Limit must be an integer')
        if limit < 1:
            raise ValueError('Limit must be greater than 0')
        if limit_direction == 'forward':
            violate_limit = sorted(start_nans
                                   | set(_interp_limit(invalid, limit, 0)))
        if limit_direction == 'backward':
            violate_limit = sorted(end_nans
                                   | set(_interp_limit(invalid, 0, limit)))
        if limit_direction == 'both':
            violate_limit = sorted(_interp_limit(invalid, limit, limit))

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[violate_limit] = np.nan
        return result

    sp_methods = [
        'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric',
        'krogh', 'spline', 'polynomial', 'from_derivatives',
        'piecewise_polynomial', 'pchip', 'akima'
    ]

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order,
                                                     **kwargs)
        result[violate_limit] = np.nan
        return result
コード例 #24
0
ファイル: test_inference.py プロジェクト: josham/pandas
 def test_convert_int_overflow(self, value):
     # see gh-18584
     arr = np.array([value], dtype=object)
     result = lib.maybe_convert_objects(arr)
     tm.assert_numpy_array_equal(arr, result)
コード例 #25
0
ファイル: missing.py プロジェクト: ankostis/pandas
def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
                   limit_direction='forward', fill_value=None,
                   bounds_error=False, order=None, **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isnull(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which cant be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    def _interp_limit(invalid, fw_limit, bw_limit):
        "Get idx of values that won't be filled b/c they exceed the limits."
        for x in np.where(invalid)[0]:
            if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():
                yield x

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        raise ValueError('Invalid limit_direction: expecting one of %r, got '
                         '%r.' % (valid_limit_directions, limit_direction))

    from pandas import Series
    ys = Series(yvalues)
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))

    # This is a list of the indexes in the series whose yvalue is currently
    # NaN, but whose interpolated yvalue will be overwritten with NaN after
    # computing the interpolation. For each index in this list, one of these
    # conditions is true of the corresponding NaN in the yvalues:
    #
    # a) It is one of a chain of NaNs at the beginning of the series, and
    #    either limit is not specified or limit_direction is 'forward'.
    # b) It is one of a chain of NaNs at the end of the series, and limit is
    #    specified and limit_direction is 'backward' or 'both'.
    # c) Limit is nonzero and it is further than limit from the nearest non-NaN
    #    value (with respect to the limit_direction setting).
    #
    # The default behavior is to fill forward with no limit, ignoring NaNs at
    # the beginning (see issues #9218 and #10420)
    violate_limit = sorted(start_nans)

    if limit is not None:
        if not is_integer(limit):
            raise ValueError('Limit must be an integer')
        if limit < 1:
            raise ValueError('Limit must be greater than 0')
        if limit_direction == 'forward':
            violate_limit = sorted(start_nans | set(_interp_limit(invalid,
                                                                  limit, 0)))
        if limit_direction == 'backward':
            violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0,
                                                                limit)))
        if limit_direction == 'both':
            violate_limit = sorted(_interp_limit(invalid, limit, limit))

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[violate_limit] = np.nan
        return result

    sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic',
                  'barycentric', 'krogh', 'spline', 'polynomial',
                  'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima']

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order, **kwargs)
        result[violate_limit] = np.nan
        return result
コード例 #26
0
ファイル: missing.py プロジェクト: zacqed/pandas
def interpolate_1d(
    xvalues: Index,
    yvalues: np.ndarray,
    method: Optional[str] = "linear",
    limit: Optional[int] = None,
    limit_direction: str = "forward",
    limit_area: Optional[str] = None,
    fill_value: Optional[Any] = None,
    bounds_error: bool = False,
    order: Optional[int] = None,
    **kwargs,
):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argument.
    """
    invalid = isna(yvalues)
    valid = ~invalid

    if not valid.any():
        result = np.empty(xvalues.shape, dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == "time":
        if not needs_i8_conversion(xvalues.dtype):
            raise ValueError("time-weighted interpolation only works "
                             "on Series or DataFrames with a "
                             "DatetimeIndex")
        method = "values"

    valid_limit_directions = ["forward", "backward", "both"]
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        raise ValueError("Invalid limit_direction: expecting one of "
                         f"{valid_limit_directions}, got '{limit_direction}'.")

    if limit_area is not None:
        valid_limit_areas = ["inside", "outside"]
        limit_area = limit_area.lower()
        if limit_area not in valid_limit_areas:
            raise ValueError(
                f"Invalid limit_area: expecting one of {valid_limit_areas}, got "
                f"{limit_area}.")

    # default limit is unlimited GH #16282
    limit = algos.validate_limit(nobs=None, limit=limit)

    # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
    all_nans = set(np.flatnonzero(invalid))
    start_nans = set(range(find_valid_index(yvalues, "first")))
    end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid)))
    mid_nans = all_nans - start_nans - end_nans

    # Like the sets above, preserve_nans contains indices of invalid values,
    # but in this case, it is the final set of indices that need to be
    # preserved as NaN after the interpolation.

    # For example if limit_direction='forward' then preserve_nans will
    # contain indices of NaNs at the beginning of the series, and NaNs that
    # are more than'limit' away from the prior non-NaN.

    # set preserve_nans based on direction using _interp_limit
    preserve_nans: Union[List, Set]
    if limit_direction == "forward":
        preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
    elif limit_direction == "backward":
        preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
    else:
        # both directions... just use _interp_limit
        preserve_nans = set(_interp_limit(invalid, limit, limit))

    # if limit_area is set, add either mid or outside indices
    # to preserve_nans GH #16284
    if limit_area == "inside":
        # preserve NaNs on the outside
        preserve_nans |= start_nans | end_nans
    elif limit_area == "outside":
        # preserve NaNs on the inside
        preserve_nans |= mid_nans

    # sort preserve_nans and covert to list
    preserve_nans = sorted(preserve_nans)

    result = yvalues.copy()

    # xarr to pass to NumPy/SciPy
    xarr = xvalues._values
    if needs_i8_conversion(xarr.dtype):
        # GH#1646 for dt64tz
        xarr = xarr.view("i8")

    if method == "linear":
        inds = xarr
    else:
        inds = np.asarray(xarr)

        if method in ("values", "index"):
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)

    if method in NP_METHODS:
        # np.interp requires sorted X values, #21037
        indexer = np.argsort(inds[valid])
        result[invalid] = np.interp(inds[invalid], inds[valid][indexer],
                                    yvalues[valid][indexer])
    else:
        result[invalid] = _interpolate_scipy_wrapper(
            inds[valid],
            yvalues[valid],
            inds[invalid],
            method=method,
            fill_value=fill_value,
            bounds_error=bounds_error,
            order=order,
            **kwargs,
        )

    result[preserve_nans] = np.nan
    return result
コード例 #27
0
ファイル: test_inference.py プロジェクト: ymerkli/pandas
    def test_maybe_convert_objects_nullable_integer(self, exp):
        # GH27335
        arr = np.array([2, np.NaN], dtype=object)
        result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=1)

        tm.assert_extension_array_equal(result, exp)
コード例 #28
0
def soft_convert_objects(values,
                         datetime=True,
                         numeric=True,
                         timedelta=True,
                         coerce=False,
                         copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    conversion_count = sum((datetime, numeric, timedelta))
    if conversion_count == 0:
        raise ValueError('At least one of datetime, numeric or timedelta must '
                         'be True.')
    elif conversion_count > 1 and coerce:
        raise ValueError("Only one of 'datetime', 'numeric' or "
                         "'timedelta' can be True when when coerce=True.")

    if isinstance(values, (list, tuple)):
        # List or scalar
        values = np.array(values, dtype=np.object_)
    elif not hasattr(values, 'dtype'):
        values = np.array([values], dtype=np.object_)
    elif not is_object_dtype(values.dtype):
        # If not object, do not attempt conversion
        values = values.copy() if copy else values
        return values

    # If 1 flag is coerce, ensure 2 others are False
    if coerce:
        # Immediate return if coerce
        if datetime:
            from pandas import to_datetime
            return to_datetime(values, errors='coerce').to_numpy()
        elif timedelta:
            from pandas import to_timedelta
            return to_timedelta(values, errors='coerce').to_numpy()
        elif numeric:
            from pandas import to_numeric
            return to_numeric(values, errors='coerce')

    # Soft conversions
    if datetime:
        # GH 20380, when datetime is beyond year 2262, hence outside
        # bound of nanosecond-resolution 64-bit integers.
        try:
            values = lib.maybe_convert_objects(values,
                                               convert_datetime=datetime)
        except OutOfBoundsDatetime:
            pass

    if timedelta and is_object_dtype(values.dtype):
        # Object check to ensure only run if previous did not convert
        values = lib.maybe_convert_objects(values, convert_timedelta=timedelta)

    if numeric and is_object_dtype(values.dtype):
        try:
            converted = lib.maybe_convert_numeric(values,
                                                  set(),
                                                  coerce_numeric=True)
            # If all NaNs, then do not-alter
            values = converted if not isna(converted).all() else values
            values = values.copy() if copy else values
        except Exception:
            pass

    return values
コード例 #29
0
def interpolate_1d(
    xvalues,
    yvalues,
    method="linear",
    limit=None,
    limit_direction="forward",
    limit_area=None,
    fill_value=None,
    bounds_error=False,
    order=None,
    **kwargs,
):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argument.
    """
    # Treat the original, non-scipy methods first.

    invalid = isna(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which can't be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == "time":
        if not getattr(xvalues, "is_all_dates", None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError(
                "time-weighted interpolation only works "
                "on Series or DataFrames with a "
                "DatetimeIndex"
            )
        method = "values"

    valid_limit_directions = ["forward", "backward", "both"]
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        raise ValueError(
            "Invalid limit_direction: expecting one of "
            f"{valid_limit_directions}, got '{limit_direction}'."
        )

    if limit_area is not None:
        valid_limit_areas = ["inside", "outside"]
        limit_area = limit_area.lower()
        if limit_area not in valid_limit_areas:
            raise ValueError(
                f"Invalid limit_area: expecting one of {valid_limit_areas}, got "
                f"{limit_area}."
            )

    # default limit is unlimited GH #16282
    limit = algos._validate_limit(nobs=None, limit=limit)

    # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
    all_nans = set(np.flatnonzero(invalid))
    start_nans = set(range(find_valid_index(yvalues, "first")))
    end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid)))
    mid_nans = all_nans - start_nans - end_nans

    # Like the sets above, preserve_nans contains indices of invalid values,
    # but in this case, it is the final set of indices that need to be
    # preserved as NaN after the interpolation.

    # For example if limit_direction='forward' then preserve_nans will
    # contain indices of NaNs at the beginning of the series, and NaNs that
    # are more than'limit' away from the prior non-NaN.

    # set preserve_nans based on direction using _interp_limit
    if limit_direction == "forward":
        preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
    elif limit_direction == "backward":
        preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
    else:
        # both directions... just use _interp_limit
        preserve_nans = set(_interp_limit(invalid, limit, limit))

    # if limit_area is set, add either mid or outside indices
    # to preserve_nans GH #16284
    if limit_area == "inside":
        # preserve NaNs on the outside
        preserve_nans |= start_nans | end_nans
    elif limit_area == "outside":
        # preserve NaNs on the inside
        preserve_nans |= mid_nans

    # sort preserve_nans and covert to list
    preserve_nans = sorted(preserve_nans)

    xvalues = getattr(xvalues, "values", xvalues)
    yvalues = getattr(yvalues, "values", yvalues)
    result = yvalues.copy()

    if method in ["linear", "time", "index", "values"]:
        if method in ("values", "index"):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        # np.interp requires sorted X values, #21037
        indexer = np.argsort(inds[valid])
        result[invalid] = np.interp(
            inds[invalid], inds[valid][indexer], yvalues[valid][indexer]
        )
        result[preserve_nans] = np.nan
        return result

    sp_methods = [
        "nearest",
        "zero",
        "slinear",
        "quadratic",
        "cubic",
        "barycentric",
        "krogh",
        "spline",
        "polynomial",
        "from_derivatives",
        "piecewise_polynomial",
        "pchip",
        "akima",
        "cubicspline",
    ]

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(
            inds[valid],
            yvalues[valid],
            inds[invalid],
            method=method,
            fill_value=fill_value,
            bounds_error=bounds_error,
            order=order,
            **kwargs,
        )
        result[preserve_nans] = np.nan
        return result
コード例 #30
0
ファイル: test_inference.py プロジェクト: yyonggonyy/pandas
 def test_mixed_dtypes_remain_object_array(self):
     # GH14956
     array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1],
                      dtype=object)
     result = lib.maybe_convert_objects(array, convert_datetime=1)
     tm.assert_numpy_array_equal(result, array)
コード例 #31
0
def interpolate_1d(xvalues,
                   yvalues,
                   method='linear',
                   limit=None,
                   limit_direction='forward',
                   limit_area=None,
                   fill_value=None,
                   bounds_error=False,
                   order=None,
                   **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isna(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which can't be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        msg = ('Invalid limit_direction: expecting one of {valid!r}, '
               'got {invalid!r}.')
        raise ValueError(
            msg.format(valid=valid_limit_directions, invalid=limit_direction))

    if limit_area is not None:
        valid_limit_areas = ['inside', 'outside']
        limit_area = limit_area.lower()
        if limit_area not in valid_limit_areas:
            raise ValueError('Invalid limit_area: expecting one of {}, got '
                             '{}.'.format(valid_limit_areas, limit_area))

    # default limit is unlimited GH #16282
    if limit is None:
        # limit = len(xvalues)
        pass
    elif not is_integer(limit):
        raise ValueError('Limit must be an integer')
    elif limit < 1:
        raise ValueError('Limit must be greater than 0')

    from pandas import Series
    ys = Series(yvalues)

    # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
    all_nans = set(np.flatnonzero(invalid))
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))
    mid_nans = all_nans - start_nans - end_nans

    # Like the sets above, preserve_nans contains indices of invalid values,
    # but in this case, it is the final set of indices that need to be
    # preserved as NaN after the interpolation.

    # For example if limit_direction='forward' then preserve_nans will
    # contain indices of NaNs at the beginning of the series, and NaNs that
    # are more than'limit' away from the prior non-NaN.

    # set preserve_nans based on direction using _interp_limit
    if limit_direction == 'forward':
        preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
    elif limit_direction == 'backward':
        preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
    else:
        # both directions... just use _interp_limit
        preserve_nans = set(_interp_limit(invalid, limit, limit))

    # if limit_area is set, add either mid or outside indices
    # to preserve_nans GH #16284
    if limit_area == 'inside':
        # preserve NaNs on the outside
        preserve_nans |= start_nans | end_nans
    elif limit_area == 'outside':
        # preserve NaNs on the inside
        preserve_nans |= mid_nans

    # sort preserve_nans and covert to list
    preserve_nans = sorted(preserve_nans)

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[preserve_nans] = np.nan
        return result

    sp_methods = [
        'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric',
        'krogh', 'spline', 'polynomial', 'from_derivatives',
        'piecewise_polynomial', 'pchip', 'akima'
    ]

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order,
                                                     **kwargs)
        result[preserve_nans] = np.nan
        return result
コード例 #32
0
ファイル: missing.py プロジェクト: AllenDowney/pandas
def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
                   limit_direction='forward', fill_value=None,
                   bounds_error=False, order=None, **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isnull(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which cant be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        raise ValueError('Invalid limit_direction: expecting one of %r, got '
                         '%r.' % (valid_limit_directions, limit_direction))

    from pandas import Series
    ys = Series(yvalues)
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))

    # violate_limit is a list of the indexes in the series whose yvalue is
    # currently NaN, and should still be NaN after the interpolation.
    # Specifically:
    #
    # If limit_direction='forward' or None then the list will contain NaNs at
    # the beginning of the series, and NaNs that are more than 'limit' away
    # from the prior non-NaN.
    #
    # If limit_direction='backward' then the list will contain NaNs at
    # the end of the series, and NaNs that are more than 'limit' away
    # from the subsequent non-NaN.
    #
    # If limit_direction='both' then the list will contain NaNs that
    # are more than 'limit' away from any non-NaN.
    #
    # If limit=None, then use default behavior of filling an unlimited number
    # of NaNs in the direction specified by limit_direction

    # default limit is unlimited GH #16282
    if limit is None:
        # limit = len(xvalues)
        pass
    elif not is_integer(limit):
        raise ValueError('Limit must be an integer')
    elif limit < 1:
        raise ValueError('Limit must be greater than 0')

    # each possible limit_direction
    # TODO: do we need sorted?
    if limit_direction == 'forward' and limit is not None:
        violate_limit = sorted(start_nans |
                               set(_interp_limit(invalid, limit, 0)))
    elif limit_direction == 'forward':
        violate_limit = sorted(start_nans)
    elif limit_direction == 'backward' and limit is not None:
        violate_limit = sorted(end_nans |
                               set(_interp_limit(invalid, 0, limit)))
    elif limit_direction == 'backward':
        violate_limit = sorted(end_nans)
    elif limit_direction == 'both' and limit is not None:
        violate_limit = sorted(_interp_limit(invalid, limit, limit))
    else:
        violate_limit = []

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[violate_limit] = np.nan
        return result

    sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic',
                  'barycentric', 'krogh', 'spline', 'polynomial',
                  'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima']

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order, **kwargs)
        result[violate_limit] = np.nan
        return result
コード例 #33
0
ファイル: test_inference.py プロジェクト: cmazzullo/pandas
 def test_mixed_dtypes_remain_object_array(self):
     # GH14956
     array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1],
                      dtype=object)
     result = lib.maybe_convert_objects(array, convert_datetime=1)
     tm.assert_numpy_array_equal(result, array)
コード例 #34
0
ファイル: construction.py プロジェクト: PaulGureghian1/Pandas
 def convert(arr):
     if dtype != object and dtype != np.object:
         arr = lib.maybe_convert_objects(arr, try_float=coerce_float)
         arr = maybe_cast_to_datetime(arr, dtype)
     return arr
コード例 #35
0
def interpolate_1d(xvalues,
                   yvalues,
                   method='linear',
                   limit=None,
                   limit_direction='forward',
                   fill_value=None,
                   bounds_error=False,
                   order=None,
                   **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isnull(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which cant be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        raise ValueError('Invalid limit_direction: expecting one of %r, got '
                         '%r.' % (valid_limit_directions, limit_direction))

    from pandas import Series
    ys = Series(yvalues)
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))

    # violate_limit is a list of the indexes in the series whose yvalue is
    # currently NaN, and should still be NaN after the interpolation.
    # Specifically:
    #
    # If limit_direction='forward' or None then the list will contain NaNs at
    # the beginning of the series, and NaNs that are more than 'limit' away
    # from the prior non-NaN.
    #
    # If limit_direction='backward' then the list will contain NaNs at
    # the end of the series, and NaNs that are more than 'limit' away
    # from the subsequent non-NaN.
    #
    # If limit_direction='both' then the list will contain NaNs that
    # are more than 'limit' away from any non-NaN.
    #
    # If limit=None, then use default behavior of filling an unlimited number
    # of NaNs in the direction specified by limit_direction

    # default limit is unlimited GH #16282
    if limit is None:
        # limit = len(xvalues)
        pass
    elif not is_integer(limit):
        raise ValueError('Limit must be an integer')
    elif limit < 1:
        raise ValueError('Limit must be greater than 0')

    # each possible limit_direction
    # TODO: do we need sorted?
    if limit_direction == 'forward' and limit is not None:
        violate_limit = sorted(start_nans
                               | set(_interp_limit(invalid, limit, 0)))
    elif limit_direction == 'forward':
        violate_limit = sorted(start_nans)
    elif limit_direction == 'backward' and limit is not None:
        violate_limit = sorted(end_nans
                               | set(_interp_limit(invalid, 0, limit)))
    elif limit_direction == 'backward':
        violate_limit = sorted(end_nans)
    elif limit_direction == 'both' and limit is not None:
        violate_limit = sorted(_interp_limit(invalid, limit, limit))
    else:
        violate_limit = []

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[violate_limit] = np.nan
        return result

    sp_methods = [
        'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric',
        'krogh', 'spline', 'polynomial', 'from_derivatives',
        'piecewise_polynomial', 'pchip', 'akima'
    ]

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order,
                                                     **kwargs)
        result[violate_limit] = np.nan
        return result
コード例 #36
0
ファイル: cast.py プロジェクト: cpcloud/pandas
def maybe_convert_objects(values, convert_dates=True, convert_numeric=True,
                          convert_timedeltas=True, copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    # if we have passed in a list or scalar
    if isinstance(values, (list, tuple)):
        values = np.array(values, dtype=np.object_)
    if not hasattr(values, 'dtype'):
        values = np.array([values], dtype=np.object_)

    # convert dates
    if convert_dates and values.dtype == np.object_:

        # we take an aggressive stance and convert to datetime64[ns]
        if convert_dates == 'coerce':
            new_values = maybe_cast_to_datetime(
                values, 'M8[ns]', errors='coerce')

            # if we are all nans then leave me alone
            if not isna(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(values,
                                               convert_datetime=convert_dates)

    # convert timedeltas
    if convert_timedeltas and values.dtype == np.object_:

        if convert_timedeltas == 'coerce':
            from pandas.core.tools.timedeltas import to_timedelta
            new_values = to_timedelta(values, errors='coerce')

            # if we are all nans then leave me alone
            if not isna(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(
                values, convert_timedelta=convert_timedeltas)

    # convert to numeric
    if values.dtype == np.object_:
        if convert_numeric:
            try:
                new_values = lib.maybe_convert_numeric(values, set(),
                                                       coerce_numeric=True)

                # if we are all nans then leave me alone
                if not isna(new_values).all():
                    values = new_values

            except Exception:
                pass
        else:
            # soft-conversion
            values = lib.maybe_convert_objects(values)

    values = values.copy() if copy else values

    return values
コード例 #37
0
ファイル: missing.py プロジェクト: brianholland/pandas
def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
                   limit_direction='forward', limit_area=None, fill_value=None,
                   bounds_error=False, order=None, **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isna(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which can't be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        msg = ('Invalid limit_direction: expecting one of {valid!r}, '
               'got {invalid!r}.')
        raise ValueError(msg.format(valid=valid_limit_directions,
                                    invalid=limit_direction))

    if limit_area is not None:
        valid_limit_areas = ['inside', 'outside']
        limit_area = limit_area.lower()
        if limit_area not in valid_limit_areas:
            raise ValueError('Invalid limit_area: expecting one of {}, got '
                             '{}.'.format(valid_limit_areas, limit_area))

    # default limit is unlimited GH #16282
    if limit is None:
        # limit = len(xvalues)
        pass
    elif not is_integer(limit):
        raise ValueError('Limit must be an integer')
    elif limit < 1:
        raise ValueError('Limit must be greater than 0')

    from pandas import Series
    ys = Series(yvalues)

    # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
    all_nans = set(np.flatnonzero(invalid))
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))
    mid_nans = all_nans - start_nans - end_nans

    # Like the sets above, preserve_nans contains indices of invalid values,
    # but in this case, it is the final set of indices that need to be
    # preserved as NaN after the interpolation.

    # For example if limit_direction='forward' then preserve_nans will
    # contain indices of NaNs at the beginning of the series, and NaNs that
    # are more than'limit' away from the prior non-NaN.

    # set preserve_nans based on direction using _interp_limit
    if limit_direction == 'forward':
        preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
    elif limit_direction == 'backward':
        preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
    else:
        # both directions... just use _interp_limit
        preserve_nans = set(_interp_limit(invalid, limit, limit))

    # if limit_area is set, add either mid or outside indices
    # to preserve_nans GH #16284
    if limit_area == 'inside':
        # preserve NaNs on the outside
        preserve_nans |= start_nans | end_nans
    elif limit_area == 'outside':
        # preserve NaNs on the inside
        preserve_nans |= mid_nans

    # sort preserve_nans and covert to list
    preserve_nans = sorted(preserve_nans)

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[preserve_nans] = np.nan
        return result

    sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic',
                  'barycentric', 'krogh', 'spline', 'polynomial',
                  'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima']

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order, **kwargs)
        result[preserve_nans] = np.nan
        return result