def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, coerce=False, copy=True): """ if we have an object dtype, try to coerce dates and/or numbers """ conversion_count = sum((datetime, numeric, timedelta)) if conversion_count == 0: raise ValueError('At least one of datetime, numeric or timedelta must ' 'be True.') elif conversion_count > 1 and coerce: raise ValueError("Only one of 'datetime', 'numeric' or " "'timedelta' can be True when when coerce=True.") if isinstance(values, (list, tuple)): # List or scalar values = np.array(values, dtype=np.object_) elif not hasattr(values, 'dtype'): values = np.array([values], dtype=np.object_) elif not is_object_dtype(values.dtype): # If not object, do not attempt conversion values = values.copy() if copy else values return values # If 1 flag is coerce, ensure 2 others are False if coerce: # Immediate return if coerce if datetime: from pandas import to_datetime return to_datetime(values, errors='coerce', box=False) elif timedelta: from pandas import to_timedelta return to_timedelta(values, errors='coerce', box=False) elif numeric: from pandas import to_numeric return to_numeric(values, errors='coerce') # Soft conversions if datetime: # GH 20380, when datetime is beyond year 2262, hence outside # bound of nanosecond-resolution 64-bit integers. try: values = lib.maybe_convert_objects(values, convert_datetime=datetime) except OutOfBoundsDatetime: pass if timedelta and is_object_dtype(values.dtype): # Object check to ensure only run if previous did not convert values = lib.maybe_convert_objects(values, convert_timedelta=timedelta) if numeric and is_object_dtype(values.dtype): try: converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) # If all NaNs, then do not-alter values = converted if not isna(converted).all() else values values = values.copy() if copy else values except Exception: pass return values
def maybe_convert_platform(values): """ try to do platform conversion, allow ndarray or list here """ if isinstance(values, (list, tuple)): values = construct_1d_object_array_from_listlike(list(values)) if getattr(values, 'dtype', None) == np.object_: if hasattr(values, '_values'): values = values._values values = lib.maybe_convert_objects(values) return values
def test_maybe_convert_objects_uint64(self): # see gh-4471 arr = np.array([2**63], dtype=object) exp = np.array([2**63], dtype=np.uint64) tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) # NumPy bug: can't compare uint64 to int64, as that # results in both casting to float64, so we should # make sure that this function is robust against it arr = np.array([np.uint64(2**63)], dtype=object) exp = np.array([2**63], dtype=np.uint64) tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) arr = np.array([2, -1], dtype=object) exp = np.array([2, -1], dtype=np.int64) tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) arr = np.array([2**63, -1], dtype=object) exp = np.array([2**63, -1], dtype=object) tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
def _aggregate_series_pure_python( self, obj: Series, func: F, *args, engine: str = "cython", engine_kwargs=None, **kwargs, ): if engine == "numba": nopython, nogil, parallel = get_jit_arguments(engine_kwargs) check_kwargs_and_nopython(kwargs, nopython) validate_udf(func) cache_key = (func, "groupby_agg") numba_func = NUMBA_FUNC_CACHE.get( cache_key, jit_user_function(func, nopython, nogil, parallel) ) group_index, _, ngroups = self.group_info counts = np.zeros(ngroups, dtype=int) result = None splitter = get_splitter(obj, group_index, ngroups, axis=0) for label, group in splitter: if engine == "numba": values, index = split_for_numba(group) res = numba_func(values, index, *args) if cache_key not in NUMBA_FUNC_CACHE: NUMBA_FUNC_CACHE[cache_key] = numba_func else: res = func(group, *args, **kwargs) if result is None: if isinstance(res, (Series, Index, np.ndarray)): if len(res) == 1: # e.g. test_agg_lambda_with_timezone lambda e: e.head(1) # FIXME: are we potentially losing important res.index info? res = res.item() else: raise ValueError("Function does not reduce") result = np.empty(ngroups, dtype="O") counts[label] = group.shape[0] result[label] = res assert result is not None result = lib.maybe_convert_objects(result, try_float=0) # TODO: maybe_cast_to_extension_array? return result, counts
def agg_series( self, obj: Series, func: F, preserve_dtype: bool = False ) -> ArrayLike: """ Parameters ---------- obj : Series func : function taking a Series and returning a scalar-like preserve_dtype : bool Whether the aggregation is known to be dtype-preserving. Returns ------- np.ndarray or ExtensionArray """ # test_groupby_empty_with_category gets here with self.ngroups == 0 # and len(obj) > 0 if len(obj) == 0: # SeriesGrouper would raise if we were to call _aggregate_series_fast result = self._aggregate_series_pure_python(obj, func) elif not isinstance(obj._values, np.ndarray): # _aggregate_series_fast would raise TypeError when # calling libreduction.Slider # In the datetime64tz case it would incorrectly cast to tz-naive # TODO: can we get a performant workaround for EAs backed by ndarray? result = self._aggregate_series_pure_python(obj, func) # we can preserve a little bit more aggressively with EA dtype # because maybe_cast_pointwise_result will do a try/except # with _from_sequence. NB we are assuming here that _from_sequence # is sufficiently strict that it casts appropriately. preserve_dtype = True elif obj.index._has_complex_internals: # Preempt TypeError in _aggregate_series_fast result = self._aggregate_series_pure_python(obj, func) elif isinstance(self, BinGrouper): # Not yet able to remove the BaseGrouper aggregate_series_fast, # as test_crosstab.test_categorical breaks without it result = self._aggregate_series_pure_python(obj, func) else: result = self._aggregate_series_fast(obj, func) npvalues = lib.maybe_convert_objects(result, try_float=False) if preserve_dtype: out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True) else: out = npvalues return out
def agg_series(self, obj: Series, func: F, preserve_dtype: bool = False) -> ArrayLike: """ Parameters ---------- obj : Series func : function taking a Series and returning a scalar-like preserve_dtype : bool Whether the aggregation is known to be dtype-preserving. Returns ------- np.ndarray or ExtensionArray """ # test_groupby_empty_with_category gets here with self.ngroups == 0 # and len(obj) > 0 if len(obj) == 0: # SeriesGrouper would raise if we were to call _aggregate_series_fast result = self._aggregate_series_pure_python(obj, func) elif not isinstance(obj._values, np.ndarray): result = self._aggregate_series_pure_python(obj, func) # we can preserve a little bit more aggressively with EA dtype # because maybe_cast_pointwise_result will do a try/except # with _from_sequence. NB we are assuming here that _from_sequence # is sufficiently strict that it casts appropriately. preserve_dtype = True else: result = self._aggregate_series_pure_python(obj, func) npvalues = lib.maybe_convert_objects(result, try_float=False) if preserve_dtype: out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True) else: out = npvalues return out
def _index_to_interp_indices(index: Index, method: str) -> np.ndarray: """ Convert Index to ndarray of indices to pass to NumPy/SciPy. """ xarr = index._values if needs_i8_conversion(xarr.dtype): # GH#1646 for dt64tz xarr = xarr.view("i8") if method == "linear": inds = xarr inds = cast(np.ndarray, inds) else: inds = np.asarray(xarr) if method in ("values", "index"): if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) return inds
def _aggregate_series_pure_python(self, obj, func): group_index, _, ngroups = self.group_info counts = np.zeros(ngroups, dtype=int) result = None splitter = get_splitter(obj, group_index, ngroups, axis=self.axis) for label, group in splitter: res = func(group) if result is None: if (isinstance(res, (Series, Index, np.ndarray))): raise ValueError('Function does not reduce') result = np.empty(ngroups, dtype='O') counts[label] = group.shape[0] result[label] = res result = lib.maybe_convert_objects(result, try_float=0) return result, counts
def lookup(df, row_labels, col_labels): r"""2D lookup function for a dataframe (Old Pandas Lookup Method) Args: df (DataFrame): DataFrame for lookup row_labels (List): Row labels to use for lookup. col_labels (List): Column labels to use for lookup. Returns: numpy.ndarray: Found values """ n = len(row_labels) if n != len(col_labels): raise ValueError("Row labels must have same size as column labels") if not (df.index.is_unique and df.columns.is_unique): raise ValueError("DataFrame.lookup requires unique index and columns") thresh = 1000 if not df._is_mixed_type or n > thresh: values = df.values ridx = df.index.get_indexer(row_labels) cidx = df.columns.get_indexer(col_labels) if (ridx == -1).any(): raise KeyError("One or more row labels was not found") if (cidx == -1).any(): raise KeyError("One or more column labels was not found") flat_index = ridx * len(df.columns) + cidx result = values.flat[flat_index] else: result = empty(n, dtype="O") for i, (r, c) in enumerate(zip(row_labels, col_labels)): print(r, c) result[i] = df._get_value(r, c) if is_object_dtype(result): result = lib.maybe_convert_objects(result) return result
def _str_map(self, f, na_value=None, dtype: Dtype | None = None, convert: bool = True): """ Map a callable over valid element of the array. Parameters ---------- f : Callable A function to call on each non-NA element. na_value : Scalar, optional The value to set for NA values. Might also be used for the fill value if the callable `f` raises an exception. This defaults to ``self._str_na_value`` which is ``np.nan`` for object-dtype and Categorical and ``pd.NA`` for StringArray. dtype : Dtype, optional The dtype of the result array. convert : bool, default True Whether to call `maybe_convert_objects` on the resulting ndarray """ if dtype is None: dtype = np.dtype("object") if na_value is None: na_value = self._str_na_value if not len(self): # error: Argument 1 to "ndarray" has incompatible type "int"; # expected "Sequence[int]" return np.ndarray(0, dtype=dtype) # type: ignore[arg-type] arr = np.asarray(self, dtype=object) mask = isna(arr) map_convert = convert and not np.all(mask) try: result = lib.map_infer_mask(arr, f, mask.view(np.uint8), map_convert) except (TypeError, AttributeError) as e: # Reraise the exception if callable `f` got wrong number of args. # The user may want to be warned by this, instead of getting NaN p_err = (r"((takes)|(missing)) (?(2)from \d+ to )?\d+ " r"(?(3)required )positional arguments?") if len(e.args) >= 1 and re.search(p_err, e.args[0]): # FIXME: this should be totally avoidable raise e def g(x): # This type of fallback behavior can be removed once # we remove object-dtype .str accessor. try: return f(x) except (TypeError, AttributeError): return na_value return self._str_map(g, na_value=na_value, dtype=dtype) if not isinstance(result, np.ndarray): return result if na_value is not np.nan: np.putmask(result, mask, na_value) if convert and result.dtype == object: result = lib.maybe_convert_objects(result) return result
def sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False): """ Sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified. """ if dtype is not None: dtype = pandas_dtype(dtype) if isinstance(data, ma.MaskedArray): mask = ma.getmaskarray(data) if mask.any(): data, fill_value = maybe_upcast(data, copy=True) data.soften_mask() # set hardmask False if it was True data[mask] = fill_value else: data = data.copy() data = extract_array(data, extract_numpy=True) # GH#846 if isinstance(data, np.ndarray): if dtype is not None: subarr = np.array(data, copy=False) # possibility of nan -> garbage if is_float_dtype(data.dtype) and is_integer_dtype(dtype): try: subarr = _try_cast(data, True, dtype, copy, True) except ValueError: if copy: subarr = data.copy() else: subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) elif isinstance(data, Index): # don't coerce Index types # e.g. indexes can have different conversions (so don't fast path # them) # GH#6140 subarr = sanitize_index(data, index, copy=copy) else: # we will try to copy be-definition here subarr = _try_cast(data, True, dtype, copy, raise_cast_failure) elif isinstance(data, ExtensionArray): if isinstance(data, ABCPandasArray): # We don't want to let people put our PandasArray wrapper # (the output of Series/Index.array), into a Series. So # we explicitly unwrap it here. subarr = data.to_numpy() else: subarr = data # everything else in this block must also handle ndarray's, # becuase we've unwrapped PandasArray into an ndarray. if dtype is not None: subarr = data.astype(dtype) if copy: subarr = data.copy() return subarr elif isinstance(data, (list, tuple)) and len(data) > 0: if dtype is not None: try: subarr = _try_cast(data, False, dtype, copy, raise_cast_failure) except Exception: if raise_cast_failure: # pragma: no cover raise subarr = np.array(data, dtype=object, copy=copy) subarr = lib.maybe_convert_objects(subarr) else: subarr = maybe_convert_platform(data) subarr = maybe_cast_to_datetime(subarr, dtype) elif isinstance(data, range): # GH#16804 start, stop, step = get_range_parameters(data) arr = np.arange(start, stop, step, dtype='int64') subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure) else: subarr = _try_cast(data, False, dtype, copy, raise_cast_failure) # scalar like, GH if getattr(subarr, 'ndim', 0) == 0: if isinstance(data, list): # pragma: no cover subarr = np.array(data, dtype=object) elif index is not None: value = data # figure out the dtype from the value (upcast if necessary) if dtype is None: dtype, value = infer_dtype_from_scalar(value) else: # need to possibly convert the value here value = maybe_cast_to_datetime(value, dtype) subarr = construct_1d_arraylike_from_scalar( value, len(index), dtype) else: return subarr.item() # the result that we want elif subarr.ndim == 1: if index is not None: # a 1-element ndarray if len(subarr) != len(index) and len(subarr) == 1: subarr = construct_1d_arraylike_from_scalar( subarr[0], len(index), subarr.dtype) elif subarr.ndim > 1: if isinstance(data, np.ndarray): raise Exception('Data must be 1-dimensional') else: subarr = com.asarray_tuplesafe(data, dtype=dtype) # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. if issubclass(subarr.dtype.type, compat.string_types): # GH#16605 # If not empty convert the data to dtype # GH#19853: If data is a scalar, subarr has already the result if not lib.is_scalar(data): if not np.all(isna(data)): data = np.array(data, dtype=dtype, copy=False) subarr = np.array(data, dtype=object, copy=copy) if is_object_dtype(subarr.dtype) and dtype != 'object': inferred = lib.infer_dtype(subarr, skipna=False) if inferred == 'period': try: subarr = period_array(subarr) except IncompatibleFrequency: pass return subarr
def soft_convert_objects( values: np.ndarray, datetime: bool = True, numeric: bool = True, timedelta: bool = True, coerce: bool = False, copy: bool = True, ): """ if we have an object dtype, try to coerce dates and/or numbers """ validate_bool_kwarg(datetime, "datetime") validate_bool_kwarg(numeric, "numeric") validate_bool_kwarg(timedelta, "timedelta") validate_bool_kwarg(coerce, "coerce") validate_bool_kwarg(copy, "copy") conversion_count = sum((datetime, numeric, timedelta)) if conversion_count == 0: raise ValueError( "At least one of datetime, numeric or timedelta must be True.") elif conversion_count > 1 and coerce: raise ValueError("Only one of 'datetime', 'numeric' or " "'timedelta' can be True when when coerce=True.") if not is_object_dtype(values.dtype): # If not object, do not attempt conversion values = values.copy() if copy else values return values # If 1 flag is coerce, ensure 2 others are False if coerce: # Immediate return if coerce if datetime: from pandas import to_datetime return to_datetime(values, errors="coerce").to_numpy() elif timedelta: from pandas import to_timedelta return to_timedelta(values, errors="coerce").to_numpy() elif numeric: from pandas import to_numeric return to_numeric(values, errors="coerce") # Soft conversions if datetime: # GH 20380, when datetime is beyond year 2262, hence outside # bound of nanosecond-resolution 64-bit integers. try: values = lib.maybe_convert_objects(values, convert_datetime=True) except OutOfBoundsDatetime: pass if timedelta and is_object_dtype(values.dtype): # Object check to ensure only run if previous did not convert values = lib.maybe_convert_objects(values, convert_timedelta=True) if numeric and is_object_dtype(values.dtype): try: converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) # If all NaNs, then do not-alter values = converted if not isna(converted).all() else values values = values.copy() if copy else values except Exception: pass return values
def convert(arr): if dtype != np.dtype("O"): arr = lib.maybe_convert_objects(arr) arr = maybe_cast_to_datetime(arr, dtype) return arr
def test_convert_int_overflow(self, value): # see gh-18584 arr = np.array([value], dtype=object) result = lib.maybe_convert_objects(arr) tm.assert_numpy_array_equal(arr, result)
def maybe_convert_objects(values, convert_dates=True, convert_numeric=True, convert_timedeltas=True, copy=True): """ if we have an object dtype, try to coerce dates and/or numbers """ # if we have passed in a list or scalar if isinstance(values, (list, tuple)): values = np.array(values, dtype=np.object_) if not hasattr(values, 'dtype'): values = np.array([values], dtype=np.object_) # convert dates if convert_dates and values.dtype == np.object_: # we take an aggressive stance and convert to datetime64[ns] if convert_dates == 'coerce': new_values = maybe_cast_to_datetime(values, 'M8[ns]', errors='coerce') # if we are all nans then leave me alone if not isna(new_values).all(): values = new_values else: values = lib.maybe_convert_objects(values, convert_datetime=convert_dates) # convert timedeltas if convert_timedeltas and values.dtype == np.object_: if convert_timedeltas == 'coerce': from pandas.core.tools.timedeltas import to_timedelta new_values = to_timedelta(values, errors='coerce') # if we are all nans then leave me alone if not isna(new_values).all(): values = new_values else: values = lib.maybe_convert_objects( values, convert_timedelta=convert_timedeltas) # convert to numeric if values.dtype == np.object_: if convert_numeric: try: new_values = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) # if we are all nans then leave me alone if not isna(new_values).all(): values = new_values except Exception: pass else: # soft-conversion values = lib.maybe_convert_objects(values) values = values.copy() if copy else values return values
def sanitize_array(data, index, dtype=None, copy=False, raise_cast_failure=False): """ Sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified. """ if dtype is not None: dtype = pandas_dtype(dtype) if isinstance(data, ma.MaskedArray): mask = ma.getmaskarray(data) if mask.any(): data, fill_value = maybe_upcast(data, copy=True) data.soften_mask() # set hardmask False if it was True data[mask] = fill_value else: data = data.copy() # extract ndarray or ExtensionArray, ensure we have no PandasArray data = extract_array(data, extract_numpy=True) # GH#846 if isinstance(data, np.ndarray): if dtype is not None and is_float_dtype( data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage try: subarr = _try_cast(data, dtype, copy, True) except ValueError: if copy: subarr = data.copy() else: subarr = np.array(data, copy=False) else: # we will try to copy be-definition here subarr = _try_cast(data, dtype, copy, raise_cast_failure) elif isinstance(data, ExtensionArray): # it is already ensured above this is not a PandasArray subarr = data if dtype is not None: subarr = subarr.astype(dtype, copy=copy) elif copy: subarr = subarr.copy() return subarr elif isinstance(data, (list, tuple)) and len(data) > 0: if dtype is not None: try: subarr = _try_cast(data, dtype, copy, raise_cast_failure) except Exception: if raise_cast_failure: # pragma: no cover raise subarr = np.array(data, dtype=object, copy=copy) subarr = lib.maybe_convert_objects(subarr) else: subarr = maybe_convert_platform(data) subarr = maybe_cast_to_datetime(subarr, dtype) elif isinstance(data, range): # GH#16804 arr = np.arange(data.start, data.stop, data.step, dtype="int64") subarr = _try_cast(arr, dtype, copy, raise_cast_failure) else: subarr = _try_cast(data, dtype, copy, raise_cast_failure) # scalar like, GH if getattr(subarr, "ndim", 0) == 0: if isinstance(data, list): # pragma: no cover subarr = np.array(data, dtype=object) elif index is not None: value = data # figure out the dtype from the value (upcast if necessary) if dtype is None: dtype, value = infer_dtype_from_scalar(value) else: # need to possibly convert the value here value = maybe_cast_to_datetime(value, dtype) subarr = construct_1d_arraylike_from_scalar( value, len(index), dtype) else: return subarr.item() # the result that we want elif subarr.ndim == 1: if index is not None: # a 1-element ndarray if len(subarr) != len(index) and len(subarr) == 1: subarr = construct_1d_arraylike_from_scalar( subarr[0], len(index), subarr.dtype) elif subarr.ndim > 1: if isinstance(data, np.ndarray): raise Exception("Data must be 1-dimensional") else: subarr = com.asarray_tuplesafe(data, dtype=dtype) # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. if issubclass(subarr.dtype.type, str): # GH#16605 # If not empty convert the data to dtype # GH#19853: If data is a scalar, subarr has already the result if not lib.is_scalar(data): if not np.all(isna(data)): data = np.array(data, dtype=dtype, copy=False) subarr = np.array(data, dtype=object, copy=copy) if (not (is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)) and is_object_dtype(subarr.dtype) and not is_object_dtype(dtype)): inferred = lib.infer_dtype(subarr, skipna=False) if inferred == "period": try: subarr = period_array(subarr) except IncompatibleFrequency: pass return subarr
def convert(arr): if dtype != object and dtype != np.object: arr = lib.maybe_convert_objects(arr, try_float=coerce_float) arr = maybe_cast_to_datetime(arr, dtype) return arr
def time_maybe_convert_objects(self): lib.maybe_convert_objects(self.data)
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, limit_direction='forward', fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argumnet. """ # Treat the original, non-scipy methods first. invalid = isnull(yvalues) valid = ~invalid if not valid.any(): # have to call np.asarray(xvalues) since xvalues could be an Index # which cant be mutated result = np.empty_like(np.asarray(xvalues), dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == 'time': if not getattr(xvalues, 'is_all_dates', None): # if not issubclass(xvalues.dtype.type, np.datetime64): raise ValueError('time-weighted interpolation only works ' 'on Series or DataFrames with a ' 'DatetimeIndex') method = 'values' def _interp_limit(invalid, fw_limit, bw_limit): "Get idx of values that won't be filled b/c they exceed the limits." for x in np.where(invalid)[0]: if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): yield x valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError('Invalid limit_direction: expecting one of %r, got ' '%r.' % (valid_limit_directions, limit_direction)) from pandas import Series ys = Series(yvalues) start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) # This is a list of the indexes in the series whose yvalue is currently # NaN, but whose interpolated yvalue will be overwritten with NaN after # computing the interpolation. For each index in this list, one of these # conditions is true of the corresponding NaN in the yvalues: # # a) It is one of a chain of NaNs at the beginning of the series, and # either limit is not specified or limit_direction is 'forward'. # b) It is one of a chain of NaNs at the end of the series, and limit is # specified and limit_direction is 'backward' or 'both'. # c) Limit is nonzero and it is further than limit from the nearest non-NaN # value (with respect to the limit_direction setting). # # The default behavior is to fill forward with no limit, ignoring NaNs at # the beginning (see issues #9218 and #10420) violate_limit = sorted(start_nans) if limit is not None: if not is_integer(limit): raise ValueError('Limit must be an integer') if limit < 1: raise ValueError('Limit must be greater than 0') if limit_direction == 'forward': violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) if limit_direction == 'backward': violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, limit))) if limit_direction == 'both': violate_limit = sorted(_interp_limit(invalid, limit, limit)) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) result = yvalues.copy() if method in ['linear', 'time', 'index', 'values']: if method in ('values', 'index'): inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if needs_i8_conversion(inds.dtype.type): inds = inds.view(np.int64) if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) else: inds = xvalues result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) result[violate_limit] = np.nan return result sp_methods = [ 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', 'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima' ] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if issubclass(inds.dtype.type, np.datetime64): inds = inds.view(np.int64) result[invalid] = _interpolate_scipy_wrapper(inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs) result[violate_limit] = np.nan return result
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, limit_direction='forward', fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argumnet. """ # Treat the original, non-scipy methods first. invalid = isnull(yvalues) valid = ~invalid if not valid.any(): # have to call np.asarray(xvalues) since xvalues could be an Index # which cant be mutated result = np.empty_like(np.asarray(xvalues), dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == 'time': if not getattr(xvalues, 'is_all_dates', None): # if not issubclass(xvalues.dtype.type, np.datetime64): raise ValueError('time-weighted interpolation only works ' 'on Series or DataFrames with a ' 'DatetimeIndex') method = 'values' def _interp_limit(invalid, fw_limit, bw_limit): "Get idx of values that won't be filled b/c they exceed the limits." for x in np.where(invalid)[0]: if invalid[max(0, x - fw_limit):x + bw_limit + 1].all(): yield x valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError('Invalid limit_direction: expecting one of %r, got ' '%r.' % (valid_limit_directions, limit_direction)) from pandas import Series ys = Series(yvalues) start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) # This is a list of the indexes in the series whose yvalue is currently # NaN, but whose interpolated yvalue will be overwritten with NaN after # computing the interpolation. For each index in this list, one of these # conditions is true of the corresponding NaN in the yvalues: # # a) It is one of a chain of NaNs at the beginning of the series, and # either limit is not specified or limit_direction is 'forward'. # b) It is one of a chain of NaNs at the end of the series, and limit is # specified and limit_direction is 'backward' or 'both'. # c) Limit is nonzero and it is further than limit from the nearest non-NaN # value (with respect to the limit_direction setting). # # The default behavior is to fill forward with no limit, ignoring NaNs at # the beginning (see issues #9218 and #10420) violate_limit = sorted(start_nans) if limit is not None: if not is_integer(limit): raise ValueError('Limit must be an integer') if limit < 1: raise ValueError('Limit must be greater than 0') if limit_direction == 'forward': violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) if limit_direction == 'backward': violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, limit))) if limit_direction == 'both': violate_limit = sorted(_interp_limit(invalid, limit, limit)) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) result = yvalues.copy() if method in ['linear', 'time', 'index', 'values']: if method in ('values', 'index'): inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if needs_i8_conversion(inds.dtype.type): inds = inds.view(np.int64) if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) else: inds = xvalues result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) result[violate_limit] = np.nan return result sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', 'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima'] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if issubclass(inds.dtype.type, np.datetime64): inds = inds.view(np.int64) result[invalid] = _interpolate_scipy_wrapper(inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs) result[violate_limit] = np.nan return result
def interpolate_1d( xvalues: Index, yvalues: np.ndarray, method: Optional[str] = "linear", limit: Optional[int] = None, limit_direction: str = "forward", limit_area: Optional[str] = None, fill_value: Optional[Any] = None, bounds_error: bool = False, order: Optional[int] = None, **kwargs, ): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argument. """ invalid = isna(yvalues) valid = ~invalid if not valid.any(): result = np.empty(xvalues.shape, dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == "time": if not needs_i8_conversion(xvalues.dtype): raise ValueError("time-weighted interpolation only works " "on Series or DataFrames with a " "DatetimeIndex") method = "values" valid_limit_directions = ["forward", "backward", "both"] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError("Invalid limit_direction: expecting one of " f"{valid_limit_directions}, got '{limit_direction}'.") if limit_area is not None: valid_limit_areas = ["inside", "outside"] limit_area = limit_area.lower() if limit_area not in valid_limit_areas: raise ValueError( f"Invalid limit_area: expecting one of {valid_limit_areas}, got " f"{limit_area}.") # default limit is unlimited GH #16282 limit = algos.validate_limit(nobs=None, limit=limit) # These are sets of index pointers to invalid values... i.e. {0, 1, etc... all_nans = set(np.flatnonzero(invalid)) start_nans = set(range(find_valid_index(yvalues, "first"))) end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) mid_nans = all_nans - start_nans - end_nans # Like the sets above, preserve_nans contains indices of invalid values, # but in this case, it is the final set of indices that need to be # preserved as NaN after the interpolation. # For example if limit_direction='forward' then preserve_nans will # contain indices of NaNs at the beginning of the series, and NaNs that # are more than'limit' away from the prior non-NaN. # set preserve_nans based on direction using _interp_limit preserve_nans: Union[List, Set] if limit_direction == "forward": preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) elif limit_direction == "backward": preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) else: # both directions... just use _interp_limit preserve_nans = set(_interp_limit(invalid, limit, limit)) # if limit_area is set, add either mid or outside indices # to preserve_nans GH #16284 if limit_area == "inside": # preserve NaNs on the outside preserve_nans |= start_nans | end_nans elif limit_area == "outside": # preserve NaNs on the inside preserve_nans |= mid_nans # sort preserve_nans and covert to list preserve_nans = sorted(preserve_nans) result = yvalues.copy() # xarr to pass to NumPy/SciPy xarr = xvalues._values if needs_i8_conversion(xarr.dtype): # GH#1646 for dt64tz xarr = xarr.view("i8") if method == "linear": inds = xarr else: inds = np.asarray(xarr) if method in ("values", "index"): if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) if method in NP_METHODS: # np.interp requires sorted X values, #21037 indexer = np.argsort(inds[valid]) result[invalid] = np.interp(inds[invalid], inds[valid][indexer], yvalues[valid][indexer]) else: result[invalid] = _interpolate_scipy_wrapper( inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs, ) result[preserve_nans] = np.nan return result
def test_maybe_convert_objects_nullable_integer(self, exp): # GH27335 arr = np.array([2, np.NaN], dtype=object) result = lib.maybe_convert_objects(arr, convert_to_nullable_integer=1) tm.assert_extension_array_equal(result, exp)
def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, coerce=False, copy=True): """ if we have an object dtype, try to coerce dates and/or numbers """ conversion_count = sum((datetime, numeric, timedelta)) if conversion_count == 0: raise ValueError('At least one of datetime, numeric or timedelta must ' 'be True.') elif conversion_count > 1 and coerce: raise ValueError("Only one of 'datetime', 'numeric' or " "'timedelta' can be True when when coerce=True.") if isinstance(values, (list, tuple)): # List or scalar values = np.array(values, dtype=np.object_) elif not hasattr(values, 'dtype'): values = np.array([values], dtype=np.object_) elif not is_object_dtype(values.dtype): # If not object, do not attempt conversion values = values.copy() if copy else values return values # If 1 flag is coerce, ensure 2 others are False if coerce: # Immediate return if coerce if datetime: from pandas import to_datetime return to_datetime(values, errors='coerce').to_numpy() elif timedelta: from pandas import to_timedelta return to_timedelta(values, errors='coerce').to_numpy() elif numeric: from pandas import to_numeric return to_numeric(values, errors='coerce') # Soft conversions if datetime: # GH 20380, when datetime is beyond year 2262, hence outside # bound of nanosecond-resolution 64-bit integers. try: values = lib.maybe_convert_objects(values, convert_datetime=datetime) except OutOfBoundsDatetime: pass if timedelta and is_object_dtype(values.dtype): # Object check to ensure only run if previous did not convert values = lib.maybe_convert_objects(values, convert_timedelta=timedelta) if numeric and is_object_dtype(values.dtype): try: converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) # If all NaNs, then do not-alter values = converted if not isna(converted).all() else values values = values.copy() if copy else values except Exception: pass return values
def interpolate_1d( xvalues, yvalues, method="linear", limit=None, limit_direction="forward", limit_area=None, fill_value=None, bounds_error=False, order=None, **kwargs, ): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argument. """ # Treat the original, non-scipy methods first. invalid = isna(yvalues) valid = ~invalid if not valid.any(): # have to call np.asarray(xvalues) since xvalues could be an Index # which can't be mutated result = np.empty_like(np.asarray(xvalues), dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == "time": if not getattr(xvalues, "is_all_dates", None): # if not issubclass(xvalues.dtype.type, np.datetime64): raise ValueError( "time-weighted interpolation only works " "on Series or DataFrames with a " "DatetimeIndex" ) method = "values" valid_limit_directions = ["forward", "backward", "both"] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError( "Invalid limit_direction: expecting one of " f"{valid_limit_directions}, got '{limit_direction}'." ) if limit_area is not None: valid_limit_areas = ["inside", "outside"] limit_area = limit_area.lower() if limit_area not in valid_limit_areas: raise ValueError( f"Invalid limit_area: expecting one of {valid_limit_areas}, got " f"{limit_area}." ) # default limit is unlimited GH #16282 limit = algos._validate_limit(nobs=None, limit=limit) # These are sets of index pointers to invalid values... i.e. {0, 1, etc... all_nans = set(np.flatnonzero(invalid)) start_nans = set(range(find_valid_index(yvalues, "first"))) end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) mid_nans = all_nans - start_nans - end_nans # Like the sets above, preserve_nans contains indices of invalid values, # but in this case, it is the final set of indices that need to be # preserved as NaN after the interpolation. # For example if limit_direction='forward' then preserve_nans will # contain indices of NaNs at the beginning of the series, and NaNs that # are more than'limit' away from the prior non-NaN. # set preserve_nans based on direction using _interp_limit if limit_direction == "forward": preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) elif limit_direction == "backward": preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) else: # both directions... just use _interp_limit preserve_nans = set(_interp_limit(invalid, limit, limit)) # if limit_area is set, add either mid or outside indices # to preserve_nans GH #16284 if limit_area == "inside": # preserve NaNs on the outside preserve_nans |= start_nans | end_nans elif limit_area == "outside": # preserve NaNs on the inside preserve_nans |= mid_nans # sort preserve_nans and covert to list preserve_nans = sorted(preserve_nans) xvalues = getattr(xvalues, "values", xvalues) yvalues = getattr(yvalues, "values", yvalues) result = yvalues.copy() if method in ["linear", "time", "index", "values"]: if method in ("values", "index"): inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if needs_i8_conversion(inds.dtype.type): inds = inds.view(np.int64) if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) else: inds = xvalues # np.interp requires sorted X values, #21037 indexer = np.argsort(inds[valid]) result[invalid] = np.interp( inds[invalid], inds[valid][indexer], yvalues[valid][indexer] ) result[preserve_nans] = np.nan return result sp_methods = [ "nearest", "zero", "slinear", "quadratic", "cubic", "barycentric", "krogh", "spline", "polynomial", "from_derivatives", "piecewise_polynomial", "pchip", "akima", "cubicspline", ] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if issubclass(inds.dtype.type, np.datetime64): inds = inds.view(np.int64) result[invalid] = _interpolate_scipy_wrapper( inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs, ) result[preserve_nans] = np.nan return result
def test_mixed_dtypes_remain_object_array(self): # GH14956 array = np.array([datetime(2015, 1, 1, tzinfo=pytz.utc), 1], dtype=object) result = lib.maybe_convert_objects(array, convert_datetime=1) tm.assert_numpy_array_equal(result, array)
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, limit_direction='forward', limit_area=None, fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argumnet. """ # Treat the original, non-scipy methods first. invalid = isna(yvalues) valid = ~invalid if not valid.any(): # have to call np.asarray(xvalues) since xvalues could be an Index # which can't be mutated result = np.empty_like(np.asarray(xvalues), dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == 'time': if not getattr(xvalues, 'is_all_dates', None): # if not issubclass(xvalues.dtype.type, np.datetime64): raise ValueError('time-weighted interpolation only works ' 'on Series or DataFrames with a ' 'DatetimeIndex') method = 'values' valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: msg = ('Invalid limit_direction: expecting one of {valid!r}, ' 'got {invalid!r}.') raise ValueError( msg.format(valid=valid_limit_directions, invalid=limit_direction)) if limit_area is not None: valid_limit_areas = ['inside', 'outside'] limit_area = limit_area.lower() if limit_area not in valid_limit_areas: raise ValueError('Invalid limit_area: expecting one of {}, got ' '{}.'.format(valid_limit_areas, limit_area)) # default limit is unlimited GH #16282 if limit is None: # limit = len(xvalues) pass elif not is_integer(limit): raise ValueError('Limit must be an integer') elif limit < 1: raise ValueError('Limit must be greater than 0') from pandas import Series ys = Series(yvalues) # These are sets of index pointers to invalid values... i.e. {0, 1, etc... all_nans = set(np.flatnonzero(invalid)) start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) mid_nans = all_nans - start_nans - end_nans # Like the sets above, preserve_nans contains indices of invalid values, # but in this case, it is the final set of indices that need to be # preserved as NaN after the interpolation. # For example if limit_direction='forward' then preserve_nans will # contain indices of NaNs at the beginning of the series, and NaNs that # are more than'limit' away from the prior non-NaN. # set preserve_nans based on direction using _interp_limit if limit_direction == 'forward': preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) elif limit_direction == 'backward': preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) else: # both directions... just use _interp_limit preserve_nans = set(_interp_limit(invalid, limit, limit)) # if limit_area is set, add either mid or outside indices # to preserve_nans GH #16284 if limit_area == 'inside': # preserve NaNs on the outside preserve_nans |= start_nans | end_nans elif limit_area == 'outside': # preserve NaNs on the inside preserve_nans |= mid_nans # sort preserve_nans and covert to list preserve_nans = sorted(preserve_nans) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) result = yvalues.copy() if method in ['linear', 'time', 'index', 'values']: if method in ('values', 'index'): inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if needs_i8_conversion(inds.dtype.type): inds = inds.view(np.int64) if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) else: inds = xvalues result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) result[preserve_nans] = np.nan return result sp_methods = [ 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', 'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima' ] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if issubclass(inds.dtype.type, np.datetime64): inds = inds.view(np.int64) result[invalid] = _interpolate_scipy_wrapper(inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs) result[preserve_nans] = np.nan return result
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, limit_direction='forward', fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argumnet. """ # Treat the original, non-scipy methods first. invalid = isnull(yvalues) valid = ~invalid if not valid.any(): # have to call np.asarray(xvalues) since xvalues could be an Index # which cant be mutated result = np.empty_like(np.asarray(xvalues), dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == 'time': if not getattr(xvalues, 'is_all_dates', None): # if not issubclass(xvalues.dtype.type, np.datetime64): raise ValueError('time-weighted interpolation only works ' 'on Series or DataFrames with a ' 'DatetimeIndex') method = 'values' valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError('Invalid limit_direction: expecting one of %r, got ' '%r.' % (valid_limit_directions, limit_direction)) from pandas import Series ys = Series(yvalues) start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) # violate_limit is a list of the indexes in the series whose yvalue is # currently NaN, and should still be NaN after the interpolation. # Specifically: # # If limit_direction='forward' or None then the list will contain NaNs at # the beginning of the series, and NaNs that are more than 'limit' away # from the prior non-NaN. # # If limit_direction='backward' then the list will contain NaNs at # the end of the series, and NaNs that are more than 'limit' away # from the subsequent non-NaN. # # If limit_direction='both' then the list will contain NaNs that # are more than 'limit' away from any non-NaN. # # If limit=None, then use default behavior of filling an unlimited number # of NaNs in the direction specified by limit_direction # default limit is unlimited GH #16282 if limit is None: # limit = len(xvalues) pass elif not is_integer(limit): raise ValueError('Limit must be an integer') elif limit < 1: raise ValueError('Limit must be greater than 0') # each possible limit_direction # TODO: do we need sorted? if limit_direction == 'forward' and limit is not None: violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) elif limit_direction == 'forward': violate_limit = sorted(start_nans) elif limit_direction == 'backward' and limit is not None: violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, limit))) elif limit_direction == 'backward': violate_limit = sorted(end_nans) elif limit_direction == 'both' and limit is not None: violate_limit = sorted(_interp_limit(invalid, limit, limit)) else: violate_limit = [] xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) result = yvalues.copy() if method in ['linear', 'time', 'index', 'values']: if method in ('values', 'index'): inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if needs_i8_conversion(inds.dtype.type): inds = inds.view(np.int64) if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) else: inds = xvalues result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) result[violate_limit] = np.nan return result sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', 'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima'] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if issubclass(inds.dtype.type, np.datetime64): inds = inds.view(np.int64) result[invalid] = _interpolate_scipy_wrapper(inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs) result[violate_limit] = np.nan return result
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, limit_direction='forward', fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argumnet. """ # Treat the original, non-scipy methods first. invalid = isnull(yvalues) valid = ~invalid if not valid.any(): # have to call np.asarray(xvalues) since xvalues could be an Index # which cant be mutated result = np.empty_like(np.asarray(xvalues), dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == 'time': if not getattr(xvalues, 'is_all_dates', None): # if not issubclass(xvalues.dtype.type, np.datetime64): raise ValueError('time-weighted interpolation only works ' 'on Series or DataFrames with a ' 'DatetimeIndex') method = 'values' valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError('Invalid limit_direction: expecting one of %r, got ' '%r.' % (valid_limit_directions, limit_direction)) from pandas import Series ys = Series(yvalues) start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) # violate_limit is a list of the indexes in the series whose yvalue is # currently NaN, and should still be NaN after the interpolation. # Specifically: # # If limit_direction='forward' or None then the list will contain NaNs at # the beginning of the series, and NaNs that are more than 'limit' away # from the prior non-NaN. # # If limit_direction='backward' then the list will contain NaNs at # the end of the series, and NaNs that are more than 'limit' away # from the subsequent non-NaN. # # If limit_direction='both' then the list will contain NaNs that # are more than 'limit' away from any non-NaN. # # If limit=None, then use default behavior of filling an unlimited number # of NaNs in the direction specified by limit_direction # default limit is unlimited GH #16282 if limit is None: # limit = len(xvalues) pass elif not is_integer(limit): raise ValueError('Limit must be an integer') elif limit < 1: raise ValueError('Limit must be greater than 0') # each possible limit_direction # TODO: do we need sorted? if limit_direction == 'forward' and limit is not None: violate_limit = sorted(start_nans | set(_interp_limit(invalid, limit, 0))) elif limit_direction == 'forward': violate_limit = sorted(start_nans) elif limit_direction == 'backward' and limit is not None: violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0, limit))) elif limit_direction == 'backward': violate_limit = sorted(end_nans) elif limit_direction == 'both' and limit is not None: violate_limit = sorted(_interp_limit(invalid, limit, limit)) else: violate_limit = [] xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) result = yvalues.copy() if method in ['linear', 'time', 'index', 'values']: if method in ('values', 'index'): inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if needs_i8_conversion(inds.dtype.type): inds = inds.view(np.int64) if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) else: inds = xvalues result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) result[violate_limit] = np.nan return result sp_methods = [ 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', 'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima' ] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if issubclass(inds.dtype.type, np.datetime64): inds = inds.view(np.int64) result[invalid] = _interpolate_scipy_wrapper(inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs) result[violate_limit] = np.nan return result
def maybe_convert_objects(values, convert_dates=True, convert_numeric=True, convert_timedeltas=True, copy=True): """ if we have an object dtype, try to coerce dates and/or numbers """ # if we have passed in a list or scalar if isinstance(values, (list, tuple)): values = np.array(values, dtype=np.object_) if not hasattr(values, 'dtype'): values = np.array([values], dtype=np.object_) # convert dates if convert_dates and values.dtype == np.object_: # we take an aggressive stance and convert to datetime64[ns] if convert_dates == 'coerce': new_values = maybe_cast_to_datetime( values, 'M8[ns]', errors='coerce') # if we are all nans then leave me alone if not isna(new_values).all(): values = new_values else: values = lib.maybe_convert_objects(values, convert_datetime=convert_dates) # convert timedeltas if convert_timedeltas and values.dtype == np.object_: if convert_timedeltas == 'coerce': from pandas.core.tools.timedeltas import to_timedelta new_values = to_timedelta(values, errors='coerce') # if we are all nans then leave me alone if not isna(new_values).all(): values = new_values else: values = lib.maybe_convert_objects( values, convert_timedelta=convert_timedeltas) # convert to numeric if values.dtype == np.object_: if convert_numeric: try: new_values = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) # if we are all nans then leave me alone if not isna(new_values).all(): values = new_values except Exception: pass else: # soft-conversion values = lib.maybe_convert_objects(values) values = values.copy() if copy else values return values
def interpolate_1d(xvalues, yvalues, method='linear', limit=None, limit_direction='forward', limit_area=None, fill_value=None, bounds_error=False, order=None, **kwargs): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argumnet. """ # Treat the original, non-scipy methods first. invalid = isna(yvalues) valid = ~invalid if not valid.any(): # have to call np.asarray(xvalues) since xvalues could be an Index # which can't be mutated result = np.empty_like(np.asarray(xvalues), dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == 'time': if not getattr(xvalues, 'is_all_dates', None): # if not issubclass(xvalues.dtype.type, np.datetime64): raise ValueError('time-weighted interpolation only works ' 'on Series or DataFrames with a ' 'DatetimeIndex') method = 'values' valid_limit_directions = ['forward', 'backward', 'both'] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: msg = ('Invalid limit_direction: expecting one of {valid!r}, ' 'got {invalid!r}.') raise ValueError(msg.format(valid=valid_limit_directions, invalid=limit_direction)) if limit_area is not None: valid_limit_areas = ['inside', 'outside'] limit_area = limit_area.lower() if limit_area not in valid_limit_areas: raise ValueError('Invalid limit_area: expecting one of {}, got ' '{}.'.format(valid_limit_areas, limit_area)) # default limit is unlimited GH #16282 if limit is None: # limit = len(xvalues) pass elif not is_integer(limit): raise ValueError('Limit must be an integer') elif limit < 1: raise ValueError('Limit must be greater than 0') from pandas import Series ys = Series(yvalues) # These are sets of index pointers to invalid values... i.e. {0, 1, etc... all_nans = set(np.flatnonzero(invalid)) start_nans = set(range(ys.first_valid_index())) end_nans = set(range(1 + ys.last_valid_index(), len(valid))) mid_nans = all_nans - start_nans - end_nans # Like the sets above, preserve_nans contains indices of invalid values, # but in this case, it is the final set of indices that need to be # preserved as NaN after the interpolation. # For example if limit_direction='forward' then preserve_nans will # contain indices of NaNs at the beginning of the series, and NaNs that # are more than'limit' away from the prior non-NaN. # set preserve_nans based on direction using _interp_limit if limit_direction == 'forward': preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) elif limit_direction == 'backward': preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) else: # both directions... just use _interp_limit preserve_nans = set(_interp_limit(invalid, limit, limit)) # if limit_area is set, add either mid or outside indices # to preserve_nans GH #16284 if limit_area == 'inside': # preserve NaNs on the outside preserve_nans |= start_nans | end_nans elif limit_area == 'outside': # preserve NaNs on the inside preserve_nans |= mid_nans # sort preserve_nans and covert to list preserve_nans = sorted(preserve_nans) xvalues = getattr(xvalues, 'values', xvalues) yvalues = getattr(yvalues, 'values', yvalues) result = yvalues.copy() if method in ['linear', 'time', 'index', 'values']: if method in ('values', 'index'): inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if needs_i8_conversion(inds.dtype.type): inds = inds.view(np.int64) if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) else: inds = xvalues result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid]) result[preserve_nans] = np.nan return result sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh', 'spline', 'polynomial', 'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima'] if method in sp_methods: inds = np.asarray(xvalues) # hack for DatetimeIndex, #1646 if issubclass(inds.dtype.type, np.datetime64): inds = inds.view(np.int64) result[invalid] = _interpolate_scipy_wrapper(inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs) result[preserve_nans] = np.nan return result