def fillna(self: T, value, limit, inplace: bool, downcast) -> T: if limit is not None: # Do this validation even if we go through one of the no-op paths limit = libalgos.validate_limit(None, limit=limit) return self.apply_with_block( "fillna", value=value, limit=limit, inplace=inplace, downcast=downcast )
def array_fillna(array, value, limit, inplace): mask = isna(array) if limit is not None: limit = libalgos.validate_limit(None, limit=limit) mask[mask.cumsum() > limit] = False # TODO could optimize for arrays that cannot hold NAs # (like _can_hold_na on Blocks) if not inplace: array = array.copy() # np.putmask(array, mask, value) if np.any(mask): # TODO allow invalid value if there is nothing to fill? array[mask] = value return array
def _interpolate_2d_with_fill( data: np.ndarray, # floating dtype index: Index, axis: int, method: str = "linear", limit: int | None = None, limit_direction: str = "forward", limit_area: str | None = None, fill_value: Any | None = None, **kwargs, ) -> None: """ Column-wise application of _interpolate_1d. Notes ----- Alters 'data' in-place. The signature does differ from _interpolate_1d because it only includes what is needed for Block.interpolate. """ # validate the interp method clean_interp_method(method, index, **kwargs) if is_valid_na_for_dtype(fill_value, data.dtype): fill_value = na_value_for_dtype(data.dtype, compat=False) if method == "time": if not needs_i8_conversion(index.dtype): raise ValueError( "time-weighted interpolation only works " "on Series or DataFrames with a " "DatetimeIndex" ) method = "values" valid_limit_directions = ["forward", "backward", "both"] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError( "Invalid limit_direction: expecting one of " f"{valid_limit_directions}, got '{limit_direction}'." ) if limit_area is not None: valid_limit_areas = ["inside", "outside"] limit_area = limit_area.lower() if limit_area not in valid_limit_areas: raise ValueError( f"Invalid limit_area: expecting one of {valid_limit_areas}, got " f"{limit_area}." ) # default limit is unlimited GH #16282 limit = algos.validate_limit(nobs=None, limit=limit) indices = _index_to_interp_indices(index, method) def func(yvalues: np.ndarray) -> None: # process 1-d slices in the axis direction _interpolate_1d( indices=indices, yvalues=yvalues, method=method, limit=limit, limit_direction=limit_direction, limit_area=limit_area, fill_value=fill_value, bounds_error=False, **kwargs, ) # interp each column independently np.apply_along_axis(func, axis, data) return
def interpolate_1d( xvalues: Index, yvalues: np.ndarray, method: Optional[str] = "linear", limit: Optional[int] = None, limit_direction: str = "forward", limit_area: Optional[str] = None, fill_value: Optional[Any] = None, bounds_error: bool = False, order: Optional[int] = None, **kwargs, ): """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argument. """ invalid = isna(yvalues) valid = ~invalid if not valid.any(): result = np.empty(xvalues.shape, dtype=np.float64) result.fill(np.nan) return result if valid.all(): return yvalues if method == "time": if not needs_i8_conversion(xvalues.dtype): raise ValueError("time-weighted interpolation only works " "on Series or DataFrames with a " "DatetimeIndex") method = "values" valid_limit_directions = ["forward", "backward", "both"] limit_direction = limit_direction.lower() if limit_direction not in valid_limit_directions: raise ValueError("Invalid limit_direction: expecting one of " f"{valid_limit_directions}, got '{limit_direction}'.") if limit_area is not None: valid_limit_areas = ["inside", "outside"] limit_area = limit_area.lower() if limit_area not in valid_limit_areas: raise ValueError( f"Invalid limit_area: expecting one of {valid_limit_areas}, got " f"{limit_area}.") # default limit is unlimited GH #16282 limit = algos.validate_limit(nobs=None, limit=limit) # These are sets of index pointers to invalid values... i.e. {0, 1, etc... all_nans = set(np.flatnonzero(invalid)) start_nans = set(range(find_valid_index(yvalues, "first"))) end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) mid_nans = all_nans - start_nans - end_nans # Like the sets above, preserve_nans contains indices of invalid values, # but in this case, it is the final set of indices that need to be # preserved as NaN after the interpolation. # For example if limit_direction='forward' then preserve_nans will # contain indices of NaNs at the beginning of the series, and NaNs that # are more than'limit' away from the prior non-NaN. # set preserve_nans based on direction using _interp_limit preserve_nans: Union[List, Set] if limit_direction == "forward": preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0)) elif limit_direction == "backward": preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit)) else: # both directions... just use _interp_limit preserve_nans = set(_interp_limit(invalid, limit, limit)) # if limit_area is set, add either mid or outside indices # to preserve_nans GH #16284 if limit_area == "inside": # preserve NaNs on the outside preserve_nans |= start_nans | end_nans elif limit_area == "outside": # preserve NaNs on the inside preserve_nans |= mid_nans # sort preserve_nans and covert to list preserve_nans = sorted(preserve_nans) result = yvalues.copy() # xarr to pass to NumPy/SciPy xarr = xvalues._values if needs_i8_conversion(xarr.dtype): # GH#1646 for dt64tz xarr = xarr.view("i8") if method == "linear": inds = xarr else: inds = np.asarray(xarr) if method in ("values", "index"): if inds.dtype == np.object_: inds = lib.maybe_convert_objects(inds) if method in NP_METHODS: # np.interp requires sorted X values, #21037 indexer = np.argsort(inds[valid]) result[invalid] = np.interp(inds[invalid], inds[valid][indexer], yvalues[valid][indexer]) else: result[invalid] = _interpolate_scipy_wrapper( inds[valid], yvalues[valid], inds[invalid], method=method, fill_value=fill_value, bounds_error=bounds_error, order=order, **kwargs, ) result[preserve_nans] = np.nan return result