Example #1
0
    def test_first_last_valid(self):
        ts = self.ts.copy()
        ts[:5] = np.NaN

        index = ts.first_valid_index()
        assert index == ts.index[5]

        ts[-5:] = np.NaN
        index = ts.last_valid_index()
        assert index == ts.index[-6]

        ts[:] = np.nan
        assert ts.last_valid_index() is None
        assert ts.first_valid_index() is None

        ser = Series([], index=[])
        assert ser.last_valid_index() is None
        assert ser.first_valid_index() is None

        # GH12800
        empty = Series()
        assert empty.last_valid_index() is None
        assert empty.first_valid_index() is None

        # GH20499: its preserves freq with holes
        ts.index = date_range("20110101", periods=len(ts), freq="B")
        ts.iloc[1] = 1
        ts.iloc[-2] = 1
        assert ts.first_valid_index() == ts.index[1]
        assert ts.last_valid_index() == ts.index[-2]
        assert ts.first_valid_index().freq == ts.index.freq
        assert ts.last_valid_index().freq == ts.index.freq
Example #2
0
    def test_first_last_valid(self):
        ts = self.ts.copy()
        ts[:5] = np.NaN

        index = ts.first_valid_index()
        assert index == ts.index[5]

        ts[-5:] = np.NaN
        index = ts.last_valid_index()
        assert index == ts.index[-6]

        ts[:] = np.nan
        assert ts.last_valid_index() is None
        assert ts.first_valid_index() is None

        ser = Series([], index=[])
        assert ser.last_valid_index() is None
        assert ser.first_valid_index() is None

        # GH12800
        empty = Series()
        assert empty.last_valid_index() is None
        assert empty.first_valid_index() is None

        # GH20499: its preserves freq with holes
        ts.index = date_range("20110101", periods=len(ts), freq="B")
        ts.iloc[1] = 1
        ts.iloc[-2] = 1
        assert ts.first_valid_index() == ts.index[1]
        assert ts.last_valid_index() == ts.index[-2]
        assert ts.first_valid_index().freq == ts.index.freq
        assert ts.last_valid_index().freq == ts.index.freq
def calculate_enhanced_meta(serie: pd.Series, periodicity: str) -> dict:
    """Crea o actualiza los metadatos enriquecidos de la serie pasada. El tรญtulo de
    la misma DEBE ser el ID de la serie en la base de datos"""

    days_since_update = (datetime.now() - _get_last_day_of_period(serie, periodicity)).days
    last_index = serie.index.get_loc(serie.last_valid_index())
    last = serie[last_index]
    second_to_last = serie[last_index - 1] if serie.index.size > 1 else None
    last_pct_change = last / second_to_last - 1

    # Cรกlculos
    meta = {
        meta_keys.INDEX_START: serie.first_valid_index().date(),
        meta_keys.INDEX_END: serie.last_valid_index().date(),
        meta_keys.PERIODICITY: periodicity,
        meta_keys.INDEX_SIZE: _get_index_size(serie),
        meta_keys.DAYS_SINCE_LAST_UPDATE: days_since_update,
        meta_keys.LAST_VALUE: last,
        meta_keys.SECOND_TO_LAST_VALUE: second_to_last,
        meta_keys.LAST_PCT_CHANGE: last_pct_change,
        meta_keys.IS_UPDATED: _is_series_updated(days_since_update, periodicity),
        meta_keys.MAX: serie.max(),
        meta_keys.MIN: serie.min(),
        meta_keys.AVERAGE: serie.mean(),
        meta_keys.SIGNIFICANT_FIGURES: significant_figures(serie.values)
    }

    return meta
def _calculate_smoothed_daily_cases(new_cases: pd.Series, smooth: int = 7):

    if new_cases.first_valid_index() is None:
        return new_cases

    new_cases = new_cases.copy()

    # Front filling all cases with 0s.  We're assuming all regions are accurately
    # reporting the first day a new case occurs.  This will affect the first few cases
    # in a timeseries, because it's smoothing over a full period, rather than just the first
    # couple days of reported data.
    new_cases[:new_cases.first_valid_index() - timedelta(days=1)] = 0
    smoothed = series_utils.smooth_with_rolling_average(new_cases,
                                                        window=smooth)

    return smoothed
Example #5
0
def index(x: pd.Series, initial: int = 1) -> pd.Series:
    """
    Geometric series normalization

    :param x: time series
    :param initial: initial value
    :return: normalized time series

    **Usage**

    Divides every value in x by the initial value of x:

    :math:`Y_t = initial * X_t / X_0`

    where :math:`X_0` is the first value in the series

    **Examples**

    Normalize series to 1:

    >>> series = generate_series(100)
    >>> returns = index(series)

    **See also**

    :func:`returns`

    """
    i = x.first_valid_index()
    return pd.Series() if i is None else initial * x / x[i]
Example #6
0
def update_enhanced_meta(serie: pd.Series, catalog_id: str, distribution_id: str):
    """Crea o actualiza los metadatos enriquecidos de la serie pasada. El tรญtulo de
    la misma DEBE ser el ID de la serie en la base de datos"""

    field = Field.objects.get(distribution__dataset__catalog__identifier=catalog_id,
                              distribution__identifier=distribution_id,
                              identifier=serie.name)
    periodicity = meta_keys.get(field.distribution, meta_keys.PERIODICITY)
    days_since_update = (datetime.now() - _get_last_day_of_period(serie, periodicity)).days

    last = serie[-1]
    second_to_last = serie[-2] if serie.index.size > 1 else None
    last_pct_change = last / second_to_last - 1

    # Cรกlculos
    meta = {
        meta_keys.INDEX_START: serie.first_valid_index().date(),
        meta_keys.INDEX_END: serie.last_valid_index().date(),
        meta_keys.PERIODICITY: periodicity,
        meta_keys.INDEX_SIZE: _get_index_size(serie),
        meta_keys.DAYS_SINCE_LAST_UPDATE: days_since_update,
        meta_keys.LAST_VALUE: last,
        meta_keys.SECOND_TO_LAST_VALUE: second_to_last,
        meta_keys.LAST_PCT_CHANGE: last_pct_change,
        meta_keys.IS_UPDATED: _is_series_updated(days_since_update, periodicity),
        meta_keys.MAX: serie.max(),
        meta_keys.MIN: serie.min(),
        meta_keys.AVERAGE: serie.mean(),
    }

    for meta_key, value in meta.items():
        field.enhanced_meta.update_or_create(key=meta_key, defaults={'value': value})
Example #7
0
    def check_series(s: pd.Series, input_output="") -> bool:
        """
        Check if a given Pandas Series has the properties of a RepresentationSeries.
        """

        error_string = (
            "There are non-representation cells (every cell should be a list of floats) in the given Series."
            " See help(hero.HeroSeries) for more information."
        )

        def is_numeric(x):
            try:
                float(x)
            except ValueError:
                return False
            else:
                return True

        def is_list_of_numbers(cell):
            return all(is_numeric(x) for x in cell) and isinstance(cell, (list, tuple))

        try:
            first_non_nan_value = s.loc[s.first_valid_index()]
            if not is_list_of_numbers(first_non_nan_value) or s.index.nlevels != 1:
                raise TypeError(error_string)
        except KeyError:  # Only NaNs in Series -> same warning applies
            raise TypeError(error_string)
Example #8
0
def index(x: pd.Series, initial: int = 1) -> pd.Series:
    """
    Geometric series normalization

    :param x: time series
    :param initial: initial value
    :return: normalized time series

    **Usage**

    Divides every value in x by the initial value of x:

    :math:`Y_t = initial * X_t / X_0`

    where :math:`X_0` is the first value in the series

    **Examples**

    Normalize series to 1:

    >>> series = generate_series(100)
    >>> returns = index(series)

    **See also**

    :func:`returns`

    """
    i = x.first_valid_index()
    if not x[i]:
        raise MqValueError(
            'Divide by zero error. Ensure that the first value of series passed to index(...) '
            'is non-zero')
    return pd.Series(dtype=float) if i is None else initial * x / x[i]
Example #9
0
    def check_type(s: pd.Series, input_output="") -> Tuple[bool, str]:
        """
        Check if a given Pandas Series has the properties of a VectorSeries.
        """

        error_string = (
            "should be VectorSeries: there are non-representation cells (every cell should be a list of floats) in the given Series."
            " See help(hero.HeroTypes) for more information.")

        def is_numeric(x):
            try:
                float(x)
            except ValueError:
                return False
            else:
                return True

        def is_list_of_numbers(cell):
            return isinstance(cell, (list, tuple)) and all(
                is_numeric(x) for x in cell)

        try:
            first_non_nan_value = s.loc[s.first_valid_index()]
            if not is_list_of_numbers(first_non_nan_value):
                return False, error_string
        except KeyError:  # Only NaNs in Series -> same warning applies
            return False, error_string

        return True, ""
def drift(x: Series, h: int) -> np.ndarray:
    # x : time serie data
    # h : number of future predictions
    # equation : ลถt+h|t = Yt + h * ((Yt - Y1) / (t - 1))
    diffRate = (x.get(x.last_valid_index()) - x.get(x.first_valid_index())) / (len(x.values) - 1)
    result = []
    for t in range(h):
        result.append(x.get(x.last_valid_index()) + ((t + 1) * diffRate))
    return Series(np.array(result))
Example #11
0
 def number_of_na_in_ts(ts: pd.Series) -> int:
     """
     Removes all the NaNs at the beginning (assume the first value is never 
     missing), then counts the number of NaNs.
     See test below.
     """
     index_first_non_na = ts.first_valid_index()
     ts = ts[index_first_non_na:]
     return ts.isna().sum()
Example #12
0
 def parse(self, column_data: pd.Series):
     super().parse(column_data)
     idx = column_data.first_valid_index()
     val = column_data[idx]
     inferred_shape = np.array(val).shape
     if self._shape is not None:
         assert tuple(self._shape) == tuple(inferred_shape), 'Shape mismatch!. Expected shape={},' \
                                  ' shape in the dataset is {}'.format(self._shape,
                                                                       inferred_shape)
     else:
         self._shape = inferred_shape
Example #13
0
def is_categorical_column(
        data: pd.Series,
        threshold: int = 100,
        ratio: float = 0.1,
        is_label_columns: bool = False,
        default_allow_missing: bool = True) -> Tuple[bool, bool]:
    """Check whether the column is a categorical column.

    If the number of unique elements in the column is smaller than

        min(#Total Sample * ratio, threshold),

    it will be treated as a categorical column

    Parameters
    ----------
    data
        The column data
    threshold
        The threshold for detecting categorical column
    is_label_columns
        Whether the column is a label column
    ratio
        The ratio for detecting categorical column

    Returns
    -------
    is_categorical
        Whether the column is a categorical column
    parsed_allow_missing
    """
    threshold = min(int(len(data) * ratio), threshold)
    sample_set = set()
    element = data[data.first_valid_index()]
    if isinstance(element, str):
        for idx, sample in data.items():
            sample_set.add(sample)
            if len(sample_set) > threshold:
                return False, False
        if is_label_columns:
            return True, False
        else:
            return True, default_allow_missing
    elif isinstance(element, INT_TYPES):
        value_counts = data.value_counts()
        if value_counts.keys().min() == 0 and value_counts.keys().max(
        ) == len(value_counts) - 1:
            return True, False
        else:
            return False, False
    elif isinstance(element, BOOL_TYPES):
        return True, False
    else:
        return False, False
    def test_first_last_valid(self):
        ts = self.ts.copy()
        ts[:5] = np.NaN

        index = ts.first_valid_index()
        assert index == ts.index[5]

        ts[-5:] = np.NaN
        index = ts.last_valid_index()
        assert index == ts.index[-6]

        ts[:] = np.nan
        assert ts.last_valid_index() is None
        assert ts.first_valid_index() is None

        ser = Series([], index=[])
        assert ser.last_valid_index() is None
        assert ser.first_valid_index() is None

        # GH12800
        empty = Series()
        assert empty.last_valid_index() is None
        assert empty.first_valid_index() is None
Example #15
0
    def test_first_last_valid(self):
        ts = self.ts.copy()
        ts[:5] = np.NaN

        index = ts.first_valid_index()
        assert index == ts.index[5]

        ts[-5:] = np.NaN
        index = ts.last_valid_index()
        assert index == ts.index[-6]

        ts[:] = np.nan
        assert ts.last_valid_index() is None
        assert ts.first_valid_index() is None

        ser = Series([], index=[])
        assert ser.last_valid_index() is None
        assert ser.first_valid_index() is None

        # GH12800
        empty = Series()
        assert empty.last_valid_index() is None
        assert empty.first_valid_index() is None
Example #16
0
    def test_first_last_valid(self):
        ts = self.ts.copy()
        ts[:5] = np.NaN

        index = ts.first_valid_index()
        self.assertEqual(index, ts.index[5])

        ts[-5:] = np.NaN
        index = ts.last_valid_index()
        self.assertEqual(index, ts.index[-6])

        ts[:] = np.nan
        self.assertIsNone(ts.last_valid_index())
        self.assertIsNone(ts.first_valid_index())

        ser = Series([], index=[])
        self.assertIsNone(ser.last_valid_index())
        self.assertIsNone(ser.first_valid_index())

        # GH12800
        empty = Series()
        self.assertIsNone(empty.last_valid_index())
        self.assertIsNone(empty.first_valid_index())
    def write_serie(self, serie: pd.Series, periodicity: str, fields: dict,
                    writer: csv.writer):
        field_id = fields[serie.name]

        # Filtrado de NaN
        serie = serie[serie.first_valid_index():serie.last_valid_index()]

        df = serie.reset_index().apply(self.rows,
                                       axis=1,
                                       args=(self.fields_data, field_id,
                                             periodicity))

        serie = pd.Series(df.values, index=serie.index)
        for row in serie:
            writer.writerow(row)
Example #18
0
    def check_series(s: pd.Series) -> bool:
        """
        Check if a given Pandas Series has the properties of a TextSeries.
        """

        error_string = (
            "The input Series should consist only of strings in every cell."
            " See help(hero.HeroSeries) for more information."
        )

        try:
            first_non_nan_value = s.loc[s.first_valid_index()]
            if not isinstance(first_non_nan_value, str) or s.index.nlevels != 1:
                raise TypeError(error_string)
        except KeyError:  # Only NaNs in Series -> same warning applies
            raise TypeError(error_string)
Example #19
0
    def check_type(s: pd.Series) -> Tuple[bool, str]:
        """
        Check if a given Pandas Series has the properties of a TextSeries.
        """

        error_string = (
            "should be TextSeries: the input Series should consist only of strings in every cell."
            " See help(hero.HeroTypes) for more information.")

        try:
            first_non_nan_value = s.loc[s.first_valid_index()]
            if not isinstance(first_non_nan_value, str):
                return False, error_string
        except KeyError:  # Only NaNs in Series -> same warning applies
            return False, error_string

        return True, ""
Example #20
0
def interpolate_stalled_and_missing_values(series: pd.Series) -> pd.Series:
    """Interpolates periods where values have stopped increasing or have gaps.

    Args:
        series: Series with a datetime index
    """
    series = series.copy()
    start, end = series.first_valid_index(), series.last_valid_index()
    series_with_values = series.loc[start:end]

    series_with_values[series_with_values.diff() == 0] = None
    # Use the index to determine breaks between data (so
    # missing data is not improperly interpolated)
    series.loc[start:end] = series_with_values.interpolate(
        method="time").apply(np.floor)

    return series
Example #21
0
def _get_range(x: pd.Series):
    """Get a range of values so that there are no NaNs in the sequence."""

    first_idx = x.first_valid_index()
    last_idx = x.last_valid_index()
    subset = x.loc[first_idx:last_idx]

    while subset.isnull().values.any() and \
            (first_idx is not None or last_idx is not None):
        idx = subset.isna().idxmax()
        first_idx = subset.loc[idx:last_idx].first_valid_index()
        subset = x.loc[first_idx:last_idx]

    if first_idx is None or last_idx is None:
        return None, None

    return first_idx, last_idx
Example #22
0
 def _fit_core(self, s: pd.Series) -> None:
     if not (s.index.is_monotonic_increasing
             or s.index.is_monotonic_decreasing):
         raise ValueError("Time series must have a monotonic time index. ")
     # remove starting and ending nans
     s = s.loc[s.first_valid_index():s[::-1].first_valid_index()].copy()
     if pd.isna(s).any():
         raise ValueError(
             "Found NaN in time series among valid values. "
             "NaNs starting or ending a time series are allowed, "
             "but those among valid values are not.")
     # get datum time
     self._datumTimestamp = s.index[0]
     # get series_freq
     if s.index.freq is not None:
         self._series_freq = s.index.freqstr
     else:
         self._series_freq = s.index.inferred_freq
     if self._series_freq is None:
         raise RuntimeError(
             "Series does not follow any known frequency "
             "(e.g. second, minute, hour, day, week, month, year, etc.")
     # get average dT
     self._dT = pd.Series(s.index).diff().mean()
     # get seasonal freq
     if self.freq is None:
         identified_freq = _identify_seasonal_period(s)
         if identified_freq is None:
             raise Exception("Could not find significant seasonality.")
         else:
             self.freq_ = identified_freq
     else:
         self.freq_ = self.freq
     # get seasonal pattern
     if self.trend:
         seasonal_decompose_results = (
             seasonal_decompose(s, period=self.freq_)
             if parse(statsmodels.__version__) >= parse("0.11") else
             seasonal_decompose(s, freq=self.freq_))
         self.seasonal_ = getattr(seasonal_decompose_results,
                                  "seasonal")[:self.freq_]
     else:
         self.seasonal_ = s.iloc[:self.freq_].copy()
         for i in range(len(self.seasonal_)):
             self.seasonal_.iloc[i] = s.iloc[i::len(self.seasonal_)].mean()
Example #23
0
    def test_first_last_valid(self):
        ts = self.ts.copy()
        ts[:5] = np.NaN

        index = ts.first_valid_index()
        self.assertEqual(index, ts.index[5])

        ts[-5:] = np.NaN
        index = ts.last_valid_index()
        self.assertEqual(index, ts.index[-6])

        ts[:] = np.nan
        self.assert_(ts.last_valid_index() is None)
        self.assert_(ts.first_valid_index() is None)

        ser = Series([], index=[])
        self.assert_(ser.last_valid_index() is None)
        self.assert_(ser.first_valid_index() is None)
Example #24
0
    def check_type(s: pd.Series) -> Tuple[bool, str]:
        """
        Check if a given Pandas Series has the properties of a TokenSeries.
        """

        error_string = (
            "should be TokenSeries: there are non-token cells (every cell should be a list of words/tokens) in the given Series."
            " See help(hero.HeroTypes) for more information.")

        def is_list_of_strings(cell):
            return all(isinstance(x, str)
                       for x in cell) and isinstance(cell, (list, tuple))

        try:
            first_non_nan_value = s.loc[s.first_valid_index()]
            if not is_list_of_strings(first_non_nan_value):
                return False, error_string
        except KeyError:  # Only NaNs in Series -> same warning applies
            return False, error_string

        return True, ""
Example #25
0
    def check_series(s: pd.Series) -> bool:
        """
        Check if a given Pandas Series has the properties of a TokenSeries.
        """

        error_string = (
            "There are non-token cells (every cell should be a list of words/tokens) in the given Series."
            " See help(hero.HeroSeries) for more information."
        )

        def is_list_of_strings(cell):
            return all(isinstance(x, str) for x in cell) and isinstance(
                cell, (list, tuple)
            )

        try:
            first_non_nan_value = s.loc[s.first_valid_index()]
            if not is_list_of_strings(first_non_nan_value) or s.index.nlevels != 1:
                raise TypeError(error_string)
        except KeyError:  # Only NaNs in Series -> same warning applies
            raise TypeError(error_string)
        def _assert_single_contiguous_dense_sequence(
                _series: pd.Series) -> None:
            """
            Assert that the input series has no Null values after removing leading
            and trailing Nulls. An motivating example for this requirement is a
            ForecastCheck, which might have a main value series that ends with trailing
            Nulls, and a forecast series that begins with leading nulls, but the actual
            and forecast periods should have no nulls.

            This is a strong assertion, and I'm not 100% sure it's the right one, but
            I'm putting it in because I'd rather start out with more constraints. However,
            we can revisit this design choice.
            """

            assert is_numeric_dtype(_series), 'The "Single Contiguous Dense Sequence" constraint should only be ' \
                                              'applied to numeric Series'

            assert (not _series.loc[_series.first_valid_index(
            ):_series.last_valid_index()].isnull().values.any()), (
                'Numeric series may have leading or trailing null values to represent missing or non-applicable '
                'data points. However, values for the series should otherwise be non-Null.'
            )
Example #27
0
def fill_gaps(time_series: pd.Series) -> pd.Series:
    """
    Fill gaps in a time series (i.e. value equals to NaN) inside the time-series (leading and ending missing
    values are untouched).

    Parameters
    ----------
    time_series: pd.Series
        Time-series of load (can be NaNs) indexed with datetime indexes.

    Returns
    -------
    time_series: pd.Series
        Corrected time series
    """

    # First remove starting and ending nans
    time_series_trim = time_series.loc[time_series.first_valid_index(
    ):time_series.last_valid_index()]

    # For each remaining nan, we replace its value by the value of an identical hour in another day for which we have
    # data
    time_series_trim_valid = time_series_trim.dropna()
    nan_indexes = time_series_trim.index[time_series_trim.apply(np.isnan)]
    for index in nan_indexes:
        # Get all elements which have are on the same day, same hour
        similar_hours = time_series_trim_valid[
            time_series_trim_valid.index.map(lambda x: x.weekday(
            ) == index.weekday() and x.hour == index.hour)]
        # Find closest valid hour
        closest_valid_hour_index = similar_hours.index[np.argmin(
            abs((similar_hours.index - index).days))]

        time_series_trim[index] = time_series_trim_valid[
            closest_valid_hour_index]

    time_series[time_series_trim.index] = time_series_trim.values

    return time_series
def generate_field_summary(series: pd.Series) -> pd.Series:

    has_value = not series.isnull().all()
    min_date = None
    max_date = None
    max_value = None
    min_value = None
    latest_value = None
    num_observations = 0
    largest_delta = None
    largest_delta_date = None

    if has_value:
        min_date = series.first_valid_index()
        max_date = series.last_valid_index()
        latest_value = series[series.notnull()].iloc[-1]
        max_value = series.max()
        min_value = series.min()
        num_observations = len(series[series.notnull()])
        largest_delta = series.diff().abs().max()
        # If a
        if len(series.diff().abs().dropna()):
            largest_delta_date = series.diff().abs().idxmax()

    results = {
        "has_value": has_value,
        "min_date": min_date,
        "max_date": max_date,
        "max_value": max_value,
        "min_value": min_value,
        "latest_value": latest_value,
        "num_observations": num_observations,
        "largest_delta": largest_delta,
        "largest_delta_date": largest_delta_date,
    }
    return pd.Series(results)
Example #29
0
def _get_index_size(serie: pd.Series):
    # Filtro los NaN antes y despuรฉs de la serie
    return len(serie[serie.first_valid_index():serie.last_valid_index()])
Example #30
0
def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
                   limit_direction='forward', fill_value=None,
                   bounds_error=False, order=None, **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isnull(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which cant be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        raise ValueError('Invalid limit_direction: expecting one of %r, got '
                         '%r.' % (valid_limit_directions, limit_direction))

    from pandas import Series
    ys = Series(yvalues)
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))

    # violate_limit is a list of the indexes in the series whose yvalue is
    # currently NaN, and should still be NaN after the interpolation.
    # Specifically:
    #
    # If limit_direction='forward' or None then the list will contain NaNs at
    # the beginning of the series, and NaNs that are more than 'limit' away
    # from the prior non-NaN.
    #
    # If limit_direction='backward' then the list will contain NaNs at
    # the end of the series, and NaNs that are more than 'limit' away
    # from the subsequent non-NaN.
    #
    # If limit_direction='both' then the list will contain NaNs that
    # are more than 'limit' away from any non-NaN.
    #
    # If limit=None, then use default behavior of filling an unlimited number
    # of NaNs in the direction specified by limit_direction

    # default limit is unlimited GH #16282
    if limit is None:
        # limit = len(xvalues)
        pass
    elif not is_integer(limit):
        raise ValueError('Limit must be an integer')
    elif limit < 1:
        raise ValueError('Limit must be greater than 0')

    # each possible limit_direction
    # TODO: do we need sorted?
    if limit_direction == 'forward' and limit is not None:
        violate_limit = sorted(start_nans |
                               set(_interp_limit(invalid, limit, 0)))
    elif limit_direction == 'forward':
        violate_limit = sorted(start_nans)
    elif limit_direction == 'backward' and limit is not None:
        violate_limit = sorted(end_nans |
                               set(_interp_limit(invalid, 0, limit)))
    elif limit_direction == 'backward':
        violate_limit = sorted(end_nans)
    elif limit_direction == 'both' and limit is not None:
        violate_limit = sorted(_interp_limit(invalid, limit, limit))
    else:
        violate_limit = []

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[violate_limit] = np.nan
        return result

    sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic',
                  'barycentric', 'krogh', 'spline', 'polynomial',
                  'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima']

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order, **kwargs)
        result[violate_limit] = np.nan
        return result
Example #31
0
def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
                   limit_direction='forward', fill_value=None,
                   bounds_error=False, order=None, **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = com.isnull(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which cant be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    def _interp_limit(invalid, fw_limit, bw_limit):
        "Get idx of values that won't be filled b/c they exceed the limits."
        for x in np.where(invalid)[0]:
            if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():
                yield x

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        raise ValueError('Invalid limit_direction: expecting one of %r, got '
                         '%r.' % (valid_limit_directions, limit_direction))

    from pandas import Series
    ys = Series(yvalues)
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))

    # This is a list of the indexes in the series whose yvalue is currently
    # NaN, but whose interpolated yvalue will be overwritten with NaN after
    # computing the interpolation. For each index in this list, one of these
    # conditions is true of the corresponding NaN in the yvalues:
    #
    # a) It is one of a chain of NaNs at the beginning of the series, and
    #    either limit is not specified or limit_direction is 'forward'.
    # b) It is one of a chain of NaNs at the end of the series, and limit is
    #    specified and limit_direction is 'backward' or 'both'.
    # c) Limit is nonzero and it is further than limit from the nearest non-NaN
    #    value (with respect to the limit_direction setting).
    #
    # The default behavior is to fill forward with no limit, ignoring NaNs at
    # the beginning (see issues #9218 and #10420)
    violate_limit = sorted(start_nans)

    if limit:
        if limit_direction == 'forward':
            violate_limit = sorted(start_nans | set(_interp_limit(invalid,
                                                                  limit, 0)))
        if limit_direction == 'backward':
            violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0,
                                                                limit)))
        if limit_direction == 'both':
            violate_limit = sorted(_interp_limit(invalid, limit, limit))

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if issubclass(inds.dtype.type, np.datetime64):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[violate_limit] = np.nan
        return result

    sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic',
                  'barycentric', 'krogh', 'spline', 'polynomial',
                  'piecewise_polynomial', 'pchip', 'akima']
    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order, **kwargs)
        result[violate_limit] = np.nan
        return result
Example #32
0
def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
                   limit_direction='forward', fill_value=None,
                   bounds_error=False, order=None, **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isnull(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which cant be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    def _interp_limit(invalid, fw_limit, bw_limit):
        "Get idx of values that won't be filled b/c they exceed the limits."
        for x in np.where(invalid)[0]:
            if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():
                yield x

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        raise ValueError('Invalid limit_direction: expecting one of %r, got '
                         '%r.' % (valid_limit_directions, limit_direction))

    from pandas import Series
    ys = Series(yvalues)
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))

    # This is a list of the indexes in the series whose yvalue is currently
    # NaN, but whose interpolated yvalue will be overwritten with NaN after
    # computing the interpolation. For each index in this list, one of these
    # conditions is true of the corresponding NaN in the yvalues:
    #
    # a) It is one of a chain of NaNs at the beginning of the series, and
    #    either limit is not specified or limit_direction is 'forward'.
    # b) It is one of a chain of NaNs at the end of the series, and limit is
    #    specified and limit_direction is 'backward' or 'both'.
    # c) Limit is nonzero and it is further than limit from the nearest non-NaN
    #    value (with respect to the limit_direction setting).
    #
    # The default behavior is to fill forward with no limit, ignoring NaNs at
    # the beginning (see issues #9218 and #10420)
    violate_limit = sorted(start_nans)

    if limit is not None:
        if not is_integer(limit):
            raise ValueError('Limit must be an integer')
        if limit < 1:
            raise ValueError('Limit must be greater than 0')
        if limit_direction == 'forward':
            violate_limit = sorted(start_nans | set(_interp_limit(invalid,
                                                                  limit, 0)))
        if limit_direction == 'backward':
            violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0,
                                                                limit)))
        if limit_direction == 'both':
            violate_limit = sorted(_interp_limit(invalid, limit, limit))

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[violate_limit] = np.nan
        return result

    sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic',
                  'barycentric', 'krogh', 'spline', 'polynomial',
                  'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima']

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order, **kwargs)
        result[violate_limit] = np.nan
        return result
Example #33
0
def interpolate_1d(xvalues,
                   yvalues,
                   method='linear',
                   limit=None,
                   limit_direction='forward',
                   fill_value=None,
                   bounds_error=False,
                   order=None,
                   **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isnull(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which cant be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        raise ValueError('Invalid limit_direction: expecting one of %r, got '
                         '%r.' % (valid_limit_directions, limit_direction))

    from pandas import Series
    ys = Series(yvalues)
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))

    # violate_limit is a list of the indexes in the series whose yvalue is
    # currently NaN, and should still be NaN after the interpolation.
    # Specifically:
    #
    # If limit_direction='forward' or None then the list will contain NaNs at
    # the beginning of the series, and NaNs that are more than 'limit' away
    # from the prior non-NaN.
    #
    # If limit_direction='backward' then the list will contain NaNs at
    # the end of the series, and NaNs that are more than 'limit' away
    # from the subsequent non-NaN.
    #
    # If limit_direction='both' then the list will contain NaNs that
    # are more than 'limit' away from any non-NaN.
    #
    # If limit=None, then use default behavior of filling an unlimited number
    # of NaNs in the direction specified by limit_direction

    # default limit is unlimited GH #16282
    if limit is None:
        # limit = len(xvalues)
        pass
    elif not is_integer(limit):
        raise ValueError('Limit must be an integer')
    elif limit < 1:
        raise ValueError('Limit must be greater than 0')

    # each possible limit_direction
    # TODO: do we need sorted?
    if limit_direction == 'forward' and limit is not None:
        violate_limit = sorted(start_nans
                               | set(_interp_limit(invalid, limit, 0)))
    elif limit_direction == 'forward':
        violate_limit = sorted(start_nans)
    elif limit_direction == 'backward' and limit is not None:
        violate_limit = sorted(end_nans
                               | set(_interp_limit(invalid, 0, limit)))
    elif limit_direction == 'backward':
        violate_limit = sorted(end_nans)
    elif limit_direction == 'both' and limit is not None:
        violate_limit = sorted(_interp_limit(invalid, limit, limit))
    else:
        violate_limit = []

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[violate_limit] = np.nan
        return result

    sp_methods = [
        'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric',
        'krogh', 'spline', 'polynomial', 'from_derivatives',
        'piecewise_polynomial', 'pchip', 'akima'
    ]

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order,
                                                     **kwargs)
        result[violate_limit] = np.nan
        return result
Example #34
0
def interpolate_1d(xvalues,
                   yvalues,
                   method='linear',
                   limit=None,
                   limit_direction='forward',
                   limit_area=None,
                   fill_value=None,
                   bounds_error=False,
                   order=None,
                   **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isna(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which can't be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        msg = ('Invalid limit_direction: expecting one of {valid!r}, '
               'got {invalid!r}.')
        raise ValueError(
            msg.format(valid=valid_limit_directions, invalid=limit_direction))

    if limit_area is not None:
        valid_limit_areas = ['inside', 'outside']
        limit_area = limit_area.lower()
        if limit_area not in valid_limit_areas:
            raise ValueError('Invalid limit_area: expecting one of {}, got '
                             '{}.'.format(valid_limit_areas, limit_area))

    # default limit is unlimited GH #16282
    if limit is None:
        # limit = len(xvalues)
        pass
    elif not is_integer(limit):
        raise ValueError('Limit must be an integer')
    elif limit < 1:
        raise ValueError('Limit must be greater than 0')

    from pandas import Series
    ys = Series(yvalues)

    # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
    all_nans = set(np.flatnonzero(invalid))
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))
    mid_nans = all_nans - start_nans - end_nans

    # Like the sets above, preserve_nans contains indices of invalid values,
    # but in this case, it is the final set of indices that need to be
    # preserved as NaN after the interpolation.

    # For example if limit_direction='forward' then preserve_nans will
    # contain indices of NaNs at the beginning of the series, and NaNs that
    # are more than'limit' away from the prior non-NaN.

    # set preserve_nans based on direction using _interp_limit
    if limit_direction == 'forward':
        preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
    elif limit_direction == 'backward':
        preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
    else:
        # both directions... just use _interp_limit
        preserve_nans = set(_interp_limit(invalid, limit, limit))

    # if limit_area is set, add either mid or outside indices
    # to preserve_nans GH #16284
    if limit_area == 'inside':
        # preserve NaNs on the outside
        preserve_nans |= start_nans | end_nans
    elif limit_area == 'outside':
        # preserve NaNs on the inside
        preserve_nans |= mid_nans

    # sort preserve_nans and covert to list
    preserve_nans = sorted(preserve_nans)

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[preserve_nans] = np.nan
        return result

    sp_methods = [
        'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric',
        'krogh', 'spline', 'polynomial', 'from_derivatives',
        'piecewise_polynomial', 'pchip', 'akima'
    ]

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order,
                                                     **kwargs)
        result[preserve_nans] = np.nan
        return result
Example #35
0
def _first_valid_value(serie: Series) -> Any:
    first_valid_index = serie.first_valid_index()
    return serie[first_valid_index] if first_valid_index is not None else None
Example #36
0
def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
                   limit_direction='forward', limit_area=None, fill_value=None,
                   bounds_error=False, order=None, **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isna(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which can't be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        msg = ('Invalid limit_direction: expecting one of {valid!r}, '
               'got {invalid!r}.')
        raise ValueError(msg.format(valid=valid_limit_directions,
                                    invalid=limit_direction))

    if limit_area is not None:
        valid_limit_areas = ['inside', 'outside']
        limit_area = limit_area.lower()
        if limit_area not in valid_limit_areas:
            raise ValueError('Invalid limit_area: expecting one of {}, got '
                             '{}.'.format(valid_limit_areas, limit_area))

    # default limit is unlimited GH #16282
    if limit is None:
        # limit = len(xvalues)
        pass
    elif not is_integer(limit):
        raise ValueError('Limit must be an integer')
    elif limit < 1:
        raise ValueError('Limit must be greater than 0')

    from pandas import Series
    ys = Series(yvalues)

    # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
    all_nans = set(np.flatnonzero(invalid))
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))
    mid_nans = all_nans - start_nans - end_nans

    # Like the sets above, preserve_nans contains indices of invalid values,
    # but in this case, it is the final set of indices that need to be
    # preserved as NaN after the interpolation.

    # For example if limit_direction='forward' then preserve_nans will
    # contain indices of NaNs at the beginning of the series, and NaNs that
    # are more than'limit' away from the prior non-NaN.

    # set preserve_nans based on direction using _interp_limit
    if limit_direction == 'forward':
        preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
    elif limit_direction == 'backward':
        preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
    else:
        # both directions... just use _interp_limit
        preserve_nans = set(_interp_limit(invalid, limit, limit))

    # if limit_area is set, add either mid or outside indices
    # to preserve_nans GH #16284
    if limit_area == 'inside':
        # preserve NaNs on the outside
        preserve_nans |= start_nans | end_nans
    elif limit_area == 'outside':
        # preserve NaNs on the inside
        preserve_nans |= mid_nans

    # sort preserve_nans and covert to list
    preserve_nans = sorted(preserve_nans)

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[preserve_nans] = np.nan
        return result

    sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic',
                  'barycentric', 'krogh', 'spline', 'polynomial',
                  'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima']

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order, **kwargs)
        result[preserve_nans] = np.nan
        return result