def calculate_enhanced_meta(serie: pd.Series, periodicity: str) -> dict:
    """Crea o actualiza los metadatos enriquecidos de la serie pasada. El título de
    la misma DEBE ser el ID de la serie en la base de datos"""

    days_since_update = (datetime.now() - _get_last_day_of_period(serie, periodicity)).days
    last_index = serie.index.get_loc(serie.last_valid_index())
    last = serie[last_index]
    second_to_last = serie[last_index - 1] if serie.index.size > 1 else None
    last_pct_change = last / second_to_last - 1

    # Cálculos
    meta = {
        meta_keys.INDEX_START: serie.first_valid_index().date(),
        meta_keys.INDEX_END: serie.last_valid_index().date(),
        meta_keys.PERIODICITY: periodicity,
        meta_keys.INDEX_SIZE: _get_index_size(serie),
        meta_keys.DAYS_SINCE_LAST_UPDATE: days_since_update,
        meta_keys.LAST_VALUE: last,
        meta_keys.SECOND_TO_LAST_VALUE: second_to_last,
        meta_keys.LAST_PCT_CHANGE: last_pct_change,
        meta_keys.IS_UPDATED: _is_series_updated(days_since_update, periodicity),
        meta_keys.MAX: serie.max(),
        meta_keys.MIN: serie.min(),
        meta_keys.AVERAGE: serie.mean(),
        meta_keys.SIGNIFICANT_FIGURES: significant_figures(serie.values)
    }

    return meta
Example #2
0
    def test_first_last_valid(self):
        ts = self.ts.copy()
        ts[:5] = np.NaN

        index = ts.first_valid_index()
        assert index == ts.index[5]

        ts[-5:] = np.NaN
        index = ts.last_valid_index()
        assert index == ts.index[-6]

        ts[:] = np.nan
        assert ts.last_valid_index() is None
        assert ts.first_valid_index() is None

        ser = Series([], index=[])
        assert ser.last_valid_index() is None
        assert ser.first_valid_index() is None

        # GH12800
        empty = Series()
        assert empty.last_valid_index() is None
        assert empty.first_valid_index() is None

        # GH20499: its preserves freq with holes
        ts.index = date_range("20110101", periods=len(ts), freq="B")
        ts.iloc[1] = 1
        ts.iloc[-2] = 1
        assert ts.first_valid_index() == ts.index[1]
        assert ts.last_valid_index() == ts.index[-2]
        assert ts.first_valid_index().freq == ts.index.freq
        assert ts.last_valid_index().freq == ts.index.freq
Example #3
0
    def test_first_last_valid(self):
        ts = self.ts.copy()
        ts[:5] = np.NaN

        index = ts.first_valid_index()
        assert index == ts.index[5]

        ts[-5:] = np.NaN
        index = ts.last_valid_index()
        assert index == ts.index[-6]

        ts[:] = np.nan
        assert ts.last_valid_index() is None
        assert ts.first_valid_index() is None

        ser = Series([], index=[])
        assert ser.last_valid_index() is None
        assert ser.first_valid_index() is None

        # GH12800
        empty = Series()
        assert empty.last_valid_index() is None
        assert empty.first_valid_index() is None

        # GH20499: its preserves freq with holes
        ts.index = date_range("20110101", periods=len(ts), freq="B")
        ts.iloc[1] = 1
        ts.iloc[-2] = 1
        assert ts.first_valid_index() == ts.index[1]
        assert ts.last_valid_index() == ts.index[-2]
        assert ts.first_valid_index().freq == ts.index.freq
        assert ts.last_valid_index().freq == ts.index.freq
def drift(x: Series, h: int) -> np.ndarray:
    # x : time serie data
    # h : number of future predictions
    # equation : Ŷt+h|t = Yt + h * ((Yt - Y1) / (t - 1))
    diffRate = (x.get(x.last_valid_index()) - x.get(x.first_valid_index())) / (len(x.values) - 1)
    result = []
    for t in range(h):
        result.append(x.get(x.last_valid_index()) + ((t + 1) * diffRate))
    return Series(np.array(result))
Example #5
0
class Player:
    def __init__(self, first_name, last_name, id):
        self.first_name = first_name
        self.last_name = last_name
        self.id = id
        self.hrs = [0, 0, 0, 0, 0, 0]  #One for each month of the game
        self.hr_total = 0
        self.hr_series = Series()
        self.hr_total_series = Series()

    def __str__(self):
        return str.format('{0} : {1}', self.id, self.last_name)

    def __repr__(self):
        return self.__str__()

    def add_hrs(self, count, date):
        self.hr_total += count
        self.hr_total_series[date] = self.hr_series.sum() + count
        if (self.hr_series.last_valid_index() == date):
            self.hr_series[date] = count + self.hr_series[date]
        else:
            self.hr_series[date] = count

    def name(self):
        return self.first_name + " " + self.last_name

    def get_player_hr_dataframe(self):
        return self.hr_series.to_frame(self.name())

    def get_player_hr_total_dataframe(self):
        return self.hr_total_series.to_frame(self.name())
Example #6
0
def generate_ema_list(
    closing_prices: pd.Series, sma_list: pd.Series, duration: int = 10
) -> pd.Series:
    """Returns Exponential Moving Average List given pandas series of Closing Prices."""
    # first exponential moving average reference point is simple
    # '1000' proxy for our furthest back available data
    # ema = ((current price - previous EMA) * weight) + previous EMA
    weight = 2 / (duration + 1)
    ret = []
    if sma_list is None:
        sma_list = generate_sma_list(closing_prices, duration)
    last_valid_sma_idx = sma_list.last_valid_index()

    oldest_sma = sma_list[last_valid_sma_idx]  # given most-current on top

    oldest_ema = (
        (closing_prices[len(closing_prices) - duration] - oldest_sma) * weight
    ) + oldest_sma
    ret.append(oldest_ema)

    for index in range(1, len(closing_prices) - duration + 1):

        ret.insert(
            0,
            (closing_prices[len(closing_prices) - duration - index] - ret[0]) * weight
            + ret[0],
        )
    return pd.Series(ret)
Example #7
0
class Player:
    def __init__(self, first_name, last_name, id):
        self.first_name = first_name
        self.last_name = last_name
        self.id = id
        self.hrs = [0,0,0,0,0,0] #One for each month of the game
        self.hr_total = 0
        self.hr_series = Series()
        self.hr_total_series = Series()

    def __str__(self):
        return str.format('{0} : {1}', self.id, self.last_name)

    def __repr__(self):
        return self.__str__()

    def add_hrs(self, count, date):
        self.hr_total += count
        self.hr_total_series[date] = self.hr_series.sum() + count
        if(self.hr_series.last_valid_index() == date ):
            self.hr_series[date] = count + self.hr_series[date]
        else:
            self.hr_series[date] = count


    def name(self):
        return self.first_name + " " + self.last_name

    def get_player_hr_dataframe(self):
        return self.hr_series.to_frame(self.name())

    def get_player_hr_total_dataframe(self):
        return self.hr_total_series.to_frame(self.name())
Example #8
0
def update_enhanced_meta(serie: pd.Series, catalog_id: str, distribution_id: str):
    """Crea o actualiza los metadatos enriquecidos de la serie pasada. El título de
    la misma DEBE ser el ID de la serie en la base de datos"""

    field = Field.objects.get(distribution__dataset__catalog__identifier=catalog_id,
                              distribution__identifier=distribution_id,
                              identifier=serie.name)
    periodicity = meta_keys.get(field.distribution, meta_keys.PERIODICITY)
    days_since_update = (datetime.now() - _get_last_day_of_period(serie, periodicity)).days

    last = serie[-1]
    second_to_last = serie[-2] if serie.index.size > 1 else None
    last_pct_change = last / second_to_last - 1

    # Cálculos
    meta = {
        meta_keys.INDEX_START: serie.first_valid_index().date(),
        meta_keys.INDEX_END: serie.last_valid_index().date(),
        meta_keys.PERIODICITY: periodicity,
        meta_keys.INDEX_SIZE: _get_index_size(serie),
        meta_keys.DAYS_SINCE_LAST_UPDATE: days_since_update,
        meta_keys.LAST_VALUE: last,
        meta_keys.SECOND_TO_LAST_VALUE: second_to_last,
        meta_keys.LAST_PCT_CHANGE: last_pct_change,
        meta_keys.IS_UPDATED: _is_series_updated(days_since_update, periodicity),
        meta_keys.MAX: serie.max(),
        meta_keys.MIN: serie.min(),
        meta_keys.AVERAGE: serie.mean(),
    }

    for meta_key, value in meta.items():
        field.enhanced_meta.update_or_create(key=meta_key, defaults={'value': value})
Example #9
0
def smooth_with_rolling_average(
    series: pd.Series,
    window: int = 7,
    include_trailing_zeros: bool = True,
    exclude_negatives: bool = True,
):
    """Smoothes series with a min period of 1.

    Series must have a datetime index.

    https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.rolling.html

    Port of Projections.ts:
    https://github.com/covid-projections/covid-projections/blob/master/src/common/models/Projection.ts#L715

    Args:
        series: Series with datetime index to smooth.
        window: Sliding window to average.
        include_trailing_zeros: Whether or not to NaN out trailing zeroes.
        exclude_negatives: Exclude negative values from rolling averages.

    Returns:
        Smoothed series.
    """
    # Drop trailing NAs so that we don't smooth for day we don't yet have data.
    series = series.loc[:series.last_valid_index()]

    if exclude_negatives:
        series = series.copy()
        series.loc[series < 0] = None

    def mean_with_no_trailing_nan(x):
        """Return mean of series unless last value is nan."""
        if np.isnan(x.iloc[-1]):
            return np.nan

        return x.mean()

    # Apply function to a rolling window
    # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.core.window.rolling.Rolling.apply.html
    rolling_average = series.rolling(
        window, min_periods=1).apply(mean_with_no_trailing_nan)
    if include_trailing_zeros:
        return rolling_average

    last_valid_index = series.replace(0, np.nan).last_valid_index()

    if last_valid_index:
        rolling_average[last_valid_index + timedelta(days=1):] = np.nan
        return rolling_average
    else:  # entirely empty series:
        return series
Example #10
0
    def test_first_last_valid(self):
        ts = self.ts.copy()
        ts[:5] = np.NaN

        index = ts.first_valid_index()
        self.assertEqual(index, ts.index[5])

        ts[-5:] = np.NaN
        index = ts.last_valid_index()
        self.assertEqual(index, ts.index[-6])

        ts[:] = np.nan
        self.assertIsNone(ts.last_valid_index())
        self.assertIsNone(ts.first_valid_index())

        ser = Series([], index=[])
        self.assertIsNone(ser.last_valid_index())
        self.assertIsNone(ser.first_valid_index())

        # GH12800
        empty = Series()
        self.assertIsNone(empty.last_valid_index())
        self.assertIsNone(empty.first_valid_index())
Example #11
0
    def test_first_last_valid(self):
        ts = self.ts.copy()
        ts[:5] = np.NaN

        index = ts.first_valid_index()
        assert index == ts.index[5]

        ts[-5:] = np.NaN
        index = ts.last_valid_index()
        assert index == ts.index[-6]

        ts[:] = np.nan
        assert ts.last_valid_index() is None
        assert ts.first_valid_index() is None

        ser = Series([], index=[])
        assert ser.last_valid_index() is None
        assert ser.first_valid_index() is None

        # GH12800
        empty = Series()
        assert empty.last_valid_index() is None
        assert empty.first_valid_index() is None
    def test_first_last_valid(self):
        ts = self.ts.copy()
        ts[:5] = np.NaN

        index = ts.first_valid_index()
        assert index == ts.index[5]

        ts[-5:] = np.NaN
        index = ts.last_valid_index()
        assert index == ts.index[-6]

        ts[:] = np.nan
        assert ts.last_valid_index() is None
        assert ts.first_valid_index() is None

        ser = Series([], index=[])
        assert ser.last_valid_index() is None
        assert ser.first_valid_index() is None

        # GH12800
        empty = Series()
        assert empty.last_valid_index() is None
        assert empty.first_valid_index() is None
    def write_serie(self, serie: pd.Series, periodicity: str, fields: dict,
                    writer: csv.writer):
        field_id = fields[serie.name]

        # Filtrado de NaN
        serie = serie[serie.first_valid_index():serie.last_valid_index()]

        df = serie.reset_index().apply(self.rows,
                                       axis=1,
                                       args=(self.fields_data, field_id,
                                             periodicity))

        serie = pd.Series(df.values, index=serie.index)
        for row in serie:
            writer.writerow(row)
Example #14
0
def interpolate_stalled_and_missing_values(series: pd.Series) -> pd.Series:
    """Interpolates periods where values have stopped increasing or have gaps.

    Args:
        series: Series with a datetime index
    """
    series = series.copy()
    start, end = series.first_valid_index(), series.last_valid_index()
    series_with_values = series.loc[start:end]

    series_with_values[series_with_values.diff() == 0] = None
    # Use the index to determine breaks between data (so
    # missing data is not improperly interpolated)
    series.loc[start:end] = series_with_values.interpolate(
        method="time").apply(np.floor)

    return series
Example #15
0
def _get_range(x: pd.Series):
    """Get a range of values so that there are no NaNs in the sequence."""

    first_idx = x.first_valid_index()
    last_idx = x.last_valid_index()
    subset = x.loc[first_idx:last_idx]

    while subset.isnull().values.any() and \
            (first_idx is not None or last_idx is not None):
        idx = subset.isna().idxmax()
        first_idx = subset.loc[idx:last_idx].first_valid_index()
        subset = x.loc[first_idx:last_idx]

    if first_idx is None or last_idx is None:
        return None, None

    return first_idx, last_idx
Example #16
0
    def test_first_last_valid(self):
        ts = self.ts.copy()
        ts[:5] = np.NaN

        index = ts.first_valid_index()
        self.assertEqual(index, ts.index[5])

        ts[-5:] = np.NaN
        index = ts.last_valid_index()
        self.assertEqual(index, ts.index[-6])

        ts[:] = np.nan
        self.assert_(ts.last_valid_index() is None)
        self.assert_(ts.first_valid_index() is None)

        ser = Series([], index=[])
        self.assert_(ser.last_valid_index() is None)
        self.assert_(ser.first_valid_index() is None)
Example #17
0
def fill_gaps(time_series: pd.Series) -> pd.Series:
    """
    Fill gaps in a time series (i.e. value equals to NaN) inside the time-series (leading and ending missing
    values are untouched).

    Parameters
    ----------
    time_series: pd.Series
        Time-series of load (can be NaNs) indexed with datetime indexes.

    Returns
    -------
    time_series: pd.Series
        Corrected time series
    """

    # First remove starting and ending nans
    time_series_trim = time_series.loc[time_series.first_valid_index(
    ):time_series.last_valid_index()]

    # For each remaining nan, we replace its value by the value of an identical hour in another day for which we have
    # data
    time_series_trim_valid = time_series_trim.dropna()
    nan_indexes = time_series_trim.index[time_series_trim.apply(np.isnan)]
    for index in nan_indexes:
        # Get all elements which have are on the same day, same hour
        similar_hours = time_series_trim_valid[
            time_series_trim_valid.index.map(lambda x: x.weekday(
            ) == index.weekday() and x.hour == index.hour)]
        # Find closest valid hour
        closest_valid_hour_index = similar_hours.index[np.argmin(
            abs((similar_hours.index - index).days))]

        time_series_trim[index] = time_series_trim_valid[
            closest_valid_hour_index]

    time_series[time_series_trim.index] = time_series_trim.values

    return time_series
        def _assert_single_contiguous_dense_sequence(
                _series: pd.Series) -> None:
            """
            Assert that the input series has no Null values after removing leading
            and trailing Nulls. An motivating example for this requirement is a
            ForecastCheck, which might have a main value series that ends with trailing
            Nulls, and a forecast series that begins with leading nulls, but the actual
            and forecast periods should have no nulls.

            This is a strong assertion, and I'm not 100% sure it's the right one, but
            I'm putting it in because I'd rather start out with more constraints. However,
            we can revisit this design choice.
            """

            assert is_numeric_dtype(_series), 'The "Single Contiguous Dense Sequence" constraint should only be ' \
                                              'applied to numeric Series'

            assert (not _series.loc[_series.first_valid_index(
            ):_series.last_valid_index()].isnull().values.any()), (
                'Numeric series may have leading or trailing null values to represent missing or non-applicable '
                'data points. However, values for the series should otherwise be non-Null.'
            )
def generate_field_summary(series: pd.Series) -> pd.Series:

    has_value = not series.isnull().all()
    min_date = None
    max_date = None
    max_value = None
    min_value = None
    latest_value = None
    num_observations = 0
    largest_delta = None
    largest_delta_date = None

    if has_value:
        min_date = series.first_valid_index()
        max_date = series.last_valid_index()
        latest_value = series[series.notnull()].iloc[-1]
        max_value = series.max()
        min_value = series.min()
        num_observations = len(series[series.notnull()])
        largest_delta = series.diff().abs().max()
        # If a
        if len(series.diff().abs().dropna()):
            largest_delta_date = series.diff().abs().idxmax()

    results = {
        "has_value": has_value,
        "min_date": min_date,
        "max_date": max_date,
        "max_value": max_value,
        "min_value": min_value,
        "latest_value": latest_value,
        "num_observations": num_observations,
        "largest_delta": largest_delta,
        "largest_delta_date": largest_delta_date,
    }
    return pd.Series(results)
Example #20
0
def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
                   limit_direction='forward', limit_area=None, fill_value=None,
                   bounds_error=False, order=None, **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isna(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which can't be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        msg = ('Invalid limit_direction: expecting one of {valid!r}, '
               'got {invalid!r}.')
        raise ValueError(msg.format(valid=valid_limit_directions,
                                    invalid=limit_direction))

    if limit_area is not None:
        valid_limit_areas = ['inside', 'outside']
        limit_area = limit_area.lower()
        if limit_area not in valid_limit_areas:
            raise ValueError('Invalid limit_area: expecting one of {}, got '
                             '{}.'.format(valid_limit_areas, limit_area))

    # default limit is unlimited GH #16282
    if limit is None:
        # limit = len(xvalues)
        pass
    elif not is_integer(limit):
        raise ValueError('Limit must be an integer')
    elif limit < 1:
        raise ValueError('Limit must be greater than 0')

    from pandas import Series
    ys = Series(yvalues)

    # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
    all_nans = set(np.flatnonzero(invalid))
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))
    mid_nans = all_nans - start_nans - end_nans

    # Like the sets above, preserve_nans contains indices of invalid values,
    # but in this case, it is the final set of indices that need to be
    # preserved as NaN after the interpolation.

    # For example if limit_direction='forward' then preserve_nans will
    # contain indices of NaNs at the beginning of the series, and NaNs that
    # are more than'limit' away from the prior non-NaN.

    # set preserve_nans based on direction using _interp_limit
    if limit_direction == 'forward':
        preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
    elif limit_direction == 'backward':
        preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
    else:
        # both directions... just use _interp_limit
        preserve_nans = set(_interp_limit(invalid, limit, limit))

    # if limit_area is set, add either mid or outside indices
    # to preserve_nans GH #16284
    if limit_area == 'inside':
        # preserve NaNs on the outside
        preserve_nans |= start_nans | end_nans
    elif limit_area == 'outside':
        # preserve NaNs on the inside
        preserve_nans |= mid_nans

    # sort preserve_nans and covert to list
    preserve_nans = sorted(preserve_nans)

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[preserve_nans] = np.nan
        return result

    sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic',
                  'barycentric', 'krogh', 'spline', 'polynomial',
                  'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima']

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order, **kwargs)
        result[preserve_nans] = np.nan
        return result
Example #21
0
def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
                   limit_direction='forward', fill_value=None,
                   bounds_error=False, order=None, **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isnull(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which cant be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    def _interp_limit(invalid, fw_limit, bw_limit):
        "Get idx of values that won't be filled b/c they exceed the limits."
        for x in np.where(invalid)[0]:
            if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():
                yield x

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        raise ValueError('Invalid limit_direction: expecting one of %r, got '
                         '%r.' % (valid_limit_directions, limit_direction))

    from pandas import Series
    ys = Series(yvalues)
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))

    # This is a list of the indexes in the series whose yvalue is currently
    # NaN, but whose interpolated yvalue will be overwritten with NaN after
    # computing the interpolation. For each index in this list, one of these
    # conditions is true of the corresponding NaN in the yvalues:
    #
    # a) It is one of a chain of NaNs at the beginning of the series, and
    #    either limit is not specified or limit_direction is 'forward'.
    # b) It is one of a chain of NaNs at the end of the series, and limit is
    #    specified and limit_direction is 'backward' or 'both'.
    # c) Limit is nonzero and it is further than limit from the nearest non-NaN
    #    value (with respect to the limit_direction setting).
    #
    # The default behavior is to fill forward with no limit, ignoring NaNs at
    # the beginning (see issues #9218 and #10420)
    violate_limit = sorted(start_nans)

    if limit is not None:
        if not is_integer(limit):
            raise ValueError('Limit must be an integer')
        if limit < 1:
            raise ValueError('Limit must be greater than 0')
        if limit_direction == 'forward':
            violate_limit = sorted(start_nans | set(_interp_limit(invalid,
                                                                  limit, 0)))
        if limit_direction == 'backward':
            violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0,
                                                                limit)))
        if limit_direction == 'both':
            violate_limit = sorted(_interp_limit(invalid, limit, limit))

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[violate_limit] = np.nan
        return result

    sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic',
                  'barycentric', 'krogh', 'spline', 'polynomial',
                  'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima']

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order, **kwargs)
        result[violate_limit] = np.nan
        return result
Example #22
0
def interpolate_1d(xvalues,
                   yvalues,
                   method='linear',
                   limit=None,
                   limit_direction='forward',
                   fill_value=None,
                   bounds_error=False,
                   order=None,
                   **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isnull(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which cant be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        raise ValueError('Invalid limit_direction: expecting one of %r, got '
                         '%r.' % (valid_limit_directions, limit_direction))

    from pandas import Series
    ys = Series(yvalues)
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))

    # violate_limit is a list of the indexes in the series whose yvalue is
    # currently NaN, and should still be NaN after the interpolation.
    # Specifically:
    #
    # If limit_direction='forward' or None then the list will contain NaNs at
    # the beginning of the series, and NaNs that are more than 'limit' away
    # from the prior non-NaN.
    #
    # If limit_direction='backward' then the list will contain NaNs at
    # the end of the series, and NaNs that are more than 'limit' away
    # from the subsequent non-NaN.
    #
    # If limit_direction='both' then the list will contain NaNs that
    # are more than 'limit' away from any non-NaN.
    #
    # If limit=None, then use default behavior of filling an unlimited number
    # of NaNs in the direction specified by limit_direction

    # default limit is unlimited GH #16282
    if limit is None:
        # limit = len(xvalues)
        pass
    elif not is_integer(limit):
        raise ValueError('Limit must be an integer')
    elif limit < 1:
        raise ValueError('Limit must be greater than 0')

    # each possible limit_direction
    # TODO: do we need sorted?
    if limit_direction == 'forward' and limit is not None:
        violate_limit = sorted(start_nans
                               | set(_interp_limit(invalid, limit, 0)))
    elif limit_direction == 'forward':
        violate_limit = sorted(start_nans)
    elif limit_direction == 'backward' and limit is not None:
        violate_limit = sorted(end_nans
                               | set(_interp_limit(invalid, 0, limit)))
    elif limit_direction == 'backward':
        violate_limit = sorted(end_nans)
    elif limit_direction == 'both' and limit is not None:
        violate_limit = sorted(_interp_limit(invalid, limit, limit))
    else:
        violate_limit = []

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[violate_limit] = np.nan
        return result

    sp_methods = [
        'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric',
        'krogh', 'spline', 'polynomial', 'from_derivatives',
        'piecewise_polynomial', 'pchip', 'akima'
    ]

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order,
                                                     **kwargs)
        result[violate_limit] = np.nan
        return result
Example #23
0
def interpolate_1d(xvalues,
                   yvalues,
                   method='linear',
                   limit=None,
                   limit_direction='forward',
                   limit_area=None,
                   fill_value=None,
                   bounds_error=False,
                   order=None,
                   **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isna(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which can't be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        msg = ('Invalid limit_direction: expecting one of {valid!r}, '
               'got {invalid!r}.')
        raise ValueError(
            msg.format(valid=valid_limit_directions, invalid=limit_direction))

    if limit_area is not None:
        valid_limit_areas = ['inside', 'outside']
        limit_area = limit_area.lower()
        if limit_area not in valid_limit_areas:
            raise ValueError('Invalid limit_area: expecting one of {}, got '
                             '{}.'.format(valid_limit_areas, limit_area))

    # default limit is unlimited GH #16282
    if limit is None:
        # limit = len(xvalues)
        pass
    elif not is_integer(limit):
        raise ValueError('Limit must be an integer')
    elif limit < 1:
        raise ValueError('Limit must be greater than 0')

    from pandas import Series
    ys = Series(yvalues)

    # These are sets of index pointers to invalid values... i.e. {0, 1, etc...
    all_nans = set(np.flatnonzero(invalid))
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))
    mid_nans = all_nans - start_nans - end_nans

    # Like the sets above, preserve_nans contains indices of invalid values,
    # but in this case, it is the final set of indices that need to be
    # preserved as NaN after the interpolation.

    # For example if limit_direction='forward' then preserve_nans will
    # contain indices of NaNs at the beginning of the series, and NaNs that
    # are more than'limit' away from the prior non-NaN.

    # set preserve_nans based on direction using _interp_limit
    if limit_direction == 'forward':
        preserve_nans = start_nans | set(_interp_limit(invalid, limit, 0))
    elif limit_direction == 'backward':
        preserve_nans = end_nans | set(_interp_limit(invalid, 0, limit))
    else:
        # both directions... just use _interp_limit
        preserve_nans = set(_interp_limit(invalid, limit, limit))

    # if limit_area is set, add either mid or outside indices
    # to preserve_nans GH #16284
    if limit_area == 'inside':
        # preserve NaNs on the outside
        preserve_nans |= start_nans | end_nans
    elif limit_area == 'outside':
        # preserve NaNs on the inside
        preserve_nans |= mid_nans

    # sort preserve_nans and covert to list
    preserve_nans = sorted(preserve_nans)

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[preserve_nans] = np.nan
        return result

    sp_methods = [
        'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'barycentric',
        'krogh', 'spline', 'polynomial', 'from_derivatives',
        'piecewise_polynomial', 'pchip', 'akima'
    ]

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order,
                                                     **kwargs)
        result[preserve_nans] = np.nan
        return result
Example #24
0
def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
                   limit_direction='forward', fill_value=None,
                   bounds_error=False, order=None, **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = com.isnull(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which cant be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    def _interp_limit(invalid, fw_limit, bw_limit):
        "Get idx of values that won't be filled b/c they exceed the limits."
        for x in np.where(invalid)[0]:
            if invalid[max(0, x - fw_limit):x + bw_limit + 1].all():
                yield x

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        raise ValueError('Invalid limit_direction: expecting one of %r, got '
                         '%r.' % (valid_limit_directions, limit_direction))

    from pandas import Series
    ys = Series(yvalues)
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))

    # This is a list of the indexes in the series whose yvalue is currently
    # NaN, but whose interpolated yvalue will be overwritten with NaN after
    # computing the interpolation. For each index in this list, one of these
    # conditions is true of the corresponding NaN in the yvalues:
    #
    # a) It is one of a chain of NaNs at the beginning of the series, and
    #    either limit is not specified or limit_direction is 'forward'.
    # b) It is one of a chain of NaNs at the end of the series, and limit is
    #    specified and limit_direction is 'backward' or 'both'.
    # c) Limit is nonzero and it is further than limit from the nearest non-NaN
    #    value (with respect to the limit_direction setting).
    #
    # The default behavior is to fill forward with no limit, ignoring NaNs at
    # the beginning (see issues #9218 and #10420)
    violate_limit = sorted(start_nans)

    if limit:
        if limit_direction == 'forward':
            violate_limit = sorted(start_nans | set(_interp_limit(invalid,
                                                                  limit, 0)))
        if limit_direction == 'backward':
            violate_limit = sorted(end_nans | set(_interp_limit(invalid, 0,
                                                                limit)))
        if limit_direction == 'both':
            violate_limit = sorted(_interp_limit(invalid, limit, limit))

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if issubclass(inds.dtype.type, np.datetime64):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[violate_limit] = np.nan
        return result

    sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic',
                  'barycentric', 'krogh', 'spline', 'polynomial',
                  'piecewise_polynomial', 'pchip', 'akima']
    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order, **kwargs)
        result[violate_limit] = np.nan
        return result
Example #25
0
def _get_index_size(serie: pd.Series):
    # Filtro los NaN antes y después de la serie
    return len(serie[serie.first_valid_index():serie.last_valid_index()])
Example #26
0
def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
                   limit_direction='forward', fill_value=None,
                   bounds_error=False, order=None, **kwargs):
    """
    Logic for the 1-d interpolation.  The result should be 1-d, inputs
    xvalues and yvalues will each be 1-d arrays of the same length.

    Bounds_error is currently hardcoded to False since non-scipy ones don't
    take it as an argumnet.
    """
    # Treat the original, non-scipy methods first.

    invalid = isnull(yvalues)
    valid = ~invalid

    if not valid.any():
        # have to call np.asarray(xvalues) since xvalues could be an Index
        # which cant be mutated
        result = np.empty_like(np.asarray(xvalues), dtype=np.float64)
        result.fill(np.nan)
        return result

    if valid.all():
        return yvalues

    if method == 'time':
        if not getattr(xvalues, 'is_all_dates', None):
            # if not issubclass(xvalues.dtype.type, np.datetime64):
            raise ValueError('time-weighted interpolation only works '
                             'on Series or DataFrames with a '
                             'DatetimeIndex')
        method = 'values'

    valid_limit_directions = ['forward', 'backward', 'both']
    limit_direction = limit_direction.lower()
    if limit_direction not in valid_limit_directions:
        raise ValueError('Invalid limit_direction: expecting one of %r, got '
                         '%r.' % (valid_limit_directions, limit_direction))

    from pandas import Series
    ys = Series(yvalues)
    start_nans = set(range(ys.first_valid_index()))
    end_nans = set(range(1 + ys.last_valid_index(), len(valid)))

    # violate_limit is a list of the indexes in the series whose yvalue is
    # currently NaN, and should still be NaN after the interpolation.
    # Specifically:
    #
    # If limit_direction='forward' or None then the list will contain NaNs at
    # the beginning of the series, and NaNs that are more than 'limit' away
    # from the prior non-NaN.
    #
    # If limit_direction='backward' then the list will contain NaNs at
    # the end of the series, and NaNs that are more than 'limit' away
    # from the subsequent non-NaN.
    #
    # If limit_direction='both' then the list will contain NaNs that
    # are more than 'limit' away from any non-NaN.
    #
    # If limit=None, then use default behavior of filling an unlimited number
    # of NaNs in the direction specified by limit_direction

    # default limit is unlimited GH #16282
    if limit is None:
        # limit = len(xvalues)
        pass
    elif not is_integer(limit):
        raise ValueError('Limit must be an integer')
    elif limit < 1:
        raise ValueError('Limit must be greater than 0')

    # each possible limit_direction
    # TODO: do we need sorted?
    if limit_direction == 'forward' and limit is not None:
        violate_limit = sorted(start_nans |
                               set(_interp_limit(invalid, limit, 0)))
    elif limit_direction == 'forward':
        violate_limit = sorted(start_nans)
    elif limit_direction == 'backward' and limit is not None:
        violate_limit = sorted(end_nans |
                               set(_interp_limit(invalid, 0, limit)))
    elif limit_direction == 'backward':
        violate_limit = sorted(end_nans)
    elif limit_direction == 'both' and limit is not None:
        violate_limit = sorted(_interp_limit(invalid, limit, limit))
    else:
        violate_limit = []

    xvalues = getattr(xvalues, 'values', xvalues)
    yvalues = getattr(yvalues, 'values', yvalues)
    result = yvalues.copy()

    if method in ['linear', 'time', 'index', 'values']:
        if method in ('values', 'index'):
            inds = np.asarray(xvalues)
            # hack for DatetimeIndex, #1646
            if needs_i8_conversion(inds.dtype.type):
                inds = inds.view(np.int64)
            if inds.dtype == np.object_:
                inds = lib.maybe_convert_objects(inds)
        else:
            inds = xvalues
        result[invalid] = np.interp(inds[invalid], inds[valid], yvalues[valid])
        result[violate_limit] = np.nan
        return result

    sp_methods = ['nearest', 'zero', 'slinear', 'quadratic', 'cubic',
                  'barycentric', 'krogh', 'spline', 'polynomial',
                  'from_derivatives', 'piecewise_polynomial', 'pchip', 'akima']

    if method in sp_methods:
        inds = np.asarray(xvalues)
        # hack for DatetimeIndex, #1646
        if issubclass(inds.dtype.type, np.datetime64):
            inds = inds.view(np.int64)
        result[invalid] = _interpolate_scipy_wrapper(inds[valid],
                                                     yvalues[valid],
                                                     inds[invalid],
                                                     method=method,
                                                     fill_value=fill_value,
                                                     bounds_error=bounds_error,
                                                     order=order, **kwargs)
        result[violate_limit] = np.nan
        return result