Esempio n. 1
0
    def fit(self, series: TimeSeries):
        series = fill_missing_values(series)
        super().fit(series)
        series = self.training_series

        # determine trend
        if self.trend == "poly":
            trend_coefficients = np.polyfit(range(len(series)),
                                            series.univariate_values(),
                                            self.trend_poly_degree)
            self.trend_function = np.poly1d(trend_coefficients)
        elif self.trend == "exp":
            trend_coefficients = np.polyfit(range(len(series)),
                                            np.log(series.univariate_values()),
                                            1)
            self.trend_function = lambda x: np.exp(trend_coefficients[
                1]) * np.exp(trend_coefficients[0] * x)
        else:
            self.trend_function = lambda x: 0

        # subtract trend
        detrended_values = series.univariate_values() - self.trend_function(
            range(len(series)))
        detrended_series = TimeSeries.from_times_and_values(
            series.time_index, detrended_values)

        # crop training set to match the seasonality of the first prediction point
        if self.required_matches is None:
            curr_required_matches = _find_relevant_timestamp_attributes(
                detrended_series)
        else:
            curr_required_matches = self.required_matches
        cropped_series = _crop_to_match_seasons(
            detrended_series, required_matches=curr_required_matches)

        # perform dft
        self.fft_values = np.fft.fft(cropped_series.univariate_values())

        # get indices of `nr_freqs_to_keep` (if a correct value was provied) frequencies with the highest amplitudes
        # by partitioning around the element with sorted index -nr_freqs_to_keep instead of sorting the whole array
        first_n = self.nr_freqs_to_keep
        if first_n is None or first_n < 1 or first_n > len(self.fft_values):
            first_n = len(self.fft_values)
        self.filtered_indices = np.argpartition(abs(self.fft_values),
                                                -first_n)[-first_n:]

        # set all other values in the frequency domain to 0
        self.fft_values_filtered = np.zeros(len(self.fft_values),
                                            dtype=np.complex_)
        self.fft_values_filtered[self.filtered_indices] = self.fft_values[
            self.filtered_indices]

        # precompute all possible predicted values using inverse dft
        self.predicted_values = np.fft.ifft(self.fft_values_filtered).real

        return self
Esempio n. 2
0
def _check_approximate_seasonality(
    series: TimeSeries,
    seasonality_period: int,
    period_error_margin: int,
    max_seasonality_order: int,
) -> bool:
    """Checks whether the given series has a given seasonality.

    Analyzes the given TimeSeries instance for seasonality of the given period
    while taking into account potential noise of the autocorrelation function.
    This is done by averaging all AC values that are within `period_error_margin`
    steps from the index `seasonality_period` in the ACF domain.

    Parameters
    ----------
    series
        The TimeSeries instance to be analyzed.
    seasonality_period
        The (approximate) period to be checked for seasonality.
    period_error_margin
        The radius around the `seasonality_period` that is taken into consideration when computing the autocorrelation.
    max_seasonality_order
        The maximum number of lags (or inputs to the acf) that can exceed the ac value computed over the interval
        around `seasonality_period`. The lower this number, the stricter the criterion for seasonality.

    Returns
    -------
    bool
        Boolean value indicating whether the seasonality is significant given the parameters passed.
    """
    # fraction of seasonality_period that will skipped when looking at acf values due to high
    # autocorrelation for small lags
    frac = 1 / 4

    # return False if there are not enough entries in the TimeSeries instance
    if len(series) < seasonality_period * (1 + frac):
        return False

    # compute relevant autocorrelation values
    r = acf(
        series.univariate_values(),
        nlags=int(seasonality_period * (1 + frac)),
        fft=False,
    )

    # compute the approximate autocorrelation value for the given period
    left_bound = seasonality_period - period_error_margin
    right_bound = seasonality_period + period_error_margin
    approximation_interval = range(left_bound, right_bound + 1)
    approximated_period_ac = np.mean(r[approximation_interval])

    # compute the number of ac values larger than the approximated ac value for the given period
    indices = list(range(int(frac * seasonality_period), left_bound)) + list(
        range(right_bound + 1, len(r)))
    order = sum(
        map(lambda ac_value: int(ac_value > approximated_period_ac),
            r[indices]))

    return order <= max_seasonality_order
Esempio n. 3
0
 def fit(self, series: TimeSeries):
     super().fit(series)
     raise_if_not(
         len(series) >= self.K,
         f"The time series requires at least K={self.K} points",
         logger,
     )
     self.last_k_vals = series.univariate_values()[-self.K:]
     return self
Esempio n. 4
0
    def fit(self, series: TimeSeries):
        super().fit(series)
        series = self.training_series  # defined in super()

        x_train = series.time_index()
        y = series.univariate_values()
        self.last_tick = x_train[-1]

        # Reshape data
        x_train = np.array(x_train).reshape(x_train.shape[0], 1)

        self.model.fit(x_train, y)
Esempio n. 5
0
    def _fit(self,
             series: TimeSeries,
             future_covariates: Optional[TimeSeries] = None):

        super()._fit(series, future_covariates)
        series = self.training_series

        fit_df = pd.DataFrame(data={
            "ds": series.time_index,
            "y": series.univariate_values()
        })

        self.model = self._model_builder(**self.prophet_kwargs)

        # add user defined seasonalities (from model creation and/or pre-fit self.add_seasonalities())
        interval_length = self._freq_to_days(series.freq_str)
        for seasonality_name, attributes in self._add_seasonalities.items():
            self.model.add_seasonality(
                name=seasonality_name,
                period=attributes["seasonal_periods"] * interval_length,
                fourier_order=attributes["fourier_order"],
            )

        # add covariates
        if future_covariates is not None:
            fit_df = fit_df.merge(
                future_covariates.pd_dataframe(),
                left_on="ds",
                right_index=True,
                how="left",
            )
            for covariate in future_covariates.columns:
                self.model.add_regressor(covariate)

        # add built-in country holidays
        if self.country_holidays is not None:
            self.model.add_country_holidays(self.country_holidays)

        if self.suppress_stdout_stderr:
            self._execute_and_suppress_output(self.model.fit, logger,
                                              logging.WARNING, fit_df)
        else:
            self.model.fit(fit_df)

        return self
Esempio n. 6
0
 def fit(self, series: TimeSeries):
     super().fit(series)
     self.mean_val = np.mean(series.univariate_values())
     return self