def ts_fit(self, suppress=False): """Fit LinearRegression to the time series data. Parameters: ---------- suppress: bool Suppress or not some of the output messages """ self._prepare_fit() self.ts_split() ts_df = self._train_dt.copy() # x = np.arange(0, len(ts_df)).reshape(-1, 1) y = np.asarray(ts_df['y']) # Fit self._lin_logger.info("Trying to fit the linear model....") # tic start = time() try: if not suppress: self._lin_logger.info("...via using parameters") print_attributes(self) self.model_fit = LinearRegression(fit_intercept=self._fit_intercept, normalize=self._normalize, copy_X=self._copy_X, n_jobs=self._n_jobs).fit(x, y) # toc self._lin_logger.info("Time elapsed: {} sec.".format(time() - start)) except (Exception, ValueError): self._lin_logger.exception("LinearRegression error...") else: # self._lin_logger.info("Model successfully fitted to the data!") if not suppress: self._lin_logger.info("R^2: {:f}".format(self.model_fit.score(x, y))) # self.intercept = self.model_fit.intercept_ self.slope = self.model_fit.coef_ # Fitted values self._lin_logger.info("Computing fitted values and residuals...") self.fittedvalues = pd.Series(self.model_fit.predict(x), index=ts_df.index) # Residuals super(LinearForecaster, self)._residuals() self._lin_logger.info("Done.") return self
def ts_fit(self, suppress=False): """Fit Prophet to the time series data. Parameters: ---------- suppress: bool Suppress or not some of the output messages """ if self.hyper_params is not None: self._gs.set_forecaster(self) self._gs.set_hyper_params(self.hyper_params) # a very important command here to avoid endless loop self.hyper_params = None self._prophet_logger.info("***** Starting grid search *****") self._gs = self._gs.grid_search(suppress=suppress, show_plot=False) # self.best_model = self._gs.best_model self.__dict__.update(self.best_model['forecaster'].__dict__) self._prophet_logger.info("***** Finished grid search *****") else: self._prepare_fit() self._model = None self.ts_split() ts_df = self._train_dt.copy() ts_test_df = self._test_dt # sanity check if 'on_weekend' in ts_df.columns: ts_df.drop(['on_weekend', 'off_weekend'], inplace=True, axis=1) # ts_test_df.drop(['on_weekend', 'off_weekend'], inplace=True, axis=1) # Fit self._prophet_logger.info("Trying to fit the Prophet model....") try: if not suppress: self._prophet_logger.info("...via using parameters\n") print_attributes(self) # diagnose on? if self._diagnose: try: assert self._step is not None and self._horizon is not None except (KeyError, AssertionError): self._prophet_logger.warning("You want to diagnose the Prophet model. Please provide parameters " "'step' and 'horizon' within object initialization!") sys.exit("STOP") ts_df = ts_df.reset_index() ts_df.columns = self._ts_df_cols if ts_test_df is not None and not ts_test_df.empty: ts_test_df = ts_test_df.reset_index() ts_test_df.columns = self._ts_df_cols # weekly_s = self._weekly_seasonality if self._weekend_seasonality: # force to False weekly_s = False # if not self._consider_holidays: self._model = Prophet(interval_width=self._prophet_interval_width, yearly_seasonality=self._yearly_seasonality, weekly_seasonality=weekly_s, daily_seasonality=self._daily_seasonality, changepoint_range=self._changepoint_range, changepoint_prior_scale=self._changepoint_prior_scale) else: try: assert self._country in ['AT', 'DE', 'US'] except AssertionError: self._prophet_logger.exception("Assrtion exception occurred. Right now, Austria (AT), " "Germany(DE) and USA (US) supported.") sys.exit("STOP") else: holi = None if self._country == 'AT': holi = holidays.AT(state=None, years=list(np.unique(np.asarray(self.ts_df.index.year)))) elif self._country == 'DE': holi = holidays.DE(state=None, years=list(np.unique(np.asarray(self.ts_df.index.year)))) elif self._country == 'US': holi = holidays.US(state=None, years=list(np.unique(np.asarray(self.ts_df.index.year)))) # holi_dict = dict() for date, name in sorted(holi.items()): holi_dict[date] = name df_holi = pd.DataFrame.from_dict(data=holi_dict, orient='index').reset_index() df_holi.columns = ['ds', 'holiday'] df_holi['lower_window'] = 0 df_holi['upper_window'] = 0 self._model = Prophet(interval_width=self._prophet_interval_width, yearly_seasonality=self._yearly_seasonality, weekly_seasonality=weekly_s, daily_seasonality=self._daily_seasonality, changepoint_range=self._changepoint_range, changepoint_prior_scale=self._changepoint_prior_scale, holidays=df_holi) if self._monthly_seasonality: self._model.add_seasonality(name='monthly', period=30.5, fourier_order=20) if not suppress: self._prophet_logger.info("Added monthly seasonality.") if self._quarterly_seasonality: self._model.add_seasonality(name='quarterly', period=91.5, fourier_order=20) if not suppress: self._prophet_logger.info("Added quarterly seasonality.") if self._weekend_seasonality: ts_df['on_weekend'] = ts_df['ds'].apply(self.we_season) ts_df['off_weekend'] = ~ts_df['ds'].apply(self.we_season) self._train_dt = ts_df.copy() self._train_dt.set_index('ds', inplace=True) # if ts_test_df is not None and not ts_test_df.empty: ts_test_df['on_weekend'] = ts_test_df['ds'].apply(self.we_season) ts_test_df['off_weekend'] = ~ts_test_df['ds'].apply(self.we_season) self._test_dt = ts_test_df.copy() self._test_dt.set_index('ds', inplace=True) # and add self._model.add_seasonality(name='weekend_on_season', period=7, fourier_order=5, condition_name='on_weekend') self._model.add_seasonality(name='weekend_off_season', period=7, fourier_order=5, condition_name='off_weekend') if not suppress: self._prophet_logger.info("Added week-end seasonality.") # tic start = time() self.model_fit = self._model.fit(ts_df) # toc if not suppress: self._prophet_logger.info("Time elapsed: {} sec.".format(time() - start)) except (Exception, ValueError): self._prophet_logger.exception("Prophet error...") return -1 else: self._prophet_logger.info("Model successfully fitted to the data!") # Fitted values self._prophet_logger.info("Computing fitted values and residuals...") # in-sample predict try: self.fittedvalues = self._model.predict(ts_df.drop('y', axis=1)) except (Exception, ValueError): self._prophet_logger.exception("Prophet predict error...") # Residuals try: # use fittedvalues to fill in the model dictionary self.residuals = pd.Series(np.asarray(ts_df.y) - np.asarray(self.fittedvalues['yhat']), index=self._train_dt.index) except (KeyError, AttributeError): self._prophet_logger.exception("Model was not fitted or ts has other structure...") # self.lower_conf_int = pd.Series(np.asarray(self.fittedvalues['yhat_lower']), index=self._train_dt.index) self.upper_conf_int = pd.Series(np.asarray(self.fittedvalues['yhat_upper']), index=self._train_dt.index) self._prophet_logger.info("Done.") return self
def ts_fit(self, suppress=False): """Fit DLM to the time series data. Parameters: ---------- suppress: bool Suppress or not some of the output messages """ self._prepare_fit() self._model = None self.ts_split() ts_df = self._train_dt.copy() # Fit self._dlm_logger.info("Trying to fit the DLM model....") try: if not suppress: self._dlm_logger.info("...via using parameters\n") print_attributes(self) ts_df = ts_df.reset_index() ts_df.columns = self._ts_df_cols self._model = dlm(ts_df['y']) # trend if self._dlm_trend is not None: self._model = self._model + trend( degree=self._dlm_trend['degree'], discount=self._dlm_trend['discount'], name=self._dlm_trend['name'], w=self._dlm_trend['w']) # seasonality if self._dlm_seasonality is not None: self._model = self._model + seasonality( period=self._dlm_seasonality['period'], discount=self._dlm_seasonality['discount'], name=self._dlm_seasonality['name'], w=self._dlm_seasonality['w']) # dynamic if self._train_dlm_dynamic is not None: for i in range(len(self._train_dlm_dynamic['features'])): self._model = self._model + dynamic( features=self._train_dlm_dynamic['features'][i] ['features'], discount=self._train_dlm_dynamic['features'][i] ['discount'], name=self._train_dlm_dynamic['features'][i]['name'], w=self._train_dlm_dynamic['features'][i]['w']) # auto_reg if self._dlm_auto_reg is not None: self._model = self._model + autoReg( degree=self._dlm_auto_reg['degree'], discount=self._dlm_auto_reg['discount'], name=self._dlm_auto_reg['name'], w=self._dlm_auto_reg['w']) # long_season if self._dlm_long_season is not None: ls = longSeason(period=self._dlm_long_season['period'], stay=self._dlm_long_season['stay'], data=ts_df, name=self._dlm_long_season['name'], w=self._dlm_long_season['w']) self._model = self._model + ls if not suppress: self._dlm_logger.info("The constructed DLM model components:") print(self._model.ls()) # tic start = time() if self._use_rolling_window: self._model.fitForwardFilter(useRollingWindow=True, windowLength=self._window_size) self._model.fitBackwardSmoother() else: self._model.fit() self.model_fit = self._model # toc if not suppress: self._dlm_logger.info("Time elapsed: {} sec.".format(time() - start)) except (Exception, ValueError) as e: self._dlm_logger.exception("DLM error...{}".format(e)) return -1 else: self._dlm_logger.info("Model successfully fitted to the data!") self._dlm_logger.info("Computing fitted values and residuals...") # Residuals self.residuals = pd.Series(self.model_fit.getResidual(), index=self._train_dt.index) try: self.lower_conf_int = pd.Series( self.model_fit.getInterval()[1], index=self._train_dt.index) self.upper_conf_int = pd.Series( self.model_fit.getInterval()[0], index=self._train_dt.index) except ValueError as e: self._dlm_logger.exception( "Something went wrong in getInterval...{}".format(e)) self.mse = self.model_fit.getMSE() # Fitted values # this is not elegant, but found no other way self.fittedvalues = self._train_dt['y'] + self.residuals return self
def ts_fit(self, suppress=False): """Fit ARIMA to the time series data. Parameters: ---------- suppress: bool Suppress or not some of the output messages """ if self.hyper_params is not None: self._gs.set_forecaster(self) self._gs.set_hyper_params(self.hyper_params) # a very important command here to avoid endless loop self.hyper_params = None self._arima_logger.info("***** Starting grid search *****") self._gs = self._gs.grid_search(suppress=suppress, show_plot=False) # self.best_model = self._gs.best_model self.__dict__.update(self.best_model['forecaster'].__dict__) self._arima_logger.info("***** Finished grid search *****") else: self._prepare_fit() self.ts_split() ARIMAForecaster._init_trend(self) ts_df = self._train_dt.copy() # Fit self._arima_logger.info("Trying to fit the ARIMA model....") # tic start = time() try: if not suppress: self._arima_logger.info("...via using parameters\n") print_attributes(self) self._model = ARIMA(ts_df['y'], order=self._order, freq=self.freq) self.model_fit = self._model.fit(trend=self._arima_trend, method='mle', disp=1) except (Exception, ValueError): self._arima_logger.exception("Exception occurred in the fit...") self._arima_logger.error("Please try other parameters!") self.model_fit = None else: # toc self._arima_logger.info("Time elapsed: {} sec.".format(time() - start)) self._arima_logger.info("Model successfully fitted to the data!") if not suppress: self._arima_logger.info("The model summary: " + str(self.model_fit.summary())) # Fitted values self._arima_logger.info("Computing fitted values and residuals...") self._ar_coef, self._ma_coef = self.model_fit.arparams, self.model_fit.maparams self.fittedvalues = self.model_fit.fittedvalues # prologue if len(self.fittedvalues) != len(self._train_dt): self.fittedvalues = pd.DataFrame( index=pd.date_range(ts_df.index[0], ts_df.index[len(ts_df) - 1], freq=self.freq), columns=['dummy']).join(pd.DataFrame(self.fittedvalues)).drop(['dummy'], axis=1) self.fittedvalues = self.fittedvalues.reset_index() self.fittedvalues.columns = self._ts_df_cols self.fittedvalues.set_index('ds', inplace=True) self.fittedvalues.y = self.fittedvalues.y.fillna(method='bfill') # Residuals super(ARIMAForecaster, self)._residuals() self._arima_logger.info("Done.") return self
def ts_fit(self, suppress=False): """Fit LinearRegression to the time series data. Parameters: ---------- suppress: bool Suppress or not some of the output messages """ if self.hyper_params is not None: self._gs.set_forecaster(self) self._gs.set_hyper_params(self.hyper_params) # a very important command here to avoid endless loop self.hyper_params = None self._lin_logger.info("***** Starting grid search *****") self._gs = self._gs.grid_search(suppress=suppress, show_plot=False) # self.best_model = self._gs.best_model self.__dict__.update(self.best_model['forecaster'].__dict__) self._lin_logger.info("***** Finished grid search *****") else: self._prepare_fit() self.ts_split() ts_df = self._train_dt.copy() # x = np.arange(0, len(ts_df)).reshape(-1, 1) y = np.asarray(ts_df['y']) # Fit self._lin_logger.info("Trying to fit the linear model....") # tic start = time() try: if not suppress: self._lin_logger.info("...via using parameters") print_attributes(self) self.model_fit = LinearRegression( fit_intercept=self._fit_intercept, normalize=self._normalize, copy_X=self._copy_X, n_jobs=self._n_jobs).fit(x, y) # toc self._lin_logger.info("Time elapsed: {} sec.".format(time() - start)) except (Exception, ValueError): self._lin_logger.exception("LinearRegression error...") else: # self._lin_logger.info("Model successfully fitted to the data!") if not suppress: self._lin_logger.info("R^2: {:f}".format( self.model_fit.score(x, y))) # self.intercept = self.model_fit.intercept_ self.slope = self.model_fit.coef_ # Fitted values self._lin_logger.info( "Computing fitted values and residuals...") self.fittedvalues = pd.Series(self.model_fit.predict(x), index=ts_df.index) # Residuals super(LinearForecaster, self)._residuals() self._lin_logger.info("Done.") return self
def ts_fit(self, suppress=False): """Fit Auto ARIMA to the time series data. Parameters: ---------- suppress: bool Suppress or not some of the output messages """ self._prepare_fit() self.ts_split() self._init_trend() self._init_seasonal() ts_df = self._train_dt.copy() """ Fit """ self._aarima_logger.info("Trying to fit the Auto ARIMA model....") # tic start = time() try: if not suppress: self._aarima_logger.info("...via using parameters\n") print_attributes(self) self.model_fit = pm.auto_arima( ts_df, start_p=self._start_p, start_q=self._start_q, test=self._test, max_p=self._max_p, m=self._seasonal_periods, d=self._d, seasonal=self._aarima_seasonal, D=self._D, start_P=self._start_P, max_P=self._max_P, trend=self._aarima_trend, trace=True, error_action='ignore', suppress_warnings=True, stepwise=self._stepwise, random=self._random, n_fits=self._n_fits, scoring=self._scoring, out_of_sample_size=self._out_of_sample_size, information_criterion=self._information_criterion) except (Exception, ValueError): self._aarima_logger.exception("Exception occurred in the fit...") self._aarima_logger.warning("Will try to reset some parameters...") try: self.model_fit = pm.auto_arima( ts_df, start_p=self._start_p, start_q=self._start_q, test=self._test, max_p=self._max_p, m=1, d=0, seasonal=self._aarima_seasonal, D=0, start_P=self._start_P, max_P=self._max_P, trend=self._aarima_trend, trace=True, error_action='ignore', suppress_warnings=True, stepwise=self._stepwise, random=self._random, n_fits=self._n_fits, scoring=self._scoring, out_of_sample_size=self._out_of_sample_size, information_criterion=self._information_criterion) except (Exception, ValueError): self._aarima_logger.exception("Exception occurred") self._aarima_logger.error("Please try other parameters!") self.model_fit = None else: # toc self._aarima_logger.info("Time elapsed: {} sec.".format(time() - start)) # self._aarima_logger.info("Model successfully fitted to the data!") self._aarima_logger.info("The chosen model AIC: " + str(self.model_fit.aic())) # Fitted values self._aarima_logger.info( "Computing fitted values and residuals...") self.fittedvalues = pd.Series(self.model_fit.predict_in_sample( start=0, end=(len(ts_df) - 1)), index=ts_df.index) # Residuals super(AutoARIMAForecaster, self)._residuals() self._aarima_logger.info("Done.") return self
def ts_fit(self, suppress=False): """Fit Exponential Smoothing to the time series data. Parameters: ---------- suppress: bool Suppress or not some of the output messages """ if self.hyper_params is not None: self._gs.set_forecaster(self) self._gs.set_hyper_params(self.hyper_params) # a very important command here to avoid endless loop self.hyper_params = None self._expsm_logger.info("***** Starting grid search *****") self._gs = self._gs.grid_search(suppress=suppress, show_plot=False) # self.best_model = self._gs.best_model self.__dict__.update(self.best_model['forecaster'].__dict__) self._expsm_logger.info("***** Finished grid search *****") else: self._prepare_fit() self.ts_split() self._init_trend() self._init_seasonal() ts_df = self._train_dt.copy() # Fit print("Trying to fit the exponential smoothing model....") # tic start = time() try: if not suppress: self._expsm_logger.info("...via using parameters\n") print_attributes(self) # self.model_fit = ExponentialSmoothing( ts_df, freq=self.freq, trend=self._es_trend, seasonal=self._es_seasonal, seasonal_periods=self._seasonal_periods, damped=self._damped).fit( smoothing_level=self._smoothing_level, smoothing_slope=self._smoothing_slope, smoothing_seasonal=self._smoothing_seasonal, damping_slope=self._damping_slope, optimized=self._optimized, use_boxcox=self._use_boxcox, remove_bias=self._remove_bias) # toc self._expsm_logger.info("Time elapsed: {} sec.".format(time() - start)) except (Exception, ValueError): self._expsm_logger.exception("Exponential Smoothing error...") else: # self._expsm_logger.info( "Model successfully fitted to the data!") # Fitted values self._expsm_logger.info( "Computing fitted values and residuals...") self.fittedvalues = self.model_fit.fittedvalues # Residuals super(ExponentialSmoothingForecaster, self)._residuals() self._expsm_logger.info("Done.") return self