Exemple #1
0
def main():
    args = get_input_args()
    main_logger = Logger("auto_arima app")

    try:
        assert args.dt_path.split('.')[-1] == 'csv'
    except AssertionError:
        main_logger.exception("Here only csv supported!")
        sys.exit("STOP")
    try:
        ts_df = pd.read_csv(args.dt_path,
                            index_col='Date',
                            delimiter=',',
                            usecols=['Date', args.value_column],
                            parse_dates=True)
        main_logger.info("Data of shape {0} read in.".format(str(ts_df.shape)))
    except IOError:
        main_logger.exception("File could not be read!")
        sys.exit("STOP")
    except (NameError, KeyError):
        main_logger.exception(
            "Incompatible file format! Expected columns 'Date' and " +
            args.value_column)
        sys.exit("STOP")

    # initiate
    tsf_obj = AutoARIMAForecaster(ts_df=ts_df,
                                  time_format=args.time_format,
                                  freq=args.freq,
                                  n_test=args.n_test,
                                  n_val=args.n_val)
    if args.transform != '':
        tsf_obj.ts_transform(args.transform)

    if input("Continue with ts_fit y/n?").strip().lower() == 'y':
        tsf_obj.ts_fit(suppress=args.suppress)
    else:
        main_logger.info("OK")

    if input("Continue with ts_diagnose y/n?").strip().lower() == 'y':
        tsf_obj.ts_diagnose()
    else:
        main_logger.info("OK")
    if input("Continue with ts_test y/n?").strip().lower() == 'y':
        tsf_obj.ts_test()
    else:
        main_logger.info("OK")
    if input("Continue with ts_forecast y/n?").strip().lower() == 'y':
        tsf_obj.ts_forecast(n_forecast=args.n_forecast)
    else:
        main_logger.info("OK")
class EnsembleForecaster(LinearForecaster, SARIMAForecaster,
                         ExponentialSmoothingForecaster, AutoARIMAForecaster,
                         ProphetForecaster, DLMForecaster):
    """Univariate time series class inheriting from all existing forecasters and choosing the best ensemble.

    Each forecaster is supposed to be equipped with y set of hyper parameters. Grid search is used to choose the
    best model for each forecaster among the respective hyper parameters.

    Those best models are then combined (Ensemble is created) to achieve the forecast of the best quality:
    All combinations of best models are created, forecasted values for all these combinations are either
    averaged or a median is computed. The best combination is chosen as the best ensemble.
    Note, that it is necessary that test and validation data are generated.

     Attributes
    ----------
    _model_list: list
       Internal (immutable) list of possible models
    ensemble: list
       List of all forecasters to be used to create the best ensemble.
    _dict_models: dictionary
       Dictionary keeping all models
    dict_hyper_params: dictionary
       Dictionary of hyper parameters per forecaster
    show_plots: bool
       Whether to show plots when models are fitted/tested
    _best_models: dictionary
       Dictionary keeping best models for each forecaster type in the list 'ensemble'.
       This best one is chosen after applying the grid search.
    best_ensemble: dictionary
        Dictionary keeping the results of ensemble
    _ensemble_logger: Logger
        The logger for logging

    Methods
    ----------
    assertions()
       Assertion tests, must be overrided
    ts_fit()
       Grid search on all forecasters in ensemble. Respective hyper parameters are used.
    ts_test()
       Test all forecasters on test data and computes rmse
    ts_validate()
       Validate all forecasters on validation data
    _build_ensemble()
       Builds the ensemble. All combinations of forecasters in ensemble is generated.
       For each combination the meand and median rmse over the validation data is computed.
       The best combination in terms of the best rmse is the best ensemble.

    """
    def __init__(self,
                 dict_hyper_params,
                 ensemble=['dlm', 'prophet'],
                 show_plots=True,
                 **kwds):
        """Initialized the object EnsembleForecaster"""
        self._model_list = [
            'arima', 'sarima', 'exponential smoothing', 'prophet', 'dlm',
            'linear'
        ]

        self.ensemble = list(map(lambda x: x.lower(), ensemble))
        self.dict_hyper_params = dict_hyper_params
        self.show_plots = show_plots
        self._dict_models = dict()  # dict.fromkeys(self.ensemble, None)
        self._best_models = dict()
        self.best_ensemble = dict()
        self._ensemble_logger = Logger("ensemble")

        try:
            super(EnsembleForecaster, self).__init__(**kwds)
        except (TypeError, AttributeError) as e:
            self._ensemble_logger.exception("Arguments missing...{}".format(e))

        self._id = 'Ensemble'
        #
        if 'prophet' in self.ensemble:
            self._dict_models['prophet'] = self.__copy__()
            self._dict_models['prophet'].__class__ = ProphetForecaster
        if 'linear' in self.ensemble:
            self._dict_models['linear'] = self.__copy__()
            self._dict_models['linear'].__class__ = LinearForecaster
        if 'arima' in self.ensemble:
            self._dict_models['arima'] = self.__copy__()
            self._dict_models['arima'].__class__ = ARIMAForecaster
        if 'sarima' in self.ensemble:
            self._dict_models['sarima'] = self.__copy__()
            self._dict_models['sarima'].__class__ = SARIMAForecaster
        if 'exponential smoothing' in self.ensemble:
            self._dict_models['expsm'] = self.__copy__()
            self._dict_models[
                'expsm'].__class__ = ExponentialSmoothingForecaster
        if 'dlm' in self.ensemble:
            self._dict_models['dlm'] = self.__copy__()
            self._dict_models['dlm'].__class__ = DLMForecaster
        if 'auto_arima' in self.ensemble:
            self._dict_models['auto_arima'] = self.__copy__()
            self._dict_models['auto_arima'].__class__ = AutoARIMAForecaster

        if 'all' in self.ensemble:
            self._dict_models['prophet'] = self.__copy__()
            self._dict_models['prophet'].__class__ = ProphetForecaster

            self._dict_models['linear'] = self.__copy__()
            self._dict_models['linear'].__class__ = LinearForecaster

            self._dict_models['arima'] = self.__copy__()
            self._dict_models['arima'].__class__ = ARIMAForecaster

            self._dict_models['sarima'] = self.__copy__()
            self._dict_models['sarima'].__class__ = SARIMAForecaster

            self._dict_models['expsm'] = self.__copy__()
            self._dict_models[
                'expsm'].__class__ = ExponentialSmoothingForecaster

            self._dict_models['dlm'] = self.__copy__()
            self._dict_models['dlm'].__class__ = DLMForecaster

            self._dict_models['auto_arima'] = self.__copy__()
            self._dict_models['auto_arima'].__class__ = AutoARIMAForecaster

        self.assertions()

    def assertions(self):
        try:
            assert isinstance(self.dict_hyper_params, dict)
        except AssertionError:
            self._ensemble_logger.exception(
                "Assertion exception occurred, dict expected")
            sys.exit("STOP")
        #
        """
        len_keys = list(filter(lambda x: x in list(self.dict_hyper_params.keys()),
                               keys_f(keys=self.ensemble)))
        try:
            assert len(len_keys) == len(self.ensemble)
        except AssertionError:
            self._dlm_logger.warning("hyper parameters found only for " + len_keys + " our of " + len(self.ensemble))
        """

        for k, v in self._dict_models.items():
            try:
                assert self._dict_models[k].n_test > 0 and self._dict_models[
                    k].n_val > 0
            except AssertionError:
                self._ensemble_logger.exception(
                    "Assertion exception occurred,  both test and validation "
                    "have to be generated! Please specify n_test and n_val!")
                sys.exit("STOP")

    def __copy__(self):
        """Copies the object"""

        result = super(EnsembleForecaster, self).__copy__()
        #
        result.ensemble = self.ensemble
        result.dict_hyper_params = self.dict_hyper_params
        result._dict_models = self._dict_models
        result._best_models = self._best_models
        result._ensemble_logger = self._ensemble_logger
        result._model_list = self._model_list

        return result

    def ts_fit(self, suppress=False):
        """Grid search on all forecasters in ensemble to find the best model out of hyper parameters provided.

         Parameters:
         ----------
         suppress: bool
            Suppress or not some of the output messages
         """
        for k, v in self._dict_models.items():
            if k in list(self.dict_hyper_params.keys()):
                self._gs.set_forecaster(self._dict_models[k])
                self._gs.set_hyper_params(self.dict_hyper_params[k])
                self._ensemble_logger.info(
                    "==========================================Starting grid search for the forecaster +++ {} +++ =================================="
                    .format(k))
                self._gs = self._gs.grid_search(suppress=suppress,
                                                show_plot=self.show_plots)
                #
                self._best_models[k] = self._gs.best_model
            else:
                self._dict_models[k].ts_fit(suppress=suppress)
                if k not in list(self._best_models.keys()):
                    self._best_models[k] = dict()
                    self._best_models[k]['forecaster'] = self._dict_models[k]

        return self

    def ts_test(self, show_plot=True):
        """Test all models on test data

         Parameters:
         ----------
         show_plot: bool
            Whether to show or not the residual plots
        """

        for k, v in self._best_models.items():
            self._ensemble_logger.info(
                "==========================================Testing model +++ {} +++ ================================== "
                .format(k))
            if 'hyper_params' in list(self._best_models[k].keys()):
                self._best_models[k]['forecaster'].set_params(
                    p_dict=self._best_models[k]['hyper_params'])
            self._best_models[k]['forecaster'].ts_test(show_plot=show_plot)

        self._build_ensemble()
        self._plot_ensemble()

    def plot_residuals(self):
        """Plot the residuals"""

        if self._best_models is None or len(self._best_models) == 0:
            self._ensemble_logger.warning(
                "No models have been fit. The forecaster will stop!")
            sys.exit("STOP")

        for k, v in self._best_models.items():
            self._best_models[k]['forecaster'].plot_residuals()

    def ts_diagnose(self):
        """Plot the residuals"""

        if self._best_models is None or len(self._best_models) == 0:
            self._ensemble_logger.warning(
                "No models have been fit. The forecaster will stop!")
            sys.exit("STOP")

        for k, v in self._best_models.items():
            self._best_models[k]['forecaster'].ts_diagnose()

    @staticmethod
    def _print_dict(d):
        e_info = ""
        for k, v in d.items():
            e_info = e_info + "....................... | ensemble | INFO : " + str(
                k) + " : " + str(v) + "\n"
        return "Best ensemble: \n" + e_info

    @staticmethod
    def lambda_forecast(x):
        if isinstance(x, ProphetForecaster):
            return x.forecast.iloc[:, -1].values
        else:
            return x.forecast.values

    def _compute_ensemble(self, compute_rmse=False):
        """Re-computes 'ensemble_forecast' for best_ensemble"""

        if self.best_ensemble['aggregation'] == 'none':
            self.best_ensemble['ensemble_forecast'] = pd.Series(
                self.lambda_forecast(self.best_ensemble['models'][0]),
                index=self.best_ensemble['models'][0].forecast.index)
        elif self.best_ensemble['aggregation'] == 'mean':
            self.best_ensemble['ensemble_forecast'] = \
                pd.Series(np.mean(list(map(lambda x: self.lambda_forecast(x), self.best_ensemble['models'])), axis=0),
                          index=self.best_ensemble['models'][0].forecast.index)
            # rmse
            if compute_rmse:
                ensemble_res_mean = np.mean(list(
                    map(lambda x: x.residuals_forecast,
                        self.best_ensemble['models'])),
                                            axis=0)
                self.best_ensemble['rmse'] = np.sqrt(
                    np.square(ensemble_res_mean)).mean()
        elif self.best_ensemble['aggregation'] == 'median':
            self.best_ensemble['ensemble_forecast'] = \
                pd.Series(np.median(list(map(lambda x: self.lambda_forecast(x), self.best_ensemble['models'])), axis=0),
                          index=self.best_ensemble['models'][0].forecast.index)
            if compute_rmse:
                ensemble_res_median = np.median(list(
                    map(lambda x: x.residuals_forecast,
                        self.best_ensemble['models'])),
                                                axis=0)
                self.best_ensemble['rmse'] = np.sqrt(
                    np.square(ensemble_res_median)).mean()

    def _build_ensemble(self):
        """
        # check that validation has been run
        for k, v in self._best_models.items():
            if self._best_models[k]['forecaster']._mode != 'test and validate':
                # do what ts_validate does
                self._best_models[k]['forecaster'].set_params(p_dict=self._best_models[k]['hyper_params'])
                self._ensemble_logger.info("Validating model {}".format(k))
                self._best_models[k]['forecaster'].ts_validate(suppress=suppress, show_plot=show_plot)
            else:
                pass
        """
        # build ensemble
        self._ensemble_logger.info(
            "==========================================Start building the best ensemble=========================================="
        )
        rmse = np.float('Inf')
        mod_list = list(self._best_models.keys())
        for L in range(0, len(mod_list) + 1):
            for subset in itertools.combinations(mod_list, L):
                if len(subset) == 0:
                    pass
                if len(subset) > 1:
                    #
                    ensemble_candidate = [
                        self._best_models[s]['forecaster'] for s in subset
                    ]
                    # mean: note, residuals_forecast is now (each time) over the validation data
                    ensemble_res_mean = np.mean(list(
                        map(lambda x: x.residuals_forecast,
                            ensemble_candidate)),
                                                axis=0)
                    if np.sqrt(np.square(ensemble_res_mean)).mean() < rmse:
                        rmse = np.sqrt(np.square(ensemble_res_mean)).mean()
                        self.best_ensemble['rmse'] = rmse
                        self.best_ensemble['set'] = subset
                        self.best_ensemble['models'] = ensemble_candidate
                        self.best_ensemble['aggregation'] = 'mean'
                    # median
                    ensemble_res_median = np.median(list(
                        map(lambda x: x.residuals_forecast,
                            ensemble_candidate)),
                                                    axis=0)
                    if np.sqrt(np.square(ensemble_res_median)).mean() < rmse:
                        rmse = np.sqrt(np.square(ensemble_res_median)).mean()
                        self.best_ensemble['rmse'] = rmse
                        self.best_ensemble['set'] = subset
                        self.best_ensemble['models'] = ensemble_candidate
                        self.best_ensemble['aggregation'] = 'median'
                elif len(subset) == 1:
                    ensemble_candidate = self._best_models[
                        subset[0]]['forecaster']
                    if ensemble_candidate.rmse < rmse:
                        rmse = ensemble_candidate.rmse
                        self.best_ensemble['rmse'] = rmse
                        self.best_ensemble['set'] = subset
                        self.best_ensemble['models'] = [ensemble_candidate]
                        self.best_ensemble['aggregation'] = 'none'
        self._compute_ensemble()

        # self._ensemble_logger.info("The best ensemble found as:")
        print(self._print_dict(self.best_ensemble))

    def _plot_ensemble(self):
        """Plots the best ensemble"""

        if len(self.best_ensemble) == 0:
            self._ensemble_logger.error(
                "Ensemble does not exist yet! Forecaster will stop!")
            sys.exit("STOP")

        plt.figure(figsize=(20, 7))
        #
        plt.plot(self.best_ensemble['models'][0].ts_df, color='b')
        # colours
        colors = mcolors.BASE_COLORS
        by_hsv = sorted(
            (tuple(mcolors.rgb_to_hsv(mcolors.to_rgb(color))), colo_name)
            for colo_name, color in colors.items())
        colo_names = [name for hsv, name in by_hsv]
        if 'w' in colo_names:
            colo_names.remove('w')
        if 'b' in colo_names:
            colo_names.remove('b')
        if 'g' in colo_names:
            colo_names.remove('g')
        if 'darkgreen' in colo_names:
            colo_names.remove('darkgreen')
        colo_names = sample(colo_names, len(self.best_ensemble['models']))
        #
        for i in range(len(self.best_ensemble['models'])):
            plt.plot(pd.Series(
                self.lambda_forecast(self.best_ensemble['models'][i]),
                index=self.best_ensemble['models'][i].forecast.index),
                     color=colo_names[i],
                     linewidth=2.0,
                     label=str(type(self.best_ensemble['models'][i])).split(
                         "'")[1].split('.')[2])
        plt.plot(self.best_ensemble['ensemble_forecast'],
                 color='darkgreen',
                 linewidth=2.0,
                 label='Ensemble')
        plt.axvline(x=min(self.best_ensemble['ensemble_forecast'].index),
                    color='grey',
                    linestyle='dashed')
        plt.legend()
        plt.title("Real (blue) and forecasted values")

        #
        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()

    def ts_validate(self, suppress=True, show_plot=True):
        """Validate best ensemble."""

        if self.best_ensemble is None or len(self.best_ensemble) == 0:
            self._ensemble_logger.error(
                "Ensemble has not been built! Forecaster will stop!")
            sys.exit("STOP")

        for i in range(len(self.best_ensemble['models'])):
            self.best_ensemble['models'][i]._mode = 'test and validate'
            self.best_ensemble['models'][i].ts_fit(suppress=suppress)
            self.best_ensemble['models'][i].ts_test(show_plot=show_plot)

        self._compute_ensemble(compute_rmse=True)
        print(self._print_dict(self.best_ensemble))
        self._plot_ensemble()

    def ts_forecast(self, n_forecast, features_dict=None, suppress=False):
        if self.best_ensemble is None or len(self.best_ensemble) == 0:
            self._ensemble_logger.error(
                "Ensemble has not been built! Forecaster will stop!")
            sys.exit("STOP")

        for i in range(len(self.best_ensemble['models'])):
            if str(type(self.best_ensemble['models'][i])).split("'")[1].split(
                    '.')[2] != 'DLMForecaster':
                self.best_ensemble['models'][i].ts_forecast(
                    n_forecast=n_forecast, suppress=suppress)
            else:
                self.best_ensemble['models'][i].ts_forecast(
                    n_forecast=n_forecast,
                    features_dict=features_dict,
                    suppress=suppress)

        self._compute_ensemble()
        self._plot_ensemble()
class LinearForecaster(UVariateTimeSeriesClass):
    """Univariate time series child class using LinearRegression for forecasting

    Attributes
    ----------
    _fit_intercept: bool
        Whether to fit the intercept yes/no
    _normalize: bool
        Whether to normalize time series data before fitting yes/no
    _copy_X: bool
      If True, X will be copied; else, it may be overwritten.
    _n_jobs: int or None
      The number of jobs to use for the computation. This will only provide speedup for n_targets > 1 and
      sufficient large problems. None means 1 unless in a joblib.parallel_backend context.
      -1 means using all processors.

    Methods
    ----------
    assertions()
       Assertion tests, must be overrided
    set_params()
       Sets new parameter values
    get_params_dict()
        Gets parameter values as a dictionary
    ts_fit()
       Fits the auto_arima model to time series
    ts_diagnose()
       Diagnoses the fitted model
    plot_residuals()
       Generates residual plots
    ts_test()
       Evaluates fitted model on the test data, if this one has been generated
    ts_forecast()
       Forecasts time series and plots the results
    plot_forecasts()
       Plots forecasted time-series
    """

    def __init__(self,
                 fit_intercept=True,
                 normalize=False,
                 copy_X=False,
                 n_jobs=None,
                 **kwds):
        """Initializes the object LinearForecaster"""
        self._lin_logger = Logger('linear')

        try:
            super(LinearForecaster, self).__init__(**kwds)
        except TypeError:
            self._lin_logger.exception("Arguments missing...")

        self._fit_intercept = fit_intercept
        self._normalize = normalize
        self._copy_X = copy_X
        self._n_jobs = n_jobs

        self.intercept = None
        self.slope = None

        self._id = 'Linear'

    def __copy__(self):
        """Copies the object"""
        result = super(LinearForecaster, self).__copy__()

        result._fit_intercept = self._fit_intercept
        result._normalize = self._normalize
        result._copy_X = self._copy_X
        result._n_jobs = self._n_jobs
        result.intercept = self.intercept
        result.slope = self.slope
        result._lin_logger = self._lin_logger

        return result

    def set_params(self, p_dict=None, **kwargs):
        """Sets new parameters"""
        params_dict = kwargs
        if p_dict is not None: 
            params_dict = p_dict
        #
        for k, v in params_dict.items():
            if k == 'ts_df':
                self.ts_df = v
            elif k == 'freq':
                self.freq = v
            elif k == 'n_test':
                self.n_test = v
            elif k == 'n_val':
                self.n_val = v
            elif k == 'timeformat':
                self.time_format = v
            elif k == 'fit_intercept':
                self._fit_intercept = v
            elif k == 'normalize':
                self._normalize = v
            elif k == 'copy_X':
                self._copy_X = v
            elif k == 'n_jobs':
                self._n_jobs = v

        return self

    def get_params_dict(self):
        """Gets parameters as dictionary"""
        return {'fit_intercept': self._fit_intercept,
                'normalize': self._normalize,
                'copy_X': self._copy_X,
                'n_jobs': self._n_jobs
                }

    def ts_fit(self, suppress=False):
        """Fit LinearRegression to the time series data.

         Parameters:
         ----------
         suppress: bool
            Suppress or not some of the output messages
         """
        self._prepare_fit()
        self.ts_split()

        ts_df = self._train_dt.copy()
        #
        x = np.arange(0, len(ts_df)).reshape(-1, 1)
        y = np.asarray(ts_df['y'])

        # Fit
        self._lin_logger.info("Trying to fit the linear model....")
        # tic
        start = time()
        try:
            if not suppress:
                self._lin_logger.info("...via using parameters")
                print_attributes(self)

            self.model_fit = LinearRegression(fit_intercept=self._fit_intercept,
                                              normalize=self._normalize,
                                              copy_X=self._copy_X,
                                              n_jobs=self._n_jobs).fit(x, y)
            # toc
            self._lin_logger.info("Time elapsed: {} sec.".format(time() - start))
        except (Exception, ValueError):
            self._lin_logger.exception("LinearRegression error...")
        else:
            #
            self._lin_logger.info("Model successfully fitted to the data!")
            if not suppress:
                self._lin_logger.info("R^2: {:f}".format(self.model_fit.score(x, y)))
            #
            self.intercept = self.model_fit.intercept_
            self.slope = self.model_fit.coef_

            # Fitted values
            self._lin_logger.info("Computing fitted values and residuals...")
            self.fittedvalues = pd.Series(self.model_fit.predict(x), index=ts_df.index)

            # Residuals
            super(LinearForecaster, self)._residuals()
            self._lin_logger.info("Done.")
            return self

    def ts_diagnose(self):
        """Diagnoses the model"""
        try:
            assert self.model_fit is not None
        except AssertionError:
            self._lin_logger.exception("Model has to be fitted first! Please call ts_fit(...)")
            sys.exit("STOP")

        self.plot_residuals()

    def plot_residuals(self):
        """Plot the residuals"""
        fig, axis = super(LinearForecaster, self)._plot_residuals(y=np.asarray(self._train_dt['y']),
                                                                  yhat=np.asarray(self.fittedvalues), _id="Linear")

        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()

    def ts_test(self, show_plot=True):
        """Test the fitted model if test data available"""
        if super(LinearForecaster, self)._check_ts_test() < 0:
            return

        n_forecast = len(self._test_dt)

        self._lin_logger.info("Evaluating the fitted Linear model on the test data...")
        x_future = np.arange(len(self._train_dt), len(self._train_dt) + n_forecast).reshape(-1, 1)
        self.forecast = pd.Series(self.model_fit.predict(x_future), index=self._test_dt.index)

        self.residuals_forecast = pd.Series(np.asarray(self._test_dt['y']) - np.asarray(self.forecast),
                                            index=self._test_dt.index)
        self.measure_rmse()
        self._lin_logger.info("RMSE on test data: {}".format(self.rmse))
        # plot
        if show_plot:
            self.plot_forecast()
        
        return self            

    def ts_forecast(self, n_forecast, suppress=False):
        """Forecast time series over time frame in the future specified via n_forecast"""
        #
        n_forecast = super(LinearForecaster, self)._check_ts_forecast(n_forecast)
        #
        if not suppress:
            self._lin_logger.info("Fitting using all data....")
        self._mode = 'forecast'
        self.ts_fit(suppress=suppress)

        self._lin_logger.info("Forecasting next " + str(n_forecast) + str(self.freq))
        #
        x_future = np.arange(len(self._train_dt), len(self._train_dt) + n_forecast).reshape(-1, 1)
        future = self.model_fit.predict(x_future)
        idx_future = self._gen_idx_future(n_forecast=n_forecast)
        self.forecast = pd.Series(future, index=idx_future)

        self.residuals_forecast = None
        self.plot_forecast()
        return self

    def plot_forecast(self):
        """Plot forecasted values"""
        fig, axis = super(LinearForecaster, self)._plot_forecast(y=np.asarray(self._train_dt['y']),
                                                                 yhat=np.asarray(self.fittedvalues),
                                                                 forecast=self.forecast, _id='Linear')
        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()
class ProphetForecaster(UVariateTimeSeriesClass):
    """Univariate time series child class using Prophet for forecasting,ref. to https://facebook.github.io/prophet

    Attributes
    ----------
    _prophet_interval_width: float
         The width of the uncertainty intervals (by default 80%), also
         ref. to https://facebook.github.io/prophet/docs/uncertainty_intervals.html
    _yearly_seasonality: bool
        Consider yearly seasonality yes/no
    _monthly_seasonality: bool
        Consider monthly seasonality yes/no
    _quarterly_seasonality: bool
       Consider quarterly seasonality yes/no
    _weekly_seasonality:
       Consider weekly seasonality yes/no
    _daily_seasonality: bool
       Consider daily seasonality yes/no
    _weekend_seasonality: bool#
       Consider week-end seasonality yes/no.
       ref. to https://facebook.github.io/prophet/docs/seasonality,_holiday_effects,_and_regressors.html#modeling-holidays-and-special-events
    _changepoint_prior_scale: float
       If the trend changes are being overfit (too much flexibility) or underfit (not enough flexibility),
       you can adjust the strength of the sparse prior using this argument.
       By default, this parameter is set to 0.05. Increasing it will make the trend more flexible.
       Decreasing it will make the trend less flexible.
       ref. to https://facebook.github.io/prophet/docs/trend_changepoints.html#automatic-changepoint-detection-in-prophet

    _changepoint_range: float
        By default changepoints are only inferred for the first 80% of the time series in order to have plenty of runway
        for projecting the trend forward and to avoid overfitting fluctuations at the end of the time series.
        This default works in many situations but not all, and can be changed using the changepoint_range argument.
        For example, m = Prophet(changepoint_range=0.9) will place potential changepoints in
        the first 90% of the time series.
        ref. to https://facebook.github.io/prophet/docs/trend_changepoints.html#automatic-changepoint-detection-in-prophet
     _add_change_points: bool
        Whether to add change points to the plots
        ref. to https://facebook.github.io/prophet/docs/trend_changepoints.html#automatic-changepoint-detection-in-prophet

     _diagnose: bool
         Whether to run cross validation yes/no
     _history: str
         Amount of historic data in days for cross validation,
         Corresponds to initial in  https://facebook.github.io/prophet/docs/diagnostics.html
     _step: str
         Correspons to period in the linke above. Defines step in days to shift the historic data
     _horizon: str
         Forecasting horizon in days for each cross validation run
    _consider_holidays: bool
         Whether to consider holiodays yes/no
         ref. to https://facebook.github.io/prophet/docs/seasonality,_holiday_effects,_and_regressors.html#modeling-holidays-and-special-events
    _country: str
         The country for which holidays are to be considered

    _prophet_logger: Logger
        The logger for logging

    Methods
    ----------
    assertions()
       Assertion tests, must be overrided
    set_params()
       Sets new parameter values
    get_params_dict()
        Gets parameter values as a dictionary
    ts_fit()
       Fits the auto_arima model to time series
    ts_diagnose()
       Diagnoses the fitted model. Cross validation is started
    plot_residuals()
       Generates residual plots
    ts_test()
       Evaluates fitted model on the test data, if this one has been generated
    ts_forecast()
       Forecasts time series and plots the results
    plot_forecasts()
       Plots forecasted time-series
    """

    def __init__(self,
                 prophet_interval_width=0.95,
                 yearly_seasonality=False,
                 monthly_seasonality=False,
                 quarterly_seasonality=False,
                 weekly_seasonality=False,
                 daily_seasonality=False,
                 weekend_seasonality=False,
                 changepoint_prior_scale=0.001,
                 changepoint_range=0.9,
                 add_change_points=True,
                 diagnose=False,
                 history=None,
                 step=None,
                 horizon=None,
                 consider_holidays=True,
                 country='DE',
                 **kwds):
        """Initializes the object ProphetForecaster"""
        self._prophet_logger = Logger('prophet')

        try:
            super(ProphetForecaster, self).__init__(**kwds)
        except TypeError:
            self._prophet_logger.exception("TypeError occurred, Arguments missing")

        self._model = None

        self._prophet_interval_width = prophet_interval_width
        self._yearly_seasonality = yearly_seasonality
        self._monthly_seasonality = monthly_seasonality
        self._quarterly_seasonality = quarterly_seasonality
        self._weekly_seasonality = weekly_seasonality
        self._daily_seasonality = daily_seasonality
        self._weekend_seasonality = weekend_seasonality

        self._changepoint_prior_scale = changepoint_prior_scale
        self._changepoint_range = changepoint_range
        self._add_change_points = add_change_points

        self._diagnose = diagnose
        self._history = history
        self._step = step
        self._horizon = horizon
        self._prophet_cv = None
        self._prophet_p = None

        self._consider_holidays = consider_holidays
        self._country = country

        self._id = 'Prophet'

    def __copy__(self):
        """Copies the object"""
        result = super(ProphetForecaster, self).__copy__()
        #
        result._model = self._model
        result._prophet_interval_width = self._prophet_interval_width
        result._yearly_seasonality = self._yearly_seasonality
        result._monthly_seasonality = self._monthly_seasonality
        result._quarterly_seasonality = self._quarterly_seasonality
        result._weekly_seasonality = self._weekly_seasonality
        result._daily_seasonality = self._daily_seasonality
        result._weekend_seasonality = self._weekend_seasonality

        result._changepoint_prior_scale = self._changepoint_prior_scale
        result._changepoint_range = self._changepoint_range
        result._add_change_points = self._add_change_points

        result._diagnose = self._diagnose
        result._history = self._history
        result._step = self._step
        result._horizon = self._horizon
        result._prophet_cv = self._prophet_cv
        result._prophet_p = self._prophet_p

        result._consider_holidays = self._consider_holidays
        result._country = self._country

        result._prophet_logger = self._prophet_logger

        return result

    def set_params(self, p_dict=None, **kwargs):
        """Sets new parameters"""
        params_dict = kwargs
        if p_dict is not None:
            params_dict = p_dict
        #
        for k, v in params_dict.items():
            if k == 'ts_df':
                self.ts_df = v
            elif k == 'freq':
                self.freq = v
            elif k == 'n_test':
                self.n_test = v
            elif k == 'n_val':
                self.n_val = v
            elif k == 'timeformat':
                self.time_format = v
            elif k == "prophet_interval_width":
                self._prophet_interval_width = v
            elif k == "yearly_seasonality":
                self._yearly_seasonality = v
            elif k == "monthly_seasonality":
                self._monthly_seasonality = v
            elif k == "quarterly_seasonality":
                self._quarterly_seasonality = v
            elif k == "weekly_seasonality":
                self._weekly_seasonality = v
            elif k == "daily_seasonality":
                self._daily_seasonality = v
            elif k == "weekend_seasonality":
                self._weekend_seasonality = v
            elif k == "changepoint_prior_scale":
                self._changepoint_prior_scale = v
            elif k == "changepoint_range":
                self._changepoint_range = v
            elif k == "add_change_points":
                self._add_change_points = v
            elif k == "diagnose":
                self._diagnose = v
            elif k == "history":
                self._history = v
            elif k == "step":
                self._step = v
            elif k == "horizon":
                self._horizon = v
            elif k == "consider_holidays":
                self._consider_holidays = v
            elif k == "country":
                self._country = v

        return self

    def get_params_dict(self):
        """Gets parameters as a dictionary"""
        return {'prophet_interval_width': self._prophet_interval_width,
                'yearly_seasonality': self._yearly_seasonality,
                'monthly_seasonality': self._monthly_seasonality,
                'quarterly_seasonality': self._quarterly_seasonality,
                'weekly_seasonality': self._weekly_seasonality,
                'daily_seasonality': self._daily_seasonality,
                'weekend_seasonality': self._weekend_seasonality,
                'changepoint_prior_scale': self._changepoint_prior_scale,
                'changepoint_range': self._changepoint_range,
                'add_change_points': self._add_change_points,
                'diagnose': self._diagnose,
                'history': self._history,
                'step': self._step,
                'horizon': self._horizon,
                'consider_holidays': self._consider_holidays,
                'country': self._country
                }

    @staticmethod
    def we_season(ds):
        """Lambda function to prepare weekend_seasonality for  Prophet"""
        date = pd.to_datetime(ds)
        return date.weekday() >= 5

    def ts_fit(self, suppress=False):
        """Fit Prophet to the time series data.

         Parameters:
         ----------
         suppress: bool
            Suppress or not some of the output messages
         """

        if self.hyper_params is not None:
            self._gs.set_forecaster(self)
            self._gs.set_hyper_params(self.hyper_params)
            # a very important command here to avoid endless loop
            self.hyper_params = None
            self._prophet_logger.info("***** Starting grid search *****")
            self._gs = self._gs.grid_search(suppress=suppress, show_plot=False)
            #
            self.best_model = self._gs.best_model
            self.__dict__.update(self.best_model['forecaster'].__dict__)
            self._prophet_logger.info("***** Finished grid search *****")
        else:
            self._prepare_fit()
            self._model = None
            self.ts_split()

            ts_df = self._train_dt.copy()
            ts_test_df = self._test_dt
            # sanity check
            if 'on_weekend' in ts_df.columns:
                ts_df.drop(['on_weekend', 'off_weekend'], inplace=True, axis=1)
                # ts_test_df.drop(['on_weekend', 'off_weekend'], inplace=True, axis=1)
            # Fit
            self._prophet_logger.info("Trying to fit the Prophet model....")
            try:
                if not suppress:
                    self._prophet_logger.info("...via using parameters\n")
                    print_attributes(self)
                # diagnose on?
                if self._diagnose:
                    try:
                        assert self._step is not None and self._horizon is not None
                    except (KeyError, AssertionError):
                        self._prophet_logger.warning("You want to diagnose the Prophet model. Please provide parameters "
                                                     "'step' and 'horizon' within object initialization!")
                        sys.exit("STOP")

                ts_df = ts_df.reset_index()
                ts_df.columns = self._ts_df_cols
                if ts_test_df is not None and not ts_test_df.empty:
                    ts_test_df = ts_test_df.reset_index()
                    ts_test_df.columns = self._ts_df_cols
                #
                weekly_s = self._weekly_seasonality
                if self._weekend_seasonality:
                    # force to False
                    weekly_s = False
                #
                if not self._consider_holidays:
                    self._model = Prophet(interval_width=self._prophet_interval_width,
                                          yearly_seasonality=self._yearly_seasonality,
                                          weekly_seasonality=weekly_s,
                                          daily_seasonality=self._daily_seasonality,
                                          changepoint_range=self._changepoint_range,
                                          changepoint_prior_scale=self._changepoint_prior_scale)
                else:
                    try:
                        assert self._country in ['AT', 'DE', 'US']
                    except AssertionError:
                        self._prophet_logger.exception("Assrtion exception occurred. Right now, Austria (AT), "
                                                       "Germany(DE) and USA (US) supported.")
                        sys.exit("STOP")
                    else:
                        holi = None
                        if self._country == 'AT':
                            holi = holidays.AT(state=None, years=list(np.unique(np.asarray(self.ts_df.index.year))))
                        elif self._country == 'DE':
                            holi = holidays.DE(state=None, years=list(np.unique(np.asarray(self.ts_df.index.year))))
                        elif self._country == 'US':
                            holi = holidays.US(state=None, years=list(np.unique(np.asarray(self.ts_df.index.year))))
                        #
                        holi_dict = dict()
                        for date, name in sorted(holi.items()):
                            holi_dict[date] = name

                        df_holi = pd.DataFrame.from_dict(data=holi_dict, orient='index').reset_index()
                        df_holi.columns = ['ds', 'holiday']
                        df_holi['lower_window'] = 0
                        df_holi['upper_window'] = 0
                        self._model = Prophet(interval_width=self._prophet_interval_width,
                                              yearly_seasonality=self._yearly_seasonality,
                                              weekly_seasonality=weekly_s,
                                              daily_seasonality=self._daily_seasonality,
                                              changepoint_range=self._changepoint_range,
                                              changepoint_prior_scale=self._changepoint_prior_scale,
                                              holidays=df_holi)

                if self._monthly_seasonality:
                    self._model.add_seasonality(name='monthly', period=30.5, fourier_order=20)
                    if not suppress:
                        self._prophet_logger.info("Added monthly seasonality.")

                if self._quarterly_seasonality:
                    self._model.add_seasonality(name='quarterly', period=91.5, fourier_order=20)
                    if not suppress:
                        self._prophet_logger.info("Added quarterly seasonality.")

                if self._weekend_seasonality:
                    ts_df['on_weekend'] = ts_df['ds'].apply(self.we_season)
                    ts_df['off_weekend'] = ~ts_df['ds'].apply(self.we_season)
                    self._train_dt = ts_df.copy()
                    self._train_dt.set_index('ds', inplace=True)
                    #
                    if ts_test_df is not None and not ts_test_df.empty:
                        ts_test_df['on_weekend'] = ts_test_df['ds'].apply(self.we_season)
                        ts_test_df['off_weekend'] = ~ts_test_df['ds'].apply(self.we_season)
                        self._test_dt = ts_test_df.copy()
                        self._test_dt.set_index('ds', inplace=True)
                    # and add
                    self._model.add_seasonality(name='weekend_on_season', period=7,
                                                fourier_order=5, condition_name='on_weekend')
                    self._model.add_seasonality(name='weekend_off_season', period=7,
                                                fourier_order=5, condition_name='off_weekend')

                    if not suppress:
                        self._prophet_logger.info("Added week-end seasonality.")

                # tic
                start = time()
                self.model_fit = self._model.fit(ts_df)
                # toc
                if not suppress:
                    self._prophet_logger.info("Time elapsed: {} sec.".format(time() - start))
            except (Exception, ValueError):
                self._prophet_logger.exception("Prophet error...")
                return -1
            else:
                self._prophet_logger.info("Model successfully fitted to the data!")

                # Fitted values
                self._prophet_logger.info("Computing fitted values and residuals...")
                # in-sample predict
                try:
                    self.fittedvalues = self._model.predict(ts_df.drop('y', axis=1))
                except (Exception, ValueError):
                    self._prophet_logger.exception("Prophet predict error...")
                # Residuals
                try:
                    # use fittedvalues to fill in the model dictionary
                    self.residuals = pd.Series(np.asarray(ts_df.y) - np.asarray(self.fittedvalues['yhat']),
                                               index=self._train_dt.index)
                except (KeyError, AttributeError):
                    self._prophet_logger.exception("Model was not fitted or ts has other structure...")
                #
                self.lower_conf_int = pd.Series(np.asarray(self.fittedvalues['yhat_lower']), index=self._train_dt.index)
                self.upper_conf_int = pd.Series(np.asarray(self.fittedvalues['yhat_upper']), index=self._train_dt.index)

                self._prophet_logger.info("Done.")
            return self

    def ts_diagnose(self):
        """Diagnoses the fitted model"""
        try:
            assert self.model_fit is not None
        except AssertionError:
            self._prophet_logger.exception("Model has to be fitted first! Please call ts_fit(...)")
            sys.exit("STOP")

        self.plot_residuals()

        if self._diagnose:
            if input("Run cross validation y/n? Note, depending on parameters provided "
                     "this can take some time...").strip().lower() == 'y':
                start = time()
                self._prophet_logger.info("Running cross validation using parameters provided....")
                if self._history is not None:
                    try:
                        self._prophet_cv = cross_validation(self.model_fit, initial=self._history,
                                                            period=self._step,
                                                            horizon=self._horizon)
                    except Exception:
                        self._prophet_logger.exception("Prophet cross validation error: check your "
                                                       "parameters 'history', 'horizon', 'step'!")
                else:
                    try:
                        self._prophet_cv = cross_validation(self.model_fit, period=self._step,
                                                            horizon=self._horizon)
                    except Exception:
                        self._prophet_logger.exception("Prophet cross validation error: "
                                                       "check your parameters 'horizon', 'step'!")

                self._prophet_logger.info("Time elapsed: {}".format(time() - start))
                simu_intervals = self._prophet_cv.groupby('cutoff')['ds'].agg(
                    [('forecast_start', 'min'),
                     ('forecast_till', 'max')])
                self._prophet_logger.info("Following time windows and cutoffs have been set-up:\n")
                print(simu_intervals)
                #
                plot_cross_validation_metric(self._prophet_cv, metric='mape')
                #
                self._prophet_logger.info("Running performance metrics...")
                self._prophet_p = performance_metrics(self._prophet_cv)

            else:
                self._prophet_logger.info("OK")
                return

    def plot_residuals(self):
        """Plot the residuals"""
        fig, axes = super(ProphetForecaster, self)._plot_residuals(
            y=np.asarray(self._train_dt['y']), yhat=np.asarray(self.fittedvalues['yhat']), _id="Prophet")
        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()

    def ts_test(self, show_plot=True):
        """Test the fitted model if test data available"""
        if super(ProphetForecaster, self)._check_ts_test() < 0:
            return

        self._prophet_logger.info("Evaluating the fitted Prophet model on the test data...")
        self.forecast = self._model.predict(self._test_dt.copy().reset_index().drop('y', axis=1))
        # confidence intervals
        self.lower_conf_int = pd.concat([self.lower_conf_int,
                                         pd.Series(np.asarray(self.forecast['yhat_lower']), index=self._test_dt.index)],
                                        axis=0)
        self.upper_conf_int = pd.concat([self.upper_conf_int,
                                         pd.Series(np.asarray(self.forecast['yhat_upper']), index=self._test_dt.index)],
                                        axis=0)

        self.residuals_forecast = pd.Series(np.asarray(self._test_dt['y']) - np.asarray(self.forecast['yhat']),
                                            index=self._test_dt.index)
        self.measure_rmse()
        self._prophet_logger.info("RMSE on test data: {}".format(self.rmse))
        # plot
        if show_plot:
            self.plot_forecast()

    def ts_forecast(self, n_forecast, suppress):
        """Forecast time series over time frame in the future specified via n_forecast"""
        #
        n_forecast = super(ProphetForecaster, self)._check_ts_forecast(n_forecast)
        #
        self._prophet_logger.info("Fitting using all data....")
        self._mode = 'forecast'
        self.ts_fit(suppress=suppress)

        self._prophet_logger.info("Forecasting next " + str(n_forecast) + str(self.ts_df.index.freq))
        #
        future = self._model.make_future_dataframe(periods=n_forecast, freq=self.freq)
        if self._weekend_seasonality:
            future['on_weekend'] = future['ds'].apply(self.we_season)
            future['off_weekend'] = ~future['ds'].apply(self.we_season)

        self.forecast = self._model.predict(future)
        # confidence intervals
        self.lower_conf_int = pd.concat([self.lower_conf_int,
                                         pd.Series(np.asarray(self.forecast['yhat_lower']), index=future.ds)],
                                        axis=0)
        self.upper_conf_int = pd.concat([self.upper_conf_int,
                                         pd.Series(np.asarray(self.forecast['yhat_upper']), index=future.ds)],
                                        axis=0)

        self.residuals_forecast = None
        self.plot_forecast()

    def plot_forecast(self):
        """Plot forecasted values"""
        if self.residuals_forecast is not None:
            fig, axes = super(ProphetForecaster, self)._plot_forecast(y=np.asarray(self._train_dt['y']),
                                                                      yhat=np.asarray(self.fittedvalues['yhat']),
                                                                      forecast=pd.Series(
                                                                          np.asarray(self.forecast['yhat']),
                                                                          index=self.forecast['ds']), _id='Prophet')
        else:
            fig_forecast = self._model.plot(self.forecast)
            fig_components = self._model.plot_components(self.forecast)
            if self._add_change_points:
                a = add_changepoints_to_plot(fig_forecast.gca(), self._model, self.forecast)

        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()
Exemple #5
0
class UVariateTimeSeriesForecaster(LinearForecaster, AutoARIMAForecaster,
                                   SARIMAForecaster,
                                   ExponentialSmoothingForecaster,
                                   ProphetForecaster, DLMForecaster):
    """Univariate time series class inheriting from all existing forecasters and choosing the best forecaster.

     Attributes
    ----------
    forecasters: list
       List of all forecasters to be used. The best one will be chosen as the final model.
       The goodness of measure is rmse of a model on test data.
       Note, that it is necessary to generate the test data.

    _dict_models: dictionary
       Dictionary keeping all models
    best_model: Object
       The best model
    _uvtsf_logger: Logger
        The logger for logging

    Methods
    ----------
    assertions()
       Assertion tests, must be overrided
    ts_fit()
       Fits all forecasters to time series
    ts_test()
       Test all forecasters on test data and computes rmse
    _select_best()
       Helper function to select the best model
    select_best()
       Fits all forecasters to time series and selects the best one based on rmse of each computed on test data.
       If these was no test data, no test is done and no model is selected
    plot_residuals()
       Plots residuals for the best model
    ts_forecast()
       Forecasts time series and plots the results using the best model
    plot_forecasts()
       Plots forecasted time-series

    """
    def __init__(self, forecasters=['all'], **kwds):
        """Initialized the object UVariateTimeSeriesForecaster"""
        self.forecasters = list(map(lambda x: x.lower(), forecasters))
        self._dict_models = dict()  # .fromkeys(self._model_list, None)
        self.best_model = None

        self._uvtsf_logger = Logger("uvtsf")
        #
        try:
            super(UVariateTimeSeriesForecaster, self).__init__(**kwds)
        except TypeError:
            self._uvtsf_logger.exception("Arguments missing...")

        self._id = 'ts_forecaster'
        #
        if 'prophet' in self.forecasters:
            self._dict_models['prophet'] = self.__copy__()
            self._dict_models['prophet'].__class__ = ProphetForecaster
        if 'linear' in self.forecasters:
            self._dict_models['linear'] = self.__copy__()
            self._dict_models['linear'].__class__ = LinearForecaster
        if 'arima' in self.forecasters:
            self._dict_models['arima'] = self.__copy__()
            self._dict_models['arima'].__class__ = ARIMAForecaster
        if 'sarima' in self.forecasters:
            self._dict_models['sarima'] = self.__copy__()
            self._dict_models['sarima'].__class__ = SARIMAForecaster
        if 'auto_arima' in self.forecasters:
            self._dict_models['auto_arima'] = self.__copy__()
            self._dict_models['auto_arima'].__class__ = AutoARIMAForecaster
        if 'exponential smoothing' in self.forecasters:
            self._dict_models['expsm'] = self.__copy__()
            self._dict_models[
                'expsm'].__class__ = ExponentialSmoothingForecaster
        if 'dlm' in self.forecasters:
            self._dict_models['dlm'] = self.__copy__()
            self._dict_models['dlm'].__class__ = DLMForecaster

        if 'all' in self.forecasters:
            self._dict_models['prophet'] = self.__copy__()
            self._dict_models['prophet'].__class__ = ProphetForecaster

            self._dict_models['linear'] = self.__copy__()
            self._dict_models['linear'].__class__ = LinearForecaster

            self._dict_models['arima'] = self.__copy__()
            self._dict_models['arima'].__class__ = ARIMAForecaster

            self._dict_models['sarima'] = self.__copy__()
            self._dict_models['sarima'].__class__ = SARIMAForecaster

            self._dict_models['auto_arima'] = self.__copy__()
            self._dict_models['auto_arima'].__class__ = SARIMAForecaster

            self._dict_models['expsm'] = self.__copy__()
            self._dict_models[
                'expsm'].__class__ = ExponentialSmoothingForecaster

            self._dict_models['dlm'] = self.__copy__()
            self._dict_models['dlm'].__class__ = DLMForecaster

        self.assertions()

    def __copy__(self):
        """Copies the object"""

        result = super(UVariateTimeSeriesForecaster, self).__copy__()
        #
        result.forecasters = self.forecasters
        result._dict_models = self._dict_models
        result.best_model = self.best_model

        return result

    def assertions(self):
        try:
            assert isinstance(self.forecasters, list)
        except AssertionError:
            self._uvtsf_logger.exception(
                "Assertion exception occurred, list expected for forecasters")
            sys.exit("STOP")

        for k, v in self._dict_models.items():
            try:
                assert self._dict_models[k].n_test > 0
            except AssertionError:
                self._uvtsf_logger.exception(
                    "Assertion exception occurred, no test data was generated! "
                    "This forecaster requires the test data")
                sys.exit("STOP")

    def ts_fit(self, suppress=False):
        """Fit all forecasters to the time series data.

         Parameters:
         ----------
         suppress: bool
            Suppress or not some of the output messages
         """
        for k, v in self._dict_models.items():
            if self._dict_models[k] is not None:
                self._dict_models[k].ts_fit(suppress=suppress)
        return self

    def ts_diagnose(self):
        """Diagnoses all candidate models"""
        for k, v in self._dict_models.items():
            if self._dict_models[k].model_fit is not None:
                self._dict_models[k].ts_diagnose()

    def ts_test(self, show_plot=True):
        """Test the fitted model if test data available"""
        for k, v in self._dict_models.items():
            if self._dict_models[k].model_fit is not None:
                self._dict_models[k].ts_test(show_plot=show_plot)
        self._select_best()

    def _select_best(self):
        """Helper function to select the best model among fitted forecasters"""
        rmse = float('Inf')
        for k, v in self._dict_models.items():
            if self._dict_models[k].model_fit is not None:
                if self._dict_models[k].rmse < rmse:
                    rmse = self._dict_models[k].rmse
                    self.best_model = self._dict_models[k]
        if self.best_model is not None:
            self._uvtsf_logger.info("The best model selected as: {}".format(
                str(type(self.best_model)).split('\'')[1].split('.')[2]))
        else:
            self._uvtsf_logger.warning(
                "No model has been fitted! Please call ts_fit()...")

    """
    def select_best(self, suppress=False):
        Fit all forecasters and select the best model
        self.ts_fit(suppress=suppress)
        self.ts_test()

        return self
    """

    def plot_residuals(self):
        """Residual plots"""
        if self.best_model is not None and bool(self.best_model):
            self.best_model.plot_residuals()
        else:
            for k, v in self._dict_models.items():
                if self._dict_models[k].model_fit is not None:
                    self._dict_models[k].plot_residuals()

    def ts_validate(self, suppress=True, show_plot=True):
        """Validates the best model"""
        if self.best_model is not None:
            self.best_model.ts_validate(suppress=suppress, show_plot=show_plot)
        else:
            self._uvts_cls_logger.warning(
                "No model has been selected yet! Run ts_test() first, or restart."
            )
            sys.exit("STOP")

    def ts_forecast(self, n_forecast, suppress=False):
        """Forecast n_forecast steps in the future using the best model"""
        if self.best_model is not None:
            self.best_model.ts_forecast(n_forecast=n_forecast,
                                        suppress=suppress)
        else:
            self._uvtsf_logger.warning(
                "No model has been selected! Please call ts_test()...")
        return self

    def plot_forecast(self):
        """Plots forecasted values"""
        if self.best_model is not None:
            self.best_model.plot_forecast()
        else:
            self._uvtsf_logger.warning(
                "No model has been selected! Please call ts_fit()...")
class DLMForecaster(UVariateTimeSeriesClass):
    """Univariate time series child class using DLM of pydlm for forecasting,
    ref. to https://pydlm.github.io/pydlm_user_guide.html

    Attributes
    ----------
    _dlm_trend: tuple
         A tuple of degree, discount, name and prior covariance
    _dlm_seasonality: tuple
        A tuple of period, discount, name and prior covariance
    _dlm_dynamic: dictionary
        A dictionary of tuples as features, discount, name and prior covariance.
        Note, the features for _dynamic should be a list of lists.
    _dlm_auto_reg: tuple
       A tuple of degree, discount, name and prior covariance
    _dlm_long_season: tuple
       A tuple of period, stay, name and prior covariance
    _use_rolling_window: bool
       Use rolling window in forward filtering yes/no
    _window_size: int
    _dlm_interval_width: float
       TBD
    _dlm_logger: Logger
       The logger for logging

    Methods
    ----------
    assertions()
       Assertion tests, must be overrided
    set_params()
       Sets new parameter values
    get_params_dict()
        Gets parameter values as a dictionary
    ts_fit()
       Fits the auto_arima model to time series
    ts_diagnose()
       Diagnoses the fitted model
    plot_dlm()
        Plot pydlm native plots
    plot_residuals()
       Generates residual plots
    ts_test()
       Evaluates fitted model on the test data, if this one has been generated
    ts_forecast()
       Forecasts time series and plots the results
    plot_forecasts()
       Plots forecasted time-series
    """
    def __init__(self,
                 dlm_trend=None,
                 dlm_seasonality=None,
                 dlm_dynamic=None,
                 dlm_auto_reg=None,
                 dlm_long_season=None,
                 use_rolling_window=False,
                 window_size=0,
                 dlm_interval_width=0.95,
                 **kwds):
        """Initializes the object DLMForecaster"""
        if dlm_trend is None:
            dlm_trend = {
                'degree': 0,
                'discount': 0.99,
                'name': 'trend1',
                'w': 1e7
            }
        self._model = None
        self.mse = None

        self._dlm_trend = dlm_trend
        self._dlm_seasonality = dlm_seasonality
        self._dlm_dynamic = dlm_dynamic
        self._dlm_auto_reg = dlm_auto_reg
        self._dlm_long_season = dlm_long_season
        self._use_rolling_window = use_rolling_window
        self._window_size = window_size
        self._dlm_interval_width = dlm_interval_width

        self._dlm_logger = Logger('dlm')

        self.assertions()

        try:
            super(DLMForecaster, self).__init__(**kwds)
        except TypeError:
            self._dlm_logger.exception("TypeError occurred, Arguments missing")

        self._id = 'DLM'
        self._train_dlm_dynamic = None  # features
        self._test_dlm_dynamic = None  # featureDict
        self._val_dlm_dynamic = None  # featureDict

    def assertions(self):
        if self._dlm_trend is not None:
            try:
                assert isinstance(self._dlm_trend, dict)
            except AssertionError:
                self._dlm_logger.exception(
                    "Assertion exception occurred, dictionary expected for dlm_trend"
                )
                sys.exit("STOP")
            else:
                len_keys = list(
                    filter(lambda x: x in list(self._dlm_trend.keys()),
                           keys_f(keys=['degree', 'discount', 'name'])))
                try:
                    assert len(len_keys) == len(['degree', 'discount', 'name'])
                except AssertionError:
                    self._dlm_logger.exception(
                        "Not all expected parameters found for trend. "
                        "['degree', 'discount', 'name'] are necessary!")
                    sys.exit("STOP")
                else:
                    if 'w' not in list(self._dlm_trend.keys()):
                        self._dlm_trend['w'] = 1e7

        if self._dlm_seasonality is not None:
            try:
                assert isinstance(self._dlm_seasonality, dict)
            except AssertionError:
                self._dlm_logger.exception(
                    "Assertion exception occurred, dictionary expected for dlm_seasonality"
                )
                sys.exit("STOP")
            else:
                len_keys = list(
                    filter(lambda x: x in list(self._dlm_seasonality.keys()),
                           keys_f(keys=['period', 'discount', 'name'])))
                try:
                    assert len(len_keys) == len(['period', 'discount', 'name'])
                except AssertionError:
                    self._dlm_logger.exception(
                        "Not all expected parameters found for seasonality. "
                        "['period', 'discount', 'name] are necessary!")
                    sys.exit("STOP")
                else:
                    if 'w' not in list(self._dlm_seasonality.keys()):
                        self._dlm_seasonality['w'] = 1e7

        if self._dlm_auto_reg is not None:
            try:
                assert isinstance(self._dlm_auto_reg, dict)
            except AssertionError:
                self._dlm_logger.exception(
                    "Assertion exception occurred, dictionary expected for dlm_auroReg"
                )
                sys.exit("STOP")
            else:
                len_keys = list(
                    filter(lambda x: x in list(self._dlm_auto_reg.keys()),
                           keys_f(keys=['degree', 'discount', 'name'])))
                try:
                    assert len(len_keys) == len(['degree', 'discount', 'name'])
                except AssertionError:
                    self._dlm_logger.exception(
                        "Not all expected parameters found for auto_reg. "
                        "['degree', 'discount', 'name'] are necessary!")
                    sys.exit("STOP")
                else:
                    if 'w' not in list(self._dlm_auto_reg.keys()):
                        self._dlm_auto_reg['w'] = 1e7

        if self._dlm_long_season is not None:
            try:
                assert isinstance(self._dlm_long_season, dict)
            except AssertionError:
                self._dlm_logger.exception(
                    "Assertion exception occurred, dictionary expected for dlm_longSeason"
                )
                sys.exit("STOP")
            else:
                len_keys = list(
                    filter(lambda x: x in list(self._dlm_long_season.keys()),
                           keys_f(keys=['period', 'stay', 'name'])))
                try:
                    assert len(len_keys) == len(['period', 'stay', 'name'])
                except AssertionError:
                    self._dlm_logger.exception(
                        "Not all expected parameters found for long season. "
                        "['period', 'stay', 'name'] are necessary!")
                    sys.exit("STOP")
                else:
                    if 'w' not in list(self._dlm_long_season.keys()):
                        self._dlm_long_season['w'] = 1e7

        if self._dlm_dynamic is not None:
            try:
                assert isinstance(self._dlm_dynamic, dict)
            except AssertionError:
                self._dlm_logger.exception(
                    "Assertion exception occurred, dictionary expected for dlm_seasonality"
                )
                sys.exit("STOP")
            else:
                try:
                    assert 'features' in list(self._dlm_dynamic.keys())
                except AssertionError:
                    self._dlm_logger.exception(
                        "Assertion exception occurred, 'features' must be provided!"
                    )
                    sys.exit("STOP")
                else:
                    try:
                        assert isinstance(self._dlm_dynamic['features'], list)
                    except AssertionError:
                        self._dlm_logger.exception(
                            "Assertion exception occurred, list expected for 'features'"
                        )
                        sys.exit("STOP")
                    else:
                        for i in range(len(self._dlm_dynamic['features'])):
                            len_keys = list(
                                filter(
                                    lambda x: x in list(self._dlm_dynamic[
                                        'features'][i].keys()),
                                    keys_f(
                                        keys=['features', 'discount', 'name'
                                              ])))
                            try:
                                assert len(len_keys) == len(
                                    ['features', 'discount', 'name'])
                            except AssertionError:
                                self._dlm_logger.exception(
                                    "Not all expected parameters found for dynamic features. "
                                    "['features', 'discount', 'name'] are necessary!"
                                )
                                sys.exit("STOP")

                    # features must have same length with the data
                    for i in range(len(self._dlm_dynamic['features'])):
                        try:
                            assert len(self._dlm_dynamic['features'][i]
                                       ['features']) == len(self.ts_df)
                        except AssertionError:
                            self._dlm_logger.exception(
                                "Assertion exception occurred. All provided features must"
                                " be of same length as your data!")
                            sys.exit("STOP")
                        else:
                            if 'w' not in list(
                                    self._dlm_dynamic['features'][i].keys()):
                                self._dlm_dynamic['features'][i]['w'] = 1e7

        if self._use_rolling_window:
            try:
                assert self._window_size > 0
            except AssertionError:
                self._dlm_logger.exception(
                    "Assertion exception occurred, zero window_size. "
                    "No rolling window will be used")
                self._use_rolling_window = False

    def __copy__(self):
        """Copies the object"""
        result = super(DLMForecaster, self).__copy__()
        #
        result._dlm_trend = self._dlm_trend
        result._dlm_seasonality = self._dlm_seasonality
        result._dlm_dynamic = self._dlm_dynamic
        result._dlm_auto_reg = self._dlm_auto_reg
        result._dlm_long_season = self._dlm_long_season
        result._use_rolling_window = self._use_rolling_window
        result._window_size = self._window_size
        result._dlm_interval_width = self._dlm_interval_width

        result._dlm_logger = self._dlm_logger

        return result

    def set_params(self, p_dict=None, **kwargs):
        """Sets new parameters"""
        params_dict = kwargs
        if p_dict is not None:
            params_dict = p_dict
        #
        for k, v in params_dict.items():
            if k == 'ts_df':
                self.ts_df = v
            elif k == 'freq':
                self.freq = v
            elif k == 'n_test':
                self.n_test = v
            elif k == 'n_val':
                self.n_val = v
            elif k == 'timeformat':
                self.time_format = v
            elif k == "dlm_trend":
                self._dlm_trend = v
            elif k == "dlm_seasonality":
                self._dlm_seasonality = v
            elif k == "dlm_dynamic":
                self._dlm_dynamic = v
            elif k == "dlm_autoReg":
                self._dlm_auto_reg = v
            elif k == "dlm_longSeason":
                self._dlm_long_season = v
            # TBD other params!!
        self.assertions()

        return self

    def get_params_dict(self):
        """Gets parameters as dictionary"""
        return {
            'dlm_trend': self._dlm_trend,
            'dlm_seasonality': self._dlm_seasonality,
            'dlm_dynamic': self._dlm_dynamic,
            'dlm_auto_reg': self._dlm_auto_reg,
            'dlm_long_season': self._dlm_long_season,
            'use_rolling_window': self._use_rolling_window,
            'window_size': self._window_size,
            'dlm_interval_width': self._dlm_interval_width
        }

    def ts_split(self):
        """DLM extension of the parent ts_split()

        DLM needs to extend the ts_split of its parent class.
        The reason lies in dynamic features: this list of lists must be splitted
        """
        # call super
        super(DLMForecaster, self).ts_split()

        if self._dlm_dynamic is None or self._mode == 'forecast':
            return self

        # split dynamic features
        test_feat_dict = dict()
        val_feat_dict = dict()

        self._train_dlm_dynamic = self._dlm_dynamic

        for i in range(len(self._dlm_dynamic)):
            feats = self._dlm_dynamic['features'][i]['features']
            #
            if self._mode == 'test and validate':
                if self._test_dlm_dynamic is not None:
                    self._train_dlm_dynamic['features'][i]['features'].append(
                        self._test_dlm_dynamic[self._dlm_dynamic['features'][i]
                                               ['name']])
                    self._val_dlm_dynamic = self._test_dlm_dynamic
                else:
                    self._dlm_logger.error("Something is wrong, mode!")
            else:
                if self._mode == 'test' and self.n_val == 0:
                    self._train_dlm_dynamic['features'][i][
                        'features'] = feats[:(len(feats) - 1 - self.n_test)]
                    #
                    test_feat_dict[self._dlm_dynamic['features'][i]
                                   ['name']] = feats[(len(feats) -
                                                      self.n_test):]
                elif self._mode == 'validate':
                    self._train_dlm_dynamic['features'][i][
                        'features'] = feats[:(len(feats) - 1 - self.n_val)]
                    #
                    val_feat_dict[self._dlm_dynamic['features'][i]
                                  ['name']] = feats[(len(feats) - self.n_val):]
                elif self._mode == 'test' and self.n_val > 0:
                    self._dlm_dynamic['features'][i]['features'] = feats[:(
                        len(feats) - 1 - self.n_test - self.n_val)]
                    #
                    test_feat_dict[self._dlm_dynamic['features'][i]['name']] = \
                        feats[(len(feats) - self.n_test - self.n_val):(len(feats) - self.n_val - 1)]
                    val_feat_dict[self._dlm_dynamic['features'][i]
                                  ['name']] = feats[(len(feats) - self.n_val):]

            # now set
            if len(test_feat_dict):
                self._test_dlm_dynamic = test_feat_dict
            if len(val_feat_dict):
                self._val_dlm_dynamic = val_feat_dict

        return self

    def ts_fit(self, suppress=False):
        """Fit DLM to the time series data.

         Parameters:
         ----------
         suppress: bool
            Suppress or not some of the output messages
         """
        self._prepare_fit()
        self._model = None
        self.ts_split()

        ts_df = self._train_dt.copy()

        # Fit
        self._dlm_logger.info("Trying to fit the DLM model....")
        try:
            if not suppress:
                self._dlm_logger.info("...via using parameters\n")
                print_attributes(self)

            ts_df = ts_df.reset_index()
            ts_df.columns = self._ts_df_cols

            self._model = dlm(ts_df['y'])

            # trend
            if self._dlm_trend is not None:
                self._model = self._model + trend(
                    degree=self._dlm_trend['degree'],
                    discount=self._dlm_trend['discount'],
                    name=self._dlm_trend['name'],
                    w=self._dlm_trend['w'])
            # seasonality
            if self._dlm_seasonality is not None:
                self._model = self._model + seasonality(
                    period=self._dlm_seasonality['period'],
                    discount=self._dlm_seasonality['discount'],
                    name=self._dlm_seasonality['name'],
                    w=self._dlm_seasonality['w'])
            # dynamic
            if self._train_dlm_dynamic is not None:
                for i in range(len(self._train_dlm_dynamic['features'])):
                    self._model = self._model + dynamic(
                        features=self._train_dlm_dynamic['features'][i]
                        ['features'],
                        discount=self._train_dlm_dynamic['features'][i]
                        ['discount'],
                        name=self._train_dlm_dynamic['features'][i]['name'],
                        w=self._train_dlm_dynamic['features'][i]['w'])
            # auto_reg
            if self._dlm_auto_reg is not None:
                self._model = self._model + autoReg(
                    degree=self._dlm_auto_reg['degree'],
                    discount=self._dlm_auto_reg['discount'],
                    name=self._dlm_auto_reg['name'],
                    w=self._dlm_auto_reg['w'])
            # long_season
            if self._dlm_long_season is not None:
                ls = longSeason(period=self._dlm_long_season['period'],
                                stay=self._dlm_long_season['stay'],
                                data=ts_df,
                                name=self._dlm_long_season['name'],
                                w=self._dlm_long_season['w'])
                self._model = self._model + ls

            if not suppress:
                self._dlm_logger.info("The constructed DLM model components:")
                print(self._model.ls())

            # tic
            start = time()
            if self._use_rolling_window:
                self._model.fitForwardFilter(useRollingWindow=True,
                                             windowLength=self._window_size)
                self._model.fitBackwardSmoother()
            else:
                self._model.fit()
            self.model_fit = self._model
            # toc
            if not suppress:
                self._dlm_logger.info("Time elapsed: {} sec.".format(time() -
                                                                     start))
        except (Exception, ValueError) as e:
            self._dlm_logger.exception("DLM error...{}".format(e))
            return -1
        else:
            self._dlm_logger.info("Model successfully fitted to the data!")
            self._dlm_logger.info("Computing fitted values and residuals...")

            # Residuals
            self.residuals = pd.Series(self.model_fit.getResidual(),
                                       index=self._train_dt.index)
            try:
                self.lower_conf_int = pd.Series(
                    self.model_fit.getInterval()[1],
                    index=self._train_dt.index)
                self.upper_conf_int = pd.Series(
                    self.model_fit.getInterval()[0],
                    index=self._train_dt.index)
            except ValueError as e:
                self._dlm_logger.exception(
                    "Something went wrong in getInterval...{}".format(e))

            self.mse = self.model_fit.getMSE()

            # Fitted values
            # this is not elegant, but found no other way
            self.fittedvalues = self._train_dt['y'] + self.residuals

            return self

    def ts_diagnose(self):
        """Diagnoses the fitted model"""
        self.plot_residuals()

    def plot_dlm(self):
        """Plot pydlm native plots"""
        self.model_fit.plot("DLM native")

    def plot_residuals(self):
        """Plot the residuals."""
        fig, axis = super(DLMForecaster, self)._plot_residuals(
            y=np.asarray(self._train_dt['y']),
            yhat=np.asarray(self.fittedvalues),
            _id="DLM")

        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()

    def ts_test(self, show_plot=True):
        """Test the fitted model if test data available"""
        if super(DLMForecaster, self)._check_ts_test() < 0:
            return

        N = len(self._test_dt)

        self._dlm_logger.info(
            "Evaluating the fitted DLM model on the test data...")

        if self._test_dlm_dynamic is not None:
            (predictMean, predictVar) = self._model.predictN(
                N=N,
                date=self._model.n - 1,
                featureDict=self._test_dlm_dynamic)
        else:
            (predictMean,
             predictVar) = self._model.predictN(N=N, date=self._model.n - 1)

        self.forecast = pd.Series(np.asarray(predictMean),
                                  index=self._test_dt.index)
        # confidence intervals
        cl, cu = self.compute_ci(yhat=np.asarray(predictMean),
                                 yhat_var=np.asarray(predictVar),
                                 ci_level=self._dlm_interval_width)
        cl = pd.Series(cl, index=self._test_dt.index)
        cu = pd.Series(cu, index=self._test_dt.index)
        self.lower_conf_int = pd.concat([self.lower_conf_int, cl], axis=0)
        self.upper_conf_int = pd.concat([self.upper_conf_int, cu], axis=0)

        self.residuals_forecast = pd.Series(np.asarray(self._test_dt['y']) -
                                            np.asarray(self.forecast),
                                            index=self._test_dt.index)
        self.measure_rmse()
        self._dlm_logger.info("RMSE on test data: {}".format(self.rmse))
        # plot
        if show_plot:
            self.plot_forecast()

        return self

    def ts_forecast(self, n_forecast, suppress=False, features_dict=None):
        """Forecast time series over time frame in the future specified via n_forecast"""
        #
        n_forecast = super(DLMForecaster, self)._check_ts_forecast(n_forecast)
        #
        self._dlm_logger.info("Fitting using all data....")
        self._mode = 'forecast'
        self.ts_fit(suppress=suppress)

        self._dlm_logger.info("Forecasting next " + str(n_forecast) +
                              str(self.freq))
        #
        try:
            if features_dict is not None and len(features_dict) != 0:
                (predictMean,
                 predictVar) = self._model.predictN(N=n_forecast,
                                                    date=self._model.n - 1,
                                                    featureDict=features_dict)
            else:
                (predictMean,
                 predictVar) = self._model.predictN(N=n_forecast,
                                                    date=self._model.n - 1)
        except (NameError, ValueError) as e:
            self._dlm_logger.exception("DLM PredictN error...{}".format(e))
            sys.exit("STOP")

        idx_future = self._gen_idx_future(n_forecast=n_forecast)
        self.forecast = pd.Series(np.asarray(predictMean), index=idx_future)

        # confidence intervals
        cl, cu = self.compute_ci(yhat=np.asarray(predictMean),
                                 yhat_var=np.asarray(predictVar),
                                 ci_level=self._dlm_interval_width)
        cl = pd.Series(cl, index=idx_future)
        cu = pd.Series(cu, index=idx_future)
        self.lower_conf_int = pd.concat([self.lower_conf_int, cl], axis=0)
        self.upper_conf_int = pd.concat([self.upper_conf_int, cu], axis=0)

        self.residuals_forecast = None
        self.plot_forecast(n_forecast=n_forecast, features_dict=features_dict)
        return self

    def plot_forecast(self, **kwargs):
        """Plot forecasted values"""
        if self.residuals_forecast is not None:
            fig, axis = super(DLMForecaster, self)._plot_forecast(
                y=np.asarray(self._train_dt['y']),
                yhat=np.asarray(self.fittedvalues),
                forecast=self.forecast,
                _id='DLM')
            plt.gcf().autofmt_xdate()
            plt.grid(True)
            plt.show()
        else:
            n_forecast = -1
            features_dict = dict()

            for k, v in kwargs.items():
                if k == 'n_forecast':
                    n_forecast = v
                if k == 'features_dict':
                    features_dict = v
            print(features_dict)
            try:
                if features_dict is not None and len(features_dict) != 0:
                    self._model.plotPredictN(N=n_forecast,
                                             date=self._model.n - 1,
                                             featureDict=features_dict)
                else:
                    self._model.plotPredictN(N=n_forecast,
                                             date=self._model.n - 1)
            except (NameError, ValueError) as e:
                self._dlm_logger.exception(
                    "DLM plotPredictN error...{}".format(e))
                sys.exit("STOP")
Exemple #7
0
class ARIMAForecaster(UVariateTimeSeriesClass):
    """Univariate time series child class for forecasting using ARIMA

    Attributes
    ----------
    _order: tuple
       a tuple of p, d, q
    _arima_trend: str
        A parameter for controlling a model of the deterministic trend as one of ‘nc’ or ’c’.
        ‘c’ includes constant trend, ‘nc’ no constant for trend.
    _arima_logger: Logger
       the logger

    Methods
    ----------
    assertions()
       Assertion tests, must be overrided
    set_params()
       Sets new parameter values
    get_params_dict()
        Gets parameter values as a dictionary
    ts_fit()
       Fits the ARIMA model to time series
    ts_diagnose()
       Diagnoses the fitted model
    plot_residuals()
       Generates residual plots
    ts_test()
       Evaluates fitted model on the test data, if this one has been generated
    ts_forecast()
       Forecasts time series and plots the results
    plot_forecasts()
       Plots forecasted time-series
    """

    def __init__(self,
                 order=(1, 0, 1),
                 **kwds):
        """Initializes the object ARIMAForecaster"""
        self._arima_logger = Logger("ARIMA")

        self._order = order
        self._arima_trend = ''

        try:
            super(ARIMAForecaster, self).__init__(**kwds)
        except TypeError as e:
            self._arima_logger.exception("Arguments missing...{}".format(e))

        self._model = None

        ARIMAForecaster._init_trend(self)

        self._ar_coef = None
        self._ma_coef = None

        ARIMAForecaster.assertions(self)

        self._id = 'ARIMA'

    def _init_trend(self):
        if self._trend == 'constant':
            self._arima_trend = 'c'
        elif self._trend is None:
            self._arima_trend = 'nc'
        elif self._trend in ['linear', 'constant linear', 'additive', 'add', 'multiplicative', 'mul']:
            # self._arima_logger.warning("The trend " + str(self._trend) +
            #                           " is not supported by ARIMA! Assuming constant trend")
            self._arima_trend = 'c'

    def __copy__(self):
        """Copies the object"""
        result = super(ARIMAForecaster, self).__copy__()

        result._order = self._order
        result._arima_trend = self._arima_trend

        result._arima_logger = self._arima_logger
        return result

    def assertions(self):
        try:
            assert isinstance(self._order, tuple)
        except AssertionError:
            self._arima_logger.exception("Assertion exception occurred, tuple expected")
            sys.exit("STOP")
        try:
            assert (self.hyper_params is not None and len(self.hyper_params) != 0 and
                    'trend' in list(self.hyper_params.keys())) or (
                        self._arima_trend in ['c', 'nc'] or self._arima_trend is None)
        except AssertionError:
            self._arima_logger.exception("Assertion Error, trend must be in ['c', 'nc']")
            sys.exit("STOP")

    def set_params(self, p_dict=None, **kwargs):
        """Sets new parameter values"""

        params_dict = kwargs
        if p_dict is not None:
            params_dict = p_dict
        #
        for k, v in params_dict.items():
            if k == 'ts_df':
                self.ts_df = v
            elif k == 'freq':
                self.freq = v
            elif k == 'n_test':
                self.n_test = v
            elif k == 'n_val':
                self.n_val = v
            elif k == 'time_format':
                self.time_format = v
            elif k == 'order':
                self._order = v
            elif k == 'trend':
                self._arima_trend = v
        self.assertions()

        return self

    def get_params_dict(self):
        """Gets parameter values"""
        return {'order': self._order,
                'trend': self._arima_trend
                }

    def ts_fit(self, suppress=False):
        """Fit ARIMA to the time series data.

         Parameters:
         ----------
         suppress: bool
            Suppress or not some of the output messages
         """

        if self.hyper_params is not None:
            self._gs.set_forecaster(self)
            self._gs.set_hyper_params(self.hyper_params)
            # a very important command here to avoid endless loop
            self.hyper_params = None
            self._arima_logger.info("***** Starting grid search *****")
            self._gs = self._gs.grid_search(suppress=suppress, show_plot=False)
            #
            self.best_model = self._gs.best_model
            self.__dict__.update(self.best_model['forecaster'].__dict__)
            self._arima_logger.info("***** Finished grid search *****")
        else:
            self._prepare_fit()
            self.ts_split()
            ARIMAForecaster._init_trend(self)

            ts_df = self._train_dt.copy()

            # Fit
            self._arima_logger.info("Trying to fit the ARIMA model....")
            # tic
            start = time()
            try:
                if not suppress:
                    self._arima_logger.info("...via using parameters\n")
                    print_attributes(self)

                self._model = ARIMA(ts_df['y'], order=self._order, freq=self.freq)
                self.model_fit = self._model.fit(trend=self._arima_trend, method='mle', disp=1)
            except (Exception, ValueError):
                self._arima_logger.exception("Exception occurred in the fit...")
                self._arima_logger.error("Please try other parameters!")
                self.model_fit = None

            else:
                # toc
                self._arima_logger.info("Time elapsed: {} sec.".format(time() - start))
                self._arima_logger.info("Model successfully fitted to the data!")
                if not suppress:
                    self._arima_logger.info("The model summary: " + str(self.model_fit.summary()))

                # Fitted values
                self._arima_logger.info("Computing fitted values and residuals...")
                self._ar_coef, self._ma_coef = self.model_fit.arparams, self.model_fit.maparams

                self.fittedvalues = self.model_fit.fittedvalues
                # prologue
                if len(self.fittedvalues) != len(self._train_dt):
                    self.fittedvalues = pd.DataFrame(
                        index=pd.date_range(ts_df.index[0], ts_df.index[len(ts_df) - 1],
                                            freq=self.freq),
                        columns=['dummy']).join(pd.DataFrame(self.fittedvalues)).drop(['dummy'], axis=1)
                    self.fittedvalues = self.fittedvalues.reset_index()
                    self.fittedvalues.columns = self._ts_df_cols
                    self.fittedvalues.set_index('ds', inplace=True)
                    self.fittedvalues.y = self.fittedvalues.y.fillna(method='bfill')

                # Residuals
                super(ARIMAForecaster, self)._residuals()
                self._arima_logger.info("Done.")
        return self

    def ts_diagnose(self):
        """Diagnoses the model.

        In case of ARIMA residual plots are generated.
        Additionally, the kde plot of residuals is returned
        """
        try:
            assert self.model_fit is not None
        except AssertionError:
            self._arima_logger.exception("Model has to be fitted first! Please call ts_fit(...)")
            sys.exit("STOP")

        self.residuals.plot(kind='kde', title='Density')
        print("Residuals statistics")
        print(self.residuals.describe())
        self.plot_residuals()

    def plot_residuals(self):
        """Plot the residuals"""
        fig, axis = super(ARIMAForecaster, self)._plot_residuals(y=np.asarray(self._train_dt['y']),
                                                                 yhat=np.asarray(self.fittedvalues).flatten(),
                                                                 _id="ARIMA")

        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()

    def ts_test(self, show_plot=True):
        """Test the fitted model if test data available"""
        if super(ARIMAForecaster, self)._check_ts_test() < 0:
            return

        n_forecast = len(self._test_dt)

        self._arima_logger.info("Evaluating the fitted ARIMA model on the test data...")
        future = self.model_fit.predict(start=len(self._train_dt.index),
                                        end=len(self._train_dt.index) + n_forecast - 1, dynamic=True)

        self.forecast = pd.Series(future, index=self._test_dt.index)

        self.residuals_forecast = pd.Series(np.asarray(self._test_dt['y']) - np.asarray(self.forecast),
                                            index=self._test_dt.index)
        self.measure_rmse()
        self._arima_logger.info("RMSE on the test data: {}".format(self.rmse))

        # plot
        if show_plot:
            self.plot_forecast()

    def ts_forecast(self, n_forecast, suppress=False):
        """Forecast time series over time frame in the future specified via n_forecast"""
        #
        n_forecast = super(ARIMAForecaster, self)._check_ts_forecast(n_forecast)
        #
        self._arima_logger.info("Fitting using all data....")
        self._mode = 'forecast'
        self.ts_fit(suppress=suppress)

        self._arima_logger.info("Forecasting next " + str(n_forecast) + str(self.freq))
        #
        future = self.model_fit.predict(start=len(self._train_dt.index),
                                        end=len(self._train_dt.index) + (n_forecast-1), dynamic=True)
        idx_future = self._gen_idx_future(n_forecast=n_forecast)
        self.forecast = pd.Series(future, index=idx_future)

        self.residuals_forecast = None
        # self.plot_forecast()
        return self

    def plot_forecast(self):
        """Plot forecasted values"""
        fig, axis = super(ARIMAForecaster, self)._plot_forecast(y=np.asarray(self._train_dt['y']),
                                                                yhat=np.asarray(self.fittedvalues).flatten(),
                                                                forecast=self.forecast, _id='ARIMA')
        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()
Exemple #8
0
class UVariateTimeSeriesClass(object):
    """
    Uni-variate time series class
    Attributes:
        _ts_df_cols - internal column names for dataframe that will be input to model
        ts_df - time series data frame
        freq - frequency of time series, possibilities  ['S', 'min', 'H', 'D', 'W', 'M']
        p_train - float value defining which part of data is to be used as training data. Note, value of 1.0 would mean
                  all data will be used as training data,
                  hence no test data will be generated.
        timeformat - time format if time series data needs to be brought into datetime
        #
        _mode - defines the mode as 'test' or 'forecast'
        _train_dt - training data
        _test_dt - test data

        model_fit -  fitted model
        fittedvalues - computed fitted values
        residuals - residuals
        rmse - RMSE on test set (test data and the forecast on test data)

        upper_whisker_res - upper whisker for residuals
        lower_conf_int - upper confidence interval
        upper_conf_int - lower confidence interval

        forecast - computed forcatsed values
        residuals_forecast - residuals between forecasted and real values. Note, this variable exist only if test data
        existed

    Methods:
         ts_transform() - transforms time series using log10 or box-cox
         ts_resample() - resamples time series at the chosen frequency freq
         _plot_residuals() - residual plots helper function
         ts_test()  - evaluates fitted model on the test data, if this one has been generated
         ts_forecast() - forecasts time series and plots the results
         _plot_forecast() - helper function for plotting forecasted time-series
         ts_decompose() - decomposes time series in seasonal, trend and resduals and plots the results
         plot_decompose() - plots the results of ts_decompose()
    Helper methods:
         _prepare_fit() - prepares ts_fit of child class. Supposed to be called by a child class
         _residuals() - helper function for calculating residuals. Supposed to be called by a child class
         _check_ts_test() - checks for test. Supposed to be called by a child class
         _check_ts_forecast() - checks for forecast. Supposed to be called by a child class
    """

    def __init__(self, ts_df, time_format="%Y-%m-%d %H:%M:%S", freq='D', p_train=1.0, **kwds):
        """
        Initializes the object UVariateTimeSeriesForecaster
        """
        self._ts_df_cols = ['ds', 'y']

        self.ts_df = ts_df
        self.time_format = time_format
        self.freq = freq
        self.p_train = p_train
        self.transform = None
        self._boxcox_lmbda = None

        self._mode = ''

        self._train_dt = None
        self._test_dt = None

        self.model_fit = None
        self.fittedvalues = None
        self.residuals = None
        self.rmse = None

        self.upper_whisker_res = None
        self.lower_conf_int = None
        self.upper_conf_int = None

        self.forecast = None
        self.residuals_forecast = None

        self.seasonal = None
        self.trend = None
        self.baseline = None

        self._uvts_cls_logger = Logger('uvts_cls')
        # Assertion Tests
        try:
            assert self.freq in ['S', 'min', 'H', 'D', 'W', 'M']
        except AssertionError:
            self._uvts_cls_logger.warning("freq should be in  ['S', 'min', 'H', 'D', W', 'M']. "
                                          "Assuming daily frequency!")
            self.freq = 'D'

        try:
            self.p_train = float(self.p_train)
            assert self.p_train > 0
        except AssertionError:
            self._uvts_cls_logger.error("p_train defines part of data on which you would train your model."
                                        "This value cannot be less than or equal to zero!")
            self._uvts_cls_logger.exception("Exception occurred, p_train")
        except ValueError:
            self._uvts_cls_logger.error("p_train must be convertible to float type!")
            self._uvts_cls_logger.exception("Exception occurred, p_train")
        else:
            if int(self.p_train) < 1:
                self._mode = 'test'
            else:
                self._mode = 'forecast'

        try:
            assert pd.DataFrame(self.ts_df).shape[1] <= 2
        except AssertionError:
            self._uvts_cls_logger.error(
                "Time series must be uni-variate. "
                "Hence, at most a time columns and a column of numeric values are expected!")
            self._uvts_cls_logger.exception("Exception occurred, ts_df")
        else:
            self.ts_df = self.ts_df.reset_index()
            self.ts_df.columns = self._ts_df_cols
            self.ts_df['y'] = self.ts_df['y'].apply(np.float64, errors='coerce')
            self.ts_df.set_index('ds', inplace=True)
            print(type(self._uvts_cls_logger))
            print(self._uvts_cls_logger)
            self._uvts_cls_logger.info("Using time series data of range: " + str(min(self.ts_df.index)) + ' - ' + str(
                max(self.ts_df.index)) + " and shape: " + str(self.ts_df.shape))

        if not isinstance(self.ts_df.index, pd.DatetimeIndex):
            self._uvts_cls_logger.warning("Time conversion required...")
            self.ts_df = self.ts_df.reset_index()
            try:
                self.ts_df['ds'] = self.ts_df['ds'].apply(
                    lambda x: datetime.datetime.strptime(
                        str(x).translate({ord('T'): ' ', ord('Z'): None})[:-1],
                        self.time_format))
            except ValueError as e:
                self._uvts_cls_logger.warning("Zulu time conversion not successful: {}".format(e))
                self._uvts_cls_logger.warning("Will try without assuming zulu time...")
                try:
                    self.ts_df['ds'] = self.ts_df['ds'].apply(
                        lambda x: datetime.datetime.strptime(str(x), self.time_format))
                except ValueError as e:
                    self._uvts_cls_logger.info("Time conversion not successful. Check your time_format: {}".format(e))
                else:
                    self._uvts_cls_logger.info("Time conversion successful!")
            else:
                self._uvts_cls_logger.info("Time conversion successful!")
            # set index
            self.ts_df.set_index('ds', inplace=True)
        #
        self.ts_df.index = pd.to_datetime(self.ts_df.index)
        self.ts_df.sort_index(inplace=True)
        # resample
        self.ts_resample()
        # delegate
        super(UVariateTimeSeriesClass, self).__init__(**kwds)

    def __copy__(self):
        """
        Copies the object
        """
        cls = self.__class__
        result = cls.__new__(cls)
        result.__dict__.update(self.__dict__)
        return result

    def __deepcopy__(self, memo):
        """
        Deepcopies the object
        """
        cls = self.__class__
        result = cls.__new__(cls)
        memo[id(self)] = result
        for k, v in self.__dict__.items():
            setattr(result, k, deepcopy(v, memo))
        return result

    def ts_transform(self, transform):
        """
        Transforms time series via applying casted 'transform'. Right now 'log10' and 'box-cox' possible.
        """
        try:
            assert transform.lower().strip() in ['log10', 'box-cox']
        except AssertionError:
            self._uvts_cls_logger.error(
                "transform should be in ['log10', 'box-cox'] or empty. Assuming no transform! "
                "Hence, if you get bad results, you would like maybe to choose e.g., log10 here.")
            self._uvts_cls_logger.exception("Assertion exception occurred, transform")
            self.transform = None
        else:
            self.transform = transform.lower()
            # transform
            if self.transform == 'log10':
                try:
                    self.ts_df['y'] = self.ts_df['y'].apply(np.log10)
                except ValueError:
                    self._uvts_cls_logger.exception("log10 transformation did not work! Possibly negative "
                                                    "values present?")
            elif self.transform == 'box-cox':
                if input("Do you want to provide lambda for box.cox? y/n?").strip().lower() == 'y':
                    self._boxcox_lmbda = float(input())
                else:
                    self._boxcox_lmbda = None
                try:
                    if self._boxcox_lmbda is None:
                        bc, lmbda_1 = stats.boxcox(self.ts_df['y'], lmbda=self._boxcox_lmbda)
                        self.ts_df['y'] = stats.boxcox(self.ts_df['y'], lmbda=lmbda_1)
                    else:
                        self.ts_df['y'] = stats.boxcox(self.ts_df['y'], lmbda=self._boxcox_lmbda)
                except ValueError:
                    self._uvts_cls_logger.exception("box-cox transformation did not work! "
                                                    "Possibly negative values present or bad lmbda?")
        return self

    def set_frequency(self, new_freq):
        """
        Sets new frequency and resamples time series to that new frequency
        """
        try:
            assert new_freq in ['S', 'min', 'H', 'D', 'W', 'M']
        except AssertionError:
            self._uvts_cls_logger.error("frequency should be in  ['S', 'min', 'H', 'D', W', 'M']")
        else:
            self.freq = new_freq
            self.ts_resample()

    def ts_check_frequency(self):
        """
        Checks the frequency of time series
        """
        if self.ts_df.index.freq is None:
            self._uvts_cls_logger.info("No specific frequency detected.")
            self._uvts_cls_logger.info("Frequency chosen in initialization: " + str(
                self.freq) + " enter 'n' and call ts_resample() if you are satisfied with this value.")
            if input("Should a histogram of time deltas be plotted y/n?").strip().lower() == 'y':
                ff = pd.Series(self.ts_df.index[1:(len(self.ts_df))] - self.ts_df.index[0:(len(self.ts_df) - 1)])
                ff = ff.apply(lambda x: int(x.total_seconds() / (60 * 60)))
                plt.hist(ff, bins=120)
                plt.xlabel("Rounded time delta [H]")
                plt.ylabel("Frequency of occurrence")
                self._uvts_cls_logger.info(ff.value_counts())
                self._uvts_cls_logger.info("Should hourly frequency not fit, choose a reasonable frequency and call "
                                           "set_frequency(new_freq)")
            else:
                pass
        else:
            self._uvts_cls_logger.info("Time series frequency: " + str(self.ts_df.index.freq))

    def ts_resample(self):
        """
        Brings original time series to the chosen frequency freq
        """
        ts_freq = pd.DataFrame(
            index=pd.date_range(self.ts_df.index[0], self.ts_df.index[len(self.ts_df) - 1], freq=self.freq),
            columns=['dummy'])
        self.ts_df = ts_freq.join(self.ts_df).drop(['dummy'], axis=1)
        self.ts_df.y = self.ts_df.y.fillna(method='ffill')
        # if np.isnan ( self.ts_df.y ).any ():
        #    self.ts_df.y = self.ts_df.y.fillna ( method='bfill' )
        if np.isnan(self.ts_df.y).any():
            self._uvts_cls_logger.warning("Some NaN found, something went wrong, check the data!")
            sys.exit(-1)

        self._uvts_cls_logger.info("Time series resampled at frequency: " + str(self.ts_df.index.freq) +
                                   ". New shape of the data: " + str(self.ts_df.shape))
        return self

    def _prepare_fit(self):
        """
        Prepares data for training or forecasting modes
        """

        if self.ts_df.index.freq is None:
            self._uvts_cls_logger.warning("Time series exhibit no frequency. Calling ts_resample()...")
            try:
                self.ts_resample()
            except ValueError:
                self._uvts_cls_logger.error("Resample did not work! Error:" + str(sys.exc_info()[0]))
                sys.exit("STOP")

        ts_df = self.ts_df
        ts_test_df = pd.DataFrame()

        if self._mode == 'forecast' or int(self.p_train) == 1:
            self._train_dt = ts_df
            self._test_dt = ts_test_df
        elif self._mode == 'test' and int(self.p_train) < 1:
            # split
            ts_df = ts_df.reset_index()
            ts_df.columns = self._ts_df_cols
            ts_test_df = ts_df
            # training
            ts_df = pd.DataFrame(ts_df.loc[:int(self.p_train * len(ts_df) - 1), ])
            ts_df.set_index('ds', inplace=True)
            # test
            ts_test_df = pd.DataFrame(ts_test_df.loc[int(self.p_train * len(ts_test_df)):, ])
            ts_test_df.set_index('ds', inplace=True)
            # now set
            self._train_dt = ts_df
            if not ts_test_df.empty:
                self._test_dt = ts_test_df

        return self

    def _residuals(self):
        """
        Calculate residuals
        """
        if self.model_fit is None:
            self._uvts_cls_logger.error("No model has been fitted, residuals cannot be computed!")
            sys.exit("STOP")

        try:
            # use fittedvalues to fill in the model dictionary
            self.residuals = pd.Series(np.asarray(self._train_dt['y']) - np.asarray(self.fittedvalues).flatten(),
                                       index=self._train_dt['y'].index)
            self.upper_whisker_res = self.residuals.mean() + 1.5 * (
                    self.residuals.quantile(0.75) - self.residuals.quantile(0.25))
        except (KeyError, AttributeError):
            self._uvts_cls_logger.exception("Exception occurred: Model was not fitted or ts has other structure")

        return self

    def _plot_residuals(self, y, yhat, _id):
        """
        Plot the residuals
        """
        try:
            assert self.model_fit is not None
        except AssertionError:
            self._uvts_cls_logger.exception("Model has to be fitted first! Please call ts_fit(...)")

        fig, axes = plt.subplots(2, 1, figsize=(20, 5), sharex=True)

        axes[0].plot(pd.Series(yhat, index=self._train_dt.index), color='y', linewidth=2.0)
        axes[0].plot(pd.Series(y, index=self._train_dt.index), color='b')

        axes[0].set_ylabel("Model Fit")
        axes[0].set_title("Real (blue) and estimated values, " + str(_id))
        #
        axes[1].plot(self.residuals, color="r")
        if self.forecast is not None and self.residuals_forecast is None \
                and self.lower_conf_int is not None and self.upper_conf_int is not None:
            axes[0].fill_between(self.lower_conf_int.index, self.lower_conf_int, self.upper_conf_int, color='k',
                                 alpha=.15)
        if self.upper_whisker_res is not None:
            axes[1].axhline(y=self.upper_whisker_res, xmin=0, xmax=1, color='m', label='upper_whisker', linestyle='--',
                            linewidth=1.5)
            axes[1].axhline(y=-self.upper_whisker_res, xmin=0, xmax=1, color='m', label='upper_whisker', linestyle='--',
                            linewidth=1.5)

        axes[1].set_ylabel('Residuals')
        axes[1].set_title('Difference between model output and the real data and +/- upper whisker, ' + str(_id))

        return fig, axes

    def _check_ts_test(self):
        """
        Check before ts_test is child class is called
        """
        try:
            assert self.model_fit is not None
        except AssertionError:
            self._uvts_cls_logger.exception("Model has to be fitted first! Please call ts_fit(...)")

        try:
            assert self._test_dt is not None
        except(KeyError, AssertionError):
            self._uvts_cls_logger.exception("Nothing to validate. "
                                            "Call ts_forecast() or specify amount of training data "
                                            "when initializing the object.")
            return -1
        else:
            self._mode = 'test'
            return 0

    def _check_ts_forecast(self, n_forecast):
        """
        Check before ts_forecast in child class is called
        """
        #
        try:
            n_forecast = int(n_forecast)
            assert 0 < n_forecast < len(self._train_dt)
        except AssertionError:
            self._uvts_cls_logger.exception("Number of periods to be forecasted is too low, too high or not numeric!")
        except ValueError:
            self._uvts_cls_logger.exception("n_forecast must be convertible to int type!")

        try:
            assert self.model_fit is not None
        except AssertionError:
            self._uvts_cls_logger.exception("Model has to be fitted first! Please call ts_fit(...)")

        return n_forecast

    def _gen_idx_future(self, n_forecast):
        idx_future = None
        if self.freq == 'S':
            idx_future = pd.date_range(start=max(self._train_dt.index),
                                       end=max(self._train_dt.index) + datetime.timedelta(
                                           seconds=n_forecast - 1), freq='S')
        elif self.freq == 'min':
            idx_future = pd.date_range(start=max(self._train_dt.index),
                                       end=max(self._train_dt.index) + datetime.timedelta(
                                           minutes=n_forecast - 1), freq='min')
        elif self.freq == 'H':
            idx_future = pd.date_range(start=max(self._train_dt.index),
                                       end=max(self._train_dt.index) + datetime.timedelta(
                                           hours=n_forecast - 1), freq='H')
        elif self.freq == 'D':
            idx_future = pd.date_range(start=max(self._train_dt.index),
                                       end=max(self._train_dt.index) + datetime.timedelta(
                                           days=n_forecast - 1), freq='D')
        elif self.freq == 'W':
            idx_future = pd.date_range(start=max(self._train_dt.index),
                                       end=max(self._train_dt.index) + datetime.timedelta(
                                           weeks=n_forecast - 1), freq='W')
        elif self.freq == 'M':
            idx_future = pd.date_range(start=max(self._train_dt.index),
                                       end=max(self._train_dt.index) + relativedelta(months=+(n_forecast - 1)),
                                       freq='M')
        return idx_future

    def _plot_forecast(self, y, yhat, forecast, _id):
        """
        Plot forecasted values
        """
        try:
            assert self.model_fit is not None
        except AssertionError:
            self._uvts_cls_logger.exception("Model has to be fitted first! Please call ts_fit(...)")
            sys.exit("STOP")
        #
        try:
            assert self.forecast is not None
        except AssertionError:
            self._uvts_cls_logger.exception("Neither ts_test(...) nor ts_forecast(...) have been called yet!")
            sys.exit("STOP")

        fig, axes = plt.subplots(2, 1, figsize=(20, 7), sharex=True)
        #
        axes[0].plot(pd.Series(yhat, index=self._train_dt.index), color='y', linewidth=2.0)
        axes[0].plot(pd.Series(y, index=self._train_dt.index), color='b')
        #
        if self.residuals_forecast is not None:
            axes[0].plot(self.ts_df, color='b')
        axes[0].plot(forecast, color='darkgreen')
        #
        if self.lower_conf_int is not None and self.upper_conf_int is not None:
            axes[0].fill_between(self.lower_conf_int.index,
                                 self.lower_conf_int,
                                 self.upper_conf_int,
                                 color='k', alpha=.15)
        axes[0].set_ylabel("Fit and Forecast/Validation")
        axes[0].set_title("Real (blue), estimated (yellow) and forecasted values, " + str(_id))
        #
        if self.residuals_forecast is not None:
            axes[1].plot(pd.concat([self.residuals, self.residuals_forecast], axis=0), color='r')
        axes[1].plot(self.residuals, color="r")

        if self.upper_whisker_res is not None:
            axes[1].axhline(y=self.upper_whisker_res,
                            xmin=0,
                            xmax=1, color='m',
                            label='upper_whisker',
                            linestyle='--', linewidth=1.5)
            axes[1].axhline(y=-self.upper_whisker_res,
                            xmin=0,
                            xmax=1, color='m',
                            label='upper_whisker',
                            linestyle='--', linewidth=1.5)
        axes[1].set_ylabel("Residuals")
        axes[1].set_title("Difference between model output and the real data both, for fitted "
                          "and forecasted and +/- upper whisker or confidence intervals, " + str(_id))

        return fig, axes

    def ts_decompose(self, params=None):
        """
        Decomposes time series into trend, seasonal and residual
        """
        if params is None:
            params = dict({'model': 'additive',
                           'freq': 1})
        try:
            assert isinstance(params, dict)
        except AssertionError:
            self._uvts_cls_logger.exception("Dictionary is expected for parameters!")
            sys.exit("STOP")

        try:
            assert 'model' in list(params.keys())
        except AssertionError:
            self._uvts_cls_logger.exception("Unexpected dictionary keys. At least decomposition "
                                            "model must be supplied!")
            sys.exit("STOP")

        if 'freq' not in list(params.keys()):
            params['freq'] = 1

        try:
            if self.ts_df.index.freq is not None:
                res = seasonal_decompose(self.ts_df.loc[:, 'y'], model=params['model'])
            else:
                res = seasonal_decompose(self.ts_df.loc[:, 'y'], model=params['model'], freq=params['freq'])

        except ValueError:
            self._uvts_cls_logger.exception("ValueError, seasonal_decompose error")
        else:
            self.seasonal = res.seasonal
            self.trend = res.trend
            self.baseline = self.seasonal + self.trend
            self.residuals = res.resid
            self.upper_whisker_res = self.residuals.mean() + 1.5 * (
                    self.residuals.quantile(0.75) - self.residuals.quantile(0.25))

    def plot_decompose(self):
        try:
            assert self.seasonal is not None
        except AssertionError:
            self.ts_decompose()

        fig, axes = plt.subplots(4, 1, figsize=(20, 7), sharex=True)
        axes[0].plot(self.trend)
        axes[0].set_title("Trend")
        #
        axes[1].plot(self.seasonal)
        axes[1].set_title("Seasonality")
        #
        axes[2].plot(self.baseline)
        axes[2].set_title("Baseline")
        #
        axes[3].plot(self.residuals)
        axes[3].set_title("Residuals")
        #
        if self.upper_whisker_res is not None:
            axes[3].axhline(y=self.upper_whisker_res,
                            xmin=0,
                            xmax=1, color='m',
                            label='upper_whisker',
                            linestyle='--', linewidth=1.5)
            axes[3].axhline(y=-self.upper_whisker_res,
                            xmin=0,
                            xmax=1, color='m',
                            label='upper_whisker',
                            linestyle='--', linewidth=1.5)

        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()

    def ts_fit(self):
        # stop the delegation chain
        assert not hasattr(super(), 'ts_fit')

    # root mean squared error or rmse
    def measure_rmse(self):
        try:
            assert self.residuals_forecast is not None
        except AssertionError:
            self._uvts_cls_logger.exception("AssertionError occurred, Cannot compute RMSE! Check your object mode")

        self.rmse = np.sqrt(np.square(self.residuals_forecast).mean())
class AutoARIMAForecaster(UVariateTimeSeriesClass):
    """Univariate time series child class using pmdarima.auto_arima for forecasting

    Attributes
    ----------
    ref. to https://pypi.org/project/pmdarima/
    https://www.alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.AutoARIMA.html#pmdarima.arima.AutoARIMA
    _start_p: int
        The starting value for p
    _start_q: int
        The starting value for q
    _test: str
       Test for determining the value of d
    _max_p: int
        The maximal value for p: all values between _start_p and this one will be tried out
    _max_q: int
        The maximal value for q: all values between _start_q and this one will be tried out
     _d: int
        The maximum value of d, or the maximum number of non-seasonal differences. If None, this value will be determined.
    _seasonal: bool
        Seasonal component yes/no
    _D:
        The order of the seasonal differencing. If None, the value will automatically be selected based on
        the results of the seasonal_test.
    _start_P: int
         The starting value for P
    _start_Q: int
         The starting value for Q
    _max_P: int
         The maximum value for P
    _max_Q: int
         The maximum value for Q
    _seasonal_periods (m in original package): int
        The period for seasonal differencing, m refers to the number of periods in each season.
        For example, m is 4 for quarterly data, 12 for monthly data, or 1 for annual (non-seasonal) data.
        Default is 1. Note that if m == 1 (i.e., is non-seasonal),
        seasonal will be set to False.
     _aarima_trend: str or iterable, default=’c’, ref. http://www.alkaline-ml.com/pmdarima/1.0.0/modules/generated/pmdarima.arima.auto_arima.html
        Parameter controlling the deterministic trend polynomial A(t). Can be specified as a string where ‘c’
        indicates a constant (i.e. a degree zero component of the trend polynomial),
        ‘t’ indicates a linear trend with time, and ‘ct’ is both.
        Can also be specified as an iterable defining the polynomial as
        in numpy.poly1d, where [1,1,0,1] would denote a+bt+ct3.
    _random : bool, optional (default=False)
        Auto_arima provides the capability to perform a “random search” over a hyper-parameter space.
        If random is True, rather than perform an exhaustive search or stepwise search, only n_fits
        ARIMA models will be fit (stepwise must be False for this option to do anything).
    _n_fits : int, optional (default=10)
        If random is True and a “random search” is going to be performed, n_iter is the number of ARIMA models to be fit.
    _stepwise : bool, optional (default=True)
        Whether to use the stepwise algorithm outlined in Hyndman and Khandakar (2008) to identify the
        optimal model parameters.
        The stepwise algorithm can be significantly faster than fitting all (or a random subset of)
        hyper-parameter combinations and is less likely to over-fit the model.
    _information_criterion : str, optional (default=’aic’)
        The information criterion used to select the best ARIMA model.
        One of pmdarima.arima.auto_arima.VALID_CRITERIA, (‘aic’, ‘bic’, ‘hqic’, ‘oob’).
    _scoring : str, optional (default=’mse’)
        If performing validation (i.e., if out_of_sample_size > 0), the metric to use for scoring the
        out-of-sample data. One of {‘mse’, ‘mae’}
    _out_of_sample_size : int, optional (default=0)
        The ARIMA class can fit only a portion of the data if specified, in order to retain an “out of bag” sample score.
        This is the number of examples from the tail of the time series to hold out and use as validation examples.
        The model will not be fit on these samples, but the observations will be added into the model’s endog and exog
        arrays so that future forecast values originate from the end of the endogenous vector.
    _aarima_logger: Logger
        The logger for logging

    Methods
    ----------
    assertions()
       Assertion tests, must be overrided
    set_params()
       Sets new parameter values
    get_params_dict()
        Gets parameter values as a dictionary
    ts_fit()
       Fits the auto_arima model to time series
    ts_diagnose()
       Diagnoses the fitted model
    plot_residuals()
       Generates residual plots
    ts_test()
       Evaluates fitted model on the test data, if this one has been generated
    ts_forecast()
       Forecasts time series and plots the results
    plot_forecasts()
       Plots forecasted time-series
    """
    def __init__(self,
                 start_p=1,
                 start_q=1,
                 max_p=3,
                 max_q=3,
                 d=None,
                 D=None,
                 start_P=1,
                 start_Q=1,
                 max_P=3,
                 max_Q=3,
                 random=False,
                 n_fits=10,
                 stepwise=True,
                 information_criterion='aic',
                 scoring='mse',
                 out_of_sample_size=0,
                 **kwds):
        """Initializes the object AutoARIMAForecaster"""
        self._aarima_logger = Logger("AutoARIMA")
        self._aarima_seasonal = False
        self._aarima_trend = 'c'
        self._start_p = start_p
        self._start_q = start_q
        self._max_p = max_p
        self._max_q = max_q
        self._d = d
        self._D = D
        self._start_P = start_P
        self._start_Q = start_Q
        self._max_P = max_P
        self._max_Q = max_Q
        self._random = random
        self._n_fits = n_fits
        self._stepwise = stepwise
        self._information_criterion = information_criterion
        self._scoring = scoring
        self._out_of_sample_size = out_of_sample_size

        try:
            super(AutoARIMAForecaster, self).__init__(**kwds)
        except TypeError:
            self._aarima_logger.exception("Arguments missing...")

        AutoARIMAForecaster._init_trend(self)
        AutoARIMAForecaster._init_seasonal(self)

        AutoARIMAForecaster.assertions(self)

        self._id = 'Auto_ARIMA'

    def _init_trend(self):
        if self._trend is None or self._trend == 'constant':
            self._aarima_trend = 'c'
        elif self._trend == 'linear':
            self._aarima_trend = 't'
        elif self._trend == 'constant linear':
            self._aarima_trend = 'ct'
        elif self._trend in ['additive', 'add']:
            # self._aarima_logger.warning("The trend " + str(self._trend) + " not supported by AutoARIMA! "
            #                                                              "Assuming first order trend")
            self._aarima_trend = 'a+bt'
        elif self._trend in ['multiplicative', 'mul']:
            # self._aarima_logger.warning("The trend " + str(self._trend) + " not supported by AutoARIMA! "
            #                                                              "Assuming first order trend")
            self._aarima_trend = 'a+bt'

    def _init_seasonal(self):
        if self._seasonal is None:
            self._aarima_seasonal = False
        if isinstance(self._seasonal, bool):
            self._aarima_seasonal = self._seasonal
        else:
            self._aarima_seasonal = False

    def __copy__(self):
        """Copies the object"""
        result = super(AutoARIMAForecaster, self).__copy__()

        result._start_p = self._start_p
        result.start_q = self._start_q
        result._test = self._test
        result._max_p = self._max_p
        result._max_q = self._max_q
        result._d = self._d
        result._aarima_trend = self._aarima_trend
        result._aarima_seasonal = self._aarima_seasonal
        result._D = self._D
        result._start_P = self._start_P
        result._start_Q = self._start_Q
        result._max_P = self._max_P
        result._max_Q = self._max_Q
        result._random = self._random
        result._n_fits = self._n_fits
        result._stepwise = self._stepwise
        result._information_criterion = self._information_criterion
        result._scoring = self._scoring
        result._out_of_sample_size = self._out_of_sample_size

        result._aarima_logger = self._aarima_logger
        return result

    def assertions(self):
        try:
            assert self.hyper_params is None
        except AssertionError:
            self._aarima_logger.exception(
                "Hyper parameters does not make sence for Auto ARIMA! "
                "Please specify parameters")
            sys.exit("STOP")

        try:
            assert self._aarima_trend is not None
        except AssertionError:
            self._aarima_logger.exception(
                "Assertion Error, trend cannot be None!")
            sys.exit("STOP")
        try:
            assert isinstance(self._aarima_seasonal, bool)
        except AssertionError:
            self._aarima_logger.exception(
                "Assertion Error, seasonal must be boolean True/False")
            sys.exit("STOP")

    def set_params(self, p_dict=None, **kwargs):
        """Sets new parameter values"""
        params_dict = kwargs
        if p_dict is not None:
            params_dict = p_dict
        #
        for k, v in params_dict.items():
            if k == 'ts_df':
                self.ts_df = v
            elif k == 'freq':
                self.freq = v
            elif k == 'n_test':
                self.n_test = v
            elif k == 'n_val':
                self.n_val = v
            elif k == 'time_format':
                self.time_format = v
            elif k == 'start_p':
                self._start_p = v
            elif k == 'max_p':
                self._max_p = v
            elif k == 'start_q':
                self._start_q = v
            elif k == 'max_q':
                self._max_q = v
            elif k == 'd':
                self._d = v
            elif k == 'trend':
                self._aarima_trend = v
            elif k == 'seasonal':
                self._aarima_seasonal = v
            elif k == 'seasonal_periods':
                self._seasonal_periods = v
            elif k == 'start_P':
                self._start_P = v
            elif k == 'max_P':
                self._max_P = v
            elif k == 'start_Q':
                self._start_Q = v
            elif k == 'max_Q':
                self._max_Q = v
            elif k == 'D':
                self._D = v
            elif k == 'random':
                self._random = v
            elif k == 'n_fits':
                self._n_fits = v
            elif k == 'stepwise':
                self._stepwise = v
            elif k == 'information_criterion':
                self._information_criterion = v
            elif k == 'scoring':
                self._scoring = v
            elif k == 'out_of_sample_size':
                self._out_of_sample_size = v
        self.assertions()

        return self

    def get_params_dict(self):
        """Gets parameter values as dictionary"""
        return {
            'start_p': self._start_p,
            'start_q': self._start_q,
            'test': self._test,
            'max_p': self._max_p,
            'max_q': self._max_q,
            'd': self._d,
            'trend': self._aarima_trend,
            'seasonal': self._aarima_seasonal,
            'seasonal_periods': self._seasonal_periods,
            'D': self._D,
            'start_P': self._start_P,
            'start_Q': self._start_Q,
            'max_P': self._max_P,
            'max_Q': self._max_Q,
            'random': self._random,
            'n_fits': self._n_fits,
            'stepwise': self._stepwise,
            'information_criterion': self._information_criterion,
            'scoring': self._scoring,
            'out_of_sample_size': self._out_of_sample_size
        }

    def ts_fit(self, suppress=False):
        """Fit Auto ARIMA to the time series data.

         Parameters:
         ----------
         suppress: bool
            Suppress or not some of the output messages
         """
        self._prepare_fit()
        self.ts_split()
        self._init_trend()
        self._init_seasonal()

        ts_df = self._train_dt.copy()
        """
        Fit
        """
        self._aarima_logger.info("Trying to fit the Auto ARIMA model....")
        # tic
        start = time()
        try:
            if not suppress:
                self._aarima_logger.info("...via using parameters\n")
                print_attributes(self)

            self.model_fit = pm.auto_arima(
                ts_df,
                start_p=self._start_p,
                start_q=self._start_q,
                test=self._test,
                max_p=self._max_p,
                m=self._seasonal_periods,
                d=self._d,
                seasonal=self._aarima_seasonal,
                D=self._D,
                start_P=self._start_P,
                max_P=self._max_P,
                trend=self._aarima_trend,
                trace=True,
                error_action='ignore',
                suppress_warnings=True,
                stepwise=self._stepwise,
                random=self._random,
                n_fits=self._n_fits,
                scoring=self._scoring,
                out_of_sample_size=self._out_of_sample_size,
                information_criterion=self._information_criterion)
        except (Exception, ValueError):
            self._aarima_logger.exception("Exception occurred in the fit...")
            self._aarima_logger.warning("Will try to reset some parameters...")
            try:
                self.model_fit = pm.auto_arima(
                    ts_df,
                    start_p=self._start_p,
                    start_q=self._start_q,
                    test=self._test,
                    max_p=self._max_p,
                    m=1,
                    d=0,
                    seasonal=self._aarima_seasonal,
                    D=0,
                    start_P=self._start_P,
                    max_P=self._max_P,
                    trend=self._aarima_trend,
                    trace=True,
                    error_action='ignore',
                    suppress_warnings=True,
                    stepwise=self._stepwise,
                    random=self._random,
                    n_fits=self._n_fits,
                    scoring=self._scoring,
                    out_of_sample_size=self._out_of_sample_size,
                    information_criterion=self._information_criterion)
            except (Exception, ValueError):
                self._aarima_logger.exception("Exception occurred")
                self._aarima_logger.error("Please try other parameters!")
                self.model_fit = None

        else:
            # toc
            self._aarima_logger.info("Time elapsed: {} sec.".format(time() -
                                                                    start))
            #
            self._aarima_logger.info("Model successfully fitted to the data!")
            self._aarima_logger.info("The chosen model AIC: " +
                                     str(self.model_fit.aic()))

            # Fitted values
            self._aarima_logger.info(
                "Computing fitted values and residuals...")
            self.fittedvalues = pd.Series(self.model_fit.predict_in_sample(
                start=0, end=(len(ts_df) - 1)),
                                          index=ts_df.index)
            # Residuals
            super(AutoARIMAForecaster, self)._residuals()

            self._aarima_logger.info("Done.")
            return self

    def ts_diagnose(self):
        """Diagnose the model"""
        try:
            assert self.model_fit is not None
        except AssertionError:
            self._aarima_logger.exception(
                "Model has to be fitted first! Please call ts_fit(...)")
            sys.exit("STOP")

        self.model_fit.plot_diagnostics(figsize=(9, 3.5))
        self.plot_residuals()

    def plot_residuals(self):
        """Plot the residuals"""
        fig, axis = super(AutoARIMAForecaster, self)._plot_residuals(
            y=np.asarray(self._train_dt['y']),
            yhat=np.asarray(self.fittedvalues),
            _id=" Auto ARIMA")

        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()

    def ts_test(self, show_plot=True):
        """Test the fitted model if test data available"""
        if super(AutoARIMAForecaster, self)._check_ts_test() < 0:
            return

        n_forecast = len(self._test_dt)

        self._aarima_logger.info(
            "Evaluating the fitted ARIMA model on the test data...")
        future, confint = self.model_fit.predict(n_periods=n_forecast,
                                                 return_conf_int=True)
        self.forecast = pd.Series(future, index=self._test_dt.index)
        self.lower_conf_int = pd.Series(confint[:, 0],
                                        index=self._test_dt.index)
        self.upper_conf_int = pd.Series(confint[:, 1],
                                        index=self._test_dt.index)

        self.residuals_forecast = pd.Series(np.asarray(self._test_dt['y']) -
                                            np.asarray(self.forecast),
                                            index=self._test_dt.index)
        self.measure_rmse()
        self._aarima_logger.info("RMSE on test data: {}".format(self.rmse))

        # plot
        if show_plot:
            self.plot_forecast()

    def ts_forecast(self, n_forecast, suppress=False):
        """Forecast time series over time frame in the future specified via n_forecast"""
        #
        n_forecast = super(AutoARIMAForecaster,
                           self)._check_ts_forecast(n_forecast)
        #
        self._aarima_logger.info("Fitting using all data....")
        self._mode = 'forecast'
        self.ts_fit(suppress=suppress)

        self._aarima_logger.info("Forecasting next " + str(n_forecast) +
                                 str(self.ts_df.index.freq))
        #
        future, confint = self.model_fit.predict(n_periods=n_forecast,
                                                 return_conf_int=True)
        idx_future = self._gen_idx_future(n_forecast=n_forecast)
        self.forecast = pd.Series(future, index=idx_future)
        if self.lower_conf_int is None and self.upper_conf_int is None:
            self.lower_conf_int = pd.Series(confint[:, 0], index=idx_future)
            self.upper_conf_int = pd.Series(confint[:, 1], index=idx_future)
        else:
            self.lower_conf_int = pd.concat([
                self.lower_conf_int,
                pd.Series(confint[:, 0], index=idx_future)
            ],
                                            axis=0)
            self.upper_conf_int = pd.concat([
                self.upper_conf_int,
                pd.Series(confint[:, 1], index=idx_future)
            ],
                                            axis=0)

        self.residuals_forecast = None
        # self.plot_forecast()
        return self

    def plot_forecast(self):
        """Plot forecasted values"""
        fig, axis = super(AutoARIMAForecaster, self)._plot_forecast(
            y=np.asarray(self._train_dt['y']),
            yhat=np.asarray(self.fittedvalues),
            forecast=self.forecast,
            _id='Auto ARIMA')
        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()
class ExponentialSmoothingForecaster(UVariateTimeSeriesClass):
    """Univariate time series child class using simple, double or triple exponential smoothing for forecasting

    Attributes
    ----------
    ref. to e.g., https://machinelearningmastery.com/exponential-smoothing-for-time-series-forecasting-in-python/
    _optimized: bool
        Whether to optimize smoothing coefficients
    _smoothing_level: float
       (alpha): the smoothing coefficient for the level
    _es_trend: str
        The type of trend component, as either “add” for additive or “mul” for multiplicative.
        Modeling the trend can be disabled by setting it to None
    _damped: bool
        Whether or not the trend component should be damped, either True or False
    _es_seasonal: str
        The type of seasonal component, as either “add” for additive or “mul” for multiplicative.
        Modeling the seasonal component can be disabled by setting it to None
    _seasonal_periods: int
         The number of time steps in a seasonal period, e.g. 12 for 12 months in a yearly seasonal structure
    _smoothing_slope: float
       (beta): the smoothing coefficient for the trend
    _smoothing_seasonal: float
       (gamma): the smoothing coefficient for the seasonal component
    _damping_slope: float
       (phi): the coefficient for the damped trend
    _use_boxcox: {True, False, ‘log’, float}
       Should the Box-Cox transform be applied to the data first? If ‘log’ then apply the log.
       If float then use lambda equal to float
    _remove_bias: bool
       Remove bias from forecast values and fitted values by enforcing that the average residual is equal to zero.
    _use_brute: bool
       Search for good starting values using a brute force (grid) optimizer.
       If False, a naive set of starting values is used.
    _expsm_logger: Logger
       The logger for logging

    Methods
    ----------
    assertions()
       Assertion tests, must be overrided
    set_params()
       Sets new parameter values
    get_params_dict()
        Gets parameter values as a dictionary
    ts_fit()
       Fits the auto_arima model to time series
    ts_diagnose()
       Diagnoses the fitted model
    plot_residuals()
       Generates residual plots
    ts_test()
       Evaluates fitted model on the test data, if this one has been generated
    ts_forecast()
       Forecasts time series and plots the results
    plot_forecasts()
       Plots forecasted time-series
    """
    def __init__(self,
                 smoothing_level=None,
                 optimized=False,
                 damped=False,
                 smoothing_slope=None,
                 smoothing_seasonal=None,
                 damping_slope=None,
                 use_boxcox=False,
                 remove_bias=False,
                 use_brute=False,
                 **kwds):
        """Initializes the object ExponentialSmoothingForecaster"""

        self._expsm_logger = Logger("ExpSmoothing")
        self._es_trend = None
        self._es_seasonal = None

        try:
            super(ExponentialSmoothingForecaster, self).__init__(**kwds)
        except TypeError:
            self._expsm_logger.exception("Arguments missing...")

        self._init_trend()
        self._init_seasonal()

        self._smoothing_level = smoothing_level
        self._optimized = optimized
        self._damped = damped
        self._smoothing_slope = smoothing_slope
        self._smoothing_seasonal = smoothing_seasonal
        self._damping_slope = damping_slope
        self._use_boxcox = use_boxcox
        self._remove_bias = remove_bias
        self._use_brute = use_brute

        self.assertions()

        self._id = 'ExponentialSmoothing'

    def _init_trend(self):
        if self._trend is None or self._trend == 'constant':
            self._es_trend = None
        elif self._trend in ['linear', 'constant linear']:
            # self._expsm_logger.warning("The trend " + self(self._trend) + " not supported in Exponential Smoothing! "
            #                                                              "Assuming additive trend")
            self._es_trend = 'add'
        else:
            self._es_trend = self._trend

    def _init_seasonal(self):
        if isinstance(self._seasonal, bool):
            if self._seasonal:
                # self._expsm_logger.warning("Assuming additive seasonal component in Exponential Smoothing")
                self._es_seasonal = 'add'
            else:
                self._es_seasonal = None
        else:
            self._es_seasonal = self._seasonal

    def __copy__(self):
        """Copies the object"""
        result = super(ExponentialSmoothingForecaster, self).__copy__()

        result._smoothing_level = self._smoothing_level
        result._optimized = self._optimized
        result._es_trend = self._es_trend
        result._es_seasonal = self._es_seasonal
        result._damped = self._damped
        result._smoothing_slope = self._smoothing_slope
        result._smoothing_seasonal = self._smoothing_seasonal
        result._damping_slope = self._damping_slope
        result._use_boxcox = self._use_boxcox
        result._remove_bias = self._remove_bias
        result._use_brute = self._use_brute
        result._expsm_logger = self._expsm_logger

        return result

    def assertions(self):
        try:
            assert (self.hyper_params is not None
                    and len(self.hyper_params) != 0
                    and 'trend' in list(self.hyper_params.keys())) or (
                        self._es_trend is None or self._es_trend
                        in ['add', 'mul', 'additive', 'multiplicative'])
        except AssertionError:
            self._expsm_logger.exception(
                "Assertion Error, trend must be in ['add','mul',"
                "'additive','multiplicative']")
            sys.exit("STOP")
        try:
            assert self._es_seasonal is None or isinstance(
                self._es_seasonal, str) and self._es_seasonal in [
                    'add', 'mul', 'additive', 'multiplicative'
                ]
        except AssertionError:
            self._expsm_logger.exception(
                "Assertion Error, seasonal must be in ['add','mul',"
                "'additive','multiplicative']")
            sys.exit("STOP")

    def set_params(self, p_dict=None, **kwargs):
        """Sets new parameters"""
        params_dict = kwargs
        if p_dict is not None:
            params_dict = p_dict
        #
        for k, v in params_dict.items():
            if k == 'smoothing_level':
                self._smoothing_level = v
            elif k == 'optimized':
                self._optimized = v
            elif k == 'trend':
                self._es_trend = v
            elif k == 'seasonal':
                self._es_seasonal = v
            elif k == 'seasonal_periods':
                self._seasonal_periods = v
            elif k == 'damped':
                self._damped = v
            elif k == 'smoothing_slope':
                self._smoothing_slope = v
            elif k == 'smoothing_seasonal':
                self._smoothing_seasonal = v
            elif k == 'damping_slope':
                self._damping_slope = v
            elif k == 'use_boxcox':
                self._use_boxcox = v
            elif k == 'remove_bias':
                self._remove_bias = v
            elif k == 'use_brute':
                self._use_brute = v
        self.assertions()

        return self

    def get_params_dict(self):
        """Gets parameters as dictionary"""
        return {
            'smoothing_level': self._smoothing_level,
            'optimized': self._optimized,
            'trend': self._es_trend,
            'seasonal': self._es_seasonal,
            'seasonal_periods': self._seasonal_periods,
            'damped': self._damped,
            'smoothing_slope': self._smoothing_slope,
            'smoothing_seasonal': self._smoothing_seasonal,
            'damping_slope': self._damping_slope,
            'use_boxcox': self._use_boxcox,
            'remove_bias': self._remove_bias,
            'use_brute': self._use_brute
        }

    def ts_fit(self, suppress=False):
        """Fit Exponential Smoothing to the time series data.

         Parameters:
         ----------
         suppress: bool
            Suppress or not some of the output messages
         """
        if self.hyper_params is not None:
            self._gs.set_forecaster(self)
            self._gs.set_hyper_params(self.hyper_params)
            # a very important command here to avoid endless loop
            self.hyper_params = None
            self._expsm_logger.info("***** Starting grid search *****")
            self._gs = self._gs.grid_search(suppress=suppress, show_plot=False)
            #
            self.best_model = self._gs.best_model
            self.__dict__.update(self.best_model['forecaster'].__dict__)
            self._expsm_logger.info("***** Finished grid search *****")
        else:
            self._prepare_fit()
            self.ts_split()
            self._init_trend()
            self._init_seasonal()

            ts_df = self._train_dt.copy()

            # Fit
            print("Trying to fit the exponential smoothing model....")
            # tic
            start = time()
            try:
                if not suppress:
                    self._expsm_logger.info("...via using parameters\n")
                    print_attributes(self)
                #
                self.model_fit = ExponentialSmoothing(
                    ts_df,
                    freq=self.freq,
                    trend=self._es_trend,
                    seasonal=self._es_seasonal,
                    seasonal_periods=self._seasonal_periods,
                    damped=self._damped).fit(
                        smoothing_level=self._smoothing_level,
                        smoothing_slope=self._smoothing_slope,
                        smoothing_seasonal=self._smoothing_seasonal,
                        damping_slope=self._damping_slope,
                        optimized=self._optimized,
                        use_boxcox=self._use_boxcox,
                        remove_bias=self._remove_bias)
                # toc
                self._expsm_logger.info("Time elapsed: {} sec.".format(time() -
                                                                       start))
            except (Exception, ValueError):
                self._expsm_logger.exception("Exponential Smoothing error...")
            else:
                #
                self._expsm_logger.info(
                    "Model successfully fitted to the data!")

                # Fitted values
                self._expsm_logger.info(
                    "Computing fitted values and residuals...")
                self.fittedvalues = self.model_fit.fittedvalues

                # Residuals
                super(ExponentialSmoothingForecaster, self)._residuals()
                self._expsm_logger.info("Done.")

        return self

    def ts_diagnose(self):
        """Diagnose the model"""
        try:
            assert self.model_fit is not None
        except AssertionError:
            self._expsm_logger.exception(
                "Model has to be fitted first! Please call ts_fit(...)")
            sys.exit("STOP")

        self.plot_residuals()

    def plot_residuals(self):
        """Plot the residuals"""
        fig, axis = super(ExponentialSmoothingForecaster,
                          self)._plot_residuals(
                              y=np.asarray(self._train_dt['y']),
                              yhat=np.asarray(self.fittedvalues),
                              _id="Exponential Smoothing")

        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()

    def ts_test(self, show_plot=True):
        """Test the fitted model if test data available"""

        if super(ExponentialSmoothingForecaster, self)._check_ts_test() < 0:
            return

        n_forecast = len(self._test_dt)

        self._expsm_logger.info(
            "Evaluating the fitted model on the test data...")
        self.forecast = self.model_fit.forecast(n_forecast)

        self.residuals_forecast = pd.Series(np.asarray(self._test_dt['y']) -
                                            np.asarray(self.forecast),
                                            index=self._test_dt.index)
        self.measure_rmse()
        self._expsm_logger.info("RMSE on test data: {}".format(self.rmse))
        # plot
        if show_plot:
            self.plot_forecast()

    def ts_forecast(self, n_forecast, suppress):
        """Forecast time series over time frame in the future specified via n_forecast"""
        #
        n_forecast = super(ExponentialSmoothingForecaster,
                           self)._check_ts_forecast(n_forecast)
        #
        self._expsm_logger.info("Fitting using all data....")
        self._mode = 'forecast'
        self.ts_fit(suppress=suppress)

        self._expsm_logger.info("Forecasting next " + str(n_forecast) +
                                str(self.freq))
        #
        self.forecast = self.model_fit.forecast(n_forecast)

        self.residuals_forecast = None
        # self.plot_forecast()
        return self

    def plot_forecast(self):
        """Plot forecasted values"""
        fig, axis = super(ExponentialSmoothingForecaster, self)._plot_forecast(
            y=np.asarray(self._train_dt['y']),
            yhat=np.asarray(self.fittedvalues),
            forecast=self.forecast,
            _id='Exponential Smoothing')
        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()
class SARIMAForecaster(ARIMAForecaster):
    """Univariate time series child class for forecasting using SARIMA

    Attributes
    ----------
    _order: tuple
       a tuple of p, d, q
    _s_order: tuple
        A tuple of seasonal components (P, D, Q, lag)
    _sarima_logger: Logger
        The logger for logging
    _sarima_trend: str
        A parameter for controlling a model of the deterministic trend as one of ‘n’,’c’,’t’,’ct’ for no trend,
        constant, linear, and constant with linear trend, respectively.

    Methods
    ----------
    assertions()
       Assertion tests, must be overrided
    set_params()
       Sets new parameter values
    get_params_dict()
        Gets parameter values as a dictionary
    ts_fit()
       Fits the auto_arima model to time series
    ts_diagnose()
       Diagnoses the fitted model
    plot_residuals()
       Generates residual plots
    ts_test()
       Evaluates fitted model on the test data, if this one has been generated
    ts_forecast()
       Forecasts time series and plots the results
    plot_forecasts()
       Plots forecasted time-series
    """
    def __init__(self, s_order=(1, 0, 1, 1), **kwds):
        """Initializes the object SARIMAForecaster"""
        self._sarima_logger = Logger("SARIMA")

        self._s_order = s_order
        self._sarima_trend = ''

        try:
            super(SARIMAForecaster, self).__init__(**kwds)
        except TypeError as e:
            self._sarima_logger.exception("Arguments missing...{}".format(e))

        self._model = None
        self._init_trend()
        self.assertions()

        self._id = 'SARIMA'

    def _init_trend(self):
        if self._trend == 'constant':
            self._sarima_trend = 'c'
        elif self._trend is None:
            self._srima_trend = 'n'
        elif self._trend == 'linear':
            self._sarima_trend = 't'
        elif self._trend == 'constant linear':
            self._sarima_trend = 'ct'
        elif self._trend in ['additive', 'add']:
            # self._sarima_logger.warningg("The trend " + str(self._trend) + " is not supported by SARIMA! "
            #                                                               "Assuming linear trend")
            self._sarima_trend = 't'
        elif self._trend in ['multiplicative', 'mul']:
            # self._sarima_logger.warning(
            #    "The trend " + str(self._trend) + " is not supported by ARIMA! Assuming linear trend")
            self._sarima_trend = 't'

    def assertions(self):
        try:
            assert isinstance(self._s_order, tuple)
        except AssertionError:
            self._sarima_logger.exception(
                "Assertion exception occurred, tuple expected")
            sys.exit("STOP")
        try:
            assert (self.hyper_params is not None
                    and len(self.hyper_params) != 0
                    and 'trend' in list(self.hyper_params.keys())) or (
                        self._sarima_trend is None
                        or self._sarima_trend in ['n', 'c', 't', 'ct'])
        except AssertionError:
            self._sarima_logger.exception(
                "Assertion Error, trend must be in ['n', 'c', 't', 'ct']")
            sys.exit("STOP")
        try:
            assert isinstance(self._seasonal, bool)
        except AssertionError:
            self._sarima_logger.exception(
                "Assertion Error, seasonal must be boolean True/False in SARIMA"
            )
            sys.exit("STOP")

    def __copy__(self):
        """Copies the object"""
        result = super(SARIMAForecaster, self).__copy__()

        result._s_order = self._s_order
        result._sarima_trend = self._sarima_trend
        result._sarima_logger = self._sarima_logger
        return result

    def set_params(self, p_dict=None, **kwargs):
        """Sets new parameters"""
        params_dict = kwargs
        if p_dict is not None:
            params_dict = p_dict
        #
        for k, v in params_dict.items():
            if k == 'ts_df':
                self.ts_df = v
            elif k == 'freq':
                self.freq = v
            elif k == 'n_test':
                self.n_test = v
            elif k == 'n_val':
                self.n_val = v
            elif k == 'timeformat':
                self.time_format = v
            elif k == 's_order':
                self._s_order = v
            elif k == 'order':
                self._order = v
            elif k == 'test':
                self._test = v
            elif k == 'trend':
                self._sarima_trend = v
        self.assertions()

        return self

    def get_params_dict(self):
        """Gets parameters as a dictionary"""
        return {
            'order': self._order,
            'test': self._test,
            'trend': self._sarima_trend,
            's_order': self._s_order,
        }

    def ts_fit(self, suppress=False):
        """Fit Seasonal ARIMA to the time series data.

         Parameters:
         ----------
         suppress: bool
            Suppress or not some of the output messages
         """
        if self.hyper_params is not None:
            self._gs.set_forecaster(self)
            self._gs.set_hyper_params(self.hyper_params)
            # a very important command here to avoid endless loop
            self.hyper_params = None
            self._sarima_logger.info("***** Starting grid search *****")
            self._gs = self._gs.grid_search(suppress=suppress, show_plot=False)
            #
            self.best_model = self._gs.best_model
            self.__dict__.update(self.best_model['forecaster'].__dict__)
            self._sarima_logger.info("***** Finished grid search *****")
        else:
            self._prepare_fit()
            self.ts_split()
            self._init_trend()

            ts_df = self._train_dt.copy()

            # Fit
            self._sarima_logger.info("Trying to fit the sarima model....")
            # tic
            start = time()
            try:
                if not suppress:
                    self._sarima_logger.info("...via using parameters\n")
                    print_attributes(self)

                self._model = SARIMAX(ts_df['y'],
                                      order=self._order,
                                      seasonal_order=self._s_order,
                                      trend=self._sarima_trend,
                                      enforce_stationarity=False,
                                      enforce_invertibility=False,
                                      freq=self.freq)
                self.model_fit = self._model.fit(disp=1)
            except (Exception, ValueError):
                self._sarima_logger.exception(
                    "Exception occurred in the fit...")
                self._sarima_logger.error("Please try other parameters!")
                self.model_fit = None

            else:
                # toc
                self._sarima_logger.info(
                    "Time elapsed: {} sec.".format(time() - start))
                self._sarima_logger.info(
                    "Model successfully fitted to the data!")
                if not suppress:
                    self._sarima_logger.info("The model summary: " +
                                             str(self.model_fit.summary()))

                # Fitted values
                self._sarima_logger.info(
                    "Computing fitted values and residuals...")
                self.fittedvalues = self.model_fit.fittedvalues
                # prolong: for some reason this package returns fitted values this way
                if len(self.fittedvalues) != len(self._train_dt):
                    self.fittedvalues = pd.DataFrame(
                        index=pd.date_range(ts_df.index[0],
                                            ts_df.index[len(ts_df) - 1],
                                            freq=self.freq),
                        columns=['dummy']).join(pd.DataFrame(
                            self.fittedvalues)).drop(['dummy'], axis=1)
                    self.fittedvalues = self.fittedvalues.reset_index()
                    self.fittedvalues.columns = self._ts_df_cols
                    self.fittedvalues.set_index('ds', inplace=True)
                    self.fittedvalues.y = self.fittedvalues.y.fillna(
                        method='bfill')

                #  Residuals
                super(SARIMAForecaster, self)._residuals()
                self._sarima_logger.info("Done.")
        return self

    def plot_residuals(self):
        """Plot the residuals"""
        fig, axis = super(SARIMAForecaster, self)._plot_residuals(
            y=np.asarray(self._train_dt['y']),
            yhat=np.asarray(self.fittedvalues).flatten(),
            _id="SARIMA")

        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()

    def ts_test(self, show_plot=True):
        """Test the fitted model if test data available"""
        if super(SARIMAForecaster, self)._check_ts_test() < 0:
            return

        n_forecast = len(self._test_dt)

        if self._mode == 'test':
            self._sarima_logger.info(
                "Evaluating the fitted SARIMA model on the test data...")
        elif self._mode == 'test and validate':
            self._sarima_logger.info(
                "Evaluating the fitted SARIMA model on the test and validation data..."
            )

        future = self.model_fit.predict(start=len(self._train_dt.index),
                                        end=len(self._train_dt.index) +
                                        n_forecast - 1,
                                        dynamic=True)

        self.forecast = pd.Series(future, index=self._test_dt.index)

        self.residuals_forecast = pd.Series(np.asarray(self._test_dt.y) -
                                            np.asarray(self.forecast),
                                            index=self._test_dt.index)
        self.measure_rmse()
        self._sarima_logger.info("RMSE on test data: {}".format(self.rmse))

        # plot
        if show_plot:
            self.plot_forecast()

        return self

    def ts_forecast(self, n_forecast, suppress=False):
        """Forecast time series over time frame in the future specified via n_forecast"""
        #
        n_forecast = super(SARIMAForecaster,
                           self)._check_ts_forecast(n_forecast)
        #
        self._sarima_logger.info("Fitting using all data....")
        self._mode = 'forecast'
        self.ts_fit(suppress=suppress)

        self._sarima_logger.info("Forecasting next " + str(n_forecast) +
                                 str(self.freq))
        #
        future = self.model_fit.predict(start=len(self._train_dt.index),
                                        end=len(self._train_dt.index) +
                                        (n_forecast - 1),
                                        dynamic=True)
        idx_future = self._gen_idx_future(n_forecast=n_forecast)
        self.forecast = pd.Series(future, index=idx_future)
        # self.forecast = future

        self.residuals_forecast = None
        self.plot_forecast()

        return self

    def plot_forecast(self):
        """Plot forecasted values"""
        fig, axis = super(SARIMAForecaster, self)._plot_forecast(
            y=np.asarray(self._train_dt['y']),
            yhat=np.asarray(self.fittedvalues).flatten(),
            forecast=self.forecast,
            _id='SARIMA')
        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()
class UVariateTimeSeriesClass(object):
    """Univariate time series class

    Attributes
    ----------
    _ts_df_cols: list
        internal column names for dataframe that will be input to model
    ts_df: dataframe
        time series data frame
    freq: int
       frequency of time series; python format
    fill_method: str
       filling method for resampled data. Possible are 'ffill' and 'interp1d'
    n_test: int
         number of units (defined by frequency, e.g. 6 days) to use as test data. 0 would mean no test data is generated.
    n_val: int
        similar to n_test, for validation
    time_format: str
        time format if time series data needs to be brought into datetime

    _mode: str
        defines the mode as 'test' or 'forecast'
    _train_dt: dataframe
        training data
    _test_dt: dataframe
        test data
    _val_dt: dataframe
        validation data

    model_fit:
      fitted model
    fittedvalues: series
       computed fitted values
    residuals: series
       residuals
    rmse: float
       RMSE on test set (test data and the forecast on test data)
    _gs: GridSearchClass
       The grid search class for model optimization in case hyper_parameters are specified
    hyper_params: dictionary
       The dictionary of hyper parameters or None if no model optimization wished
    best_model: dictionary
        The best model resulted from the grid search

    upper_whisker_res: float
       upper whisker for residuals
    lower_conf_int: series
      lower confidence interval
    upper_conf_int: series
      upper confidence interval

    _trend: str or iterable, default=’c’
        in AutoARIMA:
            ref. http://www.alkaline-ml.com/pmdarima/1.0.0/modules/generated/pmdarima.arima.auto_arima.html
            Parameter controlling the deterministic trend polynomial A(t). Can be specified as a string where ‘c’
            indicates a constant (i.e. a degree zero component of the trend polynomial),
            ‘t’ indicates a linear trend with time, and ‘ct’ is both.
            Can also be specified as an iterable defining the polynomial as
            in numpy.poly1d, where [1,1,0,1] would denote a+bt+ct3.
        in ARIMA:
            A parameter for controlling a model of the deterministic trend as one of ‘nc’ or ’c’.
            ‘c’ includes constant trend, ‘nc’ no constant for trend.
        in SARIMA:
            A parameter for controlling a model of the deterministic trend as one of ‘n’,’c’,’t’,’ct’ for no trend,
            constant, linear, and constant with linear trend, respectively.
        in ExponentialSmoothing:
            The type of trend component, as either “add” for additive or “mul” for multiplicative.
            Modeling the trend can be disabled by setting it to None

    _test: list or str
        in ARIMA:
           list of possible tests for determining d
        in AutoARIMA.
           test for determining the value of d, e.g. 'adf'

    _seasonal: bool or str
        in AutoARIMA
            Seasonal component yes/no

        in ExponentialSmoothing:
            The type of seasonal component, as either “add” for additive or “mul” for multiplicative.
            Modeling the seasonal component can be disabled by setting it to None

    _seasonal_periods: int
         The number of time steps in a seasonal period, e.g. 12 for 12 months in a yearly seasonal structure

    forecast: series
      computed forcatsed values
    residuals_forecast: series
      residuals between forecasted and real values. Note, this variable exist only if test data existed

    Methods
    -------
    ts_transform()
         Transforms time series using log10 or box-cox
    ts_resample()
         Resamples time series at the chosen frequency freq
    ts_test()
         Evaluates fitted model on the test data, if this one has been generated
    ts_forecast()
         Forecasts time series and plots the results
    ts_decompose()
         Decomposes time series in _arr_seasonal, _arr_trend, residual(irregular) and _arr_baseline,
         and plots the results
    plot_decompose()
         Plots the results of ts_decompose()
    difference()
        Differences the time series given the lag (parameter interval)
    rolling_mean()
        Computes moving average given the window size
    rolling_variance()
        Computes moving variance given the window size
    test_adf():
         ADF test for stationarity
    test_kpss():
         KPSS test for stationarity
    ndiff()
        Determines value for diff parameter d
        All tests given in the parameter tests are applied
    acf_plots()
        Generates autocorrelation plots
    pacf_plots()
        Generates partial correlation plots

    Helper methods:
    -------
    _plot_residuals()
         Residual plots helper function
    _plot_forecast()
         Helper function for plotting forecasted time-series
    _prepare_fit()
         Prepares ts_fit of child class. Supposed to be called by a child class
    _residuals()
         Helper function for calculating residuals. Supposed to be called by a child class
    _check_ts_test()
         Checks for test. Supposed to be called by a child class
    _check_ts_forecast()
         Checks for forecast. Supposed to be called by a child class
    """
    def __init__(self,
                 ts_df,
                 time_format="%Y-%m-%d %H:%M:%S",
                 freq='D',
                 fill_method='ffill',
                 n_test=0,
                 n_val=0,
                 hyper_params=None,
                 test='adf',
                 trend=None,
                 seasonal=False,
                 seasonal_periods=1,
                 **kwds):
        """Initializes the object UVariateTimeSeriesForecaster"""
        self._ts_df_cols = ['ds', 'y']

        self.ts_df = ts_df
        self.time_format = time_format
        self.freq = freq
        self.fill_method = fill_method.lower()
        self.n_test = int(n_test)
        self.n_val = int(n_val)
        self.transform = None
        self._boxcox_lmbda = None

        self._mode = ''

        self._train_dt = None
        self._test_dt = None
        self._val_dt = None

        self.model_fit = None
        self.fittedvalues = None
        self.residuals = None
        self.rmse = 0
        self._gs = tsa.GridSearchClass()
        self.hyper_params = hyper_params
        self.best_model = dict()
        """
        self.rmse_test = 0
        self.rmse_val = 0
        """

        self.upper_whisker_res = None
        self.lower_conf_int = None
        self.upper_conf_int = None

        self.forecast = None
        self.residuals_forecast = None

        self._res_decomp = None
        self._arr_seasonal = None
        self._arr_trend = None
        self._arr_baseline = None

        self._test = test
        self._trend = trend
        if self._trend is not None:
            self._trend = self._trend.lower()
        self._seasonal = seasonal
        if isinstance(self._seasonal, str):
            self._seasonal = self._seasonal.lower()
        self._seasonal_periods = seasonal_periods

        self._uvts_cls_logger = Logger('uvts_cls')

        UVariateTimeSeriesClass.assertions(self)
        # work with ts_df
        self.ts_df = self.ts_df.reset_index()
        self.ts_df.columns = self._ts_df_cols
        self.ts_df['y'] = self.ts_df['y'].apply(np.float64, errors='coerce')
        self.ts_df.set_index('ds', inplace=True)
        self._uvts_cls_logger.info("Received time series data of range: " +
                                   str(min(self.ts_df.index)) + ' - ' +
                                   str(max(self.ts_df.index)) +
                                   " and shape: " + str(self.ts_df.shape))

        if not isinstance(self.ts_df.index, pd.DatetimeIndex):
            self._uvts_cls_logger.warning("Time conversion required...")
            self.ts_df = self.ts_df.reset_index()
            try:
                self.ts_df['ds'] = self.ts_df['ds'].apply(
                    lambda x: datetime.datetime.strptime(
                        str(x).translate({
                            ord('T'): ' ',
                            ord('Z'): None
                        })[:-1], self.time_format))
            except ValueError as e:
                self._uvts_cls_logger.warning(
                    "Zulu time conversion not successful: {}".format(e))
                self._uvts_cls_logger.warning(
                    "Will try without assuming zulu time...")
                try:
                    self.ts_df['ds'] = self.ts_df['ds'].apply(
                        lambda x: datetime.datetime.strptime(
                            str(x), self.time_format))
                except ValueError as e:
                    self._uvts_cls_logger.info(
                        "Time conversion not successful. Check your time_format: {}"
                        .format(e))
                    sys.exit("STOP")
                else:
                    self._uvts_cls_logger.info("Time conversion successful!")
            else:
                self._uvts_cls_logger.info("Time conversion successful!")
            # set index
            self.ts_df.set_index('ds', inplace=True)
        #
        self.ts_df.index = pd.to_datetime(self.ts_df.index)
        self.ts_df.sort_index(inplace=True)
        # resample
        self.ts_resample()
        UVariateTimeSeriesClass.assertions(self, post=True)
        #
        if self.n_val > len(self.ts_df) - self.n_test:
            self.n_val = len(self.ts_df) - self.n_test

        if self.n_test == 0 and self.n_val == 0:
            self._mode = 'forecast'
        elif self.n_test > 0:
            self._mode = 'test'
        elif self.n_test == 0 and self.n_val > 0:
            self._mode = 'validate'

        # delegate just for good programming style here
        super(UVariateTimeSeriesClass, self).__init__(**kwds)

    def assertions(self, post=False):

        if post:
            try:
                assert 0 <= self.n_test < len(self.ts_df)
            except AssertionError:
                self._uvts_cls_logger.exception(
                    "Assertion exception, invalid value for n_test!")
                sys.exit("STOP")
            #
            try:
                assert 0 <= self.n_val < len(self.ts_df)
            except AssertionError:
                self._uvts_cls_logger.exception(
                    "Assertion exception, invalid value for n_val!")
                sys.exit("STOP")
        else:
            try:
                assert self.fill_method in ['ffill', 'interp1d']
            except AssertionError:
                self._uvts_cls_logger.exception(
                    "Assertion exception, fill method not recognized! "
                    "'ffill' will be used. ")
            else:
                self.fill_method = 'ffill'

            try:
                assert pd.DataFrame(self.ts_df).shape[1] <= 2
            except AssertionError:
                self._uvts_cls_logger.exception(
                    "Time series must be uni-variate. "
                    "Hence, at most a time columns and a column of numeric values are expected!"
                )
                sys.exit("STOP")

            try:
                self._trend is None or (isinstance(
                    self._trend, str) and self._trend in [
                        'constant', 'linear', 'constant linear', 'additive',
                        'add', 'multiplicative', 'mul'
                    ])
            except AssertionError:
                self._uvts_cls_logger.exception(
                    "Assertion exception occurred, invalid value for trend! "
                    "Choose between None or  "
                    "['constant', 'linear ','constant linear', "
                    "'additive', 'add , 'multiplicative', 'mul'] ")
                sys.exit("STOP")

            try:
                self._seasonal is None or isinstance(self._seasonal, bool) or (
                    isinstance(self._seasonal, str) and self._seasonal
                    in ['additive', 'add', 'multiplicative', 'mul'])
            except AssertionError:
                self._uvts_cls_logger.exception(
                    "Assertion exception occurred, invalid value for seasonal! "
                    "Choose between True/False, None or  "
                    "['additive', 'add , 'multiplicative', 'mul'] ")
                sys.exit("STOP")

    def __copy__(self):
        """Copies the object"""

        cls = self.__class__
        result = cls.__new__(cls)
        result.__dict__.update(self.__dict__)
        return result

    def ts_transform(self, transform):
        """Transforms time series via applying casted 'transform'. Right now 'log10' and 'box-cox' possible."""
        try:
            assert transform.lower().strip() in ['log10', 'box-cox']
        except AssertionError:
            self._uvts_cls_logger.error(
                "transform should be in ['log10', 'box-cox'] or empty. Assuming no transform! "
                "Hence, if you get bad results, you would like maybe to choose e.g., log10 here."
            )
            self._uvts_cls_logger.exception(
                "Assertion exception occurred, transform")
            self.transform = None
        else:
            self.transform = transform.lower()
            # transform
            if sum(self.ts_df['y'] > 0) < len(self.ts_df['y']):
                self._uvts_cls_logger.warning(
                    "Zero, negative, or both values present in your data. Transformation will not be used!"
                )
                return self
            if self.transform == 'log10':
                try:
                    self.ts_df['y'] = self.ts_df['y'].apply(np.log10)
                except ValueError:
                    self._uvts_cls_logger.exception(
                        "log10 transformation did not work! Possibly negative "
                        "values present?")
            elif self.transform == 'box-cox':
                if input("Do you want to provide lambda for box.cox? y/n?"
                         ).strip().lower() == 'y':
                    self._boxcox_lmbda = float(input())
                else:
                    self._boxcox_lmbda = None
                try:
                    if self._boxcox_lmbda is None:
                        bc, lmbda_1 = stats.boxcox(self.ts_df['y'],
                                                   lmbda=self._boxcox_lmbda)
                        self.ts_df['y'] = stats.boxcox(self.ts_df['y'],
                                                       lmbda=lmbda_1)
                    else:
                        self.ts_df['y'] = stats.boxcox(
                            self.ts_df['y'], lmbda=self._boxcox_lmbda)
                except ValueError:
                    self._uvts_cls_logger.exception(
                        "box-cox transformation did not work! "
                        "Possibly negative values present or bad lambda?")
        return self

    def set_frequency(self, new_freq):
        """Sets new frequency and resamples time series to that new frequency"""
        self.freq = new_freq
        self.ts_resample()

    def ts_check_frequency(self):
        """Checks the frequency of time series"""
        if self.ts_df.index.freq is None:
            self._uvts_cls_logger.info("No specific frequency detected.")
            self._uvts_cls_logger.info(
                "Frequency chosen in initialization: " + str(self.freq) +
                " enter 'n' and call ts_resample() if you are satisfied with this value."
            )
            if input("Should a histogram of time deltas be plotted y/n?"
                     ).strip().lower() == 'y':
                ff = pd.Series(self.ts_df.index[1:(len(self.ts_df))] -
                               self.ts_df.index[0:(len(self.ts_df) - 1)])
                ff = ff.apply(lambda x: int(x.total_seconds() / (60 * 60)))
                plt.hist(ff, bins=120)
                plt.xlabel("Rounded time delta [H]")
                plt.ylabel("Frequency of occurrence")
                self._uvts_cls_logger.info(ff.value_counts())
                self._uvts_cls_logger.info(
                    "Should hourly frequency not fit, choose a reasonable frequency and call "
                    "set_frequency(new_freq)")
            else:
                pass
        else:
            self._uvts_cls_logger.info("Time series frequency: " +
                                       str(self.ts_df.index.freq))

    def ts_resample(self):
        """Brings original time series to the chosen frequency freq"""
        try:
            ts_freq = pd.DataFrame(index=pd.date_range(
                self.ts_df.index[0],
                self.ts_df.index[len(self.ts_df) - 1],
                freq=self.freq),
                                   columns=['dummy'])
        except ValueError:
            self._uvts_cls_logger.exception(
                "Exception occurred, possibly incompatible frequency!")
            sys.exit("STOP")

        if self.fill_method == 'ffill':
            self.ts_df = ts_freq.join(self.ts_df).drop(['dummy'], axis=1)
            self.ts_df.y = self.ts_df.y.fillna(method='ffill')
            # if np.isnan ( self.ts_df.y ).any ():
            #    self.ts_df.y = self.ts_df.y.fillna ( method='bfill' )
        else:  # interp
            xp = np.linspace(0,
                             self.ts_df.size,
                             self.ts_df.size,
                             endpoint=False)
            fp = self.ts_df['y']
            # join
            self.ts_df = ts_freq.join(self.ts_df).drop(['dummy'], axis=1)
            # pick new points
            x = np.linspace(0, ts_freq.size, ts_freq.size, endpoint=False)
            x = x[self.ts_df['y'].isna()]
            print(x.size)
            print(x)

            # put the values
            self.ts_df.y[self.ts_df['y'].isna()] = np.interp(x, xp, fp)

        if np.isnan(self.ts_df.y).any():
            self._uvts_cls_logger.warning(
                "Some NaN found, something went wrong, check the data!")
            sys.exit("STOP")

        self._uvts_cls_logger.info("Time series resampled at frequency: " +
                                   str(self.ts_df.index.freq) +
                                   ". New shape of the data: " +
                                   str(self.ts_df.shape))
        self._uvts_cls_logger.info("Using time series data of range: " +
                                   str(min(self.ts_df.index)) + ' - ' +
                                   str(max(self.ts_df.index)) +
                                   " and shape: " + str(self.ts_df.shape))

        return self

    def ts_split(self):
        """Prepares data for different modes: train, test, validate, test and validate, forecast"""

        if self.ts_df.index.freq is None:
            self._uvts_cls_logger.warning(
                "Time series exhibit no frequency. Calling ts_resample()...")
            try:
                self.ts_resample()
            except ValueError:
                self._uvts_cls_logger.error("Resample did not work! Error:" +
                                            str(sys.exc_info()[0]))

        ts_df = self.ts_df

        if self._mode == 'forecast':
            self._train_dt = ts_df
            self._test_dt, self._val_dt = None, None
        elif self._mode == 'test and validate':
            if self._test_dt is not None:
                self._train_dt = pd.concat([self._train_dt, self._test_dt],
                                           axis=0)
                self._test_dt = self._val_dt
                self._val_dt = None
            else:
                self._uvts_cls_logger.error("Something is wrong: mode!")
        else:
            # split
            ts_test_df = pd.DataFrame()
            ts_val_df = pd.DataFrame()
            #
            ts_df = ts_df.reset_index()
            ts_df.columns = self._ts_df_cols

            if self._mode == 'test' and self.n_val == 0:
                ts_test_df = ts_df.copy()
                #
                ts_df = pd.DataFrame(ts_df.loc[:(len(ts_df) - 1 -
                                                 self.n_test), ])
                ts_df.set_index('ds', inplace=True)
                # test
                ts_test_df = pd.DataFrame(ts_test_df.loc[(len(ts_test_df) -
                                                          self.n_test):, ])
                ts_test_df.set_index('ds', inplace=True)
            elif self._mode == 'validate':
                ts_val_df = ts_df.copy()
                #
                ts_df = pd.DataFrame(ts_df.loc[:(len(ts_df) - 1 -
                                                 self.n_val), ])
                ts_df.set_index('ds', inplace=True)
                # val
                ts_val_df = pd.DataFrame(ts_val_df.loc[(len(ts_val_df) -
                                                        self.n_val):, ])
                ts_val_df.set_index('ds', inplace=True)
            elif self._mode == 'test' and self.n_val > 0:
                ts_test_df = ts_df.copy()
                ts_val_df = ts_df.copy()
                #
                ts_df = pd.DataFrame(ts_df.loc[:(len(ts_df) - 1 - self.n_test -
                                                 self.n_val), ])
                ts_df.set_index('ds', inplace=True)
                # test
                ts_test_df = pd.DataFrame(
                    ts_test_df.loc[(len(ts_test_df) - self.n_test -
                                    self.n_val):(len(ts_test_df) - self.n_val -
                                                 1)])
                ts_test_df.set_index('ds', inplace=True)
                # val
                ts_val_df = pd.DataFrame(ts_val_df.loc[(len(ts_val_df) -
                                                        self.n_val):, ])
                ts_val_df.set_index('ds', inplace=True)

            # now set
            self._train_dt = ts_df
            if not ts_test_df.empty:
                self._test_dt = ts_test_df
            if not ts_val_df.empty:
                self._val_dt = ts_val_df

        return self

    @staticmethod
    def compute_ci(yhat, yhat_var, ci_level):
        """Easy compute of confidence intervals"""
        z_mapping = {0.95: 1.96, 0.99: 2.58}
        z = z_mapping[ci_level]

        ci_lower = yhat - yhat_var * z
        ci_upper = yhat + yhat_var * z

        return ci_lower, ci_upper

    def _prepare_fit(self):
        """Helper function ro prepare ts_fit"""
        self.lower_conf_int, self.upper_conf_int, self.upper_whisker_res = None, None, None
        self.model_fit = None
        self.residuals, self.residuals_forecast, self.fittedvalues = None, None, None

    def _residuals(self):
        """Helper function to calculate residuals"""
        if self.model_fit is None:
            self._uvts_cls_logger.error(
                "No model has been fitted, residuals cannot be computed!")
            sys.exit("STOP")

        try:
            # use fittedvalues to fill in the model dictionary
            self.residuals = pd.Series(np.asarray(self._train_dt['y']) -
                                       np.asarray(self.fittedvalues).flatten(),
                                       index=self._train_dt['y'].index)
            self.upper_whisker_res = self.residuals.mean() + 1.5 * (
                self.residuals.quantile(0.75) - self.residuals.quantile(0.25))
        except (KeyError, AttributeError):
            self._uvts_cls_logger.exception(
                "Exception occurred: Model was not fitted or ts has other structure"
            )

        return self

    def _plot_residuals(self, y, yhat, _id):
        """Helper function to plot the residuals"""
        try:
            assert self.model_fit is not None
        except AssertionError:
            self._uvts_cls_logger.exception(
                "Model has to be fitted first! Please call ts_fit(...)")

        fig, axes = plt.subplots(2, 1, figsize=(20, 5), sharex=True)

        axes[0].plot(pd.Series(yhat, index=self._train_dt.index),
                     color='y',
                     linewidth=2.0)
        axes[0].plot(pd.Series(y, index=self._train_dt.index), color='b')

        axes[0].set_ylabel("Model Fit")
        axes[0].set_title("Real (blue) and estimated values, " + str(_id))
        #
        axes[1].plot(self.residuals, color="r")
        """
        if self.forecast is not None and self.residuals_forecast is None \
                and self.lower_conf_int is not None and self.upper_conf_int is not None:
            axes[0].fill_between(self.lower_conf_int.index, self.lower_conf_int, self.upper_conf_int, color='k',
                                 alpha=.15)
        """
        if self.lower_conf_int is not None and self.upper_conf_int is not None:
            axes[0].fill_between(self.lower_conf_int.index,
                                 self.lower_conf_int,
                                 self.upper_conf_int,
                                 color='k',
                                 alpha=.15)
        if self.upper_whisker_res is not None:
            axes[1].axhline(y=self.upper_whisker_res,
                            xmin=0,
                            xmax=1,
                            color='m',
                            label='upper_whisker',
                            linestyle='--',
                            linewidth=1.5)
            axes[1].axhline(y=-self.upper_whisker_res,
                            xmin=0,
                            xmax=1,
                            color='m',
                            label='upper_whisker',
                            linestyle='--',
                            linewidth=1.5)

        axes[1].set_ylabel('Residuals')
        axes[1].set_title(
            'Difference between model output and the real data and +/- upper whisker, '
            + str(_id))

        return fig, axes

    def _check_ts_test(self):
        """Check before ts_test in child class is called"""
        try:
            assert self.model_fit is not None
        except AssertionError:
            self._uvts_cls_logger.exception(
                "Model has to be fitted first! Please call ts_fit(...)")

        try:
            assert self._test_dt is not None
        except (KeyError, AssertionError):
            self._uvts_cls_logger.exception(
                "Nothing to test. "
                "Call ts_forecast() or specify amount of test data "
                "when initializing the object.")
            return -1
        else:
            # self._mode = 'test'
            return 0

    def _check_ts_forecast(self, n_forecast):
        """Check before ts_forecast in child class is called"""
        #
        try:
            n_forecast = int(n_forecast)
            assert 0 < n_forecast < len(self._train_dt)
        except AssertionError:
            self._uvts_cls_logger.exception(
                "Number of periods to be forecasted is too low, too high or not numeric!"
            )
        except ValueError:
            self._uvts_cls_logger.exception(
                "n_forecast must be convertible to int type!")

        return n_forecast

    def _gen_idx_future(self, n_forecast):
        """Generate the time axis for future data"""
        idx_future = None
        if self.freq == 'S':
            idx_future = pd.date_range(start=max(self._train_dt.index) +
                                       datetime.timedelta(seconds=1),
                                       end=max(self._train_dt.index) +
                                       datetime.timedelta(seconds=n_forecast),
                                       freq='S')
        elif self.freq == 'min':
            idx_future = pd.date_range(start=max(self._train_dt.index) +
                                       datetime.timedelta(minutes=1),
                                       end=max(self._train_dt.index) +
                                       datetime.timedelta(minutes=n_forecast),
                                       freq='min')
        elif self.freq == 'H':
            idx_future = pd.date_range(start=max(self._train_dt.index) +
                                       datetime.timedelta(hours=1),
                                       end=max(self._train_dt.index) +
                                       datetime.timedelta(hours=n_forecast),
                                       freq='H')
        elif self.freq == 'D':
            idx_future = pd.date_range(start=max(self._train_dt.index) +
                                       datetime.timedelta(days=1),
                                       end=max(self._train_dt.index) +
                                       datetime.timedelta(days=n_forecast),
                                       freq='D')
        elif self.freq == 'W':
            idx_future = pd.date_range(start=max(self._train_dt.index) +
                                       datetime.timedelta(weeks=1),
                                       end=max(self._train_dt.index) +
                                       datetime.timedelta(weeks=n_forecast),
                                       freq='W')
        elif self.freq == 'M' or self.freq == 'MS':
            idx_future = pd.date_range(start=max(self._train_dt.index) +
                                       relativedelta(months=+1),
                                       end=max(self._train_dt.index) +
                                       relativedelta(months=+n_forecast),
                                       freq=self.freq)
        return idx_future

    def _prepare_forecast(self, yhat, forecast):
        # forecast
        forecast = forecast.reset_index()
        forecast.columns = self._ts_df_cols
        forecast.set_index('ds', inplace=True)
        #
        vals = list()
        vals.append(yhat[-1])
        for i in range(len(forecast['y'])):
            vals.append(forecast['y'][i])

        idx = list()
        idx.append(self._train_dt.index[-1])
        for i in range(len(forecast.index)):
            idx.append(forecast.index[i])
        #
        return pd.Series(vals, index=idx)

    def _plot_forecast(self, y, yhat, forecast, _id):
        """Helper function to plot forecasted values"""
        try:
            assert self.model_fit is not None
        except AssertionError:
            self._uvts_cls_logger.exception(
                "Model has to be fitted first! Please call ts_fit(...)")
            sys.exit("STOP")
        #
        try:
            assert self.forecast is not None
        except AssertionError:
            self._uvts_cls_logger.exception(
                "Neither ts_test(...) nor ts_forecast(...) have been called yet!"
            )
            sys.exit("STOP")

        fig, axes = plt.subplots(2, 1, figsize=(20, 7), sharex=True)
        #
        axes[0].plot(pd.Series(yhat, index=self._train_dt.index),
                     color='y',
                     linewidth=2.0)
        axes[0].plot(pd.Series(y, index=self._train_dt.index),
                     color='b',
                     linewidth=1.0)
        #
        if self.residuals_forecast is not None:
            axes[0].plot(self.ts_df, color='b')
        forecast = self._prepare_forecast(yhat=yhat, forecast=forecast)
        axes[0].plot(forecast, color='orange', linewidth=2.0)
        #
        if self.lower_conf_int is not None and self.upper_conf_int is not None:
            axes[0].fill_between(self.lower_conf_int.index,
                                 self.lower_conf_int,
                                 self.upper_conf_int,
                                 color='k',
                                 alpha=.15)
        axes[0].set_ylabel("Fit and Forecast/Validation")
        axes[0].set_title(
            "Real (blue), estimated (yellow) and forecasted values, " +
            str(_id))
        #
        if self.residuals_forecast is not None:
            axes[1].plot(pd.concat([self.residuals, self.residuals_forecast],
                                   axis=0),
                         color='r')
        axes[1].plot(self.residuals, color="r")

        if self.upper_whisker_res is not None:
            axes[1].axhline(y=self.upper_whisker_res,
                            xmin=0,
                            xmax=1,
                            color='m',
                            label='upper_whisker',
                            linestyle='--',
                            linewidth=1.5)
            axes[1].axhline(y=-self.upper_whisker_res,
                            xmin=0,
                            xmax=1,
                            color='m',
                            label='upper_whisker',
                            linestyle='--',
                            linewidth=1.5)
        axes[1].set_ylabel("Residuals")
        axes[1].set_title(
            "Difference between model output and the real data both, for fitted "
            "and forecasted and +/- upper whisker or confidence intervals, " +
            str(_id))

        return fig, axes

    def ts_decompose(self, params=None):
        """Decomposes time series"""
        self._res_decomp = None
        self._arr_seasonal = None
        self._arr_trend = None
        self._arr_baseline = None
        self.residuals = None

        if params is None:
            params = dict({'model': 'additive', 'freq': 1})
        try:
            assert isinstance(params, dict)
        except AssertionError:
            self._uvts_cls_logger.exception(
                "Dictionary is expected for parameters!")
            sys.exit("STOP")

        try:
            assert 'model' in list(params.keys())
        except AssertionError:
            self._uvts_cls_logger.exception(
                "Unexpected dictionary keys. At least decomposition "
                "model must be supplied!")
            sys.exit("STOP")

        try:
            assert params['model'].lower() in ['additive', 'multiplicative']
        except AssertionError:
            self._uvts_cls_logger.exception(
                "Unexpected value for the parameter 'model'! "
                "Choose from ['additive', 'multiplicative']")
            sys.exit("STOP")
        else:
            params['model'] = params['model'].lower()

        if 'freq' not in list(params.keys()):
            params['freq'] = 1

        try:
            ts2decomp = self.ts_df
            if 'from' in list(params.keys()):
                ts2decomp = ts2decomp[
                    ts2decomp.index >= datetime.datetime.strptime(
                        params['from'], self.time_format)]
            if 'to' in list(params.keys()):
                ts2decomp = ts2decomp[ts2decomp.index <= datetime.datetime.
                                      strptime(params['to'], self.time_format)]
            try:
                assert ts2decomp.size > 0
            except AssertionError:
                self._uvts_cls_logger.exception(
                    "Empty time series resulted, please check your parameters!"
                )
                sys.exit("STOP")

            if ts2decomp.index.freq is not None:
                res = seasonal_decompose(ts2decomp.loc[:, 'y'],
                                         model=params['model'])
            else:
                res = seasonal_decompose(ts2decomp.loc[:, 'y'],
                                         model=params['model'],
                                         freq=params['freq'])

        except ValueError:
            self._uvts_cls_logger.exception(
                "ValueError, seasonal_decompose error")
        else:
            self._res_decomp = res
            self._arr_seasonal = res.seasonal
            self._arr_trend = res.trend
            self._arr_baseline = self._arr_seasonal + self._arr_trend
            self.residuals = res.resid
            self.upper_whisker_res = self.residuals.mean() + 1.5 * (
                self.residuals.quantile(0.75) - self.residuals.quantile(0.25))
            self.plot_decompose()

    def ts_stl_decompose(self, params=None):
        self._res_decomp = None
        self._arr_seasonal = None
        self._arr_trend = None
        self._arr_baseline = None
        self.residuals = None

        if params is None:
            params = dict({'period': 12})
        try:
            assert isinstance(params, dict)
        except AssertionError:
            self._uvts_cls_logger.exception(
                "Dictionary is expected for parameters!")
            sys.exit("STOP")

        try:
            assert 'period' in list(params.keys())
        except AssertionError:
            self._uvts_cls_logger.exception(
                "Unexpected dictionary keys. At least decomposition "
                "period must be supplied!")
            sys.exit("STOP")

        try:
            assert isinstance(params['period'], int)
        except AssertionError:
            self._uvts_cls_logger.exception(
                "Unexpected value for the parameter 'period'! "
                "Integer expected")
            sys.exit("STOP")

        try:
            ts2decomp = self.ts_df
            if 'from' in list(params.keys()):
                ts2decomp = ts2decomp[
                    ts2decomp.index >= datetime.datetime.strptime(
                        params['from'], self.time_format)]
            if 'to' in list(params.keys()):
                ts2decomp = ts2decomp[ts2decomp.index <= datetime.datetime.
                                      strptime(params['to'], self.time_format)]
            try:
                assert ts2decomp.size > 0
            except AssertionError:
                self._uvts_cls_logger.exception(
                    "Empty time series resulted, please check your parameters!"
                )
                sys.exit("STOP")

            res = decompose(ts2decomp, period=params['period'])

        except ValueError:
            self._uvts_cls_logger.exception("ValueError, stl_decompose error")
        else:
            self._res_decomp = res
            self._arr_seasonal = res.seasonal
            self._arr_trend = res.trend
            self._arr_baseline = self._arr_seasonal + self._arr_trend
            self.residuals = res.resid
            self.upper_whisker_res = np.asarray(self.residuals.mean() + 1.5 * (
                self.residuals.quantile(0.75) - self.residuals.quantile(0.25)))
            self.plot_decompose()

    def plot_decompose(self):
        """Plots the results of time series decomposition"""
        try:
            assert self._arr_seasonal is not None
        except AssertionError:
            self.ts_decompose()

        fig, axes = plt.subplots(5, 1, figsize=(20, 9), sharex=True)
        axes[0].plot(self._res_decomp.observed)
        axes[0].set_ylabel("Original")
        #
        axes[1].plot(self._arr_trend)
        axes[1].set_ylabel("Trend")
        #
        axes[2].plot(self._arr_seasonal)
        axes[2].set_ylabel("Seasonal")
        #
        axes[3].plot(self._arr_baseline)
        axes[3].set_ylabel("Baseline")
        #
        axes[4].plot(self.residuals)
        axes[4].set_ylabel("Residuals")
        #
        if self.upper_whisker_res is not None:
            axes[4].axhline(y=self.upper_whisker_res,
                            xmin=0,
                            xmax=1,
                            color='m',
                            label='upper_whisker',
                            linestyle='--',
                            linewidth=1.5)
            axes[4].axhline(y=-self.upper_whisker_res,
                            xmin=0,
                            xmax=1,
                            color='m',
                            label='upper_whisker',
                            linestyle='--',
                            linewidth=1.5)

        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()

    def difference(self, lag=1):
        diff = list()
        for i in range(lag, len(self.ts_df)):
            value = self.ts_df['y'][i] - self.ts_df['y'][i - lag]
            diff.append(value)
        return pd.Series(diff)

    def rolling_mean(self, window=10):
        return self.ts_df.rolling(window=window).mean()

    def rolling_variance(self, window=10):
        return self.ts_df.rolling(window=window).std()

    def test_adf(self):
        """Performs Dickey-Fuller test for stationarity"""

        dftest = adfuller(self.ts_df['y'], autolag='AIC')
        dfoutput = pd.Series(dftest[0:4],
                             index=[
                                 'Test Statistic', 'p-value', '#Lags Used',
                                 'Number of Observations Used'
                             ])
        for key, value in dftest[4].items():
            dfoutput['Critical Value (%s)' % key] = value
        print(dfoutput)
        if dftest[0] > dftest[4]['5%']:
            print(
                "Test statistic greater than critical value at 5% --> series seems to be not stationary. "
                "Look at critical values at 1% and 10% too, ideally they also should be less than test statistic."
            )
        else:
            print(
                "Test statistic less than critical value at 5% --> series seems to be stationary. "
                "Look at critical values at 1% and 10% too, ideally they also should be greater than test statistic."
            )

    def test_kpss(self):
        """Performs Kwiatkowski-Phillips-Schmidt-Shin test for stationarity"""

        kpsstest = kpss(self.ts_df['y'], regression='c')
        kpss_output = pd.Series(
            kpsstest[0:3], index=['Test Statistic', 'p-value', 'Lags Used'])
        for key, value in kpsstest[3].items():
            kpss_output['Critical Value (%s)' % key] = value
        print(kpss_output)
        if kpsstest[0] > kpsstest[3]['5%']:
            print(
                "Test statistic greater than critical value at 5% --> series seems to be not stationary. "
                "Look at critical values at 1% and 10% too, ideally they also should be greater than test statistic."
            )
        else:
            print(
                "Test statistic less than critical value at 5% --> series seems to be stationary. "
                "Look at critical values at 1% and 10% too, ideally they also should be less than test statistic."
            )

    def ndiff(self, tests=['kpss', 'adf', 'pp'], alpha=0.05, max_d=2):
        """Returns p-values to decide for the value of d-differentiation

        list of tests given in tests parameter are applied.
        """
        try:
            assert sum([i in ['kpss', 'adf', 'pp'] for i in tests]) > 0
        except AssertionError:
            self._uvts_cls_logger.exception(
                "Assertion exception occurred. No valid value for tests! "
                "Choose from ['kpss', 'adf', 'pp']. You can choose more than one."
            )
            sys.exit("STOP")

        do_test = list(
            compress(['kpss', 'adf', 'pp'],
                     [i in ['kpss', 'adf', 'pp'] for i in tests]))
        return dict(
            zip(
                do_test,
                list(
                    map(
                        lambda x: ndiffs(
                            self.ts_df['y'], test=x, alpha=alpha, max_d=max_d),
                        do_test))))

    def acf_plots(self):
        """Generates autocorrelation plots"""
        fig, axes = plt.subplots(3, 2, figsize=(20, 9), sharex=False)
        #
        axes[0, 0].plot(self.ts_df['y'])
        axes[0, 0].set_title('Original Series')
        plot_acf(self.ts_df['y'], ax=axes[0, 1])

        # 1st Differencing
        axes[1, 0].plot(self.ts_df['y'].diff())
        axes[1, 0].set_title('1st Order Differencing')
        plot_acf(self.ts_df['y'].diff().dropna(), ax=axes[1, 1])

        # 2nd Differencing
        axes[2, 0].plot(self.ts_df['y'].diff().diff())
        axes[2, 0].set_title('2nd Order Differencing')
        plot_acf(self.ts_df['y'].diff().diff().dropna(), ax=axes[2, 1])
        #
        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()

    def pacf_plots(self):
        """Generates partial correlation plots"""
        fig, axes = plt.subplots(3, 2, figsize=(20, 9), sharex=False)
        #
        axes[0, 0].plot(self.ts_df['y'])
        axes[0, 0].set_title('Original Series')
        plot_pacf(self.ts_df['y'], ax=axes[0, 1])

        # 1st Differencing
        axes[1, 0].plot(self.ts_df['y'].diff())
        axes[1, 0].set_title('1st Order Differencing')
        # axes[0].set(ylim=(0, 5))
        plot_pacf(self.ts_df['y'].diff().dropna(), ax=axes[1, 1])

        # 2nd Differencing
        axes[2, 0].plot(self.ts_df['y'].diff().diff())
        axes[2, 0].set_title('2nd Order Differencing')
        plot_pacf(self.ts_df['y'].diff().diff().dropna(), ax=axes[2, 1])

        plt.gcf().autofmt_xdate()
        plt.grid(True)
        plt.show()

    @abstractmethod
    def ts_fit(self, suppress=True):
        self.model_fit = None
        raise NotImplementedError("You must override ts_fit!")

    @abstractmethod
    def ts_test(self, show_plot=True):
        raise NotImplementedError("You must override ts_test!")

    def measure_rmse(self):
        """Computes root mean squared error on test data
        """
        try:
            assert self.residuals_forecast is not None
        except AssertionError:
            self._uvts_cls_logger.exception(
                "AssertionError occurred, Cannot compute RMSE! Check your object mode"
            )

        self.rmse = np.sqrt(
            sum(np.square(self.residuals_forecast)) /
            len(self.residuals_forecast))
        """
        if self._mode == 'test':
            self.rmse_test = self.rmse
        elif self._mode == 'test and validate':
            self.rmse_val = self.rmse - self.rmse_test
        elif self._mode == 'validate':
            self.rmse_val = self.rmse
        """

    def ts_validate(self, suppress=True, show_plot=True):
        """Validates the model"""
        if self._mode == 'forecast':  # or self._val_dt is None:
            self._uvts_cls_logger.warning(
                "Nothing to validate! n_val not set within the initialization or you already "
                "used ts_forecast. In this case you have to restart and call ts_fit()."
            )
            sys.exit("STOP")

        self._mode = 'test and validate'
        self.ts_fit(suppress=suppress)
        self.ts_test(show_plot=show_plot)

    def reset(self):
        for attr in self.__dict__.keys():
            setattr(self, attr, None)
Exemple #13
0
class GridSearchClass(object):
    """Class to perform the grid search given the hyper parameters

    Attributes
    ----------
    forecaster: Object (tsa)
        Forecaster object from the tsa package
    hyper_params: dictionary
        A dictionary of hyper parameters
    results: dictionary
        A dictionary where results are saved
    best_model: dictionary
        A dictionary where the best model and respective hyper parameters are saved
    _gs_logger: Logger
        The logger for logging

    Methods
    ----------
    assertions()
       Assertion tests
    set_forecaster()
       Sets new forecaster
    set_hyper_params()
       Sets new hyper parameters
    grid_search()
       Performs grid search trough all combinations of parameters.
       Parameter combinations are generated using hyper parameters
    """
    def __init__(self, **kwargs):
        """Initializes GridSearch class"""
        self._gs_logger = Logger("grid_search")

        self.forecaster = None
        self.hyper_params = None

        for k, v in kwargs.items():
            if k == 'forecaster':
                self.forecaster = v
            elif k == 'hyper_params':
                self.hyper_params = v

        self.assertions()
        self.results = list()
        self.best_model = dict()
        #
        # self._gs_logger.info("Grid Search initialized. Call grid_search()")

    def assertions(self):
        if self.forecaster is not None:
            try:
                assert (isinstance(self.forecaster, ProphetForecaster) or isinstance(self.forecaster, DLMForecaster) \
                       or isinstance(self.forecaster, LinearForecaster) or \
                       isinstance(self.forecaster, ExponentialSmoothingForecaster) or \
                       isinstance(self.forecaster, ARIMAForecaster) or isinstance(self.forecaster, SARIMAForecaster)) \
                and not isinstance(self.forecaster, UVariateTimeSeriesForecaster)
            except AssertionError:
                self._gs_logger.exception("Unexpected type for forecaster!")
                sys.exit("STOP")

        if self.hyper_params is not None:
            try:
                assert isinstance(self.hyper_params, dict)
            except AssertionError:
                self._gs_logger.exception("Unexpected type for hyper_params")
                sys.exit("STOP")
        if hasattr(self.forecaster, 'n_test'):
            try:
                assert self.forecaster.n_test > 0
            except AssertionError:
                self._gs_logger.exception("No test data specified for this forecaster. Grid search will stop!")
                sys.exit("STOP")
            else:
                self.forecaster._mode = 'test'

    def set_forecaster(self, forecaster_obj):
        """Sets the forecaster"""
        self.forecaster = forecaster_obj
        self.assertions()

        return self

    def set_hyper_params(self, hyper_params):
        """Sets hyper parameters"""
        self.hyper_params = hyper_params
        self.assertions()

        return self

    @staticmethod
    def _print_dict(d):
        d_info = ""
        for k,v in d.items():
            d_info = d_info + "....................... | grid_search | INFO : " + str(k) + " : " + str(v) + "\n"
        return "Hyper parameter set: \n" + d_info

    def grid_search(self, suppress=False, show_plot=True):
        """Performs the grid search

        Via generating all possible combinations of parameters. The combinations are derived from the hyper parameters.
        This method assumes that attributes of a forecaster start with '_'
        The best model is chosen using rmse computed on the test data as
        the measure for the goodness of the forecaster
        """
        # set-up parameter sets
        for p, v in self.hyper_params.items():
            if not isinstance(v, list):
                self.hyper_params[p] = [v]

        combinations = list(itertools.product(*list(self.hyper_params.values())))
        params = [dict(zip(list(self.hyper_params.keys()), combinations[i])) for i in range(len(combinations))]
        self._gs_logger.info("{} number of parameter combinations generated".format(len(params)))
        #if input("Run grid search y/n?").strip().lower() == 'y':
        # reset
        self.results = list()
        self.best_model = dict()
        rmse = np.float('Inf')

        for i in range(len(params)):
            self._gs_logger.info(self._print_dict(params[i]))
            for p, val in params[i].items():
                # check
                attr = '_'+str(p)
                if attr in list(self.forecaster.__dict__.keys()):
                    _type = type(getattr(self.forecaster, attr))
                    try:
                        assert type(val) == _type
                    except AssertionError:
                        try:
                            if str(_type) == 'float':
                                val = np.float(val)
                            elif str(_type) == 'int':
                                val = np.int(val)
                            elif str(_type) == 'bool':
                                val = np.bool(val)
                            elif str(_type) == 'str':
                                val = str(val)
                            elif str(_type) == 'NoneType':
                                pass

                            self._gs_logger.info("Parameter type mismatch found, however, conversion successful")
                        except ValueError:
                            self._gs_logger.exception("Parameter type mismatch: Conversion did not work, "
                                                      "please check your hyper parameters!")
                            raise

                    setattr(self.forecaster, attr, val)
                else:
                    self._gs_logger.warning("Attribute {} not found. Default value will be used only.".format(attr))
                    pass

            # call ts_fit() and ts_test()
            # tic
            start = time()
            self.forecaster.ts_fit(suppress=suppress)
            self.forecaster.ts_test(show_plot=show_plot)
            # toc
            time_elapsed = time() - start
            #
            current_results = dict()
            current_results['params'] = self.forecaster.get_params_dict()
            current_results['rmse'] = self.forecaster.rmse
            current_results['time_elapsed'] = time_elapsed
            self.results.append(current_results)
            #
            if self.results[i]['rmse'] < rmse:
                rmse = self.results[i]['rmse']
                self.best_model['forecaster'] = self.forecaster.__copy__()
                self.best_model['hyper_params'] = self.results[i]['params']
                self.best_model['rmse'] = self.results[i]['rmse']
                self.best_model['time_elapsed'] = self.results[i]['time_elapsed']

        self._gs_logger.info("Best parameter combination:")
        self._gs_logger.info(self._print_dict(self.best_model['hyper_params']))
        self._gs_logger.info("RMSE {} :".format(self.best_model['rmse']))
       # else:
       #     self._gs_logger.info("OK")

        return self