Esempio n. 1
0
def Best3Ensemble(
    ensemble_params,
    forecasts_list,
    forecasts,
    lower_forecasts,
    upper_forecasts,
    forecasts_runtime,
    prediction_interval,
):
    """Generate mean forecast for ensemble of models."""
    id_list = list(ensemble_params['models'].keys())
    model_indexes = [
        idx for idx, x in enumerate(forecasts_list) if x in id_list
    ]

    ens_df = pd.DataFrame(0,
                          index=forecasts[0].index,
                          columns=forecasts[0].columns)
    for idx, x in enumerate(forecasts):
        if idx in model_indexes:
            ens_df = ens_df + forecasts[idx]
    ens_df = ens_df / len(model_indexes)

    ens_df_lower = pd.DataFrame(0,
                                index=forecasts[0].index,
                                columns=forecasts[0].columns)
    for idx, x in enumerate(lower_forecasts):
        if idx in model_indexes:
            ens_df_lower = ens_df_lower + lower_forecasts[idx]
    ens_df_lower = ens_df_lower / len(model_indexes)

    ens_df_upper = pd.DataFrame(0,
                                index=forecasts[0].index,
                                columns=forecasts[0].columns)
    for idx, x in enumerate(upper_forecasts):
        if idx in model_indexes:
            ens_df_upper = ens_df_upper + upper_forecasts[idx]
    ens_df_upper = ens_df_upper / len(model_indexes)

    ens_runtime = datetime.timedelta(0)
    for idx, x in enumerate(forecasts_runtime):
        if idx in model_indexes:
            ens_runtime = ens_runtime + forecasts_runtime[idx]

    ens_result = PredictionObject(
        model_name="Ensemble",
        forecast_length=len(ens_df.index),
        forecast_index=ens_df.index,
        forecast_columns=ens_df.columns,
        lower_forecast=ens_df_lower,
        forecast=ens_df,
        upper_forecast=ens_df_upper,
        prediction_interval=prediction_interval,
        predict_runtime=datetime.timedelta(0),
        fit_runtime=ens_runtime,
        model_parameters=ensemble_params,
    )
    return ens_result
Esempio n. 2
0
    def predict(self,
                forecast_length: int,
                future_regressor=[],
                just_point_forecast=False):
        """Generates forecast data immediately following dates of index supplied to .fit()

        Args:
            forecast_length (int): Number of periods of data to forecast ahead
            regressor (numpy.Array): additional regressor, not used
            just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts

        Returns:
            Either a PredictionObject of forecasts and metadata, or
            if just_point_forecast == True, a dataframe of point forecasts
        """
        predictStartTime = datetime.datetime.now()
        test_index = self.create_forecast_index(
            forecast_length=forecast_length)

        from autots.models.sklearn import date_part

        Xf = date_part(test_index, method='expanded')
        if self.regression_type == 'User':
            # if future_regressor.ndim == 1:
            #     future_regressor = np.array(future_regressor).reshape(-1, 1)
            # Xf = np.concatenate((Xf.reshape(-1, 1), future_regressor), axis=1)
            Xf = pd.concat(
                [Xf, pd.DataFrame(future_regressor).reset_index(drop=True)],
                axis=1)
        forecast, lower_forecast, upper_forecast = self.model.predict(
            Xf.values, conf_int=self.prediction_interval)
        df_forecast = pd.DataFrame(forecast)
        df_forecast.columns = self.column_names
        df_forecast.index = test_index
        if just_point_forecast:
            return df_forecast
        else:
            lower_forecast = pd.DataFrame(lower_forecast,
                                          index=test_index,
                                          columns=self.column_names)
            upper_forecast = pd.DataFrame(upper_forecast,
                                          index=test_index,
                                          columns=self.column_names)
            predict_runtime = datetime.datetime.now() - predictStartTime
            prediction = PredictionObject(
                model_name=self.name,
                forecast_length=forecast_length,
                forecast_index=df_forecast.index,
                forecast_columns=df_forecast.columns,
                lower_forecast=lower_forecast,
                forecast=df_forecast,
                upper_forecast=upper_forecast,
                prediction_interval=self.prediction_interval,
                predict_runtime=predict_runtime,
                fit_runtime=self.fit_runtime,
                model_parameters=self.get_params(),
            )
            return prediction
Esempio n. 3
0
    def predict(self,
                forecast_length: int,
                future_regressor=[],
                just_point_forecast: bool = False):
        """Generate forecast data immediately following dates of .fit().

        Args:
            forecast_length (int): Number of periods of data to forecast ahead
            regressor (numpy.Array): additional regressor, not used
            just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts

        Returns:
            Either a PredictionObject of forecasts and metadata, or
            if just_point_forecast == True, a dataframe of point forecasts
        """
        predictStartTime = datetime.datetime.now()
        tile_len = len(self.tile_values_lag_1.index)
        df = pd.DataFrame(
            np.tile(self.tile_values_lag_1, (int(
                np.ceil(forecast_length / tile_len)), 1))[0:forecast_length],
            columns=self.column_names,
            index=self.create_forecast_index(forecast_length=forecast_length))
        if str(self.lag_2).isdigit():
            y = pd.DataFrame(np.tile(
                self.tile_values_lag_2, (int(
                    np.ceil(
                        forecast_length / len(self.tile_values_lag_2.index))),
                                         1))[0:forecast_length],
                             columns=self.column_names,
                             index=self.create_forecast_index(
                                 forecast_length=forecast_length))
            df = (df + y) / 2
        # df = df.apply(pd.to_numeric, errors='coerce')
        df = df.astype(float)
        if just_point_forecast:
            return df
        else:
            upper_forecast, lower_forecast = Point_to_Probability(
                self.df_train,
                df,
                method='inferred_normal',
                prediction_interval=self.prediction_interval)

            predict_runtime = datetime.datetime.now() - predictStartTime
            prediction = PredictionObject(
                model_name=self.name,
                forecast_length=forecast_length,
                forecast_index=df.index,
                forecast_columns=df.columns,
                lower_forecast=lower_forecast,
                forecast=df,
                upper_forecast=upper_forecast,
                prediction_interval=self.prediction_interval,
                predict_runtime=predict_runtime,
                fit_runtime=self.fit_runtime,
                model_parameters=self.get_params())
            return prediction
Esempio n. 4
0
def DistEnsemble(
    ensemble_params,
    forecasts_list,
    forecasts,
    lower_forecasts,
    upper_forecasts,
    forecasts_runtime,
    prediction_interval,
):
    """Generate forecast for distance ensemble."""
    # handle that the inputs are now dictionaries
    forecasts = list(forecasts.values())
    lower_forecasts = list(lower_forecasts.values())
    upper_forecasts = list(upper_forecasts.values())
    forecasts_runtime = list(forecasts_runtime.values())

    first_model_index = forecasts_list.index(ensemble_params['FirstModel'])
    second_model_index = forecasts_list.index(ensemble_params['SecondModel'])
    forecast_length = forecasts[0].shape[0]
    dis_frac = ensemble_params['dis_frac']
    first_bit = int(np.ceil(forecast_length * dis_frac))
    second_bit = int(np.floor(forecast_length * (1 - dis_frac)))

    ens_df = (forecasts[first_model_index].head(first_bit).append(
        forecasts[second_model_index].tail(second_bit)))
    ens_df_lower = (lower_forecasts[first_model_index].head(first_bit).append(
        lower_forecasts[second_model_index].tail(second_bit)))
    ens_df_upper = (upper_forecasts[first_model_index].head(first_bit).append(
        upper_forecasts[second_model_index].tail(second_bit)))

    id_list = list(ensemble_params['models'].keys())
    model_indexes = [
        idx for idx, x in enumerate(forecasts_list) if x in id_list
    ]

    ens_runtime = datetime.timedelta(0)
    for idx, x in enumerate(forecasts_runtime):
        if idx in model_indexes:
            ens_runtime = ens_runtime + forecasts_runtime[idx]

    ens_result_obj = PredictionObject(
        model_name="Ensemble",
        forecast_length=len(ens_df.index),
        forecast_index=ens_df.index,
        forecast_columns=ens_df.columns,
        lower_forecast=ens_df_lower,
        forecast=ens_df,
        upper_forecast=ens_df_upper,
        prediction_interval=prediction_interval,
        predict_runtime=datetime.timedelta(0),
        fit_runtime=ens_runtime,
        model_parameters=ensemble_params,
    )
    return ens_result_obj
Esempio n. 5
0
    def predict(self, forecast_length: int, future_regressor = [], just_point_forecast = False):
        """Generates forecast data immediately following dates of index supplied to .fit()
        
        Args:
            forecast_length (int): Number of periods of data to forecast ahead
            regressor (numpy.Array): additional regressor, not used
            just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts
            
        Returns:
            Either a PredictionObject of forecasts and metadata, or
            if just_point_forecast == True, a dataframe of point forecasts
        """  
        if int(forecast_length) > int(self.forecast_length):
            print("GluonTS must be refit to change forecast length!")
        predictStartTime = datetime.datetime.now()
        test_index = self.create_forecast_index(forecast_length=self.ts_metadata['forecast_length'])

        gluon_results = self.GluonPredictor.predict(self.test_ds)
        i = 0
        all_forecast = pd.DataFrame()
        for result in gluon_results:
            current_id = self.train_index[i]
            rowForecast = pd.DataFrame({
                    "ForecastDate": pd.date_range(start = result.start_date, periods = self.ts_metadata['forecast_length'], freq = self.frequency),
                    "series_id": current_id,
                    "LowerForecast": (result.quantile((1- self.prediction_interval))),
                    "MedianForecast": (result.quantile(0.5)),
                    "UpperForecast": (result.quantile(self.prediction_interval))
                    })
            all_forecast = pd.concat([all_forecast, rowForecast], ignore_index = True).reset_index(drop = True)
            i += 1
        forecast = all_forecast.pivot_table(values='MedianForecast', index='ForecastDate', columns='series_id')
        forecast = forecast[self.column_names]

        if just_point_forecast:
            return forecast
        else:
            lower_forecast = all_forecast.pivot_table(values='LowerForecast', index='ForecastDate', columns='series_id')
            lower_forecast = lower_forecast[self.column_names]
            upper_forecast = all_forecast.pivot_table(values='UpperForecast', index='ForecastDate', columns='series_id')
            upper_forecast = upper_forecast[self.column_names]
            predict_runtime = datetime.datetime.now() - predictStartTime
            prediction = PredictionObject(model_name=self.name,
                                          forecast_length=forecast_length,
                                          forecast_index=test_index,
                                          forecast_columns=forecast.columns,
                                          lower_forecast=lower_forecast,
                                          forecast=forecast,
                                          upper_forecast=upper_forecast,
                                          prediction_interval=self.prediction_interval,
                                          predict_runtime=predict_runtime,
                                          fit_runtime=self.fit_runtime,
                                          model_parameters=self.get_params())
            return prediction
Esempio n. 6
0
def BestNEnsemble(
    ensemble_params,
    forecasts_list,
    forecasts,
    lower_forecasts,
    upper_forecasts,
    forecasts_runtime,
    prediction_interval,
):
    """Generate mean forecast for ensemble of models."""
    # id_list = list(ensemble_params['models'].keys())
    # does it handle missing models well?
    # model_indexes = [x for x in forecasts.keys() if x in id_list]
    model_count = len(forecasts.keys())
    if model_count < 1:
        raise ValueError("BestN failed, no component models available.")
    sample_df = next(iter(forecasts.values()))
    columnz = sample_df.columns
    indices = sample_df.index

    ens_df = pd.DataFrame(0, index=indices, columns=columnz)
    for idx, x in forecasts.items():
        ens_df = ens_df + x
    ens_df = ens_df / model_count

    ens_df_lower = pd.DataFrame(0, index=indices, columns=columnz)
    for idx, x in lower_forecasts.items():
        ens_df_lower = ens_df_lower + x
    ens_df_lower = ens_df_lower / model_count

    ens_df_upper = pd.DataFrame(0, index=indices, columns=columnz)
    for idx, x in upper_forecasts.items():
        ens_df_upper = ens_df_upper + x
    ens_df_upper = ens_df_upper / model_count

    ens_runtime = datetime.timedelta(0)
    for x in forecasts_runtime.values():
        ens_runtime = ens_runtime + x

    ens_result = PredictionObject(
        model_name="Ensemble",
        forecast_length=len(ens_df.index),
        forecast_index=ens_df.index,
        forecast_columns=ens_df.columns,
        lower_forecast=ens_df_lower,
        forecast=ens_df,
        upper_forecast=ens_df_upper,
        prediction_interval=prediction_interval,
        predict_runtime=datetime.timedelta(0),
        fit_runtime=ens_runtime,
        model_parameters=ensemble_params,
    )
    return ens_result
Esempio n. 7
0
def HorizontalEnsemble(
    ensemble_params,
    forecasts_list,
    forecasts,
    lower_forecasts,
    upper_forecasts,
    forecasts_runtime,
    prediction_interval,
):
    """Generate forecast for per_series ensembling."""
    id_list = list(ensemble_params['models'].keys())
    mod_dic = {x: idx for idx, x in enumerate(forecasts_list) if x in id_list}

    forecast_df, u_forecast_df, l_forecast_df = (
        pd.DataFrame(),
        pd.DataFrame(),
        pd.DataFrame(),
    )
    for series, mod_id in ensemble_params['series'].items():
        l_idx = mod_dic[mod_id]
        try:
            c_fore = forecasts[l_idx][series]
            forecast_df = pd.concat([forecast_df, c_fore], axis=1)
        except Exception as e:
            repr(e)
            print(forecasts[l_idx].columns)
            print(forecasts[l_idx].head())
        # upper
        c_fore = upper_forecasts[l_idx][series]
        u_forecast_df = pd.concat([u_forecast_df, c_fore], axis=1)
        # lower
        c_fore = lower_forecasts[l_idx][series]
        l_forecast_df = pd.concat([l_forecast_df, c_fore], axis=1)

    ens_runtime = datetime.timedelta(0)
    for idx, x in enumerate(forecasts_runtime):
        if idx in list(mod_dic.values()):
            ens_runtime = ens_runtime + forecasts_runtime[idx]

    ens_result = PredictionObject(
        model_name="Ensemble",
        forecast_length=len(forecast_df.index),
        forecast_index=forecast_df.index,
        forecast_columns=forecast_df.columns,
        lower_forecast=l_forecast_df,
        forecast=forecast_df,
        upper_forecast=u_forecast_df,
        prediction_interval=prediction_interval,
        predict_runtime=datetime.timedelta(0),
        fit_runtime=ens_runtime,
        model_parameters=ensemble_params,
    )
    return ens_result
Esempio n. 8
0
    def predict(self,
                forecast_length: int,
                future_regressor=[],
                just_point_forecast=False):
        """Generates forecast data immediately following dates of index supplied to .fit()

        Args:
            forecast_length (int): Number of periods of data to forecast ahead
            regressor (numpy.Array): additional regressor, not used
            just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts

        Returns:
            Either a PredictionObject of forecasts and metadata, or
            if just_point_forecast == True, a dataframe of point forecasts
        """
        predictStartTime = datetime.datetime.now()
        df = pd.DataFrame(
            np.tile(self.last_values, (forecast_length, 1)),
            columns=self.column_names,
            index=self.create_forecast_index(forecast_length=forecast_length),
        )
        if just_point_forecast:
            return df
        else:
            # upper_forecast, lower_forecast = Point_to_Probability(self.df_train, df, prediction_interval = self.prediction_interval, method = 'historic_quantile')
            upper_forecast = df.astype(float) + (self.upper * 0.8)
            lower_forecast = df.astype(float) - (self.lower * 0.8)
            predict_runtime = datetime.datetime.now() - predictStartTime
            prediction = PredictionObject(
                model_name=self.name,
                forecast_length=forecast_length,
                forecast_index=df.index,
                forecast_columns=df.columns,
                lower_forecast=lower_forecast,
                forecast=df,
                upper_forecast=upper_forecast,
                prediction_interval=self.prediction_interval,
                predict_runtime=predict_runtime,
                fit_runtime=self.fit_runtime,
                model_parameters=self.get_params(),
            )
            return prediction
Esempio n. 9
0
    def predict(self,
                forecast_length: int,
                future_regressor=[],
                just_point_forecast=False):
        """Generates forecast data immediately following dates of index supplied to .fit()

        Args:
            forecast_length (int): Number of periods of data to forecast ahead
            regressor (numpy.Array): additional regressor, not used
            just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts

        Returns:
            Either a PredictionObject of forecasts and metadata, or
            if just_point_forecast == True, a dataframe of point forecasts
        """
        predictStartTime = datetime.datetime.now()
        df = pd.DataFrame(
            np.zeros((forecast_length, (self.train_shape[1]))),
            columns=self.column_names,
            index=self.create_forecast_index(forecast_length=forecast_length),
        )
        if just_point_forecast:
            return df
        else:
            predict_runtime = datetime.datetime.now() - predictStartTime
            prediction = PredictionObject(
                model_name=self.name,
                forecast_length=forecast_length,
                forecast_index=df.index,
                forecast_columns=df.columns,
                lower_forecast=df,
                forecast=df,
                upper_forecast=df,
                prediction_interval=self.prediction_interval,
                predict_runtime=predict_runtime,
                fit_runtime=self.fit_runtime,
                model_parameters=self.get_params(),
            )

            return prediction
Esempio n. 10
0
    def predict(
        self,
        forecast_length: int,
        future_regressor=[],
        just_point_forecast: bool = False,
    ):
        """Generates forecast data immediately following dates of index supplied to .fit()

        Args:
            forecast_length (int): Number of periods of data to forecast ahead
            regressor (numpy.Array): additional regressor
            just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts

        Returns:
            Either a PredictionObject of forecasts and metadata, or
            if just_point_forecast == True, a dataframe of point forecasts
        """
        if not _has_tsfresh:
            raise ImportError("Package tsfresh is required")
        # num_subsamples = 10
        predictStartTime = datetime.datetime.now()

        # from tsfresh import extract_features
        from tsfresh.utilities.dataframe_functions import make_forecasting_frame

        # from sklearn.ensemble import AdaBoostRegressor
        from tsfresh.utilities.dataframe_functions import impute as tsfresh_impute

        # from tsfresh.feature_extraction import EfficientFCParameters, MinimalFCParameters

        max_timeshift = 10
        regression_model = 'Adaboost'
        feature_selection = None

        max_timeshift = self.max_timeshift
        regression_model = self.regression_model
        feature_selection = self.feature_selection

        sktraindata = self.df_train.copy()

        X = pd.DataFrame()
        y = pd.DataFrame()
        counter = 0
        for column in sktraindata.columns:
            df_shift, current_y = make_forecasting_frame(
                sktraindata[column],
                kind="time_series",
                max_timeshift=max_timeshift,
                rolling_direction=1,
            )
            # disable_progressbar = True MinimalFCParameters EfficientFCParameters
            current_X = extract_features(
                df_shift,
                column_id="id",
                column_sort="time",
                column_value="value",
                impute_function=tsfresh_impute,
                show_warnings=False,
                default_fc_parameters=EfficientFCParameters(),
                n_jobs=1,
            )  #
            current_X["feature_last_value"] = current_y.shift(1)
            current_X.rename(columns=lambda x: str(counter) + '_' + x,
                             inplace=True)

            X = pd.concat([X, current_X], axis=1)
            y = pd.concat([y, current_y], axis=1)
            counter += 1

        # drop constant features
        X = X.loc[:, X.apply(pd.Series.nunique) != 1]
        X = X.replace([np.inf, -np.inf], np.nan)
        X = X.fillna(0)
        y = y.fillna(method='ffill').fillna(method='bfill')

        if feature_selection == 'Variance':
            from sklearn.feature_selection import VarianceThreshold

            sel = VarianceThreshold(threshold=(0.15))
            X = pd.DataFrame(sel.fit_transform(X))
        if feature_selection == 'Percentile':
            from sklearn.feature_selection import SelectPercentile, chi2

            X = pd.DataFrame(
                SelectPercentile(chi2, percentile=20).fit_transform(
                    X, y[y.columns[0]]))
        if feature_selection == 'DecisionTree':
            from sklearn.tree import DecisionTreeRegressor
            from sklearn.feature_selection import SelectFromModel

            clf = DecisionTreeRegressor()
            clf = clf.fit(X, y)
            model = SelectFromModel(clf, prefit=True)

            X = model.transform(X)
        if feature_selection == 'Lasso':
            from sklearn.linear_model import MultiTaskLasso
            from sklearn.feature_selection import SelectFromModel

            clf = MultiTaskLasso(max_iter=2000)
            clf = clf.fit(X, y)
            model = SelectFromModel(clf, prefit=True)

            X = model.transform(X)
        """
         decisionTreeList = X.columns[model.get_support()]
         LassoList = X.columns[model.get_support()]
         
         feature_list = decisionTreeList.to_list()
         set([x for x in feature_list if feature_list.count(x) > 1])
         from collections import Counter
         repeat_features = Counter(feature_list)
         repeat_features = repeat_features.most_common(20)
        """

        # Drop first line
        X = X.iloc[1:, ]
        y = y.iloc[1:]

        y = y.fillna(method='ffill').fillna(method='bfill')

        index = self.create_forecast_index(forecast_length=forecast_length)

        if regression_model == 'ElasticNet':
            from sklearn.linear_model import MultiTaskElasticNet

            regr = MultiTaskElasticNet(alpha=1.0,
                                       random_state=self.random_seed)
        elif regression_model == 'DecisionTree':
            from sklearn.tree import DecisionTreeRegressor

            regr = DecisionTreeRegressor(random_state=self.random_seed)
        elif regression_model == 'MLP':
            from sklearn.neural_network import MLPRegressor

            # relu/tanh lbfgs/adam layer_sizes (100) (10)
            regr = MLPRegressor(
                hidden_layer_sizes=(10, 25, 10),
                verbose=self.verbose_bool,
                max_iter=200,
                activation='tanh',
                solver='lbfgs',
                random_state=self.random_seed,
            )
        elif regression_model == 'KNN':
            from sklearn.multioutput import MultiOutputRegressor
            from sklearn.neighbors import KNeighborsRegressor

            regr = MultiOutputRegressor(
                KNeighborsRegressor(random_state=self.random_seed))
        elif regression_model == 'Adaboost':
            from sklearn.multioutput import MultiOutputRegressor
            from sklearn.ensemble import AdaBoostRegressor

            regr = MultiOutputRegressor(AdaBoostRegressor(
                n_estimators=200))  # , random_state=self.random_seed))
        else:
            regression_model = 'RandomForest'
            from sklearn.ensemble import RandomForestRegressor

            regr = RandomForestRegressor(random_state=self.random_seed,
                                         n_estimators=1000,
                                         verbose=self.verbose)

        regr.fit(X, y)

        combined_index = self.df_train.index.append(index)
        forecast = pd.DataFrame()
        sktraindata.columns = [x for x in range(len(sktraindata.columns))]

        for x in range(forecast_length):
            x_dat = pd.DataFrame()
            y_dat = pd.DataFrame()
            counter = 0
            for column in sktraindata.columns:
                df_shift, current_y = make_forecasting_frame(
                    sktraindata.tail(max_timeshift)[column],
                    kind="time_series",
                    max_timeshift=max_timeshift,
                    rolling_direction=1,
                )
                # disable_progressbar = True MinimalFCParameters EfficientFCParameters
                current_X = extract_features(
                    df_shift,
                    column_id="id",
                    column_sort="time",
                    column_value="value",
                    impute_function=tsfresh_impute,
                    show_warnings=False,
                    n_jobs=1,
                    default_fc_parameters=EfficientFCParameters(),
                )  # default_fc_parameters=MinimalFCParameters(),
                current_X["feature_last_value"] = current_y.shift(1)

                current_X.rename(columns=lambda x: str(counter) + '_' + x,
                                 inplace=True)

                x_dat = pd.concat([x_dat, current_X], axis=1)
                y_dat = pd.concat([y_dat, current_y], axis=1)
                counter += 1

            x_dat = x_dat[X.columns]
            rfPred = pd.DataFrame(regr.predict(x_dat.tail(1).values))

            forecast = pd.concat([forecast, rfPred], axis=0, ignore_index=True)
            sktraindata = pd.concat([sktraindata, rfPred],
                                    axis=0,
                                    ignore_index=True)
            sktraindata.index = combined_index[:len(sktraindata.index)]

        forecast.columns = self.column_names
        forecast.index = index

        if just_point_forecast:
            return forecast
        else:
            upper_forecast, lower_forecast = Point_to_Probability(
                self.df_train,
                forecast,
                prediction_interval=self.prediction_interval)

            predict_runtime = datetime.datetime.now() - predictStartTime
            prediction = PredictionObject(
                model_name=self.name,
                forecast_length=forecast_length,
                forecast_index=forecast.index,
                forecast_columns=forecast.columns,
                lower_forecast=lower_forecast,
                forecast=forecast,
                upper_forecast=upper_forecast,
                prediction_interval=self.prediction_interval,
                predict_runtime=predict_runtime,
                fit_runtime=self.fit_runtime,
                model_parameters=self.get_params(),
            )
            return prediction
Esempio n. 11
0
    def predict(self,
                forecast_length: int,
                future_regressor=[],
                just_point_forecast=False):
        """Generates forecast data immediately following dates of index supplied to .fit()

        Args:
            forecast_length (int): Number of periods of data to forecast ahead
            regressor (numpy.Array): additional regressor, not used
            just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts

        Returns:
            Either a PredictionObject of forecasts and metadata, or
            if just_point_forecast == True, a dataframe of point forecasts
        """
        predictStartTime = datetime.datetime.now()
        test_index = self.create_forecast_index(
            forecast_length=forecast_length)
        forecast_dist = tfp.sts.forecast(
            model=self.demand_model,
            observed_time_series=self.demand2d,
            parameter_samples=self.q_samples,
            num_steps_forecast=forecast_length,
            include_observation_noise=True,
        )

        forecast = forecast_dist.mean().numpy()[..., 0]

        forecast = pd.DataFrame(forecast,
                                index=self.column_names,
                                columns=test_index).transpose()

        if just_point_forecast:
            return forecast
        else:
            prediction_interval = self.prediction_interval
            # assume follows rules of normal because those are conventional
            from scipy.stats import norm

            # adj = norm.sf(abs(prediction_interval))*2
            p_int = 1 - ((1 - prediction_interval) / 2)
            adj = norm.ppf(p_int)
            forecast_scale = forecast_dist.stddev().numpy()[..., 0]
            upper_forecast = forecast.transpose().values + (forecast_scale *
                                                            adj)
            lower_forecast = forecast.transpose().values - (forecast_scale *
                                                            adj)
            lower_forecast = pd.DataFrame(lower_forecast,
                                          index=self.column_names,
                                          columns=test_index).transpose()
            upper_forecast = pd.DataFrame(upper_forecast,
                                          index=self.column_names,
                                          columns=test_index).transpose()
            # alternatively this followed by quantile
            # forecast_samples = self.forecast_dist.sample(10)[..., 0]
            predict_runtime = datetime.datetime.now() - predictStartTime
            prediction = PredictionObject(
                model_name=self.name,
                forecast_length=forecast_length,
                forecast_index=test_index,
                forecast_columns=forecast.columns,
                lower_forecast=lower_forecast,
                forecast=forecast,
                upper_forecast=upper_forecast,
                prediction_interval=self.prediction_interval,
                predict_runtime=predict_runtime,
                fit_runtime=self.fit_runtime,
                model_parameters=self.get_params(),
            )

            return prediction
Esempio n. 12
0
def HDistEnsemble(
    ensemble_params,
    forecasts_list,
    forecasts,
    lower_forecasts,
    upper_forecasts,
    forecasts_runtime,
    prediction_interval,
):
    """Generate forecast for per_series per distance ensembling."""
    # handle that the inputs are now dictionaries
    forecasts = list(forecasts.values())
    lower_forecasts = list(lower_forecasts.values())
    upper_forecasts = list(upper_forecasts.values())
    forecasts_runtime = list(forecasts_runtime.values())

    id_list = list(ensemble_params['models'].keys())
    mod_dic = {x: idx for idx, x in enumerate(forecasts_list) if x in id_list}
    forecast_length = forecasts[0].shape[0]
    dist_n = int(np.ceil(ensemble_params['dis_frac'] * forecast_length))
    dist_last = forecast_length - dist_n

    forecast_df, u_forecast_df, l_forecast_df = (
        pd.DataFrame(),
        pd.DataFrame(),
        pd.DataFrame(),
    )
    for series, mod_id in ensemble_params['series1'].items():
        l_idx = mod_dic[mod_id]
        try:
            c_fore = forecasts[l_idx][series]
            forecast_df = pd.concat([forecast_df, c_fore], axis=1)
        except Exception as e:
            repr(e)
            print(forecasts[l_idx].columns)
            print(forecasts[l_idx].head())
        # upper
        c_fore = upper_forecasts[l_idx][series]
        u_forecast_df = pd.concat([u_forecast_df, c_fore], axis=1)
        # lower
        c_fore = lower_forecasts[l_idx][series]
        l_forecast_df = pd.concat([l_forecast_df, c_fore], axis=1)

    forecast_df2, u_forecast_df2, l_forecast_df2 = (
        pd.DataFrame(),
        pd.DataFrame(),
        pd.DataFrame(),
    )
    for series, mod_id in ensemble_params['series2'].items():
        l_idx = mod_dic[mod_id]
        try:
            c_fore = forecasts[l_idx][series]
            forecast_df2 = pd.concat([forecast_df2, c_fore], axis=1)
        except Exception as e:
            repr(e)
            print(forecasts[l_idx].columns)
            print(forecasts[l_idx].head())
        # upper
        c_fore = upper_forecasts[l_idx][series]
        u_forecast_df2 = pd.concat([u_forecast_df2, c_fore], axis=1)
        # lower
        c_fore = lower_forecasts[l_idx][series]
        l_forecast_df2 = pd.concat([l_forecast_df2, c_fore], axis=1)

    forecast_df = pd.concat(
        [forecast_df.head(dist_n),
         forecast_df2.tail(dist_last)], axis=0)
    u_forecast_df = pd.concat(
        [u_forecast_df.head(dist_n),
         u_forecast_df2.tail(dist_last)], axis=0)
    l_forecast_df = pd.concat(
        [l_forecast_df.head(dist_n),
         l_forecast_df2.tail(dist_last)], axis=0)

    ens_runtime = datetime.timedelta(0)
    for idx, x in enumerate(forecasts_runtime):
        if idx in list(mod_dic.values()):
            ens_runtime = ens_runtime + forecasts_runtime[idx]

    ens_result = PredictionObject(
        model_name="Ensemble",
        forecast_length=len(forecast_df.index),
        forecast_index=forecast_df.index,
        forecast_columns=forecast_df.columns,
        lower_forecast=l_forecast_df,
        forecast=forecast_df,
        upper_forecast=u_forecast_df,
        prediction_interval=prediction_interval,
        predict_runtime=datetime.timedelta(0),
        fit_runtime=ens_runtime,
        model_parameters=ensemble_params,
    )
    return ens_result
Esempio n. 13
0
def HorizontalEnsemble(
    ensemble_params,
    forecasts_list,
    forecasts,
    lower_forecasts,
    upper_forecasts,
    forecasts_runtime,
    prediction_interval,
    df_train=None,
):
    """Generate forecast for per_series ensembling."""
    available_models = list(forecasts.keys())
    known_matches = ensemble_params['series']
    org_idx = df_train.columns
    org_list = org_idx.tolist()
    # remove any unavailable models or unnecessary series
    known_matches = {
        ser: mod
        for ser, mod in known_matches.items() if ser in org_list
    }
    k = {
        ser: mod
        for ser, mod in known_matches.items() if mod in available_models
    }
    # check if any series are missing from model list
    if not k:
        raise ValueError(
            "Horizontal template has no models matching this data!")
    if len(set(org_list) - set(list(k.keys()))) > 0:
        all_series = horizontal_classifier(df_train, k)
    else:
        all_series = known_matches

    forecast_df, u_forecast_df, l_forecast_df = (
        pd.DataFrame(),
        pd.DataFrame(),
        pd.DataFrame(),
    )
    for series, mod_id in all_series.items():
        try:
            c_fore = forecasts[mod_id][series]
            forecast_df = pd.concat([forecast_df, c_fore], axis=1)
        except Exception as e:
            print(f"Horizontal ensemble unable to add model {repr(e)}")
        # upper
        c_fore = upper_forecasts[mod_id][series]
        u_forecast_df = pd.concat([u_forecast_df, c_fore], axis=1)
        # lower
        c_fore = lower_forecasts[mod_id][series]
        l_forecast_df = pd.concat([l_forecast_df, c_fore], axis=1)
    # make sure columns align to original
    forecast_df.reindex(columns=org_idx)
    u_forecast_df.reindex(columns=org_idx)
    l_forecast_df.reindex(columns=org_idx)
    # combine runtimes
    ens_runtime = datetime.timedelta(0)
    for idx, x in forecasts_runtime.items():
        ens_runtime = ens_runtime + x

    ens_result = PredictionObject(
        model_name="Ensemble",
        forecast_length=len(forecast_df.index),
        forecast_index=forecast_df.index,
        forecast_columns=forecast_df.columns,
        lower_forecast=l_forecast_df,
        forecast=forecast_df,
        upper_forecast=u_forecast_df,
        prediction_interval=prediction_interval,
        predict_runtime=datetime.timedelta(0),
        fit_runtime=ens_runtime,
        model_parameters=ensemble_params,
    )
    return ens_result
Esempio n. 14
0
    def predict(self,
                forecast_length: int,
                future_regressor=[],
                just_point_forecast: bool = False):
        """Generates forecast data immediately following dates of index supplied to .fit()
        
        Args:
            forecast_length (int): Number of periods of data to forecast ahead
            regressor (numpy.Array): additional regressor, not used
            just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts
            
        Returns:
            Either a PredictionObject of forecasts and metadata, or
            if just_point_forecast == True, a dataframe of point forecasts
        """
        if not _has_prophet:
            raise ImportError("Package fbprophet is required")
        predictStartTime = datetime.datetime.now()
        #if self.regression_type != None:
        #   assert len(future_regressor) == forecast_length, "regressor not equal to forecast length"
        test_index = self.create_forecast_index(
            forecast_length=forecast_length)
        forecast = pd.DataFrame()
        lower_forecast = pd.DataFrame()
        upper_forecast = pd.DataFrame()
        if self.verbose <= 0:
            logging.getLogger('fbprophet').setLevel(logging.WARNING)

        for series in self.df_train.columns:
            current_series = self.df_train.copy()
            current_series['y'] = current_series[series]
            current_series['ds'] = current_series.index
            print("FBProphet Initial Set")
            if self.regression_type == 'User':
                current_series[self.regressor_name] = self.regressor_train

            m = Prophet(interval_width=self.prediction_interval)
            if self.holiday:
                m.add_country_holidays(country_name=self.holiday_country)
            if self.regression_type == 'User':
                m.add_regressor(self.regressor_name)
            m = m.fit(current_series)
            future = m.make_future_dataframe(periods=forecast_length)
            if self.regression_type == 'User':
                if future_regressor.ndim > 1:
                    a = self.dimensionality_reducer.transform(future_regressor)
                    a = np.append(self.regressor_train, a)
                else:
                    a = np.append(self.regressor_train,
                                  future_regressor.values)
                future[self.regressor_name] = a
            fcst = m.predict(future)
            fcst = fcst.tail(forecast_length)  # remove the backcast
            forecast = pd.concat([forecast, fcst['yhat']], axis=1)
            lower_forecast = pd.concat([lower_forecast, fcst['yhat_lower']],
                                       axis=1)
            upper_forecast = pd.concat([upper_forecast, fcst['yhat_upper']],
                                       axis=1)
        forecast.columns = self.column_names
        forecast.index = test_index
        lower_forecast.columns = self.column_names
        lower_forecast.index = test_index
        upper_forecast.columns = self.column_names
        upper_forecast.index = test_index

        if just_point_forecast:
            return forecast
        else:
            predict_runtime = datetime.datetime.now() - predictStartTime
            prediction = PredictionObject(
                model_name=self.name,
                forecast_length=forecast_length,
                forecast_index=forecast.index,
                forecast_columns=forecast.columns,
                lower_forecast=lower_forecast,
                forecast=forecast,
                upper_forecast=upper_forecast,
                prediction_interval=self.prediction_interval,
                predict_runtime=predict_runtime,
                fit_runtime=self.fit_runtime,
                model_parameters=self.get_params())

            return prediction
Esempio n. 15
0
    def predict(self,
                forecast_length: int,
                future_regressor=[],
                just_point_forecast=False):
        """Generates forecast data immediately following dates of index supplied to .fit()

        Args:
            forecast_length (int): Number of periods of data to forecast ahead
            regressor (numpy.Array): additional regressor, not used
            just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts

        Returns:
            Either a PredictionObject of forecasts and metadata, or
            if just_point_forecast == True, a dataframe of point forecasts
        """
        predictStartTime = datetime.datetime.now()

        forecasts = self.forecasts.head(forecast_length)
        if forecasts.shape[0] < forecast_length:
            extra_len = forecast_length - forecasts.shape[0]
            empty_frame = pd.DataFrame(index=np.arange(extra_len),
                                       columns=forecasts.columns)
            forecasts = pd.concat([forecasts, empty_frame], axis=0,
                                  sort=False).fillna(method='ffill')
        forecasts.columns = self.column_names
        forecasts.index = self.create_forecast_index(
            forecast_length=forecast_length)

        if just_point_forecast:
            return forecasts
        else:
            lower_forecasts = self.lower_forecasts.head(forecast_length)
            upper_forecasts = self.upper_forecasts.head(forecast_length)
            if lower_forecasts.shape[0] < forecast_length:
                extra_len = forecast_length - lower_forecasts.shape[0]
                empty_frame = pd.DataFrame(index=np.arange(extra_len),
                                           columns=lower_forecasts.columns)
                lower_forecasts = pd.concat([lower_forecasts, empty_frame],
                                            axis=0,
                                            sort=False).fillna(method='ffill')
            lower_forecasts.columns = self.column_names
            lower_forecasts.index = self.create_forecast_index(
                forecast_length=forecast_length)

            if upper_forecasts.shape[0] < forecast_length:
                extra_len = forecast_length - upper_forecasts.shape[0]
                empty_frame = pd.DataFrame(index=np.arange(extra_len),
                                           columns=upper_forecasts.columns)
                upper_forecasts = pd.concat([upper_forecasts, empty_frame],
                                            axis=0,
                                            sort=False).fillna(method='ffill')
            upper_forecasts.columns = self.column_names
            upper_forecasts.index = self.create_forecast_index(
                forecast_length=forecast_length)

            predict_runtime = datetime.datetime.now() - predictStartTime
            prediction = PredictionObject(
                model_name=self.name,
                forecast_length=forecast_length,
                forecast_index=forecasts.index,
                forecast_columns=forecasts.columns,
                lower_forecast=lower_forecasts,
                forecast=forecasts,
                upper_forecast=upper_forecasts,
                prediction_interval=self.prediction_interval,
                predict_runtime=predict_runtime,
                fit_runtime=self.fit_runtime,
                model_parameters=self.get_params(),
            )

            return prediction
Esempio n. 16
0
    def predict(
        self,
        forecast_length: int,
        future_regressor=[],
        just_point_forecast: bool = False,
    ):
        """Generates forecast data immediately following dates of index supplied to .fit()

        Args:
            forecast_length (int): Number of periods of data to forecast ahead
            regressor (numpy.Array): additional regressor, not used
            just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts

        Returns:
            Either a PredictionObject of forecasts and metadata, or
            if just_point_forecast == True, a dataframe of point forecasts
        """
        if not _has_prophet:
            raise ImportError("Package fbprophet is required")
        predictStartTime = datetime.datetime.now()
        # if self.regression_type != None:
        #   assert len(future_regressor) == forecast_length, "regressor not equal to forecast length"
        test_index = self.create_forecast_index(
            forecast_length=forecast_length)
        forecast = pd.DataFrame()
        lower_forecast = pd.DataFrame()
        upper_forecast = pd.DataFrame()
        if self.verbose <= 0:
            logging.getLogger('fbprophet').setLevel(logging.WARNING)
        if self.regression_type == 'User':
            self.df_train[self.regressor_name] = self.regressor_train
        """
        for series in self.df_train.columns:
            current_series = self.df_train.copy()
            current_series['y'] = current_series[series]
            current_series['ds'] = current_series.index
            m = Prophet(interval_width=self.prediction_interval)
            if self.holiday:
                m.add_country_holidays(country_name=self.holiday_country)
            if self.regression_type == 'User':
                m.add_regressor(self.regressor_name)
            m = m.fit(current_series)
            future = m.make_future_dataframe(periods=forecast_length)
            if self.regression_type == 'User':
                if future_regressor.ndim > 1:
                    a = self.dimensionality_reducer.transform(future_regressor)
                    a = np.append(self.regressor_train, a)
                else:
                    a = np.append(self.regressor_train, future_regressor.values)
                future[self.regressor_name] = a
            fcst = m.predict(future)
            fcst = fcst.tail(forecast_length)  # remove the backcast
            forecast = pd.concat([forecast, fcst['yhat']], axis=1)
            lower_forecast = pd.concat([lower_forecast, fcst['yhat_lower']], axis=1)
            upper_forecast = pd.concat([upper_forecast, fcst['yhat_upper']], axis=1)
        forecast.columns = self.column_names
        forecast.index = test_index
        lower_forecast.columns = self.column_names
        lower_forecast.index = test_index
        upper_forecast.columns = self.column_names
        upper_forecast.index = test_index
        """
        def seek_the_oracle(df, args, series):
            current_series = df
            current_series['y'] = current_series[series]
            current_series['ds'] = current_series.index
            m = Prophet(interval_width=args['prediction_interval'])
            if args['holiday']:
                m.add_country_holidays(country_name=args['holiday_country'])
            if args['regression_type'] == 'User':
                m.add_regressor(args['regressor_name'])
            m = m.fit(current_series)
            future = m.make_future_dataframe(periods=forecast_length)
            if args['regression_type'] == 'User':
                if future_regressor.ndim > 1:
                    a = args['dimensionality_reducer'].transform(
                        future_regressor)
                    a = np.append(args['regressor_train'], a)
                else:
                    a = np.append(args['regressor_train'],
                                  future_regressor.values)
                future[args['regressor_name']] = a
            fcst = m.predict(future)
            fcst = fcst.tail(forecast_length)  # remove the backcast
            forecast = fcst['yhat']
            forecast.name = series
            lower_forecast = fcst['yhat_lower']
            lower_forecast.name = series
            upper_forecast = fcst['yhat_upper']
            upper_forecast.name = series
            return (forecast, lower_forecast, upper_forecast)

        args = {
            'holiday': self.holiday,
            'holiday_country': self.holiday_country,
            'regression_type': self.regression_type,
            'regressor_name': self.regressor_name,
            'regressor_train': self.regressor_train,
            'dimensionality_reducer': self.dimensionality_reducer,
            'prediction_interval': self.prediction_interval,
        }
        parallel = True
        cols = self.df_train.columns.tolist()
        if self.n_jobs in [0, 1] or len(cols) < 4:
            parallel = False
        else:
            try:
                from joblib import Parallel, delayed
            except Exception:
                parallel = False
        # joblib multiprocessing to loop through series
        if parallel:
            verbs = 0 if self.verbose < 1 else self.verbose - 1
            df_list = Parallel(n_jobs=self.n_jobs,
                               verbose=(verbs))(delayed(seek_the_oracle)(
                                   df=self.df_train, args=args, series=col)
                                                for col in cols)
            complete = list(map(list, zip(*df_list)))
        else:
            df_list = []
            for col in cols:
                df_list.append(seek_the_oracle(self.df_train, args, col))
            complete = list(map(list, zip(*df_list)))
        forecast = pd.concat(complete[0], axis=1)
        lower_forecast = pd.concat(complete[1], axis=1)
        upper_forecast = pd.concat(complete[2], axis=1)

        if just_point_forecast:
            return forecast
        else:
            predict_runtime = datetime.datetime.now() - predictStartTime
            prediction = PredictionObject(
                model_name=self.name,
                forecast_length=forecast_length,
                forecast_index=forecast.index,
                forecast_columns=forecast.columns,
                lower_forecast=lower_forecast,
                forecast=forecast,
                upper_forecast=upper_forecast,
                prediction_interval=self.prediction_interval,
                predict_runtime=predict_runtime,
                fit_runtime=self.fit_runtime,
                model_parameters=self.get_params(),
            )

            return prediction