def Best3Ensemble( ensemble_params, forecasts_list, forecasts, lower_forecasts, upper_forecasts, forecasts_runtime, prediction_interval, ): """Generate mean forecast for ensemble of models.""" id_list = list(ensemble_params['models'].keys()) model_indexes = [ idx for idx, x in enumerate(forecasts_list) if x in id_list ] ens_df = pd.DataFrame(0, index=forecasts[0].index, columns=forecasts[0].columns) for idx, x in enumerate(forecasts): if idx in model_indexes: ens_df = ens_df + forecasts[idx] ens_df = ens_df / len(model_indexes) ens_df_lower = pd.DataFrame(0, index=forecasts[0].index, columns=forecasts[0].columns) for idx, x in enumerate(lower_forecasts): if idx in model_indexes: ens_df_lower = ens_df_lower + lower_forecasts[idx] ens_df_lower = ens_df_lower / len(model_indexes) ens_df_upper = pd.DataFrame(0, index=forecasts[0].index, columns=forecasts[0].columns) for idx, x in enumerate(upper_forecasts): if idx in model_indexes: ens_df_upper = ens_df_upper + upper_forecasts[idx] ens_df_upper = ens_df_upper / len(model_indexes) ens_runtime = datetime.timedelta(0) for idx, x in enumerate(forecasts_runtime): if idx in model_indexes: ens_runtime = ens_runtime + forecasts_runtime[idx] ens_result = PredictionObject( model_name="Ensemble", forecast_length=len(ens_df.index), forecast_index=ens_df.index, forecast_columns=ens_df.columns, lower_forecast=ens_df_lower, forecast=ens_df, upper_forecast=ens_df_upper, prediction_interval=prediction_interval, predict_runtime=datetime.timedelta(0), fit_runtime=ens_runtime, model_parameters=ensemble_params, ) return ens_result
def predict(self, forecast_length: int, future_regressor=[], just_point_forecast=False): """Generates forecast data immediately following dates of index supplied to .fit() Args: forecast_length (int): Number of periods of data to forecast ahead regressor (numpy.Array): additional regressor, not used just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts Returns: Either a PredictionObject of forecasts and metadata, or if just_point_forecast == True, a dataframe of point forecasts """ predictStartTime = datetime.datetime.now() test_index = self.create_forecast_index( forecast_length=forecast_length) from autots.models.sklearn import date_part Xf = date_part(test_index, method='expanded') if self.regression_type == 'User': # if future_regressor.ndim == 1: # future_regressor = np.array(future_regressor).reshape(-1, 1) # Xf = np.concatenate((Xf.reshape(-1, 1), future_regressor), axis=1) Xf = pd.concat( [Xf, pd.DataFrame(future_regressor).reset_index(drop=True)], axis=1) forecast, lower_forecast, upper_forecast = self.model.predict( Xf.values, conf_int=self.prediction_interval) df_forecast = pd.DataFrame(forecast) df_forecast.columns = self.column_names df_forecast.index = test_index if just_point_forecast: return df_forecast else: lower_forecast = pd.DataFrame(lower_forecast, index=test_index, columns=self.column_names) upper_forecast = pd.DataFrame(upper_forecast, index=test_index, columns=self.column_names) predict_runtime = datetime.datetime.now() - predictStartTime prediction = PredictionObject( model_name=self.name, forecast_length=forecast_length, forecast_index=df_forecast.index, forecast_columns=df_forecast.columns, lower_forecast=lower_forecast, forecast=df_forecast, upper_forecast=upper_forecast, prediction_interval=self.prediction_interval, predict_runtime=predict_runtime, fit_runtime=self.fit_runtime, model_parameters=self.get_params(), ) return prediction
def predict(self, forecast_length: int, future_regressor=[], just_point_forecast: bool = False): """Generate forecast data immediately following dates of .fit(). Args: forecast_length (int): Number of periods of data to forecast ahead regressor (numpy.Array): additional regressor, not used just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts Returns: Either a PredictionObject of forecasts and metadata, or if just_point_forecast == True, a dataframe of point forecasts """ predictStartTime = datetime.datetime.now() tile_len = len(self.tile_values_lag_1.index) df = pd.DataFrame( np.tile(self.tile_values_lag_1, (int( np.ceil(forecast_length / tile_len)), 1))[0:forecast_length], columns=self.column_names, index=self.create_forecast_index(forecast_length=forecast_length)) if str(self.lag_2).isdigit(): y = pd.DataFrame(np.tile( self.tile_values_lag_2, (int( np.ceil( forecast_length / len(self.tile_values_lag_2.index))), 1))[0:forecast_length], columns=self.column_names, index=self.create_forecast_index( forecast_length=forecast_length)) df = (df + y) / 2 # df = df.apply(pd.to_numeric, errors='coerce') df = df.astype(float) if just_point_forecast: return df else: upper_forecast, lower_forecast = Point_to_Probability( self.df_train, df, method='inferred_normal', prediction_interval=self.prediction_interval) predict_runtime = datetime.datetime.now() - predictStartTime prediction = PredictionObject( model_name=self.name, forecast_length=forecast_length, forecast_index=df.index, forecast_columns=df.columns, lower_forecast=lower_forecast, forecast=df, upper_forecast=upper_forecast, prediction_interval=self.prediction_interval, predict_runtime=predict_runtime, fit_runtime=self.fit_runtime, model_parameters=self.get_params()) return prediction
def DistEnsemble( ensemble_params, forecasts_list, forecasts, lower_forecasts, upper_forecasts, forecasts_runtime, prediction_interval, ): """Generate forecast for distance ensemble.""" # handle that the inputs are now dictionaries forecasts = list(forecasts.values()) lower_forecasts = list(lower_forecasts.values()) upper_forecasts = list(upper_forecasts.values()) forecasts_runtime = list(forecasts_runtime.values()) first_model_index = forecasts_list.index(ensemble_params['FirstModel']) second_model_index = forecasts_list.index(ensemble_params['SecondModel']) forecast_length = forecasts[0].shape[0] dis_frac = ensemble_params['dis_frac'] first_bit = int(np.ceil(forecast_length * dis_frac)) second_bit = int(np.floor(forecast_length * (1 - dis_frac))) ens_df = (forecasts[first_model_index].head(first_bit).append( forecasts[second_model_index].tail(second_bit))) ens_df_lower = (lower_forecasts[first_model_index].head(first_bit).append( lower_forecasts[second_model_index].tail(second_bit))) ens_df_upper = (upper_forecasts[first_model_index].head(first_bit).append( upper_forecasts[second_model_index].tail(second_bit))) id_list = list(ensemble_params['models'].keys()) model_indexes = [ idx for idx, x in enumerate(forecasts_list) if x in id_list ] ens_runtime = datetime.timedelta(0) for idx, x in enumerate(forecasts_runtime): if idx in model_indexes: ens_runtime = ens_runtime + forecasts_runtime[idx] ens_result_obj = PredictionObject( model_name="Ensemble", forecast_length=len(ens_df.index), forecast_index=ens_df.index, forecast_columns=ens_df.columns, lower_forecast=ens_df_lower, forecast=ens_df, upper_forecast=ens_df_upper, prediction_interval=prediction_interval, predict_runtime=datetime.timedelta(0), fit_runtime=ens_runtime, model_parameters=ensemble_params, ) return ens_result_obj
def predict(self, forecast_length: int, future_regressor = [], just_point_forecast = False): """Generates forecast data immediately following dates of index supplied to .fit() Args: forecast_length (int): Number of periods of data to forecast ahead regressor (numpy.Array): additional regressor, not used just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts Returns: Either a PredictionObject of forecasts and metadata, or if just_point_forecast == True, a dataframe of point forecasts """ if int(forecast_length) > int(self.forecast_length): print("GluonTS must be refit to change forecast length!") predictStartTime = datetime.datetime.now() test_index = self.create_forecast_index(forecast_length=self.ts_metadata['forecast_length']) gluon_results = self.GluonPredictor.predict(self.test_ds) i = 0 all_forecast = pd.DataFrame() for result in gluon_results: current_id = self.train_index[i] rowForecast = pd.DataFrame({ "ForecastDate": pd.date_range(start = result.start_date, periods = self.ts_metadata['forecast_length'], freq = self.frequency), "series_id": current_id, "LowerForecast": (result.quantile((1- self.prediction_interval))), "MedianForecast": (result.quantile(0.5)), "UpperForecast": (result.quantile(self.prediction_interval)) }) all_forecast = pd.concat([all_forecast, rowForecast], ignore_index = True).reset_index(drop = True) i += 1 forecast = all_forecast.pivot_table(values='MedianForecast', index='ForecastDate', columns='series_id') forecast = forecast[self.column_names] if just_point_forecast: return forecast else: lower_forecast = all_forecast.pivot_table(values='LowerForecast', index='ForecastDate', columns='series_id') lower_forecast = lower_forecast[self.column_names] upper_forecast = all_forecast.pivot_table(values='UpperForecast', index='ForecastDate', columns='series_id') upper_forecast = upper_forecast[self.column_names] predict_runtime = datetime.datetime.now() - predictStartTime prediction = PredictionObject(model_name=self.name, forecast_length=forecast_length, forecast_index=test_index, forecast_columns=forecast.columns, lower_forecast=lower_forecast, forecast=forecast, upper_forecast=upper_forecast, prediction_interval=self.prediction_interval, predict_runtime=predict_runtime, fit_runtime=self.fit_runtime, model_parameters=self.get_params()) return prediction
def BestNEnsemble( ensemble_params, forecasts_list, forecasts, lower_forecasts, upper_forecasts, forecasts_runtime, prediction_interval, ): """Generate mean forecast for ensemble of models.""" # id_list = list(ensemble_params['models'].keys()) # does it handle missing models well? # model_indexes = [x for x in forecasts.keys() if x in id_list] model_count = len(forecasts.keys()) if model_count < 1: raise ValueError("BestN failed, no component models available.") sample_df = next(iter(forecasts.values())) columnz = sample_df.columns indices = sample_df.index ens_df = pd.DataFrame(0, index=indices, columns=columnz) for idx, x in forecasts.items(): ens_df = ens_df + x ens_df = ens_df / model_count ens_df_lower = pd.DataFrame(0, index=indices, columns=columnz) for idx, x in lower_forecasts.items(): ens_df_lower = ens_df_lower + x ens_df_lower = ens_df_lower / model_count ens_df_upper = pd.DataFrame(0, index=indices, columns=columnz) for idx, x in upper_forecasts.items(): ens_df_upper = ens_df_upper + x ens_df_upper = ens_df_upper / model_count ens_runtime = datetime.timedelta(0) for x in forecasts_runtime.values(): ens_runtime = ens_runtime + x ens_result = PredictionObject( model_name="Ensemble", forecast_length=len(ens_df.index), forecast_index=ens_df.index, forecast_columns=ens_df.columns, lower_forecast=ens_df_lower, forecast=ens_df, upper_forecast=ens_df_upper, prediction_interval=prediction_interval, predict_runtime=datetime.timedelta(0), fit_runtime=ens_runtime, model_parameters=ensemble_params, ) return ens_result
def HorizontalEnsemble( ensemble_params, forecasts_list, forecasts, lower_forecasts, upper_forecasts, forecasts_runtime, prediction_interval, ): """Generate forecast for per_series ensembling.""" id_list = list(ensemble_params['models'].keys()) mod_dic = {x: idx for idx, x in enumerate(forecasts_list) if x in id_list} forecast_df, u_forecast_df, l_forecast_df = ( pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), ) for series, mod_id in ensemble_params['series'].items(): l_idx = mod_dic[mod_id] try: c_fore = forecasts[l_idx][series] forecast_df = pd.concat([forecast_df, c_fore], axis=1) except Exception as e: repr(e) print(forecasts[l_idx].columns) print(forecasts[l_idx].head()) # upper c_fore = upper_forecasts[l_idx][series] u_forecast_df = pd.concat([u_forecast_df, c_fore], axis=1) # lower c_fore = lower_forecasts[l_idx][series] l_forecast_df = pd.concat([l_forecast_df, c_fore], axis=1) ens_runtime = datetime.timedelta(0) for idx, x in enumerate(forecasts_runtime): if idx in list(mod_dic.values()): ens_runtime = ens_runtime + forecasts_runtime[idx] ens_result = PredictionObject( model_name="Ensemble", forecast_length=len(forecast_df.index), forecast_index=forecast_df.index, forecast_columns=forecast_df.columns, lower_forecast=l_forecast_df, forecast=forecast_df, upper_forecast=u_forecast_df, prediction_interval=prediction_interval, predict_runtime=datetime.timedelta(0), fit_runtime=ens_runtime, model_parameters=ensemble_params, ) return ens_result
def predict(self, forecast_length: int, future_regressor=[], just_point_forecast=False): """Generates forecast data immediately following dates of index supplied to .fit() Args: forecast_length (int): Number of periods of data to forecast ahead regressor (numpy.Array): additional regressor, not used just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts Returns: Either a PredictionObject of forecasts and metadata, or if just_point_forecast == True, a dataframe of point forecasts """ predictStartTime = datetime.datetime.now() df = pd.DataFrame( np.tile(self.last_values, (forecast_length, 1)), columns=self.column_names, index=self.create_forecast_index(forecast_length=forecast_length), ) if just_point_forecast: return df else: # upper_forecast, lower_forecast = Point_to_Probability(self.df_train, df, prediction_interval = self.prediction_interval, method = 'historic_quantile') upper_forecast = df.astype(float) + (self.upper * 0.8) lower_forecast = df.astype(float) - (self.lower * 0.8) predict_runtime = datetime.datetime.now() - predictStartTime prediction = PredictionObject( model_name=self.name, forecast_length=forecast_length, forecast_index=df.index, forecast_columns=df.columns, lower_forecast=lower_forecast, forecast=df, upper_forecast=upper_forecast, prediction_interval=self.prediction_interval, predict_runtime=predict_runtime, fit_runtime=self.fit_runtime, model_parameters=self.get_params(), ) return prediction
def predict(self, forecast_length: int, future_regressor=[], just_point_forecast=False): """Generates forecast data immediately following dates of index supplied to .fit() Args: forecast_length (int): Number of periods of data to forecast ahead regressor (numpy.Array): additional regressor, not used just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts Returns: Either a PredictionObject of forecasts and metadata, or if just_point_forecast == True, a dataframe of point forecasts """ predictStartTime = datetime.datetime.now() df = pd.DataFrame( np.zeros((forecast_length, (self.train_shape[1]))), columns=self.column_names, index=self.create_forecast_index(forecast_length=forecast_length), ) if just_point_forecast: return df else: predict_runtime = datetime.datetime.now() - predictStartTime prediction = PredictionObject( model_name=self.name, forecast_length=forecast_length, forecast_index=df.index, forecast_columns=df.columns, lower_forecast=df, forecast=df, upper_forecast=df, prediction_interval=self.prediction_interval, predict_runtime=predict_runtime, fit_runtime=self.fit_runtime, model_parameters=self.get_params(), ) return prediction
def predict( self, forecast_length: int, future_regressor=[], just_point_forecast: bool = False, ): """Generates forecast data immediately following dates of index supplied to .fit() Args: forecast_length (int): Number of periods of data to forecast ahead regressor (numpy.Array): additional regressor just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts Returns: Either a PredictionObject of forecasts and metadata, or if just_point_forecast == True, a dataframe of point forecasts """ if not _has_tsfresh: raise ImportError("Package tsfresh is required") # num_subsamples = 10 predictStartTime = datetime.datetime.now() # from tsfresh import extract_features from tsfresh.utilities.dataframe_functions import make_forecasting_frame # from sklearn.ensemble import AdaBoostRegressor from tsfresh.utilities.dataframe_functions import impute as tsfresh_impute # from tsfresh.feature_extraction import EfficientFCParameters, MinimalFCParameters max_timeshift = 10 regression_model = 'Adaboost' feature_selection = None max_timeshift = self.max_timeshift regression_model = self.regression_model feature_selection = self.feature_selection sktraindata = self.df_train.copy() X = pd.DataFrame() y = pd.DataFrame() counter = 0 for column in sktraindata.columns: df_shift, current_y = make_forecasting_frame( sktraindata[column], kind="time_series", max_timeshift=max_timeshift, rolling_direction=1, ) # disable_progressbar = True MinimalFCParameters EfficientFCParameters current_X = extract_features( df_shift, column_id="id", column_sort="time", column_value="value", impute_function=tsfresh_impute, show_warnings=False, default_fc_parameters=EfficientFCParameters(), n_jobs=1, ) # current_X["feature_last_value"] = current_y.shift(1) current_X.rename(columns=lambda x: str(counter) + '_' + x, inplace=True) X = pd.concat([X, current_X], axis=1) y = pd.concat([y, current_y], axis=1) counter += 1 # drop constant features X = X.loc[:, X.apply(pd.Series.nunique) != 1] X = X.replace([np.inf, -np.inf], np.nan) X = X.fillna(0) y = y.fillna(method='ffill').fillna(method='bfill') if feature_selection == 'Variance': from sklearn.feature_selection import VarianceThreshold sel = VarianceThreshold(threshold=(0.15)) X = pd.DataFrame(sel.fit_transform(X)) if feature_selection == 'Percentile': from sklearn.feature_selection import SelectPercentile, chi2 X = pd.DataFrame( SelectPercentile(chi2, percentile=20).fit_transform( X, y[y.columns[0]])) if feature_selection == 'DecisionTree': from sklearn.tree import DecisionTreeRegressor from sklearn.feature_selection import SelectFromModel clf = DecisionTreeRegressor() clf = clf.fit(X, y) model = SelectFromModel(clf, prefit=True) X = model.transform(X) if feature_selection == 'Lasso': from sklearn.linear_model import MultiTaskLasso from sklearn.feature_selection import SelectFromModel clf = MultiTaskLasso(max_iter=2000) clf = clf.fit(X, y) model = SelectFromModel(clf, prefit=True) X = model.transform(X) """ decisionTreeList = X.columns[model.get_support()] LassoList = X.columns[model.get_support()] feature_list = decisionTreeList.to_list() set([x for x in feature_list if feature_list.count(x) > 1]) from collections import Counter repeat_features = Counter(feature_list) repeat_features = repeat_features.most_common(20) """ # Drop first line X = X.iloc[1:, ] y = y.iloc[1:] y = y.fillna(method='ffill').fillna(method='bfill') index = self.create_forecast_index(forecast_length=forecast_length) if regression_model == 'ElasticNet': from sklearn.linear_model import MultiTaskElasticNet regr = MultiTaskElasticNet(alpha=1.0, random_state=self.random_seed) elif regression_model == 'DecisionTree': from sklearn.tree import DecisionTreeRegressor regr = DecisionTreeRegressor(random_state=self.random_seed) elif regression_model == 'MLP': from sklearn.neural_network import MLPRegressor # relu/tanh lbfgs/adam layer_sizes (100) (10) regr = MLPRegressor( hidden_layer_sizes=(10, 25, 10), verbose=self.verbose_bool, max_iter=200, activation='tanh', solver='lbfgs', random_state=self.random_seed, ) elif regression_model == 'KNN': from sklearn.multioutput import MultiOutputRegressor from sklearn.neighbors import KNeighborsRegressor regr = MultiOutputRegressor( KNeighborsRegressor(random_state=self.random_seed)) elif regression_model == 'Adaboost': from sklearn.multioutput import MultiOutputRegressor from sklearn.ensemble import AdaBoostRegressor regr = MultiOutputRegressor(AdaBoostRegressor( n_estimators=200)) # , random_state=self.random_seed)) else: regression_model = 'RandomForest' from sklearn.ensemble import RandomForestRegressor regr = RandomForestRegressor(random_state=self.random_seed, n_estimators=1000, verbose=self.verbose) regr.fit(X, y) combined_index = self.df_train.index.append(index) forecast = pd.DataFrame() sktraindata.columns = [x for x in range(len(sktraindata.columns))] for x in range(forecast_length): x_dat = pd.DataFrame() y_dat = pd.DataFrame() counter = 0 for column in sktraindata.columns: df_shift, current_y = make_forecasting_frame( sktraindata.tail(max_timeshift)[column], kind="time_series", max_timeshift=max_timeshift, rolling_direction=1, ) # disable_progressbar = True MinimalFCParameters EfficientFCParameters current_X = extract_features( df_shift, column_id="id", column_sort="time", column_value="value", impute_function=tsfresh_impute, show_warnings=False, n_jobs=1, default_fc_parameters=EfficientFCParameters(), ) # default_fc_parameters=MinimalFCParameters(), current_X["feature_last_value"] = current_y.shift(1) current_X.rename(columns=lambda x: str(counter) + '_' + x, inplace=True) x_dat = pd.concat([x_dat, current_X], axis=1) y_dat = pd.concat([y_dat, current_y], axis=1) counter += 1 x_dat = x_dat[X.columns] rfPred = pd.DataFrame(regr.predict(x_dat.tail(1).values)) forecast = pd.concat([forecast, rfPred], axis=0, ignore_index=True) sktraindata = pd.concat([sktraindata, rfPred], axis=0, ignore_index=True) sktraindata.index = combined_index[:len(sktraindata.index)] forecast.columns = self.column_names forecast.index = index if just_point_forecast: return forecast else: upper_forecast, lower_forecast = Point_to_Probability( self.df_train, forecast, prediction_interval=self.prediction_interval) predict_runtime = datetime.datetime.now() - predictStartTime prediction = PredictionObject( model_name=self.name, forecast_length=forecast_length, forecast_index=forecast.index, forecast_columns=forecast.columns, lower_forecast=lower_forecast, forecast=forecast, upper_forecast=upper_forecast, prediction_interval=self.prediction_interval, predict_runtime=predict_runtime, fit_runtime=self.fit_runtime, model_parameters=self.get_params(), ) return prediction
def predict(self, forecast_length: int, future_regressor=[], just_point_forecast=False): """Generates forecast data immediately following dates of index supplied to .fit() Args: forecast_length (int): Number of periods of data to forecast ahead regressor (numpy.Array): additional regressor, not used just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts Returns: Either a PredictionObject of forecasts and metadata, or if just_point_forecast == True, a dataframe of point forecasts """ predictStartTime = datetime.datetime.now() test_index = self.create_forecast_index( forecast_length=forecast_length) forecast_dist = tfp.sts.forecast( model=self.demand_model, observed_time_series=self.demand2d, parameter_samples=self.q_samples, num_steps_forecast=forecast_length, include_observation_noise=True, ) forecast = forecast_dist.mean().numpy()[..., 0] forecast = pd.DataFrame(forecast, index=self.column_names, columns=test_index).transpose() if just_point_forecast: return forecast else: prediction_interval = self.prediction_interval # assume follows rules of normal because those are conventional from scipy.stats import norm # adj = norm.sf(abs(prediction_interval))*2 p_int = 1 - ((1 - prediction_interval) / 2) adj = norm.ppf(p_int) forecast_scale = forecast_dist.stddev().numpy()[..., 0] upper_forecast = forecast.transpose().values + (forecast_scale * adj) lower_forecast = forecast.transpose().values - (forecast_scale * adj) lower_forecast = pd.DataFrame(lower_forecast, index=self.column_names, columns=test_index).transpose() upper_forecast = pd.DataFrame(upper_forecast, index=self.column_names, columns=test_index).transpose() # alternatively this followed by quantile # forecast_samples = self.forecast_dist.sample(10)[..., 0] predict_runtime = datetime.datetime.now() - predictStartTime prediction = PredictionObject( model_name=self.name, forecast_length=forecast_length, forecast_index=test_index, forecast_columns=forecast.columns, lower_forecast=lower_forecast, forecast=forecast, upper_forecast=upper_forecast, prediction_interval=self.prediction_interval, predict_runtime=predict_runtime, fit_runtime=self.fit_runtime, model_parameters=self.get_params(), ) return prediction
def HDistEnsemble( ensemble_params, forecasts_list, forecasts, lower_forecasts, upper_forecasts, forecasts_runtime, prediction_interval, ): """Generate forecast for per_series per distance ensembling.""" # handle that the inputs are now dictionaries forecasts = list(forecasts.values()) lower_forecasts = list(lower_forecasts.values()) upper_forecasts = list(upper_forecasts.values()) forecasts_runtime = list(forecasts_runtime.values()) id_list = list(ensemble_params['models'].keys()) mod_dic = {x: idx for idx, x in enumerate(forecasts_list) if x in id_list} forecast_length = forecasts[0].shape[0] dist_n = int(np.ceil(ensemble_params['dis_frac'] * forecast_length)) dist_last = forecast_length - dist_n forecast_df, u_forecast_df, l_forecast_df = ( pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), ) for series, mod_id in ensemble_params['series1'].items(): l_idx = mod_dic[mod_id] try: c_fore = forecasts[l_idx][series] forecast_df = pd.concat([forecast_df, c_fore], axis=1) except Exception as e: repr(e) print(forecasts[l_idx].columns) print(forecasts[l_idx].head()) # upper c_fore = upper_forecasts[l_idx][series] u_forecast_df = pd.concat([u_forecast_df, c_fore], axis=1) # lower c_fore = lower_forecasts[l_idx][series] l_forecast_df = pd.concat([l_forecast_df, c_fore], axis=1) forecast_df2, u_forecast_df2, l_forecast_df2 = ( pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), ) for series, mod_id in ensemble_params['series2'].items(): l_idx = mod_dic[mod_id] try: c_fore = forecasts[l_idx][series] forecast_df2 = pd.concat([forecast_df2, c_fore], axis=1) except Exception as e: repr(e) print(forecasts[l_idx].columns) print(forecasts[l_idx].head()) # upper c_fore = upper_forecasts[l_idx][series] u_forecast_df2 = pd.concat([u_forecast_df2, c_fore], axis=1) # lower c_fore = lower_forecasts[l_idx][series] l_forecast_df2 = pd.concat([l_forecast_df2, c_fore], axis=1) forecast_df = pd.concat( [forecast_df.head(dist_n), forecast_df2.tail(dist_last)], axis=0) u_forecast_df = pd.concat( [u_forecast_df.head(dist_n), u_forecast_df2.tail(dist_last)], axis=0) l_forecast_df = pd.concat( [l_forecast_df.head(dist_n), l_forecast_df2.tail(dist_last)], axis=0) ens_runtime = datetime.timedelta(0) for idx, x in enumerate(forecasts_runtime): if idx in list(mod_dic.values()): ens_runtime = ens_runtime + forecasts_runtime[idx] ens_result = PredictionObject( model_name="Ensemble", forecast_length=len(forecast_df.index), forecast_index=forecast_df.index, forecast_columns=forecast_df.columns, lower_forecast=l_forecast_df, forecast=forecast_df, upper_forecast=u_forecast_df, prediction_interval=prediction_interval, predict_runtime=datetime.timedelta(0), fit_runtime=ens_runtime, model_parameters=ensemble_params, ) return ens_result
def HorizontalEnsemble( ensemble_params, forecasts_list, forecasts, lower_forecasts, upper_forecasts, forecasts_runtime, prediction_interval, df_train=None, ): """Generate forecast for per_series ensembling.""" available_models = list(forecasts.keys()) known_matches = ensemble_params['series'] org_idx = df_train.columns org_list = org_idx.tolist() # remove any unavailable models or unnecessary series known_matches = { ser: mod for ser, mod in known_matches.items() if ser in org_list } k = { ser: mod for ser, mod in known_matches.items() if mod in available_models } # check if any series are missing from model list if not k: raise ValueError( "Horizontal template has no models matching this data!") if len(set(org_list) - set(list(k.keys()))) > 0: all_series = horizontal_classifier(df_train, k) else: all_series = known_matches forecast_df, u_forecast_df, l_forecast_df = ( pd.DataFrame(), pd.DataFrame(), pd.DataFrame(), ) for series, mod_id in all_series.items(): try: c_fore = forecasts[mod_id][series] forecast_df = pd.concat([forecast_df, c_fore], axis=1) except Exception as e: print(f"Horizontal ensemble unable to add model {repr(e)}") # upper c_fore = upper_forecasts[mod_id][series] u_forecast_df = pd.concat([u_forecast_df, c_fore], axis=1) # lower c_fore = lower_forecasts[mod_id][series] l_forecast_df = pd.concat([l_forecast_df, c_fore], axis=1) # make sure columns align to original forecast_df.reindex(columns=org_idx) u_forecast_df.reindex(columns=org_idx) l_forecast_df.reindex(columns=org_idx) # combine runtimes ens_runtime = datetime.timedelta(0) for idx, x in forecasts_runtime.items(): ens_runtime = ens_runtime + x ens_result = PredictionObject( model_name="Ensemble", forecast_length=len(forecast_df.index), forecast_index=forecast_df.index, forecast_columns=forecast_df.columns, lower_forecast=l_forecast_df, forecast=forecast_df, upper_forecast=u_forecast_df, prediction_interval=prediction_interval, predict_runtime=datetime.timedelta(0), fit_runtime=ens_runtime, model_parameters=ensemble_params, ) return ens_result
def predict(self, forecast_length: int, future_regressor=[], just_point_forecast: bool = False): """Generates forecast data immediately following dates of index supplied to .fit() Args: forecast_length (int): Number of periods of data to forecast ahead regressor (numpy.Array): additional regressor, not used just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts Returns: Either a PredictionObject of forecasts and metadata, or if just_point_forecast == True, a dataframe of point forecasts """ if not _has_prophet: raise ImportError("Package fbprophet is required") predictStartTime = datetime.datetime.now() #if self.regression_type != None: # assert len(future_regressor) == forecast_length, "regressor not equal to forecast length" test_index = self.create_forecast_index( forecast_length=forecast_length) forecast = pd.DataFrame() lower_forecast = pd.DataFrame() upper_forecast = pd.DataFrame() if self.verbose <= 0: logging.getLogger('fbprophet').setLevel(logging.WARNING) for series in self.df_train.columns: current_series = self.df_train.copy() current_series['y'] = current_series[series] current_series['ds'] = current_series.index print("FBProphet Initial Set") if self.regression_type == 'User': current_series[self.regressor_name] = self.regressor_train m = Prophet(interval_width=self.prediction_interval) if self.holiday: m.add_country_holidays(country_name=self.holiday_country) if self.regression_type == 'User': m.add_regressor(self.regressor_name) m = m.fit(current_series) future = m.make_future_dataframe(periods=forecast_length) if self.regression_type == 'User': if future_regressor.ndim > 1: a = self.dimensionality_reducer.transform(future_regressor) a = np.append(self.regressor_train, a) else: a = np.append(self.regressor_train, future_regressor.values) future[self.regressor_name] = a fcst = m.predict(future) fcst = fcst.tail(forecast_length) # remove the backcast forecast = pd.concat([forecast, fcst['yhat']], axis=1) lower_forecast = pd.concat([lower_forecast, fcst['yhat_lower']], axis=1) upper_forecast = pd.concat([upper_forecast, fcst['yhat_upper']], axis=1) forecast.columns = self.column_names forecast.index = test_index lower_forecast.columns = self.column_names lower_forecast.index = test_index upper_forecast.columns = self.column_names upper_forecast.index = test_index if just_point_forecast: return forecast else: predict_runtime = datetime.datetime.now() - predictStartTime prediction = PredictionObject( model_name=self.name, forecast_length=forecast_length, forecast_index=forecast.index, forecast_columns=forecast.columns, lower_forecast=lower_forecast, forecast=forecast, upper_forecast=upper_forecast, prediction_interval=self.prediction_interval, predict_runtime=predict_runtime, fit_runtime=self.fit_runtime, model_parameters=self.get_params()) return prediction
def predict(self, forecast_length: int, future_regressor=[], just_point_forecast=False): """Generates forecast data immediately following dates of index supplied to .fit() Args: forecast_length (int): Number of periods of data to forecast ahead regressor (numpy.Array): additional regressor, not used just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts Returns: Either a PredictionObject of forecasts and metadata, or if just_point_forecast == True, a dataframe of point forecasts """ predictStartTime = datetime.datetime.now() forecasts = self.forecasts.head(forecast_length) if forecasts.shape[0] < forecast_length: extra_len = forecast_length - forecasts.shape[0] empty_frame = pd.DataFrame(index=np.arange(extra_len), columns=forecasts.columns) forecasts = pd.concat([forecasts, empty_frame], axis=0, sort=False).fillna(method='ffill') forecasts.columns = self.column_names forecasts.index = self.create_forecast_index( forecast_length=forecast_length) if just_point_forecast: return forecasts else: lower_forecasts = self.lower_forecasts.head(forecast_length) upper_forecasts = self.upper_forecasts.head(forecast_length) if lower_forecasts.shape[0] < forecast_length: extra_len = forecast_length - lower_forecasts.shape[0] empty_frame = pd.DataFrame(index=np.arange(extra_len), columns=lower_forecasts.columns) lower_forecasts = pd.concat([lower_forecasts, empty_frame], axis=0, sort=False).fillna(method='ffill') lower_forecasts.columns = self.column_names lower_forecasts.index = self.create_forecast_index( forecast_length=forecast_length) if upper_forecasts.shape[0] < forecast_length: extra_len = forecast_length - upper_forecasts.shape[0] empty_frame = pd.DataFrame(index=np.arange(extra_len), columns=upper_forecasts.columns) upper_forecasts = pd.concat([upper_forecasts, empty_frame], axis=0, sort=False).fillna(method='ffill') upper_forecasts.columns = self.column_names upper_forecasts.index = self.create_forecast_index( forecast_length=forecast_length) predict_runtime = datetime.datetime.now() - predictStartTime prediction = PredictionObject( model_name=self.name, forecast_length=forecast_length, forecast_index=forecasts.index, forecast_columns=forecasts.columns, lower_forecast=lower_forecasts, forecast=forecasts, upper_forecast=upper_forecasts, prediction_interval=self.prediction_interval, predict_runtime=predict_runtime, fit_runtime=self.fit_runtime, model_parameters=self.get_params(), ) return prediction
def predict( self, forecast_length: int, future_regressor=[], just_point_forecast: bool = False, ): """Generates forecast data immediately following dates of index supplied to .fit() Args: forecast_length (int): Number of periods of data to forecast ahead regressor (numpy.Array): additional regressor, not used just_point_forecast (bool): If True, return a pandas.DataFrame of just point forecasts Returns: Either a PredictionObject of forecasts and metadata, or if just_point_forecast == True, a dataframe of point forecasts """ if not _has_prophet: raise ImportError("Package fbprophet is required") predictStartTime = datetime.datetime.now() # if self.regression_type != None: # assert len(future_regressor) == forecast_length, "regressor not equal to forecast length" test_index = self.create_forecast_index( forecast_length=forecast_length) forecast = pd.DataFrame() lower_forecast = pd.DataFrame() upper_forecast = pd.DataFrame() if self.verbose <= 0: logging.getLogger('fbprophet').setLevel(logging.WARNING) if self.regression_type == 'User': self.df_train[self.regressor_name] = self.regressor_train """ for series in self.df_train.columns: current_series = self.df_train.copy() current_series['y'] = current_series[series] current_series['ds'] = current_series.index m = Prophet(interval_width=self.prediction_interval) if self.holiday: m.add_country_holidays(country_name=self.holiday_country) if self.regression_type == 'User': m.add_regressor(self.regressor_name) m = m.fit(current_series) future = m.make_future_dataframe(periods=forecast_length) if self.regression_type == 'User': if future_regressor.ndim > 1: a = self.dimensionality_reducer.transform(future_regressor) a = np.append(self.regressor_train, a) else: a = np.append(self.regressor_train, future_regressor.values) future[self.regressor_name] = a fcst = m.predict(future) fcst = fcst.tail(forecast_length) # remove the backcast forecast = pd.concat([forecast, fcst['yhat']], axis=1) lower_forecast = pd.concat([lower_forecast, fcst['yhat_lower']], axis=1) upper_forecast = pd.concat([upper_forecast, fcst['yhat_upper']], axis=1) forecast.columns = self.column_names forecast.index = test_index lower_forecast.columns = self.column_names lower_forecast.index = test_index upper_forecast.columns = self.column_names upper_forecast.index = test_index """ def seek_the_oracle(df, args, series): current_series = df current_series['y'] = current_series[series] current_series['ds'] = current_series.index m = Prophet(interval_width=args['prediction_interval']) if args['holiday']: m.add_country_holidays(country_name=args['holiday_country']) if args['regression_type'] == 'User': m.add_regressor(args['regressor_name']) m = m.fit(current_series) future = m.make_future_dataframe(periods=forecast_length) if args['regression_type'] == 'User': if future_regressor.ndim > 1: a = args['dimensionality_reducer'].transform( future_regressor) a = np.append(args['regressor_train'], a) else: a = np.append(args['regressor_train'], future_regressor.values) future[args['regressor_name']] = a fcst = m.predict(future) fcst = fcst.tail(forecast_length) # remove the backcast forecast = fcst['yhat'] forecast.name = series lower_forecast = fcst['yhat_lower'] lower_forecast.name = series upper_forecast = fcst['yhat_upper'] upper_forecast.name = series return (forecast, lower_forecast, upper_forecast) args = { 'holiday': self.holiday, 'holiday_country': self.holiday_country, 'regression_type': self.regression_type, 'regressor_name': self.regressor_name, 'regressor_train': self.regressor_train, 'dimensionality_reducer': self.dimensionality_reducer, 'prediction_interval': self.prediction_interval, } parallel = True cols = self.df_train.columns.tolist() if self.n_jobs in [0, 1] or len(cols) < 4: parallel = False else: try: from joblib import Parallel, delayed except Exception: parallel = False # joblib multiprocessing to loop through series if parallel: verbs = 0 if self.verbose < 1 else self.verbose - 1 df_list = Parallel(n_jobs=self.n_jobs, verbose=(verbs))(delayed(seek_the_oracle)( df=self.df_train, args=args, series=col) for col in cols) complete = list(map(list, zip(*df_list))) else: df_list = [] for col in cols: df_list.append(seek_the_oracle(self.df_train, args, col)) complete = list(map(list, zip(*df_list))) forecast = pd.concat(complete[0], axis=1) lower_forecast = pd.concat(complete[1], axis=1) upper_forecast = pd.concat(complete[2], axis=1) if just_point_forecast: return forecast else: predict_runtime = datetime.datetime.now() - predictStartTime prediction = PredictionObject( model_name=self.name, forecast_length=forecast_length, forecast_index=forecast.index, forecast_columns=forecast.columns, lower_forecast=lower_forecast, forecast=forecast, upper_forecast=upper_forecast, prediction_interval=self.prediction_interval, predict_runtime=predict_runtime, fit_runtime=self.fit_runtime, model_parameters=self.get_params(), ) return prediction