def model_evaluation(params, indices): params = params.asDict() model = Prophet(daily_seasonality = False, interval_width = 0.95, holidays = holiday_table, **params) train = bc_df.value.iloc[indices] if has_add_regressors: model.add_regressor(train.columns[~train.columns.isin(['ds', 'y', 'cap', 'floor'])].values[0]) model.fit(train); future = model.make_future_dataframe(periods = pred_size) future['cap'] = cap future['floor'] = floor if has_add_regressors: add_reg_df = gen_regressors(perf = future, metric_name = metric_name) if add_reg_df is not None: future = future.merge(add_reg_df, on = "ds") forecast = model.predict(future) pred_df = forecast[["ds","yhat"]].set_index("ds").tail(pred_size) pred_df["y"] = perf_df.set_index("ds")['y'] if eval_level == "daily": smape = float(np.mean(np.abs(pred_df.y-pred_df.yhat)/(np.abs(pred_df.y) + np.abs(pred_df.yhat))/2) * 100) elif eval_level == "weekly": pred_df["week"] = pred_df.index.weekofyear pred_df = pred_df.groupby("week").agg({y: "sum", yhat:"sum"}) smape = float(np.mean(np.abs(pred_df.y-pred_df.yhat)/(np.abs(pred_df.y) + np.abs(pred_df.yhat))/2) * 100) else: smape = float(np.abs(np.abs(np.sum(pred_df.y)) - np.abs(np.sum(pred_df.yhat)))/(np.abs(np.sum(pred_df.y))+ np.abs(np.sum(pred_df.yhat)))/2*100) return smape
def fit_model(dataframe,local=True): if local==True: # instatiate the model model = Prophet(interval_width=0.95, weekly_seasonality=True, changepoint_prior_scale=2) # add regressors model.add_regressor('BTC_score') model.add_regressor('twitter_score') model.add_regressor('reddit_score') model.add_regressor("Fear&Greed") # fit the model fitted_model = model.fit(dataframe) return fitted_model if local==False: # instatiate the model model = Prophet(interval_width=0.95, weekly_seasonality=True, changepoint_prior_scale=2) # add regressor model.add_regressor("Fear&Greed") # fit the model fitted_model = model.fit(dataframe) return fitted_model
def fit(self, X, y=None): m = Prophet( weekly_seasonality=False, daily_seasonality=False, changepoint_prior_scale=self.changepoint_prior_scale, growth="flat", n_changepoints=self.n_changepoints, ) # Add multiple yearly seasonalities for seasonality, period in zip(self.yearly_seasonalities, self.yearly_periods): m.add_seasonality(f"yearly_{period}", period=period, fourier_order=seasonality) # Add all exogoneous columns to modelling if self.exog_cols != []: for c in self.exog_cols: m.add_regressor(c) X = X.rename(columns={self.ds_col: "ds", self.target_col: "y"}) m.fit(X) self.model = m return self
def forecast_day_per_day_with_external_features(self): temp = self.revenue temp.reset_index(inplace=True) if self.feature == "weather": ts2 = self.revenue[[ "date", "paid_no_tax", "temperature", "wind_speed", "precipitation_intensity" ]].rename(index=str, columns={ "date": "ds", "paid_no_tax": "y" }) feature = ['temperature', 'wind_speed', "precipitation_intensity"] else: ts2 = self.revenue[["date", "paid_no_tax", "guest_ticket_count" ]].rename(index=str, columns={ "date": "ds", "paid_no_tax": "y" }) feature = ['guest_ticket_count'] l = len(ts2) s = str(l - (self.days + 2)) + 'days' m = Prophet() for i in feature: m.add_regressor(i) m.fit(ts2) df_cv = cross_validation(m, horizon="1 days", initial=s, period="1 days") #1820 return df_cv
def seek_the_oracle(df, args, series): current_series = df current_series['y'] = current_series[series] current_series['ds'] = current_series.index m = Prophet(interval_width=args['prediction_interval']) if args['holiday']: m.add_country_holidays(country_name=args['holiday_country']) if args['regression_type'] == 'User': m.add_regressor(args['regressor_name']) m = m.fit(current_series) future = m.make_future_dataframe(periods=forecast_length) if args['regression_type'] == 'User': if future_regressor.ndim > 1: a = args['dimensionality_reducer'].transform( future_regressor) a = np.append(args['regressor_train'], a) else: a = np.append(args['regressor_train'], future_regressor.values) future[args['regressor_name']] = a fcst = m.predict(future) fcst = fcst.tail(forecast_length) # remove the backcast forecast = fcst['yhat'] forecast.name = series lower_forecast = fcst['yhat_lower'] lower_forecast.name = series upper_forecast = fcst['yhat_upper'] upper_forecast.name = series return (forecast, lower_forecast, upper_forecast)
def prophet(df_train, df_test, exogenous_features, scale_list=None, cp_scale=0.05): class suppress_stdout_stderr(object): ''' A context manager for doing a "deep suppression" of stdout and stderr in Python, i.e. will suppress all print, even if the print originates in a compiled C/Fortran sub-function. This will not suppress raised exceptions, since exceptions are printed to stderr just before a script exits, and after the context manager has exited (at least, I think that is why it lets exceptions through). ''' def __init__(self): # Open a pair of null files self.null_fds = [os.open(os.devnull, os.O_RDWR) for x in range(2)] # Save the actual stdout (1) and stderr (2) file descriptors. self.save_fds = [os.dup(1), os.dup(2)] def __enter__(self): # Assign the null pointers to stdout and stderr. os.dup2(self.null_fds[0], 1) os.dup2(self.null_fds[1], 2) def __exit__(self, *_): # Re-assign the real stdout/stderr back to (1) and (2) os.dup2(self.save_fds[0], 1) os.dup2(self.save_fds[1], 2) # Close the null files for fd in self.null_fds + self.save_fds: os.close(fd) if scale_list is None: scale_list = [] if len(scale_list) > 0: for col in scale_list: df_train.loc[df_train[col] < 0, col] = 0 df_test.loc[df_test[col] < 0, col] = 0 df_train[col] = np.log(df_train[col] + 1) df_test[col] = np.log(df_test[col] + 1) model = Prophet(daily_seasonality=False, weekly_seasonality=False, yearly_seasonality=False, changepoint_prior_scale=cp_scale, seasonality_mode='multiplicative') for feature in exogenous_features: model.add_regressor(feature) with suppress_stdout_stderr(): model.fit(df_train[["ds", "y"] + exogenous_features]) forecast = model.predict(df_test[["ds"] + exogenous_features]) forecast.loc[forecast.yhat < 0, "yhat"] = 0 if len(scale_list) > 0: forecast = np.exp(forecast["yhat"].item()) else: forecast = forecast["yhat"].item() return np.round(forecast, 0)
def fit_predict_model(meter, timeseries): m = Prophet(daily_seasonality=False, yearly_seasonality=True, weekly_seasonality=True, seasonality_mode='multiplicative', interval_width=.98, changepoint_range=.8) m.add_country_holidays(country_name='UK') m.add_regressor('weekend') m = m.fit(timeseries) forecast = m.predict(timeseries) forecast['consumption'] = timeseries['y'].reset_index(drop=True) forecast['meter_id'] = meter forecast['anomaly'] = 0 forecast.loc[forecast['consumption'] > forecast['yhat_upper'], 'anomaly'] = 1 forecast.loc[forecast['consumption'] < forecast['yhat_lower'], 'anomaly'] = -1 # anomaly importance forecast['importance'] = 0 forecast.loc[forecast['anomaly'] == 1, 'importance'] = \ (forecast['consumption'] - forecast['yhat_upper']) / forecast['consumption'] forecast.loc[forecast['anomaly'] == -1, 'importance'] = \ (forecast['yhat_lower'] - forecast['consumption']) / forecast['consumption'] return forecast
def test_seasonality_modes(self): # Model with holidays, seasonalities, and extra regressors holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['xmas'], 'lower_window': [-1], 'upper_window': [0], }) m = Prophet(seasonality_mode='multiplicative', holidays=holidays) m.add_seasonality('monthly', period=30, mode='additive', fourier_order=3) m.add_regressor('binary_feature', mode='additive') m.add_regressor('numeric_feature') # Construct seasonal features df = DATA.copy() df['binary_feature'] = [0] * 255 + [1] * 255 df['numeric_feature'] = range(510) df = m.setup_dataframe(df, initialize_scales=True) m.history = df.copy() m.set_auto_seasonalities() seasonal_features, prior_scales, component_cols, modes = ( m.make_all_seasonality_features(df)) self.assertEqual(sum(component_cols['additive_terms']), 7) self.assertEqual(sum(component_cols['multiplicative_terms']), 29) self.assertEqual( set(modes['additive']), {'monthly', 'binary_feature', 'additive_terms', 'extra_regressors_additive'}, ) self.assertEqual( set(modes['multiplicative']), {'weekly', 'yearly', 'xmas', 'numeric_feature', 'multiplicative_terms', 'extra_regressors_multiplicative', 'holidays', }, )
def prophet_4(city_data_train, city_list, period): forecast_data = [] future_data = [] for x in range(len(city_list)): m = Prophet(yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=False, seasonality_mode='multiplicative') city_data_train[x]['nfl_weekend'] = city_data_train[x]['ds'].apply( nfl_weekend) m.add_regressor('nfl_weekend') m.fit(city_data_train[x]) future = m.make_future_dataframe(periods=period) future['nfl_weekend'] = future['ds'].apply(nfl_weekend) forecast = m.predict(future) m.plot(forecast) figl = m.plot_components(forecast) forecast_data.append(forecast) future_data.append(future) return forecast_data
def create_model(self, capacity_max=None, capacity_min=None, **kwargs): self.cap = capacity_max self.floor = capacity_min if not capacity_max and not capacity_min: growth = 'linear' else: growth = 'logistic' if self.cap: self.node.item['cap'] = capacity_max if self.floor: self.node.item['floor'] = capacity_min try: from fbprophet import Prophet except ImportError: # pragma: no cover logger.error('prophet model requires fbprophet to work. Exiting.' 'Install it with: pip install scikit-hts[prophet]') return model = Prophet(growth=growth, **kwargs) if self.node.exogenous: for ex in self.node.exogenous: model.add_regressor(ex) return model
def test_cross_validation_extra_regressors(self): df = self.__df.copy() df['extra'] = range(df.shape[0]) df['is_conditional_week'] = np.arange(df.shape[0]) // 7 % 2 m = Prophet() m.add_seasonality(name='monthly', period=30.5, fourier_order=5) m.add_seasonality(name='conditional_weekly', period=7, fourier_order=3, prior_scale=2., condition_name='is_conditional_week') m.add_regressor('extra') m.fit(df) df_cv = diagnostics.cross_validation(m, horizon='4 days', period='4 days', initial='135 days') self.assertEqual(len(np.unique(df_cv['cutoff'])), 2) period = pd.Timedelta('4 days') dc = df_cv['cutoff'].diff() dc = dc[dc > pd.Timedelta(0)].min() self.assertTrue(dc >= period) self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all()) df_merged = pd.merge(df_cv, self.__df, 'left', on='ds') self.assertAlmostEqual( np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)
def test_seasonality_modes(self): # Model with holidays, seasonalities, and extra regressors holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['xmas'], 'lower_window': [-1], 'upper_window': [0], }) m = Prophet(seasonality_mode='multiplicative', holidays=holidays) m.add_seasonality('monthly', period=30, mode='additive', fourier_order=3) m.add_regressor('binary_feature', mode='additive') m.add_regressor('numeric_feature') # Construct seasonal features df = DATA.copy() df['binary_feature'] = [0] * 255 + [1] * 255 df['numeric_feature'] = range(510) df = m.setup_dataframe(df, initialize_scales=True) m.history = df.copy() m.set_auto_seasonalities() seasonal_features, prior_scales, component_cols, modes = ( m.make_all_seasonality_features(df)) self.assertEqual(sum(component_cols['additive_terms']), 7) self.assertEqual(sum(component_cols['multiplicative_terms']), 29) self.assertEqual( set(modes['additive']), {'monthly', 'binary_feature', 'additive_terms', 'extra_regressors_additive'}, ) self.assertEqual( set(modes['multiplicative']), {'weekly', 'yearly', 'xmas', 'numeric_feature', 'multiplicative_terms', 'extra_regressors_multiplicative', 'holidays', }, )
def train_prophet(self): df = self.data date = self.split_date df_train = df[df.datetime < date] df_valid = df[df.datetime >= date] target = self.target model_fbp = Prophet(mcmc_samples=300) for feature in exogenous_features: model_fbp.add_regressor(feature) model_fbp.fit(df_train[["datetime", target] + exogenous_features].rename(columns={ "datetime": "ds", target: "y" })) forecast = model_fbp.predict( df_valid[["datetime", target] + exogenous_features].rename(columns={"datetime": "ds"})) df_valid["Forecast_Prophet"] = forecast.yhat.values self.df_valid = df_valid date_time = str(datetime.datetime.now().strftime("%d_%m_%Y")) date_time = date_time.replace(" ", "_") # save the model to disk filename_temp = 'temp_prophet_model_' + self.target + '_' + date_time + '.pickle' self.filename_temp = filename_temp with open(filename_temp, 'wb') as filename_temp: pickle.dump(model_fbp, filename_temp)
def prophet_train(): logging.info("Preparing data for Prophet training.") region_df_dict = pickle.load(open(files.REGION_DF_DICT, "rb")) df_dict = region_df_dict[md.IDF] df_prophet_train = format_training_data(df_dict, md.START_TRAIN_DATE, md.END_TRAIN_DATE) logging.info("Training Prophet model on 2 years.") start_time = time() model_energy = Prophet(yearly_seasonality=True) model_energy.fit(df_prophet_train) with open(os.path.join(PROPHET_MODELS_PATH, files.PROPHET_2_YEARS_MODEL), "wb") as file: pickle.dump(model_energy, file) logging.info("Training Prophet model on 2 years took %.2f seconds." % (time() - start_time)) logging.info("Training Prophet model on 2 years with weather covariate.") start_time = time() model_energy_with_weather = Prophet(yearly_seasonality=True) model_energy_with_weather.add_regressor(c.Meteo.MAX_TEMP_PARIS) model_energy_with_weather.fit(df_prophet_train) with open( os.path.join(PROPHET_MODELS_PATH, files.PROPHET_2_YEARS_WEATHER_MODEL), "wb") as file: pickle.dump(model_energy_with_weather, file) logging.info("Training Prophet model on 2 years took %.2f seconds." % (time() - start_time))
def fit_prophet(dtf_train, dtf_test, lst_exog=None, model=None, freq="D", conf=0.95, figsize=(15,10)): ## setup prophet if model is None: model = Prophet(growth="linear", changepoints=None, n_changepoints=25, seasonality_mode="multiplicative", yearly_seasonality="auto", weekly_seasonality="auto", daily_seasonality="auto", holidays=None, interval_width=conf) if lst_exog != None: for regressor in lst_exog: model.add_regressor(regressor) ## train model.fit(dtf_train) ## test dtf_prophet = model.make_future_dataframe(periods=len(dtf_test), freq=freq, include_history=True) if model.growth == "logistic": dtf_prophet["cap"] = dtf_train["cap"].unique()[0] if lst_exog != None: dtf_prophet = dtf_prophet.merge(dtf_train[["ds"]+lst_exog], how="left") dtf_prophet.iloc[-len(dtf_test):][lst_exog] = dtf_test[lst_exog].values dtf_prophet = model.predict(dtf_prophet) dtf_train = dtf_train.merge(dtf_prophet[["ds","yhat"]], how="left").rename( columns={'yhat':'model', 'y':'ts'}).set_index("ds") dtf_test = dtf_test.merge(dtf_prophet[["ds","yhat","yhat_lower","yhat_upper"]], how="left").rename( columns={'yhat':'forecast', 'y':'ts', 'yhat_lower':'lower', 'yhat_upper':'upper'}).set_index("ds") ## evaluate dtf = dtf_train.append(dtf_test) dtf = utils_evaluate_ts_model(dtf, conf=conf, figsize=figsize, title="Prophet") return dtf, model
def test5(): """ 附加的回归量 可以使用add_regressor方法将附加的回归量添加到模型的线性部分。 包含回归值的列需要同时出现在拟合数据格式(fit)和预测数据格式(predict)中 """ # 判断是否是NFL赛季的周日 def nfl_sunday(ds): date = pd.to_datetime(ds) if date.weekday() == 6 and (date.month > 8 or date.month < 2): return 1 else: return 0 df['nfl_sunday'] = df['ds'].apply(nfl_sunday) m = Prophet() m.add_regressor('nfl_sunday') m.fit(df) future = m.make_future_dataframe(periods=365) future['nfl_sunday'] = future['ds'].apply(nfl_sunday) forecast = m.predict(future) fig = m.plot_components(forecast)
def run_prophet(id1, data): holidays = get_holidays(id1) model = Prophet(uncertainty_samples=False, holidays=holidays, weekly_seasonality=True, yearly_seasonality=True, changepoint_prior_scale=0.5) model.add_seasonality(name='monthly', period=30.5, fourier_order=2) model.add_regressor('log_sell_price') try: model.fit(data) future = model.make_future_dataframe(periods=28, include_history=False) future['log_sell_price'] = np.repeat(data['log_sell_price'].iloc[-1], 28) forecast2 = model.predict(future) submission = make_validation_file(id1, forecast2) return submission except: print('Failed-**************', id1) COLS = submission0.columns[0:] dd = np.hstack([np.array(id1), np.ones(28)]).reshape(1, 29) submission = pd.DataFrame(dd, columns=COLS) return submission
def generate_model(training_data, holidays_df, periods=100, add_regressor=True): ''' Build the Prophet model Return the model and predicted values ''' # build and train prophet model m = Prophet(yearly_seasonality=True, weekly_seasonality=True, seasonality_mode='multiplicative', holidays=holidays_df, changepoint_range=1) if add_regressor: training_data['covid_drop'] = training_data.ds.apply(covid_drop_mark) m.add_regressor('covid_drop') m.fit(training_data) # no. of days for which predictions need to be made future = m.make_future_dataframe(periods=periods) if add_regressor: future['covid_drop'] = future.ds.apply(covid_drop_mark) forecast = m.predict(future) return m, forecast
def seek_the_oracle(current_series, args, series, forecast_length, future_regressor): """Prophet for for loop or parallel.""" current_series = current_series.rename(columns={series: 'y'}) current_series['ds'] = current_series.index m = Prophet( interval_width=args['prediction_interval'], yearly_seasonality=self.yearly_seasonality, weekly_seasonality=self.weekly_seasonality, daily_seasonality=self.daily_seasonality, growth=self.growth, n_changepoints=self.n_changepoints, changepoint_prior_scale=self.changepoint_prior_scale, seasonality_mode=self.seasonality_mode, changepoint_range=self.changepoint_range, seasonality_prior_scale=self.seasonality_prior_scale, holidays_prior_scale=self.holidays_prior_scale, ) if args['holiday']: m.add_country_holidays(country_name=args['holiday_country']) if args['regression_type'] == 'User': current_series = pd.concat( [current_series, args['regressor_train']], axis=1) for nme in args['regressor_name']: m.add_regressor(nme) m = m.fit(current_series) future = m.make_future_dataframe(periods=forecast_length) if args['regression_type'] == 'User': if future_regressor.ndim > 1: # a = args['dimensionality_reducer'].transform(future_regressor) if future_regressor.shape[1] > 1: ft_regr = (future_regressor.mean( axis=1).to_frame().merge( future_regressor.std(axis=1).to_frame(), left_index=True, right_index=True, )) else: ft_regr = future_regressor.copy() ft_regr.columns = args['regressor_train'].columns regr = pd.concat([args['regressor_train'], ft_regr]) regr.index.name = 'ds' regr.reset_index(drop=False, inplace=True) future = future.merge(regr, on="ds", how='left') else: a = np.append(args['regressor_train'], future_regressor.values) future[args['regressor_name']] = a fcst = m.predict(future) fcst = fcst.tail(forecast_length) # remove the backcast forecast = fcst['yhat'] forecast.name = series lower_forecast = fcst['yhat_lower'] lower_forecast.name = series upper_forecast = fcst['yhat_upper'] upper_forecast.name = series return (forecast, lower_forecast, upper_forecast)
def make_Prophet_model(data, regressors, price_normalizer, plot_comp, plot_pred, axes=None, xlim=None): data_advance = data.copy() data_advance.reset_index(inplace=True) data_advance.rename(columns={'datetime':'ds', 'AAPL_Adj_close':'y'},inplace=True) first_day = '2017-04-03' split_date = ["2020-3-31", "2020-04-01"] holiday_calendar = form_holiday_calendar() model_advance = Prophet(holidays=holiday_calendar, seasonality_mode='multiplicative', yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=False) if regressors is None: None else: advance_regressors = list(data_advance.columns[2:]) if type(regressors) is str: model_advance.add_regressor(regressors,mode = 'multiplicative') else: for i in range(len(regressors)): model_advance.add_regressor(regressors[i],mode = 'multiplicative') train_ad, test_ad = train_test_split(data_advance,split_date) model_advance.fit(train_ad) future = pd.DataFrame(data_advance['ds']) if regressors is None: forecast_ad = model_advance.predict(future) else: futures = pd.concat([future, data_advance.loc[:, advance_regressors]], axis=1) forecast_ad = model_advance.predict(futures) if plot_comp: model_advance.plot_components(forecast_ad, figsize=(8, 6)) result = make_predictions_df(forecast_ad, train_ad, test_ad) result.loc[:,'yhat'] = price_normalizer.inverse_transform(np.array(result.yhat.clip(lower=0)).reshape(-1,1)) result.loc[:,'yhat_lower'] = price_normalizer.inverse_transform(np.array(result.yhat_lower.clip(lower=0)).reshape(-1,1)) result.loc[:, 'yhat_upper'] = price_normalizer.inverse_transform(np.array(result.yhat_upper.clip(lower=0)).reshape(-1,1)) result.loc[:,'y'] = price_normalizer.inverse_transform(np.array(result.loc[:,'y']).reshape(-1,1)) if plot_pred: if xlim is None: xlim = [first_day,'2020-04-30'] axes = plot_predictions(result, first_day, split_date, axes, xlim) if regressors is None: axes.set_title('Predictions based on Historical Prices') else: axes.set_title('Predictions by Adding Feature '+regressors) return np.array(result.loc[:,'yhat']), np.array(result.loc[:,'y']), axes return np.array(result.loc[:,'yhat']), np.array(result.loc[:,'y'])
def Bayseian2(txs_raw, forecastDay, y, x_col, unit): txs_raw = copy.deepcopy(txs_raw.rename(index=str, columns={y: 'y'})) txs_train, txs_test = ft_c.cut_df( txs_raw, varr.START_DATE, (varr.LAST_DATE - timedelta(days=forecastDay - 1))) txs_trainX, txs_trainY = txs_train[x_col], txs_train['y'] txs_testX, txs_testY = txs_test[x_col], txs_test['y'] # print(txs_train.tail()) # print(txs_testX.head()) # print(txs_testY.tail()) # seasonality_option: (daily, weekly, monthly, yearly, frequency) if unit is 'day': if (len(txs_train) < 366): seasonality_option = (False, True, True, False, 'd') else: seasonality_option = (False, True, True, True, 'd') elif unit is 'week': if (len(txs_train) < 53): seasonality_option = (False, False, True, False, 'w') else: seasonality_option = (False, False, True, True, 'w') elif unit is 'month': if (len(txs_train) < 12): seasonality_option = (False, False, False, False, 'm') else: seasonality_option = (False, False, False, True, 'm') model = Prophet(daily_seasonality= seasonality_option[0], yearly_seasonality=seasonality_option[3], \ holidays= holidaybeta) if seasonality_option[2]: model.add_seasonality(name='monthly', period=30.5, fourier_order=5) if seasonality_option[1]: model.add_seasonality(name='weekly', period=7, fourier_order=5, prior_scale=0.1) for feature in x_col: if not feature == 'ds': model.add_regressor(feature) model.fit(txs_train) #future= txs_raw[['ds', 'rain_amount', 'temp_max', 'temp_min']] future = pd.concat([txs_trainX, txs_testX], axis=0) # future['ds']= pd.to_datetime(future['ds'], format= "%Y-%m-%d") # # print(future[future.isnull().any(axis=1)]) # print(future) forecastProphetTable = model.predict(future) return {'model': model, 'future': future, \ 'forecastProphetTable': forecastProphetTable}
def create_prophet_with_exo(feats): ''' Instance facebook prophet model :param feats: (list): :return: Facebook Prophet model ''' model = Prophet(interval_width=.95) for feat in feats: model.add_regressor(feat) return model
def initalize_model(yearly_seasonality=True, seasonality_mode='additive', weather_prior=0.8): #weather base model initalize model = Prophet(yearly_seasonality=yearly_seasonality, seasonality_mode=seasonality_mode) #add regressors for the pca weather regressors model.add_regressor('pca1', prior_scale=weather_prior, mode='additive') model.add_regressor('pca2', prior_scale=weather_prior, mode='additive') return model
def create_model(names, holidays): # Create model with linear growth logistic is slower and less accurate, include holidays m = Prophet(growth='linear', changepoint_prior_scale=0.01, holidays=holidays) # Add regressors in a loop with column name for i in range(len(names)): if 2 < i < int(len(names)) - 1: m.add_regressor(names[i]) # return the model return m
def prophet_walk_forward_val(data, start_size, val_window, regressors, steps): ''' Function: walk_forward_val Arguments: model - model class instantiator data - pandas dataframe containing data of interest start_size - initial number of rows for training val_window - size of the validation window for each trail regressors - list of regressors to add to the model steps - number of walk forward steps to run Return: mape_list - list of tupels with the last index for training window and mape for the window ''' mape_list = [] # for each steps for step in range(steps): print('Step %d starting at index %d' % (step + 1, start_size + step * val_window)) # Get the block of data for training train_dat = data_block(data, 0, start_size + step * val_window) # Get a block of data for validation val_dat = data_block(data, start_size + step * val_window, val_window) # Instantiate the model m = Prophet(weekly_seasonality=True, yearly_seasonality=True) # Add regressors for reg in regressors: m.add_regressor(reg) # Fit the model m.fit(train_dat) # Forecast for the validation step forecast = m.predict(val_dat) forecast.index = val_dat.index print(forecast['ds'].values[0], val_dat['ds'].values[0]) # Caluclate the MAPE for the window ape = [ np.abs(val_dat.loc[x, 'y'] - forecast.loc[x, 'yhat']) / val_dat.loc[x, 'y'] * 100 for x in val_dat.index ] # Add the values to the list mape_list.append((train_dat.index[-1], np.mean(ape))) # return the mape list return mape_list
def fit_Prophet(self, ds, y, **params): country_hols = params.pop('add_country_holidays') regressors = params.pop('regressors') if 'regressors' in params else None model = Prophet(**params) self.df['y'] = self.df[y] self.df['ds'] = self.df[ds] if country_hols: model.add_country_holidays(country_name=country_hols) if regressors: for r in regressors: model.add_regressor(r) model.fit(self.df) return(model)
def build_model(df, holidays): """Initialize the Prophet model with data about holidays and extra regressors.""" m = Prophet(holidays=holidays, yearly_seasonality=False) # Create regressors for all weekend hours hours = range(0, 24) for h in hours: df["weekend_hour{}".format(h)] = df["ds"].apply( lambda x: weekend_hour(x, h)) m.add_regressor("weekend_hour{}".format(h)) return m, df
def fit(self, y, period, start_date, x=None, metric="smape", val_size=None, verbose=False): """ Build the model with using best-tuned hyperparameter values. :param y: pd.Series or 1-D np.array, time series to predict. :param period: Int or Str, the number of observations per cycle: 1 or "annual" for yearly data, 4 or "quarterly" for quarterly data, 7 or "daily" for daily data, 12 or "monthly" for monthly data, 24 or "hourly" for hourly data, 52 or "weekly" for weekly data. First-letter abbreviations of strings work as well ("a", "q", "d", "m", "h" and "w", respectively). Additional reference: https://robjhyndman.com/hyndsight/seasonal-periods/. :param x: pd.DataFrame or 2-D np.array, exogeneous predictors, optional :param start_date: pd.datetime object, date of the first observation in training data :param metric: Str, the metric used for model selection. One of: "mse", "mae", "mape", "smape", "rmse". :param val_size: Int, the number of most recent observations to use as validation set for tuning. :param verbose: Boolean, True for printing additional info while tuning. :return: None """ self.y = y self.name = "Prophet" self.key = "prophet" self._tune(y=y, period=period, start_date=start_date, metric=metric, val_size=val_size, verbose=verbose) dates = data_utils.create_dates(start_date, period=self.period, length=len(y)) input_df = pd.DataFrame({"ds": dates, "y": y}) model = Prophet( seasonality_mode=self.params["seasonality"], growth=self.params["growth"], changepoint_prior_scale=self.params["changepoint_prior_scale"], ) if x is not None: for variable_id, x_variable in enumerate(x.T): input_df[variable_id] = x_variable model.add_regressor(variable_id) with SuppressStdoutStderr(): model = model.fit(input_df) self.model = model self.last_fitting_date = dates[-1] self.time_delta = dates[1] - dates[0]
def prophet_f(daily_prior, yearly_prior, hum_prior, dp_prior, metric = 'rmse', period = '1000 hours'): """ Implements the prophet model to be optimised and performs cross-validation Args: daily_prior: daily seasonality prior scale yearly_prior: yearly seasonality prior scale hum_prior: humidity regressor prior scale dp_prior: dew.point regressor prior scale metric: metric(s) to return - 'rmse' or ['horizon', 'rmse', 'mae', 'mape'] period: cross-validation period Returns: negative of root mean square error """ m = Prophet(growth = 'flat', weekly_seasonality = False) m.add_seasonality(name = 'daily', period = 1, mode = 'multiplicative', prior_scale = 10 ** daily_prior, fourier_order = 2) m.add_seasonality(name = 'yearly', period = 365.25, mode = 'additive', prior_scale = 10 ** yearly_prior, fourier_order = 2) m.add_regressor('humidity', mode = 'multiplicative', prior_scale = 10 ** hum_prior) m.add_regressor('dew.point', mode = 'multiplicative', prior_scale = 10 ** dp_prior) m.fit(df) df_cv = cross_validation(m, initial = '90000 hours', period = period, horizon = '1 hours') if metric == 'rmse': df_cv_rmse = ((df_cv.y - df_cv.yhat) ** 2).mean() ** .5 return - df_cv_rmse elif metric == 'all': df_p = performance_metrics(df_cv) return m, df_p[['horizon', 'rmse', 'mae', 'mape']]
def fbProphet_init(self, regressors, features): prophet = Prophet( growth='linear', daily_seasonality=False, weekly_seasonality=False, yearly_seasonality=False, changepoint_prior_scale=0.001, seasonality_mode='additive', ) # Adding seasonalities if 'season_summer' in features: prophet.add_seasonality( name='summer', period=6, fourier_order=2, condition_name='season_summer') if 'season_winter' in features: prophet.add_seasonality( name='winter', period=6, fourier_order=2, condition_name='season_winter') prophet.add_seasonality( name='daily', period=1, fourier_order=2, ) prophet.add_seasonality( name='weekly', period=7, fourier_order=10, ) prophet.add_seasonality( name='yearly', period=366, fourier_order=20, ) # Adding external regressors for reg in regressors: prophet.add_regressor(reg, prior_scale=20, mode='additive', standardize='auto') return prophet
def run_prophet_log(train, test): m0 = Prophet(growth='logistic', n_changepoints=1, changepoint_prior_scale=0.2, yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False) # m0.add_regressor('tmin', prior_scale=0.02, mode='multiplicative') # m0.add_regressor('tmax', prior_scale=0.02, mode='multiplicative') m0.add_regressor('tavg', prior_scale=0.1, mode='additive') m0.add_regressor('pcpn', prior_scale=0.02, mode='additive') m0.fit(train) fcst = m0.predict(test) return fcst, m0
def test_cross_validation_extra_regressors(self): df = self.__df.copy() df['extra'] = range(df.shape[0]) m = Prophet() m.add_seasonality(name='monthly', period=30.5, fourier_order=5) m.add_regressor('extra') m.fit(df) df_cv = diagnostics.cross_validation( m, horizon='4 days', period='4 days', initial='135 days') self.assertEqual(len(np.unique(df_cv['cutoff'])), 2) period = pd.Timedelta('4 days') dc = df_cv['cutoff'].diff() dc = dc[dc > pd.Timedelta(0)].min() self.assertTrue(dc >= period) self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all()) df_merged = pd.merge(df_cv, self.__df, 'left', on='ds') self.assertAlmostEqual( np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
def test_copy(self): df = DATA_all.copy() df['cap'] = 200. df['binary_feature'] = [0] * 255 + [1] * 255 # These values are created except for its default values holiday = pd.DataFrame( {'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['x']}) products = itertools.product( ['linear', 'logistic'], # growth [None, pd.to_datetime(['2016-12-25'])], # changepoints [3], # n_changepoints [0.9], # changepoint_range [True, False], # yearly_seasonality [True, False], # weekly_seasonality [True, False], # daily_seasonality [None, holiday], # holidays ['additive', 'multiplicative'], # seasonality_mode [1.1], # seasonality_prior_scale [1.1], # holidays_prior_scale [0.1], # changepoint_prior_scale [100], # mcmc_samples [0.9], # interval_width [200] # uncertainty_samples ) # Values should be copied correctly for product in products: m1 = Prophet(*product) m1.history = m1.setup_dataframe( df.copy(), initialize_scales=True) m1.set_auto_seasonalities() m2 = diagnostics.prophet_copy(m1) self.assertEqual(m1.growth, m2.growth) self.assertEqual(m1.n_changepoints, m2.n_changepoints) self.assertEqual(m1.changepoint_range, m2.changepoint_range) self.assertEqual(m1.changepoints, m2.changepoints) self.assertEqual(False, m2.yearly_seasonality) self.assertEqual(False, m2.weekly_seasonality) self.assertEqual(False, m2.daily_seasonality) self.assertEqual( m1.yearly_seasonality, 'yearly' in m2.seasonalities) self.assertEqual( m1.weekly_seasonality, 'weekly' in m2.seasonalities) self.assertEqual( m1.daily_seasonality, 'daily' in m2.seasonalities) if m1.holidays is None: self.assertEqual(m1.holidays, m2.holidays) else: self.assertTrue((m1.holidays == m2.holidays).values.all()) self.assertEqual(m1.seasonality_mode, m2.seasonality_mode) self.assertEqual(m1.seasonality_prior_scale, m2.seasonality_prior_scale) self.assertEqual(m1.changepoint_prior_scale, m2.changepoint_prior_scale) self.assertEqual(m1.holidays_prior_scale, m2.holidays_prior_scale) self.assertEqual(m1.mcmc_samples, m2.mcmc_samples) self.assertEqual(m1.interval_width, m2.interval_width) self.assertEqual(m1.uncertainty_samples, m2.uncertainty_samples) # Check for cutoff and custom seasonality and extra regressors changepoints = pd.date_range('2012-06-15', '2012-09-15') cutoff = pd.Timestamp('2012-07-25') m1 = Prophet(changepoints=changepoints) m1.add_seasonality('custom', 10, 5) m1.add_regressor('binary_feature') m1.fit(df) m2 = diagnostics.prophet_copy(m1, cutoff=cutoff) changepoints = changepoints[changepoints <= cutoff] self.assertTrue((changepoints == m2.changepoints).all()) self.assertTrue('custom' in m2.seasonalities) self.assertTrue('binary_feature' in m2.extra_regressors)
def test_added_regressors(self): m = Prophet() m.add_regressor('binary_feature', prior_scale=0.2) m.add_regressor('numeric_feature', prior_scale=0.5) m.add_regressor( 'numeric_feature2', prior_scale=0.5, mode='multiplicative' ) m.add_regressor('binary_feature2', standardize=True) df = DATA.copy() df['binary_feature'] = [0] * 255 + [1] * 255 df['numeric_feature'] = range(510) df['numeric_feature2'] = range(510) with self.assertRaises(ValueError): # Require all regressors in df m.fit(df) df['binary_feature2'] = [1] * 100 + [0] * 410 m.fit(df) # Check that standardizations are correctly set self.assertEqual( m.extra_regressors['binary_feature'], { 'prior_scale': 0.2, 'mu': 0, 'std': 1, 'standardize': 'auto', 'mode': 'additive', }, ) self.assertEqual( m.extra_regressors['numeric_feature']['prior_scale'], 0.5) self.assertEqual( m.extra_regressors['numeric_feature']['mu'], 254.5) self.assertAlmostEqual( m.extra_regressors['numeric_feature']['std'], 147.368585, places=5) self.assertEqual( m.extra_regressors['numeric_feature2']['mode'], 'multiplicative') self.assertEqual( m.extra_regressors['binary_feature2']['prior_scale'], 10.) self.assertAlmostEqual( m.extra_regressors['binary_feature2']['mu'], 0.1960784, places=5) self.assertAlmostEqual( m.extra_regressors['binary_feature2']['std'], 0.3974183, places=5) # Check that standardization is done correctly df2 = m.setup_dataframe(df.copy()) self.assertEqual(df2['binary_feature'][0], 0) self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4) self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4) # Check that feature matrix and prior scales are correctly constructed seasonal_features, prior_scales, component_cols, modes = ( m.make_all_seasonality_features(df2) ) self.assertEqual(seasonal_features.shape[1], 30) names = ['binary_feature', 'numeric_feature', 'binary_feature2'] true_priors = [0.2, 0.5, 10.] for i, name in enumerate(names): self.assertIn(name, seasonal_features) self.assertEqual(sum(component_cols[name]), 1) self.assertEqual( sum(np.array(prior_scales) * component_cols[name]), true_priors[i], ) # Check that forecast components are reasonable future = pd.DataFrame({ 'ds': ['2014-06-01'], 'binary_feature': [0], 'numeric_feature': [10], 'numeric_feature2': [10], }) with self.assertRaises(ValueError): m.predict(future) future['binary_feature2'] = 0 fcst = m.predict(future) self.assertEqual(fcst.shape[1], 37) self.assertEqual(fcst['binary_feature'][0], 0) self.assertAlmostEqual( fcst['extra_regressors_additive'][0], fcst['numeric_feature'][0] + fcst['binary_feature2'][0], ) self.assertAlmostEqual( fcst['extra_regressors_multiplicative'][0], fcst['numeric_feature2'][0], ) self.assertAlmostEqual( fcst['additive_terms'][0], fcst['yearly'][0] + fcst['weekly'][0] + fcst['extra_regressors_additive'][0], ) self.assertAlmostEqual( fcst['multiplicative_terms'][0], fcst['extra_regressors_multiplicative'][0], ) self.assertAlmostEqual( fcst['yhat'][0], fcst['trend'][0] * (1 + fcst['multiplicative_terms'][0]) + fcst['additive_terms'][0], ) # Check works if constant extra regressor at 0 df['constant_feature'] = 0 m = Prophet() m.add_regressor('constant_feature') m.fit(df) self.assertEqual(m.extra_regressors['constant_feature']['std'], 1)