def test_cross_validation_extra_regressors(self): df = self.__df.copy() df['extra'] = range(df.shape[0]) df['is_conditional_week'] = np.arange(df.shape[0]) // 7 % 2 m = Prophet() m.add_seasonality(name='monthly', period=30.5, fourier_order=5) m.add_seasonality(name='conditional_weekly', period=7, fourier_order=3, prior_scale=2., condition_name='is_conditional_week') m.add_regressor('extra') m.fit(df) df_cv = diagnostics.cross_validation(m, horizon='4 days', period='4 days', initial='135 days') self.assertEqual(len(np.unique(df_cv['cutoff'])), 2) period = pd.Timedelta('4 days') dc = df_cv['cutoff'].diff() dc = dc[dc > pd.Timedelta(0)].min() self.assertTrue(dc >= period) self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all()) df_merged = pd.merge(df_cv, self.__df, 'left', on='ds') self.assertAlmostEqual( np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)
def run_prophet_train(df): m = Prophet(daily_seasonality=True, weekly_seasonality=True) m.add_country_holidays(country_name='CN') m.add_seasonality(name='weekly', period=7, fourier_order=3, prior_scale=0.1) m.fit(df) pass
def prophet_forecast(self, train_start, train_end, test_start, test_end): from sys import platform if platform == "linux" or platform == "linux2": from prophet import Prophet # linux elif platform == "darwin": from fbprophet import Prophet # OS X elif platform == "win32": from fbprophet import Prophet # Windows... data = self.data.reset_index() data.rename(columns={'Date': 'ds', self.column: 'y'}, inplace=True) # FIXME and take user input size = len(data) df_train = data.iloc[int(-size * .10):, :] df_test = data.iloc[int(-size * .20):, :] model_prophet = Prophet(seasonality_mode='additive') model_prophet.add_seasonality(name='monthly', period=30.5, fourier_order=5) model_prophet.fit(df_train) df_future = model_prophet.make_future_dataframe(periods=365) df_pred = model_prophet.predict(df_future) model_prophet.plot(df_pred) plt.tight_layout() plt.title('Prophet Forecast') plt.savefig(os.path.join(img_dirp, f'img/prophet_forecast.png')) model_prophet.plot_components(df_pred) plt.tight_layout() plt.savefig(os.path.join(img_dirp, 'img/components.png')) # merge test set with predicted data and plot accuracy of model's predictions selected_columns = ['ds', 'yhat_lower', 'yhat_upper', 'yhat'] df_pred = df_pred.loc[:, selected_columns].reset_index(drop=True) df_test = df_test.merge(df_pred, on=['ds'], how='left') df_test.ds = pd.to_datetime(df_test.ds) df_test.set_index('ds', inplace=True) fig, ax = plt.subplots(1, 1) ax = sns.lineplot( data=df_test[['y', 'yhat_lower', 'yhat_upper', 'yhat']]) ax.fill_between(df_test.index, df_test.yhat_lower, df_test.yhat_upper, alpha=0.3) ax.set(title=f'{self.column} - actual vs. predicted', xlabel='Date', ylabel='{self.column}') plt.tight_layout() plt.savefig(os.path.join(img_dirp, 'img/actual_v_predicted.png'))
def test_seasonality_modes(self): # Model with holidays, seasonalities, and extra regressors holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['xmas'], 'lower_window': [-1], 'upper_window': [0], }) m = Prophet(seasonality_mode='multiplicative', holidays=holidays) m.add_seasonality('monthly', period=30, mode='additive', fourier_order=3) m.add_regressor('binary_feature', mode='additive') m.add_regressor('numeric_feature') # Construct seasonal features df = DATA.copy() df['binary_feature'] = [0] * 255 + [1] * 255 df['numeric_feature'] = range(510) df = m.setup_dataframe(df, initialize_scales=True) m.history = df.copy() m.set_auto_seasonalities() seasonal_features, prior_scales, component_cols, modes = ( m.make_all_seasonality_features(df)) self.assertEqual(sum(component_cols['additive_terms']), 7) self.assertEqual(sum(component_cols['multiplicative_terms']), 29) self.assertEqual( set(modes['additive']), { 'monthly', 'binary_feature', 'additive_terms', 'extra_regressors_additive' }, ) self.assertEqual( set(modes['multiplicative']), { 'weekly', 'yearly', 'xmas', 'numeric_feature', 'multiplicative_terms', 'extra_regressors_multiplicative', 'holidays', }, )
def test_conditional_custom_seasonality(self): m = Prophet(weekly_seasonality=False, yearly_seasonality=False) m.add_seasonality(name='conditional_weekly', period=7, fourier_order=3, prior_scale=2., condition_name='is_conditional_week') m.add_seasonality(name='normal_monthly', period=30.5, fourier_order=5, prior_scale=2.) df = DATA.copy() with self.assertRaises(ValueError): # Require all conditions names in df m.fit(df) df['is_conditional_week'] = [0] * 255 + [2] * 255 with self.assertRaises(ValueError): # Require boolean compatible values m.fit(df) df['is_conditional_week'] = [0] * 255 + [1] * 255 m.fit(df) self.assertEqual( m.seasonalities['conditional_weekly'], { 'period': 7, 'fourier_order': 3, 'prior_scale': 2., 'mode': 'additive', 'condition_name': 'is_conditional_week' }, ) self.assertIsNone(m.seasonalities['normal_monthly']['condition_name']) seasonal_features, prior_scales, component_cols, modes = ( m.make_all_seasonality_features(m.history)) # Confirm that only values without is_conditional_week has non zero entries conditional_weekly_columns = seasonal_features.columns[ seasonal_features.columns.str.startswith('conditional_weekly')] self.assertTrue( np.array_equal( (seasonal_features[conditional_weekly_columns] != 0).any( axis=1).values, df['is_conditional_week'].values))
def prophet_model(df, fh, seasonality): df = df.reset_index(drop=False) df.columns = ["ds", "y"] model = Prophet( daily_seasonality=False, yearly_seasonality=True, weekly_seasonality=False, interval_width=0.95, ) model = model.add_seasonality( name="custom", period=seasonality, fourier_order=5, prior_scale=0.02 ) model.fit(df.iloc[:-fh]) return model.predict(df[-fh:][["ds"]])["yhat"].values
def get_model_forecast(info): ds = ast.literal_eval(info['train_ds']) y = ast.literal_eval(info['train_y']) avg_prc = ast.literal_eval(info['train_avg_prc']) test_y = ast.literal_eval(info['test_y']) test_avg_prc = ast.literal_eval(info['test_avg_prc']) dic = {'ds': ds, 'y': y, 'avg_prc': avg_prc} data = pd.DataFrame(dic) holidays = pd.read_json(info['holidays']) ## feature engineering if data['avg_prc'].max() > 0: data['avg_prc'] = data['avg_prc'] / data['avg_prc'].max() * 100 else: data['avg_prc'] = data['avg_prc'] / (data['avg_prc'].max() + 1) * 100 data['cap'] = 100.0 data['floor'] = 0.0 ## run prophet model = Prophet(growth='logistic', holidays=holidays) model.add_country_holidays(country_name='KR') model.add_seasonality(name='monthly', period=30.5, fourier_order=5) if data['avg_prc'].isna().sum() == 0: model.add_regressor('avg_prc') model.fit(data) ## get estimation future = model.make_future_dataframe(periods=PRED_DAYS) if data['avg_prc'].isna().sum() == 0: future['avg_prc'] = pd.concat( [pd.Series(avg_prc), pd.Series(test_avg_prc)], ignore_index=True) future['cap'] = 100 future['floor'] = 0.0 forecast = model.predict(future) return model, forecast
def get_model_forecast_pred(self): train, test = self.data[:-self.PRED_DAYS], self.data[-self.PRED_DAYS:] model = Prophet(growth='logistic', holidays=self.holidays, holidays_prior_scale=self.holiday_weight, seasonality_prior_scale=self.seasonality_weight, changepoint_prior_scale=self.changepoint_weight, changepoint_range=self.changepoint_range, changepoints=self.changepoints if self.changepoints else None, ) if self.ADD_COUNTRY_HOLIDAY: model.add_country_holidays(country_name='KR') if self.ADD_MONTHLY_SEASONALITY: model.add_seasonality(name='montly_seasonality', period=30.5, fourier_order=5) if self.PRC: model.add_regressor('avg_prc', prior_scale=self.price_weight, standardize=False) model.fit(train) future = model.make_future_dataframe(periods=self.PRED_DAYS) future = pd.merge(future, train, left_on='ds', right_on='ds', how='left') future = future[['ds', 'floor', 'cap', 'avg_prc']] future['avg_prc'] = self.data.avg_prc.values future_fill_missing = {'cap' : 100, 'floor' : 0.0} future.fillna(future_fill_missing, inplace=True) else: model.fit(train) future = model.make_future_dataframe(periods=self.PRED_DAYS) future['cap'] = 100 future['floor'] = 0.0 forecast = model.predict(future) pred = forecast[['ds', 'yhat']][-self.PRED_DAYS:] pred['yhat'] = np.where(pred['yhat'] < 0, 0, pred['yhat']) return model, forecast, pred
def predict(): # Build model m = Prophet(interval_width=0.95, yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True, changepoint_prior_scale=2) m.add_seasonality(name='monthly', period=30.5, fourier_order=5, prior_scale=0.02) df_new = data()[0] m.fit(df_new) future = m.make_future_dataframe(periods=7, freq='D') # Predict forecast = m.predict(future) forecast = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']] forecast['yhat'] = np.exp(forecast['yhat']) forecast['yhat_lower'] = np.exp(forecast['yhat_lower']) forecast['yhat_upper'] = np.exp(forecast['yhat_upper']) return forecast
def test_custom_seasonality(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2017-01-02']), 'holiday': ['special_day'], 'prior_scale': [4.], }) m = Prophet(holidays=holidays) m.add_seasonality(name='monthly', period=30, fourier_order=5, prior_scale=2.) self.assertEqual( m.seasonalities['monthly'], { 'period': 30, 'fourier_order': 5, 'prior_scale': 2., 'mode': 'additive', 'condition_name': None }, ) with self.assertRaises(ValueError): m.add_seasonality(name='special_day', period=30, fourier_order=5) with self.assertRaises(ValueError): m.add_seasonality(name='trend', period=30, fourier_order=5) m.add_seasonality(name='weekly', period=30, fourier_order=5) # Test fourier order <= 0 m = Prophet() with self.assertRaises(ValueError): m.add_seasonality(name='weekly', period=7, fourier_order=0) with self.assertRaises(ValueError): m.add_seasonality(name='weekly', period=7, fourier_order=-1) # Test priors m = Prophet(holidays=holidays, yearly_seasonality=False, seasonality_mode='multiplicative') m.add_seasonality(name='monthly', period=30, fourier_order=5, prior_scale=2., mode='additive') m.fit(DATA.copy()) self.assertEqual(m.seasonalities['monthly']['mode'], 'additive') self.assertEqual(m.seasonalities['weekly']['mode'], 'multiplicative') seasonal_features, prior_scales, component_cols, modes = ( m.make_all_seasonality_features(m.history)) self.assertEqual(sum(component_cols['monthly']), 10) self.assertEqual(sum(component_cols['special_day']), 1) self.assertEqual(sum(component_cols['weekly']), 6) self.assertEqual(sum(component_cols['additive_terms']), 10) self.assertEqual(sum(component_cols['multiplicative_terms']), 7) if seasonal_features.columns[0] == 'monthly_delim_1': true = [2.] * 10 + [10.] * 6 + [4.] self.assertEqual(sum(component_cols['monthly'][:10]), 10) self.assertEqual(sum(component_cols['weekly'][10:16]), 6) else: true = [10.] * 6 + [2.] * 10 + [4.] self.assertEqual(sum(component_cols['weekly'][:6]), 6) self.assertEqual(sum(component_cols['monthly'][6:16]), 10) self.assertEqual(prior_scales, true)
def test_copy(self): df = DATA_all.copy() df['cap'] = 200. df['binary_feature'] = [0] * 255 + [1] * 255 # These values are created except for its default values holiday = pd.DataFrame({ 'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['x'] }) products = itertools.product( ['linear', 'logistic'], # growth [None, pd.to_datetime(['2016-12-25'])], # changepoints [3], # n_changepoints [0.9], # changepoint_range [True, False], # yearly_seasonality [True, False], # weekly_seasonality [True, False], # daily_seasonality [None, holiday], # holidays ['additive', 'multiplicative'], # seasonality_mode [1.1], # seasonality_prior_scale [1.1], # holidays_prior_scale [0.1], # changepoint_prior_scale [100], # mcmc_samples [0.9], # interval_width [200] # uncertainty_samples ) # Values should be copied correctly for product in products: m1 = Prophet(*product) m1.country_holidays = 'US' m1.history = m1.setup_dataframe(df.copy(), initialize_scales=True) m1.set_auto_seasonalities() m2 = diagnostics.prophet_copy(m1) self.assertEqual(m1.growth, m2.growth) self.assertEqual(m1.n_changepoints, m2.n_changepoints) self.assertEqual(m1.changepoint_range, m2.changepoint_range) if m1.changepoints is None: self.assertEqual(m1.changepoints, m2.changepoints) else: self.assertTrue(m1.changepoints.equals(m2.changepoints)) self.assertEqual(False, m2.yearly_seasonality) self.assertEqual(False, m2.weekly_seasonality) self.assertEqual(False, m2.daily_seasonality) self.assertEqual(m1.yearly_seasonality, 'yearly' in m2.seasonalities) self.assertEqual(m1.weekly_seasonality, 'weekly' in m2.seasonalities) self.assertEqual(m1.daily_seasonality, 'daily' in m2.seasonalities) if m1.holidays is None: self.assertEqual(m1.holidays, m2.holidays) else: self.assertTrue((m1.holidays == m2.holidays).values.all()) self.assertEqual(m1.country_holidays, m2.country_holidays) self.assertEqual(m1.seasonality_mode, m2.seasonality_mode) self.assertEqual(m1.seasonality_prior_scale, m2.seasonality_prior_scale) self.assertEqual(m1.changepoint_prior_scale, m2.changepoint_prior_scale) self.assertEqual(m1.holidays_prior_scale, m2.holidays_prior_scale) self.assertEqual(m1.mcmc_samples, m2.mcmc_samples) self.assertEqual(m1.interval_width, m2.interval_width) self.assertEqual(m1.uncertainty_samples, m2.uncertainty_samples) # Check for cutoff and custom seasonality and extra regressors changepoints = pd.date_range('2012-06-15', '2012-09-15') cutoff = pd.Timestamp('2012-07-25') m1 = Prophet(changepoints=changepoints) m1.add_seasonality('custom', 10, 5) m1.add_regressor('binary_feature') m1.fit(df) m2 = diagnostics.prophet_copy(m1, cutoff=cutoff) changepoints = changepoints[changepoints < cutoff] self.assertTrue((changepoints == m2.changepoints).all()) self.assertTrue('custom' in m2.seasonalities) self.assertTrue('binary_feature' in m2.extra_regressors)
holidays = pd.concat((YQ, YDQ, YD, QR, FN, QM, LD, ET, GQ, PAY, SD, LB, CX, XN, CJ, CW, YX, DW, QX, ZY, ZQ, CY, DZ)) train = train_day_df_A.loc[:, ['date', 'amount']] train = train.rename(columns={'date': 'ds', 'amount': 'y'}) train['ds'] = pd.to_datetime(train['ds']) train.loc[train['y'] == 0, 'y'] = None train.loc[train['y'] > train['y'].quantile(q=0.998), "y"] = None # train.loc[(train['ds'] > '2020-01-29') & (train['ds'] < '2020-04-01'), "y"] = None train.loc[(train['ds'] >= '2019-12-25') & (train['ds'] <= '2020-01-05'), "y"] = None train.loc[train['ds'] == '2019-11-25', "y"] = None train.loc[train['ds'] == '2019-12-09', "y"] = None m = Prophet(holidays=holidays) m.add_seasonality(name='monthly', period=30.5, fourier_order=5) m.fit(train) future = m.make_future_dataframe(periods=90) forecast = m.predict(future) # fig1 = m.plot(forecast) fig2 = plot_plotly(m, forecast) fig2.show() # m.plot_components(forecast) forecast.to_csv('./features/feature_A.csv') train = train_day_df_B.loc[:, ['date', 'amount']] train = train.rename(columns={'date': 'ds', 'amount': 'y'}) train['ds'] = pd.to_datetime(train['ds']) train.loc[train['y'] == 0, 'y'] = None train.loc[train['y'] < 130, 'y'] = None train.loc[train['y'] > train['y'].quantile(q=0.998), "y"] = None