def fit_prophet(dtf_train, dtf_test, lst_exog=None, model=None, freq="D", conf=0.95, figsize=(15,10)): ## setup prophet if model is None: model = Prophet(growth="linear", changepoints=None, n_changepoints=25, seasonality_mode="multiplicative", yearly_seasonality="auto", weekly_seasonality="auto", daily_seasonality="auto", holidays=None, interval_width=conf) if lst_exog != None: for regressor in lst_exog: model.add_regressor(regressor) ## train model.fit(dtf_train) ## test dtf_prophet = model.make_future_dataframe(periods=len(dtf_test)+10, freq=freq, include_history=True) if model.growth == "logistic": dtf_prophet["cap"] = dtf_train["cap"].unique()[0] if lst_exog != None: dtf_prophet = dtf_prophet.merge(dtf_train[["ds"]+lst_exog], how="left") dtf_prophet.iloc[-len(dtf_test):][lst_exog] = dtf_test[lst_exog].values dtf_prophet = model.predict(dtf_prophet) dtf_train = dtf_train.merge(dtf_prophet[["ds","yhat"]], how="left").rename( columns={'yhat':'model', 'y':'ts'}).set_index("ds") dtf_test = dtf_test.merge(dtf_prophet[["ds","yhat","yhat_lower","yhat_upper"]], how="left").rename( columns={'yhat':'forecast', 'y':'ts', 'yhat_lower':'lower', 'yhat_upper':'upper'}).set_index("ds") ## evaluate dtf = dtf_train.append(dtf_test) dtf = utils_evaluate_ts_model(dtf, conf=conf, figsize=figsize, title="Prophet") return dtf, model
def test_cross_validation_extra_regressors(self): df = self.__df.copy() df['extra'] = range(df.shape[0]) df['is_conditional_week'] = np.arange(df.shape[0]) // 7 % 2 m = Prophet() m.add_seasonality(name='monthly', period=30.5, fourier_order=5) m.add_seasonality(name='conditional_weekly', period=7, fourier_order=3, prior_scale=2., condition_name='is_conditional_week') m.add_regressor('extra') m.fit(df) df_cv = diagnostics.cross_validation(m, horizon='4 days', period='4 days', initial='135 days') self.assertEqual(len(np.unique(df_cv['cutoff'])), 2) period = pd.Timedelta('4 days') dc = df_cv['cutoff'].diff() dc = dc[dc > pd.Timedelta(0)].min() self.assertTrue(dc >= period) self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all()) df_merged = pd.merge(df_cv, self.__df, 'left', on='ds') self.assertAlmostEqual( np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)
def test_seasonality_modes(self): # Model with holidays, seasonalities, and extra regressors holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['xmas'], 'lower_window': [-1], 'upper_window': [0], }) m = Prophet(seasonality_mode='multiplicative', holidays=holidays) m.add_seasonality('monthly', period=30, mode='additive', fourier_order=3) m.add_regressor('binary_feature', mode='additive') m.add_regressor('numeric_feature') # Construct seasonal features df = DATA.copy() df['binary_feature'] = [0] * 255 + [1] * 255 df['numeric_feature'] = range(510) df = m.setup_dataframe(df, initialize_scales=True) m.history = df.copy() m.set_auto_seasonalities() seasonal_features, prior_scales, component_cols, modes = ( m.make_all_seasonality_features(df)) self.assertEqual(sum(component_cols['additive_terms']), 7) self.assertEqual(sum(component_cols['multiplicative_terms']), 29) self.assertEqual( set(modes['additive']), { 'monthly', 'binary_feature', 'additive_terms', 'extra_regressors_additive' }, ) self.assertEqual( set(modes['multiplicative']), { 'weekly', 'yearly', 'xmas', 'numeric_feature', 'multiplicative_terms', 'extra_regressors_multiplicative', 'holidays', }, )
def prophet_fit_and_predict_with_exog_and_advance_vars( y: [[float]], k: int, t: [float], a: [[float]], model_params: dict = None) -> Tuple[List, List, Any, Any]: """ Simpler wrapper for testing - univariate w/ advance vars w/ supplied times and future times """ assert len(t) == len(y) + k assert len(a) == len(y) + k assert isinstance(y[0], List) a_cols = ['a' + str(i) for i in range(len(a[0]))] df = pd.DataFrame(columns=a_cols, data=a[:-k]) Y = transpose(y) df['y'] = Y[0] n_exog = len(y[0]) - 1 y_cols = ['y'] + ['y' + str(i) for i in range(1, len(y[0]))] for i in range(1, n_exog + 1): df['y' + str(i)] = Y[i][:len(y)] dt = epoch_to_naive_datetime(t) df['ds'] = dt[:len(y)] kwargs_used = dict([(k, v) for k, v in PROPHET_MODEL.items()]) if model_params: kwargs_used.update(model_params) m = Prophet(**kwargs_used) for a_col in a_cols: m.add_regressor(name=a_col) for y_col in y_cols[1:]: m.add_regressor(name=y_col) with no_stdout_stderr(): m.fit(df) freq = infer_freq_from_epoch(t) future = m.make_future_dataframe(periods=k, freq=freq) future['ds'] = dt full_a_data = transpose(a) for a_col, a_vals in zip(a_cols, full_a_data): future[a_col] = a_vals for i in range(1, n_exog + 1): # Just bring forward future['y' + str(i)] = Y[i] + [Y[i][-1]] * k forecast = m.predict(future) x = forecast['yhat'].values[-k:] # Use m.plot(forecast) to take a peak x_std = [ u - l for u, l in zip(forecast['yhat_upper'].values, forecast['yhat_lower'].values) ] return x, x_std, forecast, m
def estimate(file_path): try: df = pd.read_csv(file_path) except: raise FileNotFoundError('파일을 찾을 수 없습니다') date_range = pd.date_range(start=START_DATE, end=END_DATE, freq='1H')[:-1] df['ds'] = df['dt'].map(str) + " " + df['dhour'].map(str) + ":00:00" df['ds'] = pd.to_datetime(df['ds']) df = df.set_index('ds') df = pd.merge(date_range.to_frame(), df, left_index=True, right_index=True, how='left') df['ds'] = df.index df = df.rename(columns={'sales' : 'y'}) missing_fill_val = {'avg_prc' : df.avg_prc.median(), 'y' : 0} df.fillna(missing_fill_val, inplace=True) q3 = df['y'].quantile(q=0.75) cap = q3 * 1.5 df['y'] = df['y'].apply(lambda x : cap if x >= cap else x) df = df[['ds', 'y', 'avg_prc']] scaler = MinMaxScaler() scaled_value = scaler.fit_transform(df[['avg_prc', 'y']].values) df[['avg_prc', 'y']] = scaled_value df['floor'] = 0 df['cap'] = 1.2 train = df[:-PRED_DAYS] test = df[-PRED_DAYS:] m = Prophet(growth='logistic', holidays=holidays, holidays_prior_scale=1) m.add_regressor('avg_prc') m.fit(train) future = m.make_future_dataframe(periods=PRED_DAYS, freq='H') future = pd.merge(future, train, left_on='ds', right_on='ds', how='left') future = future[['ds', 'floor', 'cap', 'avg_prc']] future_fill_missing = {'avg_prc' : df.iloc[len(df)-1]['avg_prc'], 'cap' : 1.2, 'floor' : 0} future.fillna(future_fill_missing, inplace=True) forecast = m.predict(future) pred = forecast[['ds', 'yhat']][-PRED_DAYS:] pred['yhat'] = np.where(pred['yhat'] < 0, 0, pred['yhat']) rmse = math.sqrt(mean_squared_error(test['y'], pred['yhat'])) r2score = r2_score(test['y'], pred['yhat']) return rmse, r2score
def test_regressor_coefficients(self): m = Prophet() N = DATA.shape[0] df = DATA.copy() np.random.seed(123) df['regr1'] = np.random.normal(size=N) df['regr2'] = np.random.normal(size=N) m.add_regressor('regr1', mode='additive') m.add_regressor('regr2', mode='multiplicative') m.fit(df) coefs = regressor_coefficients(m) self.assertTrue(coefs.shape == (2, 6)) # No MCMC sampling, so lower and upper should be the same as mean self.assertTrue( np.array_equal(coefs['coef_lower'].values, coefs['coef'].values)) self.assertTrue( np.array_equal(coefs['coef_upper'].values, coefs['coef'].values))
def get_model_forecast(info): ds = ast.literal_eval(info['train_ds']) y = ast.literal_eval(info['train_y']) avg_prc = ast.literal_eval(info['train_avg_prc']) test_y = ast.literal_eval(info['test_y']) test_avg_prc = ast.literal_eval(info['test_avg_prc']) dic = {'ds': ds, 'y': y, 'avg_prc': avg_prc} data = pd.DataFrame(dic) holidays = pd.read_json(info['holidays']) ## feature engineering if data['avg_prc'].max() > 0: data['avg_prc'] = data['avg_prc'] / data['avg_prc'].max() * 100 else: data['avg_prc'] = data['avg_prc'] / (data['avg_prc'].max() + 1) * 100 data['cap'] = 100.0 data['floor'] = 0.0 ## run prophet model = Prophet(growth='logistic', holidays=holidays) model.add_country_holidays(country_name='KR') model.add_seasonality(name='monthly', period=30.5, fourier_order=5) if data['avg_prc'].isna().sum() == 0: model.add_regressor('avg_prc') model.fit(data) ## get estimation future = model.make_future_dataframe(periods=PRED_DAYS) if data['avg_prc'].isna().sum() == 0: future['avg_prc'] = pd.concat( [pd.Series(avg_prc), pd.Series(test_avg_prc)], ignore_index=True) future['cap'] = 100 future['floor'] = 0.0 forecast = model.predict(future) return model, forecast
def get_model_forecast_pred(self): train, test = self.data[:-self.PRED_DAYS], self.data[-self.PRED_DAYS:] model = Prophet(growth='logistic', holidays=self.holidays, holidays_prior_scale=self.holiday_weight, seasonality_prior_scale=self.seasonality_weight, changepoint_prior_scale=self.changepoint_weight, changepoint_range=self.changepoint_range, changepoints=self.changepoints if self.changepoints else None, ) if self.ADD_COUNTRY_HOLIDAY: model.add_country_holidays(country_name='KR') if self.ADD_MONTHLY_SEASONALITY: model.add_seasonality(name='montly_seasonality', period=30.5, fourier_order=5) if self.PRC: model.add_regressor('avg_prc', prior_scale=self.price_weight, standardize=False) model.fit(train) future = model.make_future_dataframe(periods=self.PRED_DAYS) future = pd.merge(future, train, left_on='ds', right_on='ds', how='left') future = future[['ds', 'floor', 'cap', 'avg_prc']] future['avg_prc'] = self.data.avg_prc.values future_fill_missing = {'cap' : 100, 'floor' : 0.0} future.fillna(future_fill_missing, inplace=True) else: model.fit(train) future = model.make_future_dataframe(periods=self.PRED_DAYS) future['cap'] = 100 future['floor'] = 0.0 forecast = model.predict(future) pred = forecast[['ds', 'yhat']][-self.PRED_DAYS:] pred['yhat'] = np.where(pred['yhat'] < 0, 0, pred['yhat']) return model, forecast, pred
def test_added_regressors(self): m = Prophet() m.add_regressor('binary_feature', prior_scale=0.2) m.add_regressor('numeric_feature', prior_scale=0.5) m.add_regressor('numeric_feature2', prior_scale=0.5, mode='multiplicative') m.add_regressor('binary_feature2', standardize=True) df = DATA.copy() df['binary_feature'] = ['0'] * 255 + ['1'] * 255 df['numeric_feature'] = range(510) df['numeric_feature2'] = range(510) with self.assertRaises(ValueError): # Require all regressors in df m.fit(df) df['binary_feature2'] = [1] * 100 + [0] * 410 m.fit(df) # Check that standardizations are correctly set self.assertEqual( m.extra_regressors['binary_feature'], { 'prior_scale': 0.2, 'mu': 0, 'std': 1, 'standardize': 'auto', 'mode': 'additive', }, ) self.assertEqual(m.extra_regressors['numeric_feature']['prior_scale'], 0.5) self.assertEqual(m.extra_regressors['numeric_feature']['mu'], 254.5) self.assertAlmostEqual(m.extra_regressors['numeric_feature']['std'], 147.368585, places=5) self.assertEqual(m.extra_regressors['numeric_feature2']['mode'], 'multiplicative') self.assertEqual(m.extra_regressors['binary_feature2']['prior_scale'], 10.) self.assertAlmostEqual(m.extra_regressors['binary_feature2']['mu'], 0.1960784, places=5) self.assertAlmostEqual(m.extra_regressors['binary_feature2']['std'], 0.3974183, places=5) # Check that standardization is done correctly df2 = m.setup_dataframe(df.copy()) self.assertEqual(df2['binary_feature'][0], 0) self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4) self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4) # Check that feature matrix and prior scales are correctly constructed seasonal_features, prior_scales, component_cols, modes = ( m.make_all_seasonality_features(df2)) self.assertEqual(seasonal_features.shape[1], 30) names = ['binary_feature', 'numeric_feature', 'binary_feature2'] true_priors = [0.2, 0.5, 10.] for i, name in enumerate(names): self.assertIn(name, seasonal_features) self.assertEqual(sum(component_cols[name]), 1) self.assertEqual( sum(np.array(prior_scales) * component_cols[name]), true_priors[i], ) # Check that forecast components are reasonable future = pd.DataFrame({ 'ds': ['2014-06-01'], 'binary_feature': [0], 'numeric_feature': [10], 'numeric_feature2': [10], }) with self.assertRaises(ValueError): m.predict(future) future['binary_feature2'] = 0 fcst = m.predict(future) self.assertEqual(fcst.shape[1], 37) self.assertEqual(fcst['binary_feature'][0], 0) self.assertAlmostEqual( fcst['extra_regressors_additive'][0], fcst['numeric_feature'][0] + fcst['binary_feature2'][0], ) self.assertAlmostEqual( fcst['extra_regressors_multiplicative'][0], fcst['numeric_feature2'][0], ) self.assertAlmostEqual( fcst['additive_terms'][0], fcst['yearly'][0] + fcst['weekly'][0] + fcst['extra_regressors_additive'][0], ) self.assertAlmostEqual( fcst['multiplicative_terms'][0], fcst['extra_regressors_multiplicative'][0], ) self.assertAlmostEqual( fcst['yhat'][0], fcst['trend'][0] * (1 + fcst['multiplicative_terms'][0]) + fcst['additive_terms'][0], ) # Check works if constant extra regressor at 0 df['constant_feature'] = 0 m = Prophet() m.add_regressor('constant_feature') m.fit(df) self.assertEqual(m.extra_regressors['constant_feature']['std'], 1)
def prophet_iskater_factory(y: [[float]], k: int, a: List = None, t: List = None, e=None, freq: str = None, n_max=1000, recursive: bool = False, model_params: dict = None, return_forecast=True): """ :param y: A list of observations, each a vector. :param k: Number of steps ahead to predict :param a: Known in advance observations - should be k more of these than y's :param t: Epoch times of observations y. If len(t)=len(y)+k the last k are interpreted as future times. :param freq: 'D', '5T' etc, see https://github.com/pandas-dev/pandas/blob/master/pandas/tseries/frequencies.py :param n_max: Maximum number of observations to use, should you wish to prevent prophet from slowing down :param recursive If True, exogenous variables y[1], y[2],... will be predicted forward in time (obviously this adds to computation time) :returns: x k-vector of predictions x_std k-vector of standard deviations forecast full forecast dataframe, familiar to users of fbprophet """ if a: assert len(a) == len(y) + k if isinstance(y[0], float): y = [wrap(yj) for yj in y] # Conversion of epoch times to UTC datetime # User must supply times, len(y) or len(y)+k, or a valid frequency str if t is None: if freq is None or not freq: freq = PROPHET_META['freq'] # Just assume away ... else: assert is_valid_freq( freq), 'Freq ' + str(freq) + ' is not a valid frequency' dt = pd.date_range(start=EPOCH, periods=len(y), freq=freq) # UTC else: freq = infer_freq_from_epoch(t) dt = epoch_to_naive_datetime(t) if len(dt) == len(y) + k: ta = dt dt = dt[:len(y)] else: assert len(dt) == len( y), 'Time vector t should be len(y) or len(y)+k' ta = None # Truncate history so that prophet doesn't take forever to fit y_shorter = y[-n_max:] a_shorter = a[-(n_max + k):] if a is not None else [] # may be empty dt_shorter = dt[-n_max:] # Massage data into Prophet friendly dataframe with columns y, y1, ..., yk, a0,...aj y_cols = [ 'y' + str(i) if i > 0 else 'y' for i in range(len(y_shorter[-1])) ] if a: a_cols = ['a' + str(i) for i in range(len(a_shorter[-1]))] data = [ list(yi) + list(ai) for yi, ai in zip(y_shorter, a_shorter[:-k]) ] df = pd.DataFrame(columns=y_cols + a_cols, data=data) else: data = [list(yi) for yi in y_shorter] df = pd.DataFrame(columns=y_cols, data=data) df['ds'] = dt_shorter # Instantiate Prophet model, ensure defaults are what we think they are kwargs_used = dict([(k, v) for k, v in PROPHET_MODEL.items()]) if model_params: kwargs_used.update(model_params) m = Prophet(**kwargs_used) # Add regressors for y_col in y_cols[1:]: m.add_regressor(name=y_col) if a: for a_col in a_cols: m.add_regressor(name=a_col) # Fit the model every invocation ... there isn't any other way with no_stdout_stderr(): m.fit(df) # Make future dataframe, adding known-in-advance variables future = m.make_future_dataframe(periods=k, freq=freq) if a: for j, a_col in enumerate(a_cols): future[a_col] = [ai[j] for ai in a_shorter] # Known in advance if ta is not None: future['ds'] = ta # override with user supplied future times # Next, we wish to add contemporaneously observed variables # # This is somewhat problematic, for how should we bring exogenously observed variables forward? # The simplest answer is, don't use them - only supply 1-vector y observations # prophet implicitly assumes all exogenous are known, which is a pretty big shortcoming. # # However, if we are trying to support y[1:], ... # - It seems consistent to use prophet to predict these forward, # - It also seems likely that this will lead to over-fitting. # I'm open to ideas here. Perhaps perform some hackery could effect attenuation of the coefficients # assigned to y[1],... such as jiggling past observations. For now we use prophet on each # one individually, feeding them the known in advance 'a' variables. n_exog = len(y[0]) - 1 if n_exog > 0: for j, y_col in enumerate(y_cols): if j > 0: yj = [yi[j] for yi in y_shorter] if recursive: yj_hat, yj_hat_std, yj_forecast, yj_m = prophet_iskater_factory( y=yj, k=k, a=a_shorter, freq=freq, n_max=n_max, recursive=False) else: yj_hat = [yj[-1]] * k future[y_col] = yj + list(yj_hat) # Call the prediction function forecast = m.predict(future) x = list(forecast['yhat'].values[-k:] ) # Use m.plot(forecast) to take a peak # Interpret confidence level difference as scale to be returned. TODO: set alpha properly so this really is 1-std x_std = list([ u - l for u, l in zip(forecast['yhat_upper'].values[-k:], forecast['yhat_lower'].values[-k:]) ]) if return_forecast: return x, x_std, forecast, m else: return x, x_std
def test_copy(self): df = DATA_all.copy() df['cap'] = 200. df['binary_feature'] = [0] * 255 + [1] * 255 # These values are created except for its default values holiday = pd.DataFrame({ 'ds': pd.to_datetime(['2016-12-25']), 'holiday': ['x'] }) products = itertools.product( ['linear', 'logistic'], # growth [None, pd.to_datetime(['2016-12-25'])], # changepoints [3], # n_changepoints [0.9], # changepoint_range [True, False], # yearly_seasonality [True, False], # weekly_seasonality [True, False], # daily_seasonality [None, holiday], # holidays ['additive', 'multiplicative'], # seasonality_mode [1.1], # seasonality_prior_scale [1.1], # holidays_prior_scale [0.1], # changepoint_prior_scale [100], # mcmc_samples [0.9], # interval_width [200] # uncertainty_samples ) # Values should be copied correctly for product in products: m1 = Prophet(*product) m1.country_holidays = 'US' m1.history = m1.setup_dataframe(df.copy(), initialize_scales=True) m1.set_auto_seasonalities() m2 = diagnostics.prophet_copy(m1) self.assertEqual(m1.growth, m2.growth) self.assertEqual(m1.n_changepoints, m2.n_changepoints) self.assertEqual(m1.changepoint_range, m2.changepoint_range) if m1.changepoints is None: self.assertEqual(m1.changepoints, m2.changepoints) else: self.assertTrue(m1.changepoints.equals(m2.changepoints)) self.assertEqual(False, m2.yearly_seasonality) self.assertEqual(False, m2.weekly_seasonality) self.assertEqual(False, m2.daily_seasonality) self.assertEqual(m1.yearly_seasonality, 'yearly' in m2.seasonalities) self.assertEqual(m1.weekly_seasonality, 'weekly' in m2.seasonalities) self.assertEqual(m1.daily_seasonality, 'daily' in m2.seasonalities) if m1.holidays is None: self.assertEqual(m1.holidays, m2.holidays) else: self.assertTrue((m1.holidays == m2.holidays).values.all()) self.assertEqual(m1.country_holidays, m2.country_holidays) self.assertEqual(m1.seasonality_mode, m2.seasonality_mode) self.assertEqual(m1.seasonality_prior_scale, m2.seasonality_prior_scale) self.assertEqual(m1.changepoint_prior_scale, m2.changepoint_prior_scale) self.assertEqual(m1.holidays_prior_scale, m2.holidays_prior_scale) self.assertEqual(m1.mcmc_samples, m2.mcmc_samples) self.assertEqual(m1.interval_width, m2.interval_width) self.assertEqual(m1.uncertainty_samples, m2.uncertainty_samples) # Check for cutoff and custom seasonality and extra regressors changepoints = pd.date_range('2012-06-15', '2012-09-15') cutoff = pd.Timestamp('2012-07-25') m1 = Prophet(changepoints=changepoints) m1.add_seasonality('custom', 10, 5) m1.add_regressor('binary_feature') m1.fit(df) m2 = diagnostics.prophet_copy(m1, cutoff=cutoff) changepoints = changepoints[changepoints < cutoff] self.assertTrue((changepoints == m2.changepoints).all()) self.assertTrue('custom' in m2.seasonalities) self.assertTrue('binary_feature' in m2.extra_regressors)
class FBProphetModel(PredictionModel): """Facebook's Prophet prediction model.""" def __init__(self, params: dict, transformation: str = "none"): super().__init__(params, name="FBProphet", transformation=transformation) # Stuff needed to make Prophet shut up during training. self.suppress_stdout_stderr = suppress_stdout_stderr self.fbmodel = Prophet() try: self.fbprophet_parameters = params["model_parameters"]["fbprophet_parameters"] except KeyError: self.fbprophet_parameters = None def train(self, input_data: DataFrame, extra_regressors: DataFrame = None): """Overrides PredictionModel.train()""" if self.fbprophet_parameters is not None: try: timeseries_name = input_data.columns[0] date_format = self.fbprophet_parameters["holidays_dataframes"]["date_format"] holidays = pd.read_csv(self.fbprophet_parameters["holidays_dataframes"][timeseries_name]) holidays.loc[:, "ds"].apply(lambda x: pd.to_datetime(x, format=date_format)) self.fbmodel = Prophet(holidays=holidays) log.debug(f"Using a dataframe for holidays...") except KeyError: self.fbmodel = Prophet() try: holiday_country = self.fbprophet_parameters["holiday_country"] self.fbmodel.add_country_holidays(country_name=holiday_country) log.debug(f"Set {holiday_country} as country for holiday calendar...") except KeyError: pass else: self.fbmodel = Prophet() if extra_regressors is not None: # We could apply self.transformation also on the extra regressors. # From tests, it looks like it doesn't change much/it worsens the forecasts. input_data = input_data.join(extra_regressors) input_data.reset_index(inplace=True) column_indices = [0, 1] new_names = ['ds', 'y'] old_names = input_data.columns[column_indices] input_data.rename(columns=dict(zip(old_names, new_names)), inplace=True) [self.fbmodel.add_regressor(col) for col in extra_regressors.columns] else: input_data.reset_index(inplace=True) input_data.columns = ['ds', 'y'] with self.suppress_stdout_stderr(): self.fbmodel.fit(input_data) ####################### # param_grid = { # 'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5], # 'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0], # } # param_grid = { # 'changepoint_prior_scale': [0.001, 0.01], # 'seasonality_prior_scale': [0.01, 0.1], # } # # if extra_regressors is not None: # input_data = input_data.join(extra_regressors) # input_data.reset_index(inplace=True) # column_indices = [0, 1] # new_names = ['ds', 'y'] # old_names = input_data.columns[column_indices] # input_data.rename(columns=dict(zip(old_names, new_names)), inplace=True) # # else: # input_data.reset_index(inplace=True) # input_data.columns = ['ds', 'y'] # # # Generate all combinations of parameters # all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())] # rmses = [] # Store the RMSEs for each params here # # # Use cross validation to evaluate all parameters # for params in all_params: # m = Prophet(**params) # [m.add_regressor(col) for col in extra_regressors.columns] if extra_regressors is not None else None # with self.suppress_stdout_stderr(): # m.fit(input_data) # Fit model with given params # df_cv = cross_validation(m, horizon=self.prediction_lags, parallel="processes") # df_p = performance_metrics(df_cv, rolling_window=1) # rmses.append(df_p['rmse'].values[0]) # # # Find the best parameters # tuning_results = pd.DataFrame(all_params) # tuning_results['rmse'] = rmses # # best_params = all_params[np.argmin(rmses)] # print(best_params) # # self.fbmodel = Prophet(**best_params) # [self.fbmodel.add_regressor(col) for col in extra_regressors.columns] if extra_regressors is not None else None # with self.suppress_stdout_stderr(): # self.fbmodel.fit(input_data) def predict(self, future_dataframe: DataFrame, extra_regressors: DataFrame = None) -> DataFrame: """Overrides PredictionModel.predict()""" future = future_dataframe.reset_index() future.rename(columns={'index': 'ds'}, inplace=True) if extra_regressors is not None: future.set_index('ds', inplace=True) future = future.join(extra_regressors.copy()) future.reset_index(inplace=True) forecast = self.fbmodel.predict(future) forecast.set_index('ds', inplace=True) return forecast
import pandas as pd from prophet import Prophet # float_precision='high' required for pd.read_csv to match precision of Rover.read_csv df = pd.read_csv('examples/example_wp_log_peyton_manning.csv', float_precision='high') def nfl_sunday(ds): date = pd.to_datetime(ds) if date.weekday() == 6 and (date.month > 8 or date.month < 2): return 1 else: return 0 df['nfl_sunday'] = df['ds'].apply(nfl_sunday) m = Prophet() m.add_regressor('nfl_sunday') m.fit(df) future = m.make_future_dataframe(periods=365) future['nfl_sunday'] = future['ds'].apply(nfl_sunday) forecast = m.predict(future) m.plot(forecast).savefig('/tmp/py_regressors.png') m.plot_components(forecast).savefig('/tmp/py_regressors2.png')