def test_check_single_cutoff_forecast_func_calls(self): m = Prophet() m.fit(self.__df) mock_predict = pd.DataFrame({ 'ds': pd.date_range(start='2012-09-17', periods=3), 'yhat': np.arange(16, 19), 'yhat_lower': np.arange(15, 18), 'yhat_upper': np.arange(17, 20), 'y': np.arange(16.5, 19.5), 'cutoff': [datetime.date(2012, 9, 15)] * 3 }) # cross validation with 3 and 7 forecasts for args, forecasts in ((['4 days', '10 days', '115 days'], 3), (['4 days', '4 days', '115 days'], 7)): with patch( 'prophet.diagnostics.single_cutoff_forecast') as mock_func: mock_func.return_value = mock_predict df_cv = diagnostics.cross_validation(m, *args) # check single forecast function called expected number of times self.assertEqual(diagnostics.single_cutoff_forecast.call_count, forecasts)
def __init__(self, demand: float = 20, var_per_day: float = 0.1, var_per_season: float = 0.1) -> None: self.__var_per_day = var_per_day self.__var_per_season = var_per_season self.__mean_demand = demand self.__pt = Prophet(seasonality_mode="multiplicative") self.__periods = 12 * 20 self.data_demand = None
def test_cross_validation_extra_regressors(self): df = self.__df.copy() df['extra'] = range(df.shape[0]) df['is_conditional_week'] = np.arange(df.shape[0]) // 7 % 2 m = Prophet() m.add_seasonality(name='monthly', period=30.5, fourier_order=5) m.add_seasonality(name='conditional_weekly', period=7, fourier_order=3, prior_scale=2., condition_name='is_conditional_week') m.add_regressor('extra') m.fit(df) df_cv = diagnostics.cross_validation(m, horizon='4 days', period='4 days', initial='135 days') self.assertEqual(len(np.unique(df_cv['cutoff'])), 2) period = pd.Timedelta('4 days') dc = df_cv['cutoff'].diff() dc = dc[dc > pd.Timedelta(0)].min() self.assertTrue(dc >= period) self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all()) df_merged = pd.merge(df_cv, self.__df, 'left', on='ds') self.assertAlmostEqual( np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)
def train(self, metric_data=None, prediction_duration=15): """Train the Prophet model and store the predictions in predicted_df.""" prediction_freq = "1MIN" # convert incoming metric to Metric Object if metric_data: # because the rolling_data_window_size is set, this df should not bloat self.metric += Metric(metric_data) # Don't really need to store the model, as prophet models are not retrainable # But storing it as an example for other models that can be retrained self.model = Prophet(daily_seasonality=True, weekly_seasonality=True, yearly_seasonality=True) _LOGGER.info("training data range: %s - %s", self.metric.start_time, self.metric.end_time) # _LOGGER.info("training data end time: %s", self.metric.end_time) _LOGGER.debug("begin training") self.model.fit(self.metric.metric_values) future = self.model.make_future_dataframe( periods=int(prediction_duration), freq=prediction_freq, include_history=False, ) forecast = self.model.predict(future) forecast["timestamp"] = forecast["ds"] forecast = forecast[["timestamp", "yhat", "yhat_lower", "yhat_upper"]] forecast = forecast.set_index("timestamp") self.predicted_df = forecast _LOGGER.debug(forecast)
def predict(ticker, start_date): today = datetime.date.today() end_date = today.strftime("%Y-%m-%d") data = yf.download(ticker, start_date, end_date) df_forecast = data.copy() df_forecast.reset_index(inplace=True) df_forecast["ds"] = df_forecast["Date"] df_forecast["y"] = df_forecast["Adj Close"] df_forecast = df_forecast[["ds", "y"]] df_forecast model = Prophet() model.fit(df_forecast) future = pd.to_datetime(end_date) + pd.DateOffset(days=7) future_date = future.strftime("%Y-%m-%d") dates = pd.date_range(start=end_date, end=future_date) df_pred = pd.DataFrame({"ds": dates}) forecast = model.predict(df_pred) prediction_list = forecast.tail(7).to_dict("records") output = {} for data in prediction_list: date = data["ds"].strftime("%Y-%m-%d") output[date] = data["trend"] return output
def test_set_seasonality_mode(self): # Setting attribute m = Prophet() self.assertEqual(m.seasonality_mode, 'additive') m = Prophet(seasonality_mode='multiplicative') self.assertEqual(m.seasonality_mode, 'multiplicative') with self.assertRaises(ValueError): Prophet(seasonality_mode='batman')
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: # Train the Prophet model on each time tick. df = dataframe[['time', 'close']] \ .rename(columns={'time': 'ds', 'close': 'y'}) self.model = Prophet(interval_width=0.95, daily_seasonality=True) self.model.fit(df) return dataframe
def test_subdaily_holidays(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2017-01-02']), 'holiday': ['special_day'], }) m = Prophet(holidays=holidays) m.fit(DATA2) fcst = m.predict() self.assertEqual(sum(fcst['special_day'] == 0), 575)
def __init__(self, params: dict, transformation: str = "none"): super().__init__(params, name="FBProphet", transformation=transformation) # Stuff needed to make Prophet shut up during training. self.suppress_stdout_stderr = suppress_stdout_stderr self.fbmodel = Prophet() try: self.fbprophet_parameters = params["model_parameters"]["fbprophet_parameters"] except KeyError: self.fbprophet_parameters = None
def test_fit_with_holidays(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2012-06-06', '2013-06-06']), 'holiday': ['seans-bday'] * 2, 'lower_window': [0] * 2, 'upper_window': [1] * 2, }) model = Prophet(holidays=holidays, uncertainty_samples=0) model.fit(DATA).predict()
def test_logistic_floor(self): m = Prophet(growth='logistic') N = DATA.shape[0] history = DATA.head(N // 2).copy() history['floor'] = 10. history['cap'] = 80. future = DATA.tail(N // 2).copy() future['cap'] = 80. future['floor'] = 10. m.fit(history, algorithm='Newton') self.assertTrue(m.logistic_floor) self.assertTrue('floor' in m.history) self.assertAlmostEqual(m.history['y_scaled'][0], 1.) self.assertEqual(m.fit_kwargs, {'algorithm': 'Newton'}) fcst1 = m.predict(future) m2 = Prophet(growth='logistic') history2 = history.copy() history2['y'] += 10. history2['floor'] += 10. history2['cap'] += 10. future['cap'] += 10. future['floor'] += 10. m2.fit(history2, algorithm='Newton') self.assertAlmostEqual(m2.history['y_scaled'][0], 1.)
def test_cross_validation(self): m = Prophet() m.fit(self.__df) # Calculate the number of cutoff points(k) horizon = pd.Timedelta('4 days') period = pd.Timedelta('10 days') initial = pd.Timedelta('115 days') methods = [None, 'processes', 'threads', CustomParallelBackend()] try: from dask.distributed import Client client = Client(processes=False) # noqa methods.append("dask") except ImportError: pass for parallel in methods: df_cv = diagnostics.cross_validation(m, horizon='4 days', period='10 days', initial='115 days', parallel=parallel) self.assertEqual(len(np.unique(df_cv['cutoff'])), 3) self.assertEqual(max(df_cv['ds'] - df_cv['cutoff']), horizon) self.assertTrue( min(df_cv['cutoff']) >= min(self.__df['ds']) + initial) dc = df_cv['cutoff'].diff() dc = dc[dc > pd.Timedelta(0)].min() self.assertTrue(dc >= period) self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all()) # Each y in df_cv and self.__df with same ds should be equal df_merged = pd.merge(df_cv, self.__df, 'left', on='ds') self.assertAlmostEqual( np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0) df_cv = diagnostics.cross_validation(m, horizon='4 days', period='10 days', initial='135 days') self.assertEqual(len(np.unique(df_cv['cutoff'])), 1) with self.assertRaises(ValueError): diagnostics.cross_validation(m, horizon='10 days', period='10 days', initial='140 days') # invalid alias with self.assertRaisesRegex(ValueError, "'parallel' should be one"): diagnostics.cross_validation(m, horizon="4 days", parallel="bad") # no map method with self.assertRaisesRegex(ValueError, "'parallel' should be one"): diagnostics.cross_validation(m, horizon="4 days", parallel=object())
def test_flat_trend(self): model = Prophet() t = np.arange(11) m = 0.5 y = model.flat_trend(t, m) y_true = np.array([0.5] * 11) self.assertEqual((y - y_true).sum(), 0) t = t[8:] y_true = y_true[8:] y = model.flat_trend(t, m) self.assertEqual((y - y_true).sum(), 0)
def test_cross_validation_custom_cutoffs(self): m = Prophet() m.fit(self.__df) # When specify a list of cutoffs # the cutoff dates in df_cv are those specified df_cv1 = diagnostics.cross_validation( m, horizon='32 days', period='10 days', cutoffs=[pd.Timestamp('2012-07-31'), pd.Timestamp('2012-08-31')]) self.assertEqual(len(df_cv1['cutoff'].unique()), 2)
def fit_predict_model(dataframe, interval_width=0.99, changepoint_range=0.8): m = Prophet(daily_seasonality=False, yearly_seasonality=False, weekly_seasonality=False, seasonality_mode='multiplicative', interval_width=interval_width, changepoint_range=changepoint_range) m = m.fit(dataframe) forecast = m.predict(dataframe) forecast['fact'] = dataframe['y'].reset_index(drop=True) return forecast
def _build(self, **config): """ build the models and initialize. :param config: hyperparameters for the model """ self.set_params(**config) self.model = Prophet(changepoint_prior_scale=self.changepoint_prior_scale, seasonality_prior_scale=self.seasonality_prior_scale, holidays_prior_scale=self.holidays_prior_scale, changepoint_range=self.changepoint_range, seasonality_mode=self.seasonality_mode) self.model_init = True
def test_setup_dataframe(self): m = Prophet() N = DATA.shape[0] history = DATA.head(N // 2).copy() history = m.setup_dataframe(history, initialize_scales=True) self.assertTrue('t' in history) self.assertEqual(history['t'].min(), 0.0) self.assertEqual(history['t'].max(), 1.0) self.assertTrue('y_scaled' in history) self.assertEqual(history['y_scaled'].max(), 1.0)
class MetricPredictor: """docstring for Predictor.""" model_name = "prophet" model_description = "Forecasted value from Prophet model" model = None predicted_df = None metric = None def __init__(self, metric, rolling_data_window_size="10d"): """Initialize the Metric object.""" self.metric = Metric(metric, rolling_data_window_size) def train(self, metric_data=None, prediction_duration=15): """Train the Prophet model and store the predictions in predicted_df.""" prediction_freq = "1MIN" # convert incoming metric to Metric Object if metric_data: # because the rolling_data_window_size is set, this df should not bloat self.metric += Metric(metric_data) # Don't really need to store the model, as prophet models are not retrainable # But storing it as an example for other models that can be retrained self.model = Prophet(daily_seasonality=True, weekly_seasonality=True, yearly_seasonality=True) _LOGGER.info("training data range: %s - %s", self.metric.start_time, self.metric.end_time) # _LOGGER.info("training data end time: %s", self.metric.end_time) _LOGGER.debug("begin training") self.model.fit(self.metric.metric_values) future = self.model.make_future_dataframe( periods=int(prediction_duration), freq=prediction_freq, include_history=False, ) forecast = self.model.predict(future) forecast["timestamp"] = forecast["ds"] forecast = forecast[["timestamp", "yhat", "yhat_lower", "yhat_upper"]] forecast = forecast.set_index("timestamp") self.predicted_df = forecast _LOGGER.debug(forecast) def predict_value(self, prediction_datetime): """Return the predicted value of the metric for the prediction_datetime.""" nearest_index = self.predicted_df.index.get_loc(prediction_datetime, method="nearest") return self.predicted_df.iloc[[nearest_index]]
def fit_prophet(dtf_train, dtf_test, lst_exog=None, model=None, freq="D", conf=0.95, figsize=(15,10)): ## setup prophet if model is None: model = Prophet(growth="linear", changepoints=None, n_changepoints=25, seasonality_mode="multiplicative", yearly_seasonality="auto", weekly_seasonality="auto", daily_seasonality="auto", holidays=None, interval_width=conf) if lst_exog != None: for regressor in lst_exog: model.add_regressor(regressor) ## train model.fit(dtf_train) ## test dtf_prophet = model.make_future_dataframe(periods=len(dtf_test)+10, freq=freq, include_history=True) if model.growth == "logistic": dtf_prophet["cap"] = dtf_train["cap"].unique()[0] if lst_exog != None: dtf_prophet = dtf_prophet.merge(dtf_train[["ds"]+lst_exog], how="left") dtf_prophet.iloc[-len(dtf_test):][lst_exog] = dtf_test[lst_exog].values dtf_prophet = model.predict(dtf_prophet) dtf_train = dtf_train.merge(dtf_prophet[["ds","yhat"]], how="left").rename( columns={'yhat':'model', 'y':'ts'}).set_index("ds") dtf_test = dtf_test.merge(dtf_prophet[["ds","yhat","yhat_lower","yhat_upper"]], how="left").rename( columns={'yhat':'forecast', 'y':'ts', 'yhat_lower':'lower', 'yhat_upper':'upper'}).set_index("ds") ## evaluate dtf = dtf_train.append(dtf_test) dtf = utils_evaluate_ts_model(dtf, conf=conf, figsize=figsize, title="Prophet") return dtf, model
def test_cross_validation_uncertainty_disabled(self): df = self.__df.copy() for uncertainty in [0, False]: m = Prophet(uncertainty_samples=uncertainty) m.fit(df, algorithm='Newton') df_cv = diagnostics.cross_validation(m, horizon='4 days', period='4 days', initial='115 days') expected_cols = ['ds', 'yhat', 'y', 'cutoff'] self.assertTrue( all(col in expected_cols for col in df_cv.columns.tolist())) df_p = diagnostics.performance_metrics(df_cv) self.assertTrue('coverage' not in df_p.columns)
def test_cross_validation_default_value_check(self): m = Prophet() m.fit(self.__df) # Default value of initial should be equal to 3 * horizon df_cv1 = diagnostics.cross_validation(m, horizon='32 days', period='10 days') df_cv2 = diagnostics.cross_validation(m, horizon='32 days', period='10 days', initial='96 days') self.assertAlmostEqual(((df_cv1['y'] - df_cv2['y'])**2).sum(), 0.0) self.assertAlmostEqual(((df_cv1['yhat'] - df_cv2['yhat'])**2).sum(), 0.0)
def main(): path_example = "../examples" """example 1""" df = read_dataframe(path_examples=path_example, data_idx=4) model, future, forecasted = forecast( df, periods=1096, showflag=True) # uncertainty intervals seem way too wide df.loc[(df['ds'] > '2010-01-01') & (df['ds'] < '2011-01-01'), 'y'] = None model_removed = Prophet().fit(df) fig = model_removed.plot( model_removed.predict(future) ) # model with missing data. prediction of whole data with future. fig.set_figheight(18) fig.set_figwidth(9) plt.title('prediction (model with missing data)') plt.show() """example 2""" df2 = read_dataframe(path_examples=path_example, data_idx=5) model2, future2, forecasted2 = forecast( df2, periods=1096, showflag=True) # extreme outlieres in June 2015 mess up estimate. df2.loc[(df2['ds'] > '2015-06-01') & (df2['ds'] < '2015-06-30'), 'y'] = None model2_removed = Prophet().fit(df2) # Same approach as previous example fig = model2_removed.plot(model2_removed.predict(future2)) fig.set_figheight(18) fig.set_figwidth(9) plt.title('prediction2 (model with missing data)') plt.show()
def test_fit_predict_uncertainty_disabled(self): N = DATA.shape[0] train = DATA.head(N // 2) future = DATA.tail(N // 2) for uncertainty in [0, False]: m = Prophet(uncertainty_samples=uncertainty) m.fit(train) fcst = m.predict(future) expected_cols = [ 'ds', 'trend', 'additive_terms', 'multiplicative_terms', 'weekly', 'yhat' ] self.assertTrue( all(col in expected_cols for col in fcst.columns.tolist()))
def test_fit_predict_constant_history(self): N = DATA.shape[0] train = DATA.head(N // 2).copy() train['y'] = 20 future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)}) m = Prophet() m.fit(train) fcst = m.predict(future) self.assertEqual(fcst['yhat'].values[-1], 20) train['y'] = 0 future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)}) m = Prophet() m.fit(train) fcst = m.predict(future) self.assertEqual(fcst['yhat'].values[-1], 0)
def test_fourier_series_yearly(self): mat = Prophet.fourier_series(DATA['ds'], 365.25, 3) # These are from the R forecast package directly. true_values = np.array([ 0.7006152, -0.7135393, -0.9998330, 0.01827656, 0.7262249, 0.6874572 ]) self.assertAlmostEqual(np.sum((mat[0] - true_values)**2), 0.0)
def test_fourier_series_weekly(self): mat = Prophet.fourier_series(DATA['ds'], 7, 3) # These are from the R forecast package directly. true_values = np.array([ 0.7818315, 0.6234898, 0.9749279, -0.2225209, 0.4338837, -0.9009689 ]) self.assertAlmostEqual(np.sum((mat[0] - true_values)**2), 0.0)
def _run_prophet(self, data: ProphetDataEntry, params: dict) -> np.ndarray: """ Construct and run a :class:`Prophet` model on the given :class:`ProphetDataEntry` and return the resulting array of samples. """ prophet = self.init_model(Prophet(**params)) # Register dynamic features as regressors to the model for i in range(len(data.feat_dynamic_real)): prophet.add_regressor(feat_name(i)) prophet.fit(data.prophet_training_data) future_df = prophet.make_future_dataframe( periods=self.prediction_length, freq=self.freq, include_history=False, ) # Add dynamic features in the prediction range for i, feature in enumerate(data.feat_dynamic_real): future_df[feat_name(i)] = feature[data.train_length:] prophet_result = prophet.predictive_samples(future_df) return prophet_result["yhat"].T
def test_fit_changepoint_not_in_history(self): train = DATA[(DATA['ds'] < '2013-01-01') | (DATA['ds'] > '2014-01-01')] future = pd.DataFrame({'ds': DATA['ds']}) prophet = Prophet(changepoints=['2013-06-06']) forecaster = prophet forecaster.fit(train) forecaster.predict(future)
def _build(self, **config): """ build the model and initialize. :param config: hyperparameters for the model """ changepoint_prior_scale = config.get('changepoint_prior_scale', 0.05) seasonality_prior_scale = config.get('seasonality_prior_scale', 10.0) holidays_prior_scale = config.get('holidays_prior_scale', 10.0) seasonality_mode = config.get('seasonality_mode', 'additive') changepoint_range = config.get('changepoint_range', 0.8) self.metric = config.get('metric', self.metric) self.model = Prophet(changepoint_prior_scale=changepoint_prior_scale, seasonality_prior_scale=seasonality_prior_scale, holidays_prior_scale=holidays_prior_scale, changepoint_range=changepoint_range, seasonality_mode=seasonality_mode)
def result_gam(pred_period=24): # Use monthly data df = pd.read_csv('../Data/sfopax_month.csv') # Reformat data frame df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d') df['year'] = df['date'].dt.year df['pax_count'] = df['pax_count'] / 1000000 X_train = df[df['year'] <= 2015][['date', 'pax_count']] X_test = df[df['year'] > 2015][['date', 'pax_count']] # Save the dataframe for return object X_train_org = X_train X_test_org = X_test # Prepare date list for data frame if pred_period != 24 if pred_period > 24: X_test_startdate = X_train['date'].max() + pd.tseries.offsets.MonthEnd( 1) X_test_enddate = X_train['date'].max() + pd.tseries.offsets.MonthEnd( pred_period) X_test_date = pd.date_range(X_test_startdate, X_test_enddate, freq='m') X_test['test'] = X_test_date # Rename the column names for Prophet X_train = X_train.rename(columns={'date': 'ds', 'pax_count': 'y'}) X_test = X_test.rename(columns={'date': 'ds', 'pax_count': 'y'}) # Train the model and predict model = Prophet() model.fit(X_train) yhat = model.predict(X_test) # Get RMSE fb_rmse = mse(X_test['y'], yhat['yhat'].tolist()[:24])**0.5 # Reformat the data frame for result yhat = yhat[['ds', 'yhat']] yhat = yhat.rename(columns={'ds': 'date', 'yhat': 'pred'}) # Save result and return result = {} result['X_train'] = X_train_org result['X_test'] = X_test_org result['pred'] = yhat result['rmse'] = fb_rmse return result
def test_quickstart(): prophet = Prophet() prophet.set_universe(['AAPL', 'XOM']) prophet.register_data_generators(YahooCloseData(cache_path=CACHE_PATH)) prophet.set_order_generator(OrderGenerator()) backtest = prophet.run_backtest(start=datetime(2010, 1, 1), end=datetime(2014, 11, 21)) prophet.register_portfolio_analyzers(default_analyzers) analysis = prophet.analyze_backtest(backtest) assert round(analysis['sharpe'], 10) == 1.0970973495 assert round(analysis['average_return'], 10) == 0.0010547843 assert round(analysis['cumulative_return'], 10) == 2.1688171559 assert round(analysis['volatility'], 10) == 0.0152622562 today = datetime(2014, 11, 10) expected_orders = Orders(Order(symbol='AAPL', shares=100)) assert prophet.generate_orders(today) == expected_orders
def test_quickstart(): prophet = Prophet() prophet.set_universe(['AAPL', 'XOM']) price_generator = YahooData('Adj Close', 'prices', cache_path=CACHE_PATH) prophet.register_data_generators(price_generator) prophet.set_order_generator(OrderGenerator()) backtest = prophet.run_backtest(start=datetime(2010, 1, 1), end=datetime(2014, 11, 21)) prophet.register_portfolio_analyzers(default_analyzers) analysis = prophet.analyze_backtest(backtest) assert round(analysis['sharpe'], 10) == 1.1083876014 assert round(analysis['average_return'], 10) == 0.0010655311 assert round(analysis['cumulative_return'], 10) == 2.2140809296 assert round(analysis['volatility'], 10) == 0.0152607097 today = datetime(2014, 11, 10) expected_orders = Orders(Order(symbol='AAPL', shares=100)) assert prophet.generate_orders(today) == expected_orders
from prophet.analyze import default_analyzers from bollinger import BollingerData from eventstudy import BollingerEventStudy from eventstudy import OrderGenerator # Based on Homework #7 for Computational Investing # http://wiki.quantsoftware.org/index.php?title=CompInvesti_Homework_7 # Here we use 2 symbols and a benchmark to reduce data pulled # but you can use the full sp5002012.txt file from QSTK # You will have to adjust the portfolio analyzers # The homework solution's analyzers start the analysis # when the first trade is conducted instead of the entire # duration of the backtest. prophet = Prophet() symbols = ["AAPL", "XOM", "SPX"] prophet.set_universe(symbols) prophet.register_data_generators(YahooCloseData(), BollingerData(), BollingerEventStudy()) prophet.set_order_generator(OrderGenerator()) backtest = prophet.run_backtest(start=dt.datetime(2008, 1, 1), end=dt.datetime(2009, 12, 31), lookback=20) prophet.register_portfolio_analyzers(default_analyzers) analysis = prophet.analyze_backtest(backtest) print analysis # +----------------------------------------+ # | sharpe | -0.851247401074 |
class OrderGenerator(object): def __init__(self): super(OrderGenerator, self).__init__() self._data = dict() def run(self, prices, timestamp, cash, **kwargs): symbol = "AAPL" orders = Orders() if (prices.loc[timestamp, symbol] * 100) < cash: orders.add_order(symbol, 100) return orders prophet = Prophet() prophet.set_universe(['AAPL', 'XOM']) prophet.register_data_generators(YahooCloseData()) prophet.set_order_generator(OrderGenerator()) backtest = prophet.run_backtest(start=datetime(2010, 1, 1)) prophet.register_portfolio_analyzers(default_analyzers) analysis = prophet.analyze_backtest(backtest) print analysis # +--------------------------------------+ # | sharpe | 1.09754359611 | # | average_return | 0.00105478425027 | # | cumulative_return | 2.168833 | # | volatility | 0.0152560508189 | # +--------------------------------------+