def test_logistic_floor(self): m = Prophet(growth='logistic') N = DATA.shape[0] history = DATA.head(N // 2).copy() history['floor'] = 10. history['cap'] = 80. future = DATA.tail(N // 2).copy() future['cap'] = 80. future['floor'] = 10. m.fit(history, algorithm='Newton') self.assertTrue(m.logistic_floor) self.assertTrue('floor' in m.history) self.assertAlmostEqual(m.history['y_scaled'][0], 1.) fcst1 = m.predict(future) m2 = Prophet(growth='logistic') history2 = history.copy() history2['y'] += 10. history2['floor'] += 10. history2['cap'] += 10. future['cap'] += 10. future['floor'] += 10. m2.fit(history2, algorithm='Newton') self.assertAlmostEqual(m2.history['y_scaled'][0], 1.) fcst2 = m2.predict(future) fcst2['yhat'] -= 10. # Check for approximate shift invariance self.assertTrue((np.abs(fcst1['yhat'] - fcst2['yhat']) < 1).all())
def test_fit_changepoint_not_in_history(self): train = DATA[(DATA['ds'] < '2013-01-01') | (DATA['ds'] > '2014-01-01')] train[(train['ds'] > '2014-01-01')] += 20 future = pd.DataFrame({'ds': DATA['ds']}) forecaster = Prophet(changepoints=['2013-06-06']) forecaster.fit(train) forecaster.predict(future)
def test_fit_predict(self): N = DATA.shape[0] train = DATA.head(N // 2) future = DATA.tail(N // 2) forecaster = Prophet() forecaster.fit(train) forecaster.predict(future)
def test_fit_predict_no_seasons(self): N = DATA.shape[0] train = DATA.head(N // 2) future = DATA.tail(N // 2) forecaster = Prophet(weekly_seasonality=False, yearly_seasonality=False) forecaster.fit(train) forecaster.predict(future)
def test_fit_predict_no_changepoints(self): N = DATA.shape[0] train = DATA.head(N // 2) future = DATA.tail(N // 2) forecaster = Prophet(n_changepoints=0) forecaster.fit(train) forecaster.predict(future)
def test_fit_predict_duplicates(self): N = DATA.shape[0] train1 = DATA.head(N // 2).copy() train2 = DATA.head(N // 2).copy() train2['y'] += 10 train = train1.append(train2) future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)}) forecaster = Prophet() forecaster.fit(train) forecaster.predict(future)
def build_forecast( data, forecast_range, truncate_range=0 ): """build a forecast for publishing Args: data (:obj:`pandas.data_frame`): data to build prediction forecast_range (int): how much time into the future to forecast truncate_range (int, optional): truncate output to CREST_RANGE Returns: pandas.DataFrame: collection of data + forecast info ['date', 'avgPrice', 'yhat', 'yhat_low', 'yhat_high', 'prediction'] """ data['date'] = pd.to_datetime(data['date']) filter_date = data['date'].max() ## Build DataFrame ## predict_df = pd.DataFrame() predict_df['ds'] = data['date'] predict_df['y'] = data['avgPrice'] ## Run prediction ## # https://facebookincubator.github.io/prophet/docs/quick_start.html#python-api model = Prophet() model.fit(predict_df) future = model.make_future_dataframe(periods=forecast_range) tst = model.predict(future) predict_df = pd.merge( predict_df, model.predict(future), on='ds', how='right' ) ## Build report for endpoint ## report = pd.DataFrame() report['date'] = pd.to_datetime(predict_df['ds'], format='%Y-%m-%d') report['avgPrice'] = predict_df['y'] report['yhat'] = predict_df['yhat'] report['yhat_low'] = predict_df['yhat_lower'] report['yhat_high'] = predict_df['yhat_upper'] report['prediction'] = False report.loc[report.date > filter_date, 'prediction'] = True if truncate_range > 0: cut_date = filter_date - timedelta(days=truncate_range) report = report.loc[report.date > cut_date] return report
def test_fit_predict_constant_history(self): N = DATA.shape[0] train = DATA.head(N // 2).copy() train['y'] = 20 future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)}) m = Prophet() m.fit(train) fcst = m.predict(future) self.assertEqual(fcst['yhat'].values[-1], 20) train['y'] = 0 future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)}) m = Prophet() m.fit(train) fcst = m.predict(future) self.assertEqual(fcst['yhat'].values[-1], 0)
def add_prophet_features(df_shop): df = df_shop[['day', 'pays_count']].rename(columns={'day': 'ds', 'pays_count': 'y'}) results = [] biweek_max = df_shop.biweek_id.max() for m in range(biweek_max - 1, 0, -1): train_idx = df_shop.biweek_id >= m df_train = df[train_idx] not_null = ~df_train.y.isnull() if not_null.sum() < 7: continue p = Prophet().fit(df_train) future = p.make_future_dataframe(14, include_history=False) pred = p.predict(future) results.append(pred) df_res = pd.concat(results) df_res.columns = ['prophet_%s' % c for c in pred.columns] df_res = df_shop.merge(df_res, how='left', left_on='day', right_on='prophet_ds') del df_res['prophet_t'], df_res['prophet_ds'] df_res.drop_duplicates('days_from_beginning', keep='last', inplace=1) if len(df_res) != len(df_shop): raise Exception("size doesn't match") return df_res
def get_predictions(validate, train): total_dates = train['date'].unique() result = pd.DataFrame(columns=['id', 'unit_sales']) problem_pairs = [] example_items = [510052, 1503899, 2081175, 1047674, 215327, 1239746, 765520, 1463867, 1010755, 1473396] store47examples = validate.loc[(validate.store_nbr == 47) & (validate.item_nbr.isin(example_items))] print("ONLY PREDICTING ITEMS {} IN STORE NO. 47!".format(example_items)) for name, y in store47examples.groupby(['item_nbr']): # for name, y in validate.groupby(['item_nbr', 'store_nbr']): item_nbr=int(name) store_nbr = 47 df = train[(train.item_nbr==item_nbr)&(train.store_nbr==store_nbr)] CV_SIZE = 16 #if you make it bigger, fill missing dates in cv with 0 if any TRAIN_SIZE = 365 total_dates = train['date'].unique() df = fill_missing_date(df, total_dates) df = df.sort_values(by=['date']) X = df[-TRAIN_SIZE:] X = X[['date','unit_sales']] X.columns = ['ds', 'y'] m = Prophet(yearly_seasonality=True) try: m.fit(X) except ValueError: print("problem for this item store pair") problem_pairs.append((item_nbr, store_nbr)) continue future = m.make_future_dataframe(periods=CV_SIZE) pred = m.predict(future) data = pred[['ds','yhat']].tail(CV_SIZE) data = pred[['ds','yhat']].merge(y, left_on='ds', right_on='date') data['unit_sales'] = data['yhat'].fillna(0).clip(0, 999999) result = result.append(data[['id', 'unit_sales']]) return (result, problem_pairs)
def run(): journal = ledger.read_journal("./secret/ledger.dat") last_post = None amount = 0 for post in journal.query(""): if last_post == None or post.date == last_post.date: if str(post.amount.commodity) != "£": continue amount = amount + post.amount else: print post.date, ",", amount amount = 0 last_post = post df = pd.read_csv('./testing.csv') df['y'] = np.multiply(100, df['y']) m = Prophet() m.fit(df); forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() m.plot(forecast); m.plot_components(forecast);
def test_subdaily_holidays(self): holidays = pd.DataFrame({ 'ds': pd.to_datetime(['2017-01-02']), 'holiday': ['special_day'], }) m = Prophet(holidays=holidays) m.fit(DATA2) fcst = m.predict() self.assertEqual(sum(fcst['special_day'] == 0), 575)
def hello(): print('Hello, world!') df = pd.read_csv(url) df['y'] = np.log(df['y']) df.head() m = Prophet() m.fit(df); future = m.make_future_dataframe(periods=365) future.tail() forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() return forecast.to_json(orient='table')
def train_prophet(df, modelDir, confidence=0.99): # train and cache into modelDir m = Prophet( yearly_seasonality=True, daily_seasonality=True, interval_width=confidence ) with suppress_stdout_stderr(): m.fit(df) # Predict the future. print "PREDICTING!" future = m.make_future_dataframe(periods=0) forecast = m.predict(future) # Merge in the historical data. forecast["y"] = df.y.astype(float) # Backup the model. forecast.to_csv( pJoin(modelDir, "forecasted_{}.csv".format(confidence)), index=False ) return forecast
def get_prophet_forecasting(group_name, data, logs=True): # data = group.reset_index() # data.columns = ['ds', 'y'] # Remove first null rows first_row = 0 for i in range(len(data)): if data.y[i] == 0: first_row = i + 1 else: break data = data.loc[first_row:, :] # min_value = min(data.y) # non_zero_inds = data.y > 0 # data.y[non_zero_inds] = data.y[non_zero_inds] + min_value non_zero_inds = data.y > 0 data['y'][data.y <= 0] = 0.0 if logs: data['y'][non_zero_inds] = np.log(data['y'][non_zero_inds].tolist()) if len(data) == 0: # frcst = forecastings[-1][1] # frcst['yhat'] = 0 return [group_name[0], group_name[1], np.zeros(12)], [None, None] with suppress_stdout_stderr(): try: m = Prophet(growth='logistic', weekly_seasonality=False, daily_seasonality=False) m.add_seasonality(name='monthly', period=30.5, fourier_order=5) # cap = max(data.y)*1.2 cap = max(data.y) data['cap'] = cap m.fit(data) future = m.make_future_dataframe(periods=1 * 12, freq='M') future['cap'] = cap frcst = m.predict(future) except: return [group_name[0], group_name[1], np.zeros(12)], [None, None] for field in ['yhat', 'yhat_lower', 'yhat_upper']: # print('---- ',list(frcst[frcst[field] > 0][field])) # print(sum(frcst[field] > 0)) frcst[field] = np.exp(list(frcst[field].values)) # print('*****', list(frcst[frcst[field] > 0][field])) # print(np.exp([2.02])) # res = frcst['yhat'].values if logs: res = np.exp(res) # res = data['y'] - min_value return [group_name[0], group_name[1], res], [m, frcst]
def test_added_regressors(self): m = Prophet() m.add_regressor('binary_feature', prior_scale=0.2) m.add_regressor('numeric_feature', prior_scale=0.5) m.add_regressor('binary_feature2', standardize=True) df = DATA.copy() df['binary_feature'] = [0] * 255 + [1] * 255 df['numeric_feature'] = range(510) with self.assertRaises(ValueError): # Require all regressors in df m.fit(df) df['binary_feature2'] = [1] * 100 + [0] * 410 m.fit(df) # Check that standardizations are correctly set self.assertEqual( m.extra_regressors['binary_feature'], { 'prior_scale': 0.2, 'mu': 0, 'std': 1, 'standardize': 'auto' }, ) self.assertEqual(m.extra_regressors['numeric_feature']['prior_scale'], 0.5) self.assertEqual(m.extra_regressors['numeric_feature']['mu'], 254.5) self.assertAlmostEqual(m.extra_regressors['numeric_feature']['std'], 147.368585, places=5) self.assertEqual(m.extra_regressors['binary_feature2']['prior_scale'], 10.) self.assertAlmostEqual(m.extra_regressors['binary_feature2']['mu'], 0.1960784, places=5) self.assertAlmostEqual(m.extra_regressors['binary_feature2']['std'], 0.3974183, places=5) # Check that standardization is done correctly df2 = m.setup_dataframe(df.copy()) self.assertEqual(df2['binary_feature'][0], 0) self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4) self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4) # Check that feature matrix and prior scales are correctly constructed seasonal_features, prior_scales = m.make_all_seasonality_features(df2) self.assertIn('binary_feature', seasonal_features) self.assertIn('numeric_feature', seasonal_features) self.assertIn('binary_feature2', seasonal_features) self.assertEqual(seasonal_features.shape[1], 29) self.assertEqual(set(prior_scales[26:]), set([0.2, 0.5, 10.])) # Check that forecast components are reasonable future = pd.DataFrame({ 'ds': ['2014-06-01'], 'binary_feature': [0], 'numeric_feature': [10], }) with self.assertRaises(ValueError): m.predict(future) future['binary_feature2'] = 0 fcst = m.predict(future) self.assertEqual(fcst.shape[1], 31) self.assertEqual(fcst['binary_feature'][0], 0) self.assertAlmostEqual( fcst['extra_regressors'][0], fcst['numeric_feature'][0] + fcst['binary_feature2'][0], ) self.assertAlmostEqual( fcst['seasonalities'][0], fcst['yearly'][0] + fcst['weekly'][0], ) self.assertAlmostEqual( fcst['seasonal'][0], fcst['seasonalities'][0] + fcst['extra_regressors'][0], ) self.assertAlmostEqual( fcst['yhat'][0], fcst['trend'][0] + fcst['seasonal'][0], )
def create_prophet_m(app_name,z1,delay=24): ### --- For realtime pred ---### full_df = z1.app_rsp_time.iloc[0:len(z1)] full_df = full_df.reset_index() full_df.columns = ['ds','y'] #removing outliers q50 = full_df.y.median() q100 = full_df.y.quantile(1) q75 = full_df.y.quantile(.75) if((q100-q50) >= (2*q50)): full_df.loc[full_df.y>=(2*q50),'y'] = None #-- Realtime prediction --## #model model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model_r.fit(full_df) future_r = model_r.make_future_dataframe(periods=delay,freq='H') forecast_r = model_r.predict(future_r) forecast_r.index = forecast_r['ds'] #forecast pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)]) pred_r=pred_r.reset_index() #--- completes realtime prediction ---# train_end_index=len(z1.app_rsp_time)-delay train_df=z1.app_rsp_time.iloc[0:train_end_index] test_df=z1.app_rsp_time.iloc[train_end_index:len(z1)] train_df=train_df.reset_index() test_df=test_df.reset_index() train_df.columns=['ds','y'] #--- removing outliers in trainset ---# q50 = train_df.y.median() q100 = train_df.y.quantile(1) q75 = train_df.y.quantile(.75) if((q100-q50) >= (2*q50)): train_df.loc[train_df.y>=(2*q50),'y'] = None test_df.columns=['ds','y'] #model model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model.fit(train_df) future = model.make_future_dataframe(periods=len(test_df),freq='H') forecast = model.predict(future) forecast.index = forecast['ds'] #forecast pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)]) pred=pred.reset_index() pred_df=pd.merge(test_df,pred,on='ds',how='left') pred_df.dropna(inplace=True) df=pd.DataFrame() if(len(pred_df)>0): pred_df['error_test']=pred_df.y-pred_df.yhat MSE=mse(pred_df.y,pred_df.yhat) RMSE=math.sqrt(MSE) pred_df['APE']=abs(pred_df.error_test*100/pred_df.y) MAPE=pred_df.APE.mean() print("App name:",app_name) print("MSE :",MSE) print("RMSE :",RMSE) print("MAPE :",MAPE) mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean() df = pd.DataFrame({#'length':len(z1), 'test_rmse':RMSE, 'test_mape':MAPE, 'test_mape_98':mape_q98}, index=[app_name]) return(df,model,forecast,pred_df,pred_r)
## Prophet1 # set the uncertainty interval to 95% (the Prophet default is 80%) m = Prophet() m.add_seasonality(name='hourly', period=24, fourier_order=2) m.fit(view_hour); #%% ## Create a dataframe for the future dates ## The tail will only display the time periods without the forecasted values future = m.make_future_dataframe(periods=24,freq='H') future.tail() #%% ## This is the data that is exponentiated below forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() #%% ## This is the data that retains the log transform ## Note that the predict function will create a df that contains ## many period features(e.g., trend, daily, hourly, weekly, seasonal ## along with _upper and _lower ci's). Execute a .info() against ## the dataframe to see all the elements. ## This creates a dataframe with just the 4 elements below forecast1 = m.predict(future) forecast1[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() #%% ## This works !
def main(): if request.method == 'POST': stock = request.form['companyname'] df_whole = get_historical_stock_price(stock) df = df_whole.filter(['Close']) df['ds'] = df.index #log transform the ‘Close’ variable to convert non-stationary data to stationary. df['y'] = np.log(df['Close']) original_end = df['Close'][-1] model = Prophet() model.fit(df) #num_days = int(input("Enter no of days to predict stock price for: ")) num_days = 10 future = model.make_future_dataframe(periods=num_days) forecast = model.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()) #Prophet plots the observed values of our time series (the black dots), the forecasted values (blue line) and #the uncertainty intervalsof our forecasts (the blue shaded regions). #forecast_plot = model.plot(forecast) #forecast_plot.show() #make the vizualization a little better to understand df.set_index('ds', inplace=True) forecast.set_index('ds', inplace=True) #date = df['ds'].tail(plot_num) viz_df = df.join(forecast[['yhat', 'yhat_lower', 'yhat_upper']], how='outer') viz_df['yhat_scaled'] = np.exp(viz_df['yhat']) #close_data = viz_df.Close.tail(plot_num) #forecasted_data = viz_df.yhat_scaled.tail(plot_num) #date = future['ds'].tail(num_days+plot_num) close_data = viz_df.Close forecasted_data = viz_df.yhat_scaled date = future['ds'] #date = viz_df.index[-plot_num:-1] forecast_start = forecasted_data[-num_days] d = [date, close_data, forecasted_data] export_data = zip_longest(*d, fillvalue='') with open('static/numbers.csv', 'w', encoding="ISO-8859-1", newline='') as myfile: wr = csv.writer(myfile) wr.writerow(("Date", "Actual", "Forecasted")) wr.writerows(export_data) myfile.close() return render_template("plot.html", original=round(original_end, 2), forecast=round(forecast_start, 2), stock_tinker=stock.upper())
# This trend can be confirmed through the use of the Prophet library, which has some robustness to outliers. # We can split the time series into its various time components - years, months and weeks. # This is similar to running a Fourier analysis. # The prophet library includes considerations for holidays dates. # %% # Confirm trend with prophet (facebook) from fbprophet import Prophet time_model = Prophet() prophet_data = temp.loc[:, ["datetime", "count"]] prophet_data.columns = ["ds", "y"] time_model.fit(prophet_data) # Show components forecast = time_model.predict(prophet_data) fig_components = time_model.plot_components(forecast, weekly_start=1) # Make future predictions future = time_model.make_future_dataframe(periods=365, include_history=True) fig_pred = time_model.plot(time_model.predict(future), xlabel="Date", ylabel="Number of trips/day") fig_components.savefig("images/prophet_comp.png") fig_pred.savefig("images/prophet_pred.png") # %% [markdown] # ![](images/prophet_comp.png) # # This matches our conclusions that weekends are less popular overall, and there is a summer month boom.
def PlotSeries(): #obtém valores de selects da pagina select_ano = request.form.get("Anos", None) # select_mun = request.form.get("Municipios", None) select_mun = 'Brotas' select_dp = request.form.get("Delegacias", None) select_crime = request.form.get("Crimes", None) if select_dp != None and select_dp != "" and select_crime != None and select_crime != "": #dá um nome para o arquivo do plot img = 'static/plot' + select_ano + 'Brotas' + select_dp + select_crime + '.png' print(select_dp) #obtém o dataframe df = getDataAtDB(select_mun, select_dp, select_crime) print(df.head()) df['datas'] = pd.to_datetime(df['datas']) #altera colunas do dataframe df.set_index('datas') df.columns = ["ds", "y"] #cria um modelo m = Prophet(changepoint_prior_scale=0.05, changepoint_range=0.8) m.add_country_holidays(country_name='BR') m.fit(df) #prevendo o futuro future = m.make_future_dataframe(periods=12 * (int(select_ano) - date.today().year), freq='MS') forecast = m.predict(future) #cria imagem do plot m.plot(forecast, figsize=(8, 4)) plt.xlabel('Data') plt.ylabel('Ocorrencias') plt.gca().set_ylim(bottom=0) if (select_dp != 'Todos'): plt.title("Série temporal das ocorrências de " + select_crime + " registradas no " + select_dp) else: plt.title("Série temporal das ocorrências de " + select_crime + " registradas na cidade de " + select_mun) plt.savefig(img, bbox_inches='tight') plt.clf() #limpa figura atual # df_cv = cross_validation(m, initial='3600 days', horizon = '1200 days', parallel="processes") # df_p = performance_metrics(df_cv) # print(df_p.head()) #Otimização dos hiperparametros # params_df = create_param_combinations(**param_grid) # print(len(params_df.values)) # for param in params_df.values: # param_dict = dict(zip(params_df.keys(), param)) # cv_df = single_cv_run(df, metrics, param_dict, parallel="processes") # results.append(cv_df) # results_df = pd.concat(results).reset_index(drop=True) # best_param = results_df.loc[results_df['rmse'] == min(results_df['rmse']), ['params']] # print(f'\n The best param combination is {best_param.values[0][0]}') # print(results_df) return render_template("previsao.html", image=img) return render_template("previsao.html")
fb.add_regressor('store_id') fb.add_regressor('cat_id') fb.fit(df_cat_pred) future = fb.make_future_dataframe(freq='D', periods=56, include_history=False) s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) c = pd.Series([1, 2, 3]) stores1 = pd.DataFrame({'store_id': s.repeat(56)}).reset_index() stores1 = pd.concat([stores1] * 3).reset_index().drop('index', axis=1) cat1 = pd.DataFrame({'cat_id': s.repeat(560)}).reset_index() final_df = pd.concat([future] * 30).reset_index().drop('index', axis=1) final_df['store_id'] = stores1['store_id'] final_df['cat_id'] = cat1['cat_id'] predict = fb.predict(final_df) y_pred_df = predict[['yhat']] final = pd.concat([final_df, y_pred_df], axis=1) stores2 = { 1: 'CA_1', 2: 'CA_2', 3: 'CA_3', 4: 'CA_4', 5: 'TX_1', 6: 'TX_2', 7: 'TX_3', 8: 'WI_1', 9: 'WI_2', 10: 'WI_3' }
def FBprophet_memory(dataset, host_name): dataset['date'] = pd.to_datetime(dataset.index) dataset.set_index('date', inplace=True) pjme = dataset pred = test_dummy(dataset) pred['date'] = pd.to_datetime(pred.index) pred.set_index('date', inplace=True) split_date = (date.today() - timedelta(days=7)).strftime('%d-%b-%Y') pjme_train = pjme.loc[pjme.index <= split_date].copy() pjme_test = pjme.loc[pjme.index > split_date].copy() pjme_test.rename(columns={'used': 'TEST SET'}) \ .join(pjme_train.rename(columns={'used': 'TRAINING SET'}),how='outer') # Format data for prophet model using ds and y pjme_train.reset_index().rename(columns={'date': 'ds', 'used': 'y'}) # Setup and train model and fit model = Prophet(changepoint_prior_scale=0.95) try: model.fit(pjme.reset_index().rename(columns={ 'date': 'ds', 'used': 'y' })) future = model.make_future_dataframe(periods=7) forecast = model.predict(future) df_result_prev = pd.DataFrame({ 'dtime': model.history['ds'].dt.to_pydatetime(), 'y': model.history['y'] }) pjme_test_fcst = model.predict(df=pjme_test.reset_index().rename( columns={'date': 'ds'})) df_past = pd.DataFrame({ 'ds': model.history['ds'].dt.to_pydatetime(), 'y': model.history['y'] }) df_future = pd.DataFrame({ 'ds': forecast['ds'], 'y': forecast['yhat_upper'] }) df_past = df_future.copy() #changed based on feedback from manju #df_past = df_past.append(df_future) df_past['y'] = df_past['y'].apply(lambda x: x * 1.2) pjme_test_fcst = model.predict(df=pjme_test.reset_index().rename( columns={'date': 'ds'})) pjme_fut_fcst = model.predict(df=pred.reset_index().rename( columns={'date': 'ds'})) df_past_resample = df_past.copy(deep=True) df_past_resample_indx = df_past_resample.set_index('ds') data_cols = ['y'] df_recomm = df_past_resample_indx[data_cols].resample('W').max() f, ax = plt.subplots(1) plt.plot(model.history['ds'].dt.to_pydatetime(), model.history['y'], color='teal', marker='o', linestyle='solid', linewidth=1, label='Actual Utilization') plt.plot(df_recomm.index, df_recomm['y'], color='orange', marker='o', linestyle='solid', linewidth=1, label='Recommendation') f.set_figheight(5) f.set_figwidth(15) ax.scatter(pjme_test.index, pjme_test['used'], color='yellow') ax.legend(loc='upper left', frameon=False) #fig = model.plot(pjme_test_fcst, ax=ax) #fig1 = model.plot(pjme_fut_fcst, ax=ax) pjme_test_fcst = pjme_test_fcst.append(pjme_fut_fcst) fig = model.plot(pjme_test_fcst, ax=ax) #plt.xticks(rotation='vertical') df = hostname_zone_df() zone = " ".join(df[df['Hostname'] == host_name]['Zone'].values) instance_id = instance_id_func(host_name) plt.savefig( os.path.join( os.getcwd() + '/images', "Memory_{}_{}_{}_{}.png".format(project, zone, host_name, instance_id))) plt.close() upload_blob( "gcp_cost_recommendation_bucket", os.path.join( os.getcwd() + '/images', "Memory_{}_{}_{}_{}.png".format(project, zone, host_name, instance_id)), "Memory_{}_{}_{}_{}.png".format(project, zone, host_name, instance_id)) #logging.debug("Done with {}".format(host_name)) return df_past['y'].max() except ValueError: return None
def fitForecast(y, h, sumMat, nodes, method, freq, include_history, cap, capF, changepoints, n_changepoints, \ yearly_seasonality, weekly_seasonality, daily_seasonality, holidays, seasonality_prior_scale, \ holidays_prior_scale, changepoint_prior_scale, mcmc_samples, interval_width, uncertainty_samples, \ boxcoxT, skipFitting): forecastsDict = {} mse = {} resids = {} nForecasts = sumMat.shape[0] ## # If you have a ditionary of Prophet Dataframes already, skip the prophet part, and put all the values into a dictionary ## if skipFitting == True: for key in range(len(y.columns.tolist()) - 1): forecastsDict[key] = pd.DataFrame(y.iloc[:, key + 1]) forecastsDict[key] = forecastsDict[key].rename( columns={forecastsDict[key].columns[0]: 'yhat'}) if skipFitting == False: if method == 'FP': nForecasts = sum(list(map(sum, nodes))) + 1 for node in range(nForecasts): nodeToForecast = pd.concat([y.iloc[:, [0]], y.iloc[:, node + 1]], axis=1) if isinstance(cap, pd.DataFrame): cap1 = cap.iloc[:, node] else: cap1 = cap if isinstance(capF, pd.DataFrame): cap2 = capF.iloc[:, node] else: cap2 = capF if isinstance(changepoints, pd.DataFrame): changepoints1 = changepoints[:, node] else: changepoints1 = changepoints if isinstance(n_changepoints, list): n_changepoints1 = n_changepoints[node] else: n_changepoints1 = n_changepoints ## # Put the forecasts into a dictionary of dataframes ## with contextlib.redirect_stdout(open(os.devnull, "w")): # Prophet related stuff nodeToForecast = nodeToForecast.rename( columns={nodeToForecast.columns[0]: 'ds'}) nodeToForecast = nodeToForecast.rename( columns={nodeToForecast.columns[1]: 'y'}) if capF is None: growth = 'linear' m = Prophet( growth=growth, changepoints=changepoints1, n_changepoints=n_changepoints1, yearly_seasonality=yearly_seasonality, weekly_seasonality=weekly_seasonality, daily_seasonality=daily_seasonality, holidays=holidays, seasonality_prior_scale=seasonality_prior_scale, holidays_prior_scale=holidays_prior_scale, changepoint_prior_scale=changepoint_prior_scale, mcmc_samples=mcmc_samples, interval_width=interval_width, uncertainty_samples=uncertainty_samples) else: growth = 'logistic' m = Prophet( growth=growth, changepoints=changepoints, n_changepoints=n_changepoints, yearly_seasonality=yearly_seasonality, weekly_seasonality=weekly_seasonality, daily_seasonality=daily_seasonality, holidays=holidays, seasonality_prior_scale=seasonality_prior_scale, holidays_prior_scale=holidays_prior_scale, changepoint_prior_scale=changepoint_prior_scale, mcmc_samples=mcmc_samples, interval_width=interval_width, uncertainty_samples=uncertainty_samples) nodeToForecast['cap'] = cap1 m.fit(nodeToForecast) future = m.make_future_dataframe( periods=h, freq=freq, include_history=include_history) if capF is not None: future['cap'] = cap2 ## # Base Forecasts, Residuals, and MSE ## forecastsDict[node] = m.predict(future) resids[node] = y.iloc[:, node + 1] - forecastsDict[node].yhat[:-h].values mse[node] = np.mean(np.array(resids[node])**2) ## # If logistic use exponential function, so that values can be added correctly ## if capF is not None: forecastsDict[node].yhat = np.exp(forecastsDict[node].yhat) if boxcoxT is not None: forecastsDict[node].yhat = inv_boxcox( forecastsDict[node].yhat, boxcoxT[node]) forecastsDict[node].trend = inv_boxcox( forecastsDict[node].trend, boxcoxT[node]) if "seasonal" in forecastsDict[node].columns.tolist(): forecastsDict[node].seasonal = inv_boxcox( forecastsDict[node].seasonal, boxcoxT[node]) if "daily" in forecastsDict[node].columns.tolist(): forecastsDict[node].daily = inv_boxcox( forecastsDict[node].daily, boxcoxT[node]) if "weekly" in forecastsDict[node].columns.tolist(): forecastsDict[node].weekly = inv_boxcox( forecastsDict[node].weekly, boxcoxT[node]) if "yearly" in forecastsDict[node].columns.tolist(): forecastsDict[node].yearly = inv_boxcox( forecastsDict[node].yearly, boxcoxT[node]) if "holidays" in forecastsDict[node].columns.tolist(): forecastsDict[node].yearly = inv_boxcox( forecastsDict[node].yearly, boxcoxT[node]) ## # Now, Revise them ## if method == 'BU' or method == 'AHP' or method == 'PHA': y1 = y.copy() nCols = len(list(forecastsDict.keys())) + 1 if method == 'BU': ''' Pros: No information lost due to aggregation Cons: Bottom level data can be noisy and more challenging to model and forecast ''' hatMat = np.zeros([len(forecastsDict[0].yhat), 1]) for key in range(nCols - sumMat.shape[1] - 1, nCols - 1): f1 = np.array(forecastsDict[key].yhat) f2 = f1[:, np.newaxis] if np.all(hatMat == 0): hatMat = f2 else: hatMat = np.concatenate((hatMat, f2), axis=1) if method == 'AHP': ''' Pros: Creates reliable aggregate forecasts, and good for low count data Cons: Unable to capture individual series dynamics ''' if boxcoxT is not None: for column in range(len(y.columns.tolist()) - 1): y1.iloc[:, column + 1] = inv_boxcox(y1.iloc[:, column + 1], boxcoxT[column]) ## # Find Proportions ## fcst = forecastsDict[0].yhat fcst = fcst[:, np.newaxis] numBTS = sumMat.shape[1] btsDat = pd.DataFrame(y1.iloc[:, nCols - numBTS:nCols]) divs = np.divide(np.transpose(np.array(btsDat)), np.array(y1.iloc[:, 1])) props = divs.mean(1) props = props[:, np.newaxis] hatMat = np.dot(np.array(fcst), np.transpose(props)) if method == 'PHA': ''' Pros: Creates reliable aggregate forecasts, and good for low count data Cons: Unable to capture individual series dynamics ''' if boxcoxT is not None: for column in range(len(y.columns.tolist()) - 1): y1.iloc[:, column + 1] = inv_boxcox(y1.iloc[:, column + 1], boxcoxT[column]) ## # Find Proportions ## fcst = forecastsDict[0].yhat fcst = fcst[:, np.newaxis] numBTS = sumMat.shape[1] btsDat = pd.DataFrame(y1.iloc[:, nCols - numBTS:nCols]) btsSum = btsDat.sum(0) topSum = sum(y1.iloc[:, 1]) props = btsSum / topSum props = props[:, np.newaxis] hatMat = np.dot(np.array(fcst), np.transpose(props)) newMat = np.empty([hatMat.shape[0], sumMat.shape[0]]) for i in range(hatMat.shape[0]): newMat[i, :] = np.dot(sumMat, np.transpose(hatMat[i, :])) if method == 'FP': newMat = forecastProp(forecastsDict, nodes) if method == 'OLS' or method == 'WLSS' or method == 'WLSV': if capF is not None: print( "An error might occur because of how these methods are defined (They can produce negative values). If it does, then please use another method" ) newMat = optimalComb(forecastsDict, sumMat, method, mse) for key in forecastsDict.keys(): values = forecastsDict[key].yhat.values values = newMat[:, key] forecastsDict[key].yhat = values ## # If Logistic fit values with natural log function to revert back to format of input ## if capF is not None: forecastsDict[key].yhat = np.log(forecastsDict[key].yhat) return forecastsDict
test = test.rename(columns={'Date' : 'ds', 'MeanHomeValue' : 'y'}) train.head() # setting uncertainty interval to 95% z_model = Prophet(interval_width=.95) z_model.fit(train) # making new df with future values future_home_values = z_model.make_future_dataframe(periods = 40, freq = 'M') future_home_values.tail() # base model prediction forecast = z_model.predict(future_home_values) # examining data types for forecast df forecast.info() # observing forecasted values for 2021 forecast.tail() """prophet drops the index (zip code) so it is impossible to make projects for the whole country. Instead, I will make projections for each of the top 5 zip codes in the country based on percent change in home value over the relevant time period (2010-present)""" # taking zip code, first day of 2010, and last day of 2020 top_zip = zillow[['RegionName','2010-01-31','2020-03-31']] top_zip.head()
class ProphetRegressor(BaseEstimator, RegressorMixin): """ Scikit-learn wrapper for the Prophet forecaster. Parameters ---------- growth: String 'linear' or 'logistic' to specify a linear or logistic trend. changepoints: List of dates at which to include potential changepoints. If not specified, potential changepoints are selected automatically. n_changepoints: Number of potential changepoints to include. Not used if input `changepoints` is supplied. If `changepoints` is not supplied, then n_changepoints potential changepoints are selected uniformly from the first 80 percent of the history. yearly_seasonality: Fit yearly seasonality. Can be 'auto', True, False, or a number of Fourier terms to generate. weekly_seasonality: Fit weekly seasonality. Can be 'auto', True, False, or a number of Fourier terms to generate. daily_seasonality: Fit daily seasonality. Can be 'auto', True, False, or a number of Fourier terms to generate. holidays: pd.DataFrame with columns holiday (string) and ds (date type) and optionally columns lower_window and upper_window which specify a range of days around the date to be included as holidays. lower_window=-2 will include 2 days prior to the date as holidays. Also optionally can have a column prior_scale specifying the prior scale for that holiday. seasonality_prior_scale: Parameter modulating the strength of the seasonality model. Larger values allow the model to fit larger seasonal fluctuations, smaller values dampen the seasonality. Can be specified for individual seasonalities using add_seasonality. holidays_prior_scale: Parameter modulating the strength of the holiday components model, unless overridden in the holidays input. changepoint_prior_scale: Parameter modulating the flexibility of the automatic changepoint selection. Large values will allow many changepoints, small values will allow few changepoints. mcmc_samples: Integer, if greater than 0, will do full Bayesian inference with the specified number of MCMC samples. If 0, will do MAP estimation. interval_width: Float, width of the uncertainty intervals provided for the forecast. If mcmc_samples=0, this will be only the uncertainty in the trend using the MAP estimate of the extrapolated generative model. If mcmc.samples>0, this will be integrated over all model parameters, which will include uncertainty in seasonality. uncertainty_samples: Number of simulated draws used to estimate uncertainty intervals. Attributes ---------- model_ : object Underlying Prophet model. preds_ : object Prediction DataFrame returned by the Prophet model after forecasting. Contains various extra columns that may be useful. """ def __init__(self, growth='linear', changepoints=None, n_changepoints=25, yearly_seasonality='auto', weekly_seasonality='auto', daily_seasonality='auto', holidays=None, seasonality_prior_scale=10.0, holidays_prior_scale=10.0, changepoint_prior_scale=0.05, mcmc_samples=0, interval_width=0.80, uncertainty_samples=1000): self.growth = growth self.changepoints = changepoints self.n_changepoints = n_changepoints self.yearly_seasonality = yearly_seasonality self.weekly_seasonality = weekly_seasonality self.daily_seasonality = daily_seasonality self.holidays = holidays self.seasonality_prior_scale = seasonality_prior_scale self.holidays_prior_scale = holidays_prior_scale self.changepoint_prior_scale = changepoint_prior_scale self.mcmc_samples = mcmc_samples self.interval_width = interval_width self.uncertainty_samples = uncertainty_samples self.model_ = None self.preds_ = None def fit(self, X, y): """ Fit the Prophet forecast model. Parameters ---------- X : array, shape = (n_samples,) Dates. y : array, shape = (n_samples,) Time series values. Returns ------- self : Returns an instance of self. """ if isinstance(X, pd.DataFrame): X = X.values if isinstance(y, pd.DataFrame): y = y.values data = pd.DataFrame(X, columns=['ds']) data['y'] = y self.model_ = Prophet(**self.get_params()) self.model_.fit(data) return self def predict(self, X): """ Predict using the Prophet forecast model. Parameters ---------- X : array, shape = (n_samples,) Dates to generate predictions. Returns ------- C : array, shape = (n_samples,) Returns predicted values. """ if isinstance(X, pd.DataFrame): X = X.values data = pd.DataFrame(X, columns=['ds']) self.preds_ = self.model_.predict(data) return self.preds_['yhat'].values
class tsModel(multiprocessing.Process): """ Fundamental class definition for estimating a model. Required initialization: pg: dictionary of postgres values for username, password, host, database, and port ds_key: DarkSky API key Optional initialization: bin_window: The size of the time window to model. Specify any valid Pandas offset string. All data will be resampled accordingly. Default is '15T', i.e. 15 minute windows. include_weather: Boolean to indicate if the model should use weather covariates for building and predicting. Defaults to True. """ def __init__(self, pg, ds_key, bin_window='15T', include_weather=True): multiprocessing.Process.__init__(self) self.ds_key = ds_key self.include_weather = include_weather if include_weather: self.init_ds_obj() self.bin_window = bin_window self.pg_username = pg['username'] self.pg_password = pg['password'] self.pg_host = pg['host'] self.pg_db = pg['database'] self.pg_port = pg['port'] self.ds_key = ds_key self.engine = create_engine( f'postgresql://{self.pg_username}:{self.pg_password}@{self.pg_host}:{self.pg_port}/{self.pg_db}' ) def init_ds_obj(self): """Thin wrapper to initialize DarkSky object""" self.ds = DarkSky(self.ds_key) def get_area_series(self, idx, series='scooter', log_transform=False, window_start=None, window_end=None): """ Function to query postgres for time series data. Parameters: idx: area identifier series: which series to query - options are 'scooter' or 'bicycle' log_transform: Boolean for whether the usage numbers should be logged. Defaults to False. window_start: Arbitrary date for starting the time series. Can pair with window_end for any arbitrary, logical window.Default is None (i.e. use the full time series.) window_end: See window_start. """ self.idx = idx self.series = series if self.series == 'scooter': q = f"SELECT n, in_use, area, district, tract, time FROM ts WHERE area = '{idx}'" else: q = f"SELECT bike_n, bike_in_use, area, district, tract, time FROM ts WHERE area = '{idx}'" if window_start is not None: q = q + f" AND time >= '{window_start}' AND time <= '{window_end}'" with psycopg2.connect(database=self.pg_db, user=self.pg_username, password=self.pg_password, port=self.pg_port, host=self.pg_host) as conn: self.area_series = pd.read_sql_query(q, conn) if self.bin_window != "15T": self.area_series = self.area_series.set_index('time').resample( self.bin_window).sum() self.area_series.reset_index(inplace=True) if log_transform: if series == 'scooter': self.area_series['n'] = np.log(self.area_series['n'] + 1) self.area_series['in'] = np.log(self.area_series['in_use'] + 1) else: self.area_series['bike_n'] = np.log( self.area_series['bike_n'] + 1) self.area_series['bike_in'] = np.log( self.area_series['bike_in_use'] + 1) def transform_area_series(self, select_var='n'): """Simple function to select only required variable from time series data.""" if self.series == 'scooter': if select_var == 'n': self.area_series.drop(columns=['in_use'], inplace=True) elif select_var == 'in_use': self.area_series.drop(columns=['n'], inplace=True) elif select_var == 'diff': self.area_series['available'] = self.area_series.apply( lambda x: max([0, x['n'] - x['in_use']]), axis=1) self.area_series.drop(columns=['n', 'in_use'], inplace=True) else: if select_var == 'bike_n': self.area_series.drop(columns=['bike_in_use'], inplace=True) elif select_var == 'bike_in_use': self.area_series.drop(columns=['bike_n'], inplace=True) elif select_var == 'diff': self.area_series['available'] = self.area_series.apply( lambda x: max([0, x['bike_n'] - x['bike_in_use']]), axis=1) self.area_series.drop(columns=['bike_n', 'bike_in_use'], inplace=True) def get_weather_data(self): """Simple function to query weather data from postgres""" start_time = self.area_series['time'].min() end_time = self.area_series['time'].max() q = f"SELECT * FROM weather WHERE time >= '{start_time}' AND time <= '{end_time}'" with psycopg2.connect(database=self.pg_db, user=self.pg_username, password=self.pg_password, port=self.pg_port, host=self.pg_host) as conn: self.weather = pd.read_sql_query(q, conn) if self.bin_window == '15T': self.weather = self.weather.set_index('time').resample('15T').pad() elif self.bin_window == '1H': pass else: self.weather = self.weather.set_index('time').resample( self.bin_window).mean() def prep_model_data(self): """Simple function to prepare guarantee time series data is in correct format for prophet model""" if self.include_weather: self.dat = pd.merge(self.area_series, self.weather, how='right', on='time') else: self.dat = self.area_series self.dat.fillna(0, inplace=True) if self.bin_window != '15T': self.dat.drop(columns=['district', 'tract'], inplace=True) else: self.dat.drop(columns=['area', 'district', 'tract'], inplace=True) self.dat.rename(columns={ 'time': 'ds', 'n': 'y', 'in_use': 'y', 'bike_n': 'y', 'bike_in_use': 'y', 'available': 'y' }, inplace=True) def make_special_events(self): """Simple function to prepare holiday dataframes for model""" sxsw = pd.DataFrame({ 'holiday': 'sxsw', 'ds': pd.to_datetime([ '2018-03-09', '2018-03-10', '2018-03-11', '2018-03-12', '2018-03-13', '2018-03-14', '2018-03-15', '2018-03-16', '2018-03-17', '2018-03-18', '2018-03-19', '2019-03-08', '2019-03-09', '2019-03-10', '2019-03-11', '2019-03-12', '2019-03-13', '2019-03-14', '2019-03-15', '2019-03-16', '2019-03-17', '2020-03-13', '2020-03-14', '2020-03-15', '2020-03-16', '2020-03-17', '2020-03-18', '2020-03-19', '2020-03-20', '2020-03-21', '2020-03-22' ]) }) acl = pd.DataFrame({ 'holiday': 'sxsw', 'ds': pd.to_datetime([ '2018-10-05', '2018-10-06', '2018-10-07', '2018-10-12', '2018-10-13', '2018-10-14', '2019-10-04', '2019-10-05', '2019-10-06', '2019-10-11', '2019-10-12', '2019-10-13', '2020-10-02', '2020-10-03', '2020-10-04', '2020-10-09', '2020-10-10', '2020-10-11' ]) }) self.holidays = pd.concat((sxsw, acl)) def build_model(self, scale=0.05, hourly=False, holidays_scale=10.0): """Simple function to build model. Allows for specification of model parameters.""" self.make_special_events() self.model = Prophet(changepoint_prior_scale=scale, holidays=self.holidays, holidays_prior_scale=holidays_scale) if self.include_weather: for v in ['temp', 'wind', 'cloud_cover', 'humidity']: self.model.add_regressor(v) if hourly: self.model.add_seasonality(name='hourly', period=0.04167, fourier_order=1) def train_model(self): """Thin wrapper to train model""" self.model.fit(self.dat) def calculate_periods(self): """Determine number of prediction periods required to reach 4 week forecast""" max_d = self.area_series['ds'].max() two_weeks = datetime.datetime.now() + datetime.timedelta(weeks=4) t_diff = two_weeks - max_d return int(t_diff.total_seconds() / 3600 * 4) def build_prediction_df(self, lat=30.267151, lon=-97.743057, periods=192): """ Simple function to build the prediction dataframe. Lat and lon only required if using weather data. Defaults to center of Austin. """ self.future = self.model.make_future_dataframe(periods=periods, freq='15T') if self.include_weather: self.get_weather_pred(lat, lon) self.future = pd.merge(self.future, self.weather, how='left', left_on='ds', right_on='time') self.future.update(self.future_weather) def get_weather_pred(self, lat, lon): """Fetch forecast from DarkSky""" w_pred = self.ds.get_forecast( lat, lon, extend=False, lang=languages.ENGLISH, units=units.AUTO, exclude=[weather.MINUTELY, weather.ALERTS], timezone='UTC') times = [x.time for x in w_pred.hourly.data] temps = [x.temperature for x in w_pred.hourly.data] precips = [x.precip_intensity for x in w_pred.hourly.data] rain_prob = [x.precip_probability for x in w_pred.hourly.data] humidities = [x.humidity for x in w_pred.hourly.data] wind = [x.wind_speed for x in w_pred.hourly.data] clouds = [x.cloud_cover for x in w_pred.hourly.data] uv = [x.uv_index for x in w_pred.hourly.data] self.future_weather = pd.DataFrame({ 'time': times, 'temp': temps, 'current_rain': precips, 'rain_prob': rain_prob, 'humidity': humidities, 'wind': wind, 'cloud_cover': clouds, 'uv': uv }) if self.bin_window == '15T': self.future_weather = self.future_weather.set_index( 'time').resample('15T').pad() elif self.bin_window == '1H': self.future_weather.set_index('time', inplace=True) else: self.future_weather = self.future_weather.set_index( 'time').resample(self.bin_window).mean() self.future_weather = self.future_weather.tz_convert(None) def predict(self): """Thin wrapper to produce predictions""" self.fcst = self.model.predict(self.future) def preds_to_sql(self, var): """Simple function to write predictions to postgres table""" fcst_out = self.fcst[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].copy() fcst_out.columns = map(lambda x: x.lower(), fcst_out.columns) fcst_out['area'] = self.idx fcst_out['var'] = var fcst_out['modified_date'] = pd.to_datetime( datetime.datetime.today().strftime("%Y-%m-%d")) time_cutoff = pd.to_datetime(datetime.datetime.today() - datetime.timedelta(days=1)) fcst_out = fcst_out[fcst_out['ds'] >= time_cutoff] fcst_out.to_sql('predictions', self.engine, if_exists='append', index=False) def query_preds(self, time_stamp): """Simple function for querying previous predictions""" q = f"SELECT * FROM predictions WHERE area = '{self.idx}' AND ds >= '{time_stamp}'" with psycopg2.connect(database=self.pg_db, user=self.pg_username, password=self.pg_password, port=self.pg_port, host=self.pg_host) as conn: self.old_preds = pd.read_sql(q, conn) def plot_results(self): """ Thin wrapper to plot results. Usually it is preferable to use object model and fcst dataframe to plot separately. """ self.fig = self.model.plot(self.fcst) def cv(self, initial, period, horizon, log=False): """ Simple function to do walk forward validation. Parameters: Initial: length of time to train original model Period: frequency with which to test beyond the original training period Horizon: Length of predictions Log: Was the model trained on logged data? Defaults to False. """ self.df_cv = cross_validation(self.model, initial=initial, period=period, horizon=horizon) if log: self.df_cv = self.df_cv.apply( lambda x: np.exp(x) if x.name not in ['ds', 'cutoff'] else x) self.df_p = performance_metrics(self.df_cv) def save_results(self, save_path): """Thin wrapper to save forecast dataframe to pickle object""" self.fcst.to_pickle(save_path)
weekly_seasonality=True, daily_seasonality=True) m.fit(train_df[['ds', 'y']]) # Using the helper method ```Prophet.make_future_dataframe```, we create a dataframe which will contain all dates from the history and also extend into the future for those 92 days that we left out before. # In[ ]: future = m.make_future_dataframe(periods=prediction_size) future.tail(n=3) # We predict values with Prophet by passing in the dates for which we want to create a forecast. If we also supply the historical dates (as in our case), then in addition to the prediction we will get an in-sample fit for the history. Let's call the model's predict method with our future dataframe as an input: # In[ ]: forecast = m.predict(future) forecast.tail(n=3) # In the resulting dataframe you can see many columns characterizing the prediction, including trend and seasonality components as well as their confidence intervals. The forecast itself is stored in the yhat column. # # The Prophet library has its own built-in tools for visualization that enable us to quickly evaluate the result. # # - First, there is a method called Prophet.plot that plots all the points from the forecast: # - The Second function Prophet.plot_components might be much more useful in our case. It allows us to observe different components of the model separately: trend, yearly and weekly seasonality. In addition, if you supply information about holidays and events to your model, they will also be shown in this plot. # # Let's try it out: # In[ ]: m.plot(forecast) m.plot_components(forecast)
] model_fbp = Prophet(seasonality_mode='multiplicative', holidays=holidays) model_fbp.add_country_holidays(country_name='US') for feature in exogenous_features: model_fbp.add_regressor(feature, prior_scale=0.1) with suppress_stdout_stderr(): model_fbp.fit( df_train[["DATE", 'SHIPPED_QTY', 'cap'] + exogenous_features].rename(columns={ "DATE": "ds", 'SHIPPED_QTY': "y" })) forecast = model_fbp.predict( df_valid[["DATE", 'SHIPPED_QTY', 'cap'] + exogenous_features].rename(columns={"DATE": "ds"})) # model_fbp.plot_components(forecast) df_valid.loc[:, "Forecast_Prophet"] = np.exp(forecast.yhat).values # model_fbp.plot_components(forecast) actual, forecast = sum(np.exp(df_valid['SHIPPED_QTY']).values), sum( df_valid["Forecast_Prophet"].values) sub_error = mape(actual, forecast) print(datetime.strftime(test_start[i], '%Y-%B'), np.round(sub_error, 3)) error.append([ datetime.strftime(test_start[i], '%Y-%B'), actual, forecast, sub_error ])
df = df.set_index('Date').sort_index() vol = df['Volume'] df = df['Close'] """ #plotar anual fig,ax = plt.subplots(figsize = (10,5)) df.plot(ax=ax) plt.show() # plotar gráfico semanal fig, ax = plt.subplots(figsize=(10,5)) vol.plot(ax=ax) plt.show() """ df = df.reset_index().rename(columns={'Date': 'ds', 'Close': 'y'}) #Forecasting model = Prophet() model.add_country_holidays(country_name='BR') model.fit(df) future = model.make_future_dataframe(periods=365) forecast = model.predict(future) model.plot(forecast, xlabel='Date', ylabel='Close') model.plot_components(forecast)
def fbprophet(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame): """ Run FB Prophet model Parameters ---------- other_args: List[str]: List of argparse argumenst s_ticker: str Stock ticker df_stock: pd.DataFrame Dataframe of prices Returns ------- """ parser = argparse.ArgumentParser( add_help=False, prog="fbprophet", description=""" Facebook Prophet is a forecasting procedure that is fast and provides completely automated forecasts that can be tuned by hand by data scientists and analysts. It was developed by Facebook's data science team and is open source. """, ) parser.add_argument( "-d", "--days", action="store", dest="n_days", type=check_positive, default=5, help="prediction days", ) parser.add_argument( "-e", "--end", action="store", type=valid_date, dest="s_end_date", default=None, help="The end date (format YYYY-MM-DD) to select - Backtesting", ) try: ns_parser = parse_known_args_and_warn(parser, other_args) if not ns_parser: return # BACKTESTING if ns_parser.s_end_date: if ns_parser.s_end_date < df_stock.index[0]: print( "Backtesting not allowed, since End Date is older than Start Date of historical data\n" ) return if (ns_parser.s_end_date < get_next_stock_market_days( last_stock_day=df_stock.index[0], n_next_days=5 + ns_parser.n_days)[-1]): print( "Backtesting not allowed, since End Date is too close to Start Date to train model\n" ) return future_index = get_next_stock_market_days( last_stock_day=ns_parser.s_end_date, n_next_days=ns_parser.n_days) if future_index[-1] > datetime.datetime.now(): print( "Backtesting not allowed, since End Date + Prediction days is in the future\n" ) return df_future = df_stock[future_index[0]:future_index[-1]] df_stock = df_stock[:ns_parser.s_end_date] df_stock = df_stock.sort_index(ascending=True) df_stock.reset_index(level=0, inplace=True) df_stock = df_stock[["date", "5. adjusted close"]] df_stock = df_stock.rename(columns={ "date": "ds", "5. adjusted close": "y" }) df_stock["ds"] = pd.to_datetime(df_stock["ds"]) model = Prophet(yearly_seasonality=False, daily_seasonality=False) model.fit(df_stock) l_pred_days = get_next_stock_market_days( last_stock_day=pd.to_datetime(df_stock["ds"].values[-1]), n_next_days=ns_parser.n_days, ) close_prices = model.make_future_dataframe(periods=ns_parser.n_days) forecast = model.predict(close_prices) df_pred = forecast["yhat"][ -ns_parser.n_days:] # .apply(lambda x: f"{x:.2f} $") df_pred.index = l_pred_days _, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI) model.plot( forecast[:-ns_parser.n_days], ax=ax, xlabel="Time", ylabel="Share Price ($)", ) _, _, ymin, ymax = ax.axis() ax.vlines( df_stock["ds"].values[-1], ymin, ymax, linewidth=2, linestyle="--", color="k", ) plt.axvspan( df_stock["ds"].values[-1], l_pred_days[-1], facecolor="tab:orange", alpha=0.2, ) plt.ylim(ymin, ymax) plt.xlim(df_stock["ds"].values[0], get_next_stock_market_days(l_pred_days[-1], 1)[-1]) # BACKTESTING if ns_parser.s_end_date: plt.title( f"BACKTESTING: Fb Prophet on {s_ticker} - {ns_parser.n_days} days prediction" ) else: plt.title( f"Fb Prophet on {s_ticker} - {ns_parser.n_days} days prediction" ) # BACKTESTING if ns_parser.s_end_date: plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot( [df_stock["ds"].values[-1], df_future.index[0]], [ df_stock["y"].values[-1], df_future["5. adjusted close"].values[0], ], lw=1, c="tab:blue", linestyle="--", ) plt.plot(df_pred.index, df_pred.values, lw=2, c="green") if gtff.USE_ION: plt.ion() plt.show() # BACKTESTING if ns_parser.s_end_date: plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.subplot(211) plt.plot( df_future.index, df_future["5. adjusted close"], lw=2, c="tab:blue", ls="--", ) plt.plot(df_pred.index, df_pred, lw=2, c="green") plt.scatter( df_future.index, df_future["5. adjusted close"], c="tab:blue", lw=3, ) plt.plot( [df_stock["ds"].values[-1], df_future.index[0]], [ df_stock["y"].values[-1], df_future["5. adjusted close"].values[0], ], lw=2, c="tab:blue", ls="--", ) plt.scatter(df_pred.index, df_pred, c="green", lw=3) plt.plot( [df_stock["ds"].values[-1], df_pred.index[0]], [df_stock["y"].values[-1], df_pred.values[0]], lw=2, c="green", ls="--", ) plt.title("BACKTESTING: Real data price versus Prediction") plt.xlim( df_stock["ds"].values[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) plt.ylabel("Share Price ($)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) plt.xticks([]) plt.subplot(212) plt.axhline(y=0, color="k", linestyle="--", linewidth=2) plt.plot( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, lw=2, c="red", ) plt.scatter( df_future.index, 100 * (df_pred.values - df_future["5. adjusted close"].values) / df_future["5. adjusted close"].values, c="red", lw=5, ) plt.title( "BACKTESTING: Error between Real data and Prediction [%]") plt.plot( [df_stock["ds"].values[-1], df_future.index[0]], [ 0, 100 * (df_pred.values[0] - df_future["5. adjusted close"].values[0]) / df_future["5. adjusted close"].values[0], ], lw=2, ls="--", c="red", ) plt.xlim( df_stock["ds"].values[-1], df_pred.index[-1] + datetime.timedelta(days=1), ) plt.xlabel("Time") plt.ylabel("Prediction Error (%)") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.minorticks_on() plt.grid(b=True, which="minor", color="#999999", linestyle="-", alpha=0.2) plt.legend(["Real data", "Prediction data"]) if gtff.USE_ION: plt.ion() plt.show() # Refactor prediction dataframe for backtesting print df_pred.name = "Prediction" df_pred = df_pred.to_frame() df_pred["Real"] = df_future["5. adjusted close"] if gtff.USE_COLOR: patch_pandas_text_adjustment() print("Time Real [$] x Prediction [$]") print( df_pred.apply(price_prediction_backtesting_color, axis=1).to_string()) else: print(df_pred[["Real", "Prediction"]].round(2).to_string()) print("") print_prediction_kpis(df_pred["Real"].values, df_pred["Prediction"].values) else: print("") print("Predicted share price:") print(df_pred.to_string()) print("") except Exception as e: print(e) print("")
#cache for loading data @st.cache def load_data(ticker): data = yf.download(ticker, START, TODAY) data.reset_index(inplace = True) return data data_load_state = st.text("Loading Data...") data = load_data(selected_stock) data_load_state.text("Loading Data...") #Prediction of prices using Prophet df_train = data[['Date','Close']] df_train = df_train.rename(columns = {"Date":"ds", "Close":'y'}) m = Prophet() m.fit(df_train) future = m.make_future_dataframe(periods = period) forecast = m.predict(future) st.write(f'Forecast plot for {n_years} years') fig1 = plot_plotly(m,forecast) st.plotly_chart(fig1) #Extra Components graphs st.write("COMPONENTS") fig2 = m.plot_components(forecast) st.write(fig2)
def create_prophet_m(app_name,z1,cpu_perc_list,delay=24): ### --- For realtime pred ---### full_df = z1.bw.iloc[0:len(z1)] full_df = full_df.reset_index() full_df.columns = ['ds','y'] #removing outliers q50 = full_df.y.median() q100 = full_df.y.quantile(1) q75 = full_df.y.quantile(.75) if((q100-q50) >= (2*q50)): full_df.loc[full_df.y>=(2*q50),'y'] = None #-- Realtime prediction --## #model model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.1,seasonality_prior_scale=0.05) model_r.fit(full_df) cpu_perc_list.append(py.cpu_percent()) cpu_perc_list = [max(cpu_perc_list)] future_r = model_r.make_future_dataframe(periods=delay,freq='D') forecast_r = model_r.predict(future_r) forecast_r.index = forecast_r['ds'] #forecast pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)]) pred_r=pred_r.reset_index() #--- completes realtime pred ---# train_end_index=len(z1.bw)-delay train_df=z1.bw.iloc[0:train_end_index] test_df=z1.bw.iloc[train_end_index:len(z1)] train_df=train_df.reset_index() test_df=test_df.reset_index() train_df.columns=['ds','y'] #--- removing outliers in trainset ---# q50 = train_df.y.median() q100 = train_df.y.quantile(1) q75 = train_df.y.quantile(.75) if((q100-q50) >= (2*q50)): train_df.loc[train_df.y>=(2*q50),'y'] = None test_df.columns=['ds','y'] test_df['ds'] = pd.to_datetime(test_df['ds']) #model model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.1,seasonality_prior_scale=0.05) model.fit(train_df) cpu_perc_list.append(py.cpu_percent()) cpu_perc_list = [max(cpu_perc_list)] future = model.make_future_dataframe(periods=len(test_df),freq='D') forecast = model.predict(future) forecast.index = forecast['ds'] #forecast pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)]) print('length forecasted non realtime=',len(pred)) pred=pred.reset_index() pred_df=pd.merge(test_df,pred,on='ds',how='left') pred_df.dropna(inplace=True) df=pd.DataFrame() if(len(pred_df)>0): pred_df['error_test']=pred_df.y-pred_df.yhat MSE=mse(pred_df.y,pred_df.yhat) RMSE=math.sqrt(MSE) pred_df['APE']=abs(pred_df.error_test*100/pred_df.y) MAPE=pred_df.APE.mean() min_error_rate = pred_df['APE'].quantile(0)/100 max_error_rate = pred_df['APE'].quantile(1)/100 median_error_rate = pred_df['APE'].quantile(.50)/100 print("App name:",app_name) #print("MSE :",MSE) print("RMSE :",RMSE) print("MAPE :",MAPE) mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean() std_MAPE = math.sqrt(((pred_df.APE-MAPE)**2).mean()) df = pd.DataFrame({'length':len(z1), 'test_rmse':RMSE, 'test_mape':MAPE, 'std_mape':std_MAPE, #standerd deviation of mape 'min_error_rate':min_error_rate , 'max_error_rate':max_error_rate , 'median_error_rate':median_error_rate, 'test_mape_98':mape_q98}, index=[app_name]) return(df,model,forecast,pred_df,pred_r)
def predict_series(df, date, special_dates, n=100): testdf = df.copy() date = date[:10] date_time_obj = datetime.datetime.strptime(date, '%Y-%m-%d').date() n = [i for i, d in enumerate(df.date) if d > date_time_obj] if not n: n = df.shape[0] else: n = df.shape[0] - n[-1] series = pd.DataFrame({'ds': df.date.values, 'y': df.total_cases.values}) series_train = series.tail(n).copy() m = Prophet() m.fit(series_train) future = m.make_future_dataframe(periods=df.shape[0] - n + 20) future.tail() forecast = m.predict(future) forecast = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(df.shape[0] - n + 6 + 20) x = [pd.Timestamp(d, tz=None).to_pydatetime() for d in series['ds'].values] x1 = [ pd.Timestamp(d, tz=None).to_pydatetime() for d in special_dates['date'].values ] x2 = [ pd.Timestamp(d, tz=None).to_pydatetime() for d in forecast['ds'].values ] x3 = [ pd.Timestamp(d, tz=None).to_pydatetime() for d in testdf['date'].values ] trace1 = go.Scatter( x=x1, y=special_dates['value'].values, text=['{}'.format(i) for i in special_dates['event'].values], hovertemplate='<br>date: %{x}<br>' + '<b>%{text}</b>', mode='markers', name='events', marker=dict(color='navy', size=15, line=dict(color='MediumPurple', width=1))) trace2 = go.Scatter(x=x, y=series['y'].values, mode='lines', name='original', line=dict(color='dodgerblue', width=4), opacity=0.5) trace3 = go.Scatter(x=x2, y=forecast['yhat'].values, mode='lines', name='predicted', line=dict(color='firebrick', width=3, dash='dash'), opacity=0.7) trace4 = go.Scatter(x=x3, y=testdf['total_tests'].values, mode='lines', name='original', line=dict(color='green', width=3)) fig = go.Figure() fig.add_trace(trace2) fig.add_trace(trace3) #fig.add_trace(trace4) fig.add_trace(trace1) fig.update_layout(title="Prediction from " + date) fig.update_layout( xaxis_range=[ datetime.datetime(2020, 1, 1), datetime.datetime(2020, 6, 1) ], xaxis={'type': 'date'}, ) fig.update_xaxes(title_text="Date") fig.update_yaxes(title_text="Total cases") fig.update_layout(hovermode='x unified') fig.update_layout(height=600, margin=dict(l=10, r=10, t=50, b=30), paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font=dict( family="Courier New, monospace", size=10, color="#7f7f7f", )) fig.update_xaxes(showgrid=False, ) return fig
def test_added_regressors(self): m = Prophet() m.add_regressor('binary_feature', prior_scale=0.2) m.add_regressor('numeric_feature', prior_scale=0.5) m.add_regressor( 'numeric_feature2', prior_scale=0.5, mode='multiplicative' ) m.add_regressor('binary_feature2', standardize=True) df = DATA.copy() df['binary_feature'] = [0] * 255 + [1] * 255 df['numeric_feature'] = range(510) df['numeric_feature2'] = range(510) with self.assertRaises(ValueError): # Require all regressors in df m.fit(df) df['binary_feature2'] = [1] * 100 + [0] * 410 m.fit(df) # Check that standardizations are correctly set self.assertEqual( m.extra_regressors['binary_feature'], { 'prior_scale': 0.2, 'mu': 0, 'std': 1, 'standardize': 'auto', 'mode': 'additive', }, ) self.assertEqual( m.extra_regressors['numeric_feature']['prior_scale'], 0.5) self.assertEqual( m.extra_regressors['numeric_feature']['mu'], 254.5) self.assertAlmostEqual( m.extra_regressors['numeric_feature']['std'], 147.368585, places=5) self.assertEqual( m.extra_regressors['numeric_feature2']['mode'], 'multiplicative') self.assertEqual( m.extra_regressors['binary_feature2']['prior_scale'], 10.) self.assertAlmostEqual( m.extra_regressors['binary_feature2']['mu'], 0.1960784, places=5) self.assertAlmostEqual( m.extra_regressors['binary_feature2']['std'], 0.3974183, places=5) # Check that standardization is done correctly df2 = m.setup_dataframe(df.copy()) self.assertEqual(df2['binary_feature'][0], 0) self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4) self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4) # Check that feature matrix and prior scales are correctly constructed seasonal_features, prior_scales, component_cols, modes = ( m.make_all_seasonality_features(df2) ) self.assertEqual(seasonal_features.shape[1], 30) names = ['binary_feature', 'numeric_feature', 'binary_feature2'] true_priors = [0.2, 0.5, 10.] for i, name in enumerate(names): self.assertIn(name, seasonal_features) self.assertEqual(sum(component_cols[name]), 1) self.assertEqual( sum(np.array(prior_scales) * component_cols[name]), true_priors[i], ) # Check that forecast components are reasonable future = pd.DataFrame({ 'ds': ['2014-06-01'], 'binary_feature': [0], 'numeric_feature': [10], 'numeric_feature2': [10], }) with self.assertRaises(ValueError): m.predict(future) future['binary_feature2'] = 0 fcst = m.predict(future) self.assertEqual(fcst.shape[1], 37) self.assertEqual(fcst['binary_feature'][0], 0) self.assertAlmostEqual( fcst['extra_regressors_additive'][0], fcst['numeric_feature'][0] + fcst['binary_feature2'][0], ) self.assertAlmostEqual( fcst['extra_regressors_multiplicative'][0], fcst['numeric_feature2'][0], ) self.assertAlmostEqual( fcst['additive_terms'][0], fcst['yearly'][0] + fcst['weekly'][0] + fcst['extra_regressors_additive'][0], ) self.assertAlmostEqual( fcst['multiplicative_terms'][0], fcst['extra_regressors_multiplicative'][0], ) self.assertAlmostEqual( fcst['yhat'][0], fcst['trend'][0] * (1 + fcst['multiplicative_terms'][0]) + fcst['additive_terms'][0], ) # Check works if constant extra regressor at 0 df['constant_feature'] = 0 m = Prophet() m.add_regressor('constant_feature') m.fit(df) self.assertEqual(m.extra_regressors['constant_feature']['std'], 1)
def create_prophet_m(source_name,z1,delay=24): train_end_index=len(z1.app_count)-delay train_df=z1.app_count.iloc[0:train_end_index] #train_df= train_df[train_df<cutter] full_df = z1.app_count.iloc[0:len(z1)] test_df=z1.app_count.iloc[train_end_index:len(z1)] train_df=train_df.reset_index() test_df=test_df.reset_index() train_df.columns=['ds','y'] full_df = full_df.reset_index() full_df.columns = ['ds','y'] test_df.columns=['ds','y'] ##-- Realtime prediction --## #model model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model_r.fit(full_df) future_r = model_r.make_future_dataframe(periods=delay,freq='H') forecast_r = model_r.predict(future_r) forecast_r.index = forecast_r['ds'] #forecast pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)]) pred_r=pred_r.reset_index() #model model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model.fit(train_df) future = model.make_future_dataframe(periods=len(test_df),freq='H') forecast = model.predict(future) forecast.index = forecast['ds'] #forecast pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)]) pred=pred.reset_index() pred_df=pd.merge(test_df,pred,on='ds',how='left') pred_df.dropna(inplace=True) df=pd.DataFrame() if(len(pred_df)>0): pred_df['error_test']=pred_df.y-pred_df.yhat MSE=mse(pred_df.y,pred_df.yhat) RMSE=math.sqrt(MSE) pred_df['APE']=abs(pred_df.error_test*100/pred_df.y) MAPE=pred_df.APE.mean() print("App name:",source_name) print("MSE :",MSE) print("RMSE :",RMSE) print("MAPE :",MAPE) q98=pred_df['APE'].quantile(0.98) mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean() df = pd.DataFrame({#'length':len(z1), 'test_rmse':RMSE, 'test_mape':MAPE, 'test_mape_98':mape_q98}, index=[source_name]) return(df,model,forecast,pred_df,pred_r)
# In[29]: model = Prophet(holidays=promotions, weekly_seasonality=True, daily_seasonality=True) model.fit(df) # In[30]: future = model.make_future_dataframe(periods=24, freq='m') future.tail() # In[31]: forecast = model.predict(future) # In[32]: forecast.tail() # In[33]: forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() # In[34]: model.plot(forecast) # In[35]:
model_prophet = Prophet() model_prophet.fit( total_performa ) # DataFrame total_performa didapatkan pada saat langkah untuk mengubah nama kolom menjadi "ds" & "y" """Setelah sebelumnya mengimport fbprophet pada pengujian ini, saya akan melanjutkan kembali yaitu melakukan tahapan prediksi untuk 1 tahun ke depan dengan memanfaatkan metode dari fbprophet yaitu .make_future_dataFrame dengan memakai perhitungan "M" atau bukan. Jadi 1 tahun= 12 bulan.""" prediksi_karyawan_masa_depan = model_prophet.make_future_dataframe( periods=12, freq="M" ) # M = month/bulan, sedangkan periods itu adalah jumlah bulan dalam 1 tahun prediksi_karyawan_masa_depan.tail() prediksi_karyawan_masa_depan["ds"].describe() """Setelah melakukan pemodelan dengan prophet pada DataFrame prediksi_karyawan_masa_depan, saya melakukan langkah prediksi kembali agar mendapatkan rentang waktu selama 12 bulan ke depan (mengikuti periode pada DataFrame prediksi_karyawan_masa_depan).""" nilai_prediksi_karyawan = model_prophet.predict(prediksi_karyawan_masa_depan) nilai_prediksi_karyawan[[ "ds", "trend", "yearly", "yhat", "yhat_lower", "yhat_upper" ]].tail() """Output yang dihasilkan pada DataFrame nilai_prediksi_karyawan terdapat ds, yearly, yhat, yhat_lower & yhat_upper yang bisa diartikan sebagai berikut: 1. Ds= kolom yang menyatakan waktu. Yang dimana kolom tersebut adalah year_graduated. 2. Trend = merupakan naik dan turunnya suatu prediksi yang diperoleh dari perubahan dari waktu ke waktu. 3. Yearly = Musim tahunan yang menyatakan tingkat kinerja dari para karyawan. 4. yhat = Nilai prediksi 5. Yhat_lower & yhat_upper = Merupakan range error pada interval data prediksi. Untuk lebih detailnya saya akan membuatkan sebuah visualisasi dari variabel DataFrame nilai_prediksi_karyawan menggunakan salah satu library yaitu matplotlib. """ fig = model_prophet.plot(nilai_prediksi_karyawan)
def fitting_model(data, weather_included=False, holiday_included=False, CNY_season=False, last_7_days_validation=False, four_season=False): #change the format of date data_copy = data.copy() temp = [] for i in range(len(data_copy)): str_date = str(data_copy.iloc[i]["Date"]) temp.append(datetime.strptime(str_date, '%Y%m%d')) data_copy['Date'] = temp data_copy = remove_outlier(data_copy, 'Reserve') #Config the model used #m=Prophet(daily_seasonality=False,changepoint_prior_scale=0.1,holidays_prior_scale=0.1) m = Prophet(daily_seasonality=False) if holiday_included: m.holidays = getting_holiday_info("datas/All_Holiday.csv") if CNY_season: m.weekly_seasonality = False m.yearly_seasonality = False m.add_seasonality(name="Weekly on CNY Season", period=7, fourier_order=3, condition_name="CNY season") m.add_seasonality(name="Weekly on other dates", period=7, fourier_order=3, condition_name="Other season") if four_season: m.weekly_seasonality = False m.yearly_seasonality = False m.add_seasonality(name="Spring Season", period=91.5, fourier_order=5, prior_scale=1, condition_name='Spring') m.add_seasonality(name="Summer Season", period=91.5, fourier_order=5, prior_scale=1, condition_name='Summer') m.add_seasonality(name="Autumn Season", period=91.5, fourier_order=5, prior_scale=1, condition_name='Autumn') m.add_seasonality(name="Winter Season", period=91.5, fourier_order=5, prior_scale=1, condition_name='Winter') data_history = pd.DataFrame({ 'ds': data_copy["Date"], "y": data_copy['Reserve'] }) if last_7_days_validation: data_history = data_history[:-7] if weather_included: #Get temperature info ''' weather_csv=pd.read_csv(weather_csv_location[city]) date=data["Date"] temperature=get_weather_info(weather_csv,date) data_history["Weather"]=temperature m.add_regressor('Weather') ''' if CNY_season: data_history["CNY season"], data_history[ "Yearly season"] = on_CNY_season(data_history) data_history["Other season"] = ~data_history["CNY season"] if four_season: data_history["Spring"], data_history["Summer"], data_history[ "Autumn"], data_history["Winter"] = summer_wintter_spring_auttum( data_history) #Predict one year ahead m.fit(data_history) future_date = m.make_future_dataframe(periods=8) if weather_included: ''' temp=pd.to_datetime(future_date['ds'],format="%Y-%m-%d") temperature=get_weather_info(weather_csv,temp.dt.date) future_date["Weather"]=temperature ''' if CNY_season: future_date["CNY season"], future_date[ "Yearly season"] = on_CNY_season(future_date) future_date["Other season"] = ~future_date["CNY season"] if four_season: future_date["Spring"], future_date["Summer"], future_date[ "Autumn"], future_date["Winter"] = summer_wintter_spring_auttum( future_date) #predict future price future = m.predict(future_date) #fig3=m.plot(future) #fig3.canvas.set_window_title("Prediction") #fig2=m.plot_components(future) #fig2.canvas.set_window_title("Component") #plot the corss validation erro #validation(m,data,city,future,"1825 days","100 days","100 days") if last_7_days_validation: print(f"RMSE:{rmse(future['yhat'].tail(7),data_copy[-7:].Reserve)}") return future
def main(): st.title("Welcome to Predict Future of Stocks.") menu = ["Home", "Stock Prediction using ML"] choice = st.sidebar.selectbox("Menu", menu) if choice == "Home": st.subheader("Recommendations") si.get_day_most_active() st.subheader("Today's Most Active Users") st.write(si.get_day_most_active()) si.get_day_gainers() st.subheader("Today's Top Gainers") st.write(si.get_day_gainers()) si.get_day_losers() st.subheader("Today's Top Losers") st.write(si.get_day_losers()) elif choice == "Stock Prediction using ML": st.subheader("Stock Prediction using ML") START = "2015-01-01" TODAY = date.today().strftime("%Y-%m-%d") selected_stock = st.text_input("Type Stocks's name...") submit = st.button('Search') if submit: si.get_live_price(selected_stock) st.write("Live Price : ", si.get_live_price(selected_stock)) si.get_market_status() st.write("Market state : ", si.get_market_status()) n_years = st.slider("Years of prediction:", 1, 10) period = n_years * 365 def load_data(ticker): data = yf.download(ticker, START, TODAY) data.reset_index(inplace=True) return data data_load_state = st.text('Loading data...') data = load_data(selected_stock) data_load_state.text('Loading data... done!') st.subheader('Raw data') st.write(data.tail()) # Plot raw data def plot_raw_data(): fig = go.Figure() fig.add_trace( go.Scatter(x=data['Date'], y=data['Open'], name="stock_open")) fig.add_trace( go.Scatter(x=data['Date'], y=data['Close'], name="stock_close")) fig.layout.update( title_text='Time Series data with Rangeslider', xaxis_rangeslider_visible=True) st.plotly_chart(fig) plot_raw_data() # Predict forecast with Prophet. df_train = data[['Date', 'Close']] df_train = df_train.rename(columns={"Date": "ds", "Close": "y"}) m = Prophet() m.fit(df_train) future = m.make_future_dataframe(periods=period) forecast = m.predict(future) # Show and plot forecast st.subheader('Forecast data') st.write(forecast.tail()) st.write(f'Forecast plot for {n_years} years') fig1 = plot_plotly(m, forecast) st.plotly_chart(fig1) st.write("Forecast components") fig2 = m.plot_components(forecast) st.write(fig2)
matplotlib.pyplot.legend() matplotlib.pyplot.ylabel(sensor) matplotlib.pyplot.show() # Prophet .fit(df) requires columns ds (dates) and y df['y'] = df['raw'] df['ds'] = df.index m = Prophet( changepoint_range=1.0, n_changepoints=150, changepoint_prior_scale=30, seasonality_prior_scale=35, growth='linear', holidays=None, daily_seasonality=False, weekly_seasonality=False, yearly_seasonality=False, ).add_seasonality(name="daily", period=1, fourier_order=5, prior_scale=10) m.fit(df) forecast = m.predict() # data, model, and changepoint plot fig1 = m.plot(forecast) a = add_changepoints_to_plot(fig1.gca(), m, forecast) fig1.show() # component plot fig2 = m.plot_components(forecast) fig2.show()
between 1997 and 2018""".format(curr_ticker) X_train = pd.io.sql.read_sql(query, conn) X_train.columns = ['ds', 'y'] try: model = Prophet() model.fit(X_train) except: print('####################') print('Modeling Error on ', curr_ticker) error_log['Ticker'].append(curr_ticker) error_log['TransactionDate'].append('') error_log['Issue'].append('Modeling Error') continue pred = model.predict(ds_test.copy()) for index, row in pred.iterrows(): try: insert_pred(conn, 'pred_price_sp500base', curr_ticker, row) except: print('####################') print('Prediction Insertion Error on ', curr_ticker) error_log['Ticker'].append(curr_ticker) error_log['TransactionDate'].append(row['ds']) error_log['Issue'].append('Prediction Insertion') # To end the query conn.commit() curr_endtime = datetime.now() curr_time = str(curr_endtime - curr_starttime) time_log['Ticker'].append(curr_ticker)
jfc.set_index('CHART_DATE').CLOSE.plot(figsize=(15, 10)) plt.title('Jollibee Daily Closing Price', fontsize=25) from fbprophet import Prophet #Forecasting closing prices ts = jfc[['CHART_DATE', 'CLOSE']] ts.columns = ['ds', 'y'] ts.head() HOLDOUT_START = '2019-03-01' m = Prophet(daily_seasonality=True, yearly_seasonality=True).fit(ts[ts.ds < HOLDOUT_START]) future = m.make_future_dataframe(periods=7*4*12, freq='D') pred = m.predict(future) from matplotlib import pyplot as plt fig1 = m.plot(pred) plt.title('Jollibee: Forecasted Daily Closing Price', fontsize=25) pred_holdout = pred[(pred.ds >= HOLDOUT_START)&(pred.ds <= ts.ds.max())].set_index('ds').yhat target_holdout = ts[ts.ds >= HOLDOUT_START].set_index('ds') comb = pd.concat([pred_holdout, target_holdout], axis=1).dropna() comb import numpy as np rmse_holdout = np.sqrt(comb.yhat.subtract(comb.y).pow(2).mean()) rmse_holdout mae_holdout = np.mean(np.abs(comb.yhat.subtract(comb.y)))
'Microsoft':'MSFT', 'Tesla':'TSLA' } st.markdown("# Welcome to Stock4Cast!") st.write('Stock4cast lets you forecast the stock prices of top companies for the next 2 years.') image = Image.open('stock.jpg') st.image(image, use_column_width=True) if action: if len(company[choice]) > 1: end = dt.datetime.now() start = end.year-1 df = reader.get_data_yahoo(company[choice],start,end) st.markdown(f'## {choice} statistics') st.write(df.sort_index(ascending=False)) df.reset_index(inplace=True) df = df[['Date','Close']] df = df.rename(columns={'Date':'ds','Close':'y'}) # create prophet model model = Prophet(yearly_seasonality=True, daily_seasonality=True) model.fit(df) future_dates = model.make_future_dataframe(periods=input_periods, freq='MS') forecast = model.predict(future_dates) st.markdown(f'## {choice} close price forecast') model.plot(forecast, uncertainty=True) st.pyplot() model.plot_components(forecast) st.pyplot()
def predict(values, timestamp, n_pred): df = pd.DataFrame(columns = ['value', 'api_timestamp']) df['value'] = values df['api_timestamp'] = timestamp df['value'] = df.value.astype(float) df = df.set_index('api_timestamp') #### try to infer freq pred_freq = pd.infer_freq(index = df.index) #### if freq = None then default to 1H if not pred_freq: pred_freq = '1H' #### try prophet forecasting engine try: forecasting_engine = 'Prophet' #### resample to make predictions consistent. df = df.resample(pred_freq).mean() #### prophet requires specific naming of columns df = df.reset_index() df = df.rename(index=str, columns={'api_timestamp': 'ds', 'value': 'y'}) #### prophet fails with inf values. Replacing them with nan df.replace([np.inf, -np.inf], np.nan) #### Instantiate the prophet object and fit m = Prophet() m.fit(df) #### Make forecast future = m.make_future_dataframe(periods=n_pred, freq=pred_freq) forecast = m.predict(future) #### Compute MAPE by refitting on 70% and testing on 30% df_mape = df.iloc[0: len(df)-round(len(df)/100 * 30)] m = Prophet() m.fit(df_mape) #### Generate the remaining 30% of the dataset as predictions n_pred_mape = np.abs(len(df) - len(df)-round(len(df)/100 * 30)) #### Predict the 30% future_mape = m.make_future_dataframe(periods=n_pred_mape, freq=pred_freq) forecast_mape = m.predict(future_mape) #### Compute MAPE mape = np.round(mean_absolute_percentage_error(df.y.values[len(df_mape):len(df)], forecast_mape.yhat.values[len(df_mape):len(df)]),3) #### Gather the forecasted values temp = [] pred_timestamps = forecast.ds.values[len(df)-n_pred:len(df)] for i, j in enumerate(range(len(df)-n_pred,len(df))): temp.append({ 'Value': np.round(forecast.yhat.values[j],3), 'Value_Upper': np.round(forecast.yhat_upper.values[j],3), 'Value_Lower': np.round(forecast.yhat_lower.values[j],3), 'Timestamp': datetime.strftime(pd.to_datetime(pred_timestamps[i]), '%Y-%m-%d %H:%M:%S') }) return temp, mape, forecasting_engine except Exception as e: #### TODO log the error return None, None, None
# prepare train and test sets train_size = int(prophet_df.shape[0] * 0.9) train_df = prophet_df.iloc[:train_size] test_df = prophet_df.iloc[train_size + 1:] # build a prophet model pro_model = Prophet() # fit the model pro_model.fit(train_df) # prepare a future dataframe test_dates = pro_model.make_future_dataframe(periods=test_df.shape[0]) # forecast values forecast_df = pro_model.predict(test_dates) # plot the forecast pro_model.plot(forecast_df) plt.show() # plot against true data plt.plot(forecast_df.yhat, c='r', label='Forecast') plt.plot(forecast_df.yhat_lower.iloc[train_size + 1:], linestyle='--', c='b', alpha=0.3, label='Confidence Interval') plt.plot(forecast_df.yhat_upper.iloc[train_size + 1:], linestyle='--', c='b',
0, }) superbowls = pd.DataFrame({ 'holiday': 'chunjie', 'ds': pd.to_datetime([ '2016-02-07', '2016-02-08', '2016-02-09', '2016-02-10', '2016-02-11', '2016-02-12', '2016-02-13', '2017-01-27', '2017-01-28', '2017-01-29', '2017-01-30', '2017-01-31', '2017-02-01', '2017-02-02' ]), 'lower_window': 0, 'upper_window': 0, }) holidays = pd.concat((playoffs, superbowls)) prophet = Prophet( holidays=holidays, changepoint_prior_scale=0.01, seasonality_prior_scale=0.009, holidays_prior_scale=25.0, ) #yearly_seasonality=True prophet.fit(df) future = prophet.make_future_dataframe(periods=90, include_history=False) forecast = prophet.predict(future) print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]) print('ISIR =', np.mean(forecast['yhat'])) prophet.plot_components(forecast) plt.show()
def create_prophet_m(source_name,z1,delay): import math train_end_index=len(z1.bw)-delay train_df=z1.bw.iloc[0:train_end_index] full_df = z1.bw.iloc[0:len(z1)] test_df=z1.bw.iloc[train_end_index:len(z1)] train_df=train_df.reset_index() test_df=test_df.reset_index() train_df.columns=['ds','y'] #--- removing outliers in trainset ---# q50 = train_df.y.median() q100 = train_df.y.quantile(1) q75 = train_df.y.quantile(.75) print(max(train_df.y)) if((q100-q50) >= (2*q50)): print('ind') train_df.loc[train_df.y>=(2*q50),'y'] = None full_df = full_df.reset_index() full_df.columns = ['ds','y'] test_df.columns=['ds','y'] ##-- Realtime prediction --## #model model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model_r.fit(full_df) future_r = model_r.make_future_dataframe(periods=delay,freq='H') forecast_r = model_r.predict(future_r) forecast_r.index = forecast_r['ds'] #forecast pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)]) pred_r=pred_r.reset_index() #model model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model.fit(train_df) future = model.make_future_dataframe(periods=len(test_df),freq='H') forecast = model.predict(future) forecast.index = forecast['ds'] #forecast pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)]) pred=pred.reset_index() pred_df=pd.merge(test_df,pred,on='ds',how='left') pred_df.dropna(inplace=True) df=pd.DataFrame() if(len(pred_df)>0): pred_df['error_test']=pred_df.y-pred_df.yhat MSE=mse(pred_df.y,pred_df.yhat) RMSE=math.sqrt(MSE) pred_df['APE']=abs(pred_df.error_test*100/pred_df.y) MAPE=pred_df.APE.mean() min_error_rate = pred_df.quantile(0)/100 max_error_rate = pred_df.quantile(1)/100 median_error_rate = pred_df.quantile(.50)/100 std_MAPE = math.sqrt(((pred_df.APE-MAPE)**2).mean()) print("App name:",source_name) print("MSE :",MSE) print("RMSE :",RMSE) print("MAPE :",MAPE) q98=pred_df['APE'].quantile(0.98) mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean() df = pd.DataFrame({'length':len(z1),#'predicted_t':[forcast_lag], 'test_rmse':RMSE, 'test_mape':MAPE, 'std_mape':std_MAPE, #standerd deviation of mape 'min_error_rate':min_error_rate , 'max_error_rate':max_error_rate , 'median_error_rate':median_error_rate, 'test_mape_98':mape_q98}, index=[source_name]) return(df,model,forecast,pred_df,pred_r)
end=pd.to_datetime(max(test_arima.index)), dynamic=True) pred_test_ci = pred_test.conf_int() # ceating test and train emsembled result # test result result_test = test result_test['y_ARIMA'] = np.array(pred_test.predicted_mean)[1:] # prophet m = Prophet(weekly_seasonality=False, yearly_seasonality=False, changepoint_prior_scale=5) m.fit(train); # creating pred train and test data frame past = m.make_future_dataframe(periods=0, freq='W') future = pd.DataFrame(test['ds']) pf_train_pred = m.predict(past) pf_test_pred = m.predict(future) pf_train_pred = pf_train_pred[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].set_index([past.index]) pf_test_pred = pf_test_pred[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].set_index([future.index]) # ceating test and train emsembled result # test result result_test['y_Prophet'] = np.array(pf_test_pred.yhat) # Ansemble result_test['y_Ensembled'] = result_test[["y_ARIMA", "y_Prophet"]].mean(axis=1) train = prod[:(max(train.index) + 1 + test_points)] test = prod[(max(train.index) + 1):(max(train.index) + 1 + test_points)] rem_data = prod[(max(train.index) + test_points):]
def create_prophet_m(self,app_name,z1,delay=24): import pandas as pd import pymysql import warnings warnings.filterwarnings("ignore") from datetime import datetime, timedelta import logging from tqdm import tqdm from fbprophet import Prophet from sklearn.metrics import mean_squared_error as mse import math ### --- For realtime pred ---### full_df = z1.bw.iloc[0:len(z1)] full_df = full_df.reset_index() full_df.columns = ['ds','y'] #removing outliers q50 = full_df.y.median() q100 = full_df.y.quantile(1) q75 = full_df.y.quantile(.75) #print(max(train_df.y)) if((q100-q50) >= (2*q75)): #print('ind') full_df.loc[full_df.y>=(2*q75),'y'] = None #-- Realtime prediction --## #model model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model_r.fit(full_df) future_r = model_r.make_future_dataframe(periods=delay,freq='H') forecast_r = model_r.predict(future_r) forecast_r.index = forecast_r['ds'] #forecast pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)]) pred_r=pred_r.reset_index() #--- completes realtime pred ---# train_end_index=len(z1.bw)-delay train_df=z1.bw.iloc[0:train_end_index] #train_df= train_df[train_df<cutter] test_df=z1.bw.iloc[train_end_index:len(z1)] train_df=train_df.reset_index() test_df=test_df.reset_index() train_df.columns=['ds','y'] #--- removing outliers in trainset ---# q50 = train_df.y.median() q100 = train_df.y.quantile(1) q75 = train_df.y.quantile(.75) #print(max(train_df.y)) if((q100-q50) >= (2*q75)): #print('ind') train_df.loc[train_df.y>=(2*q75),'y'] = None test_df.columns=['ds','y'] #print('len of testdf = ',len(test_df)) #model model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model.fit(train_df) future = model.make_future_dataframe(periods=len(test_df),freq='H') forecast = model.predict(future) forecast.index = forecast['ds'] #forecast pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)]) pred=pred.reset_index() pred_df=pd.merge(test_df,pred,on='ds',how='left') pred_df.dropna(inplace=True) df=pd.DataFrame() if(len(pred_df)>0): pred_df['error_test']=pred_df.y-pred_df.yhat MSE=mse(pred_df.y,pred_df.yhat) RMSE=math.sqrt(MSE) pred_df['APE']=abs(pred_df.error_test*100/pred_df.y) MAPE=pred_df.APE.mean() #print("App name:",app_name) #print("MSE :",MSE) #print("RMSE :",RMSE) #print("MAPE :",MAPE) q98=pred_df['APE'].quantile(0.98) mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean() df = pd.DataFrame({'length':len(z1),#'predicted_t':[forcast_lag], 'test_rmse':RMSE, 'test_mape':MAPE, #'test_ape_98':q98, 'test_mape_98':mape_q98}, index=[app_name]) return(df,model,forecast,pred_df,pred_r)
# ----- set DATES index as a column ----- # swe_for_prophet = swe_for_prophet.reset_index().rename( columns={'datetime': 'ds'}) # --------- # # Fit Model # # --------- # swe_model.fit(swe_for_prophet) # ---------------------------------------- # # Forecasting to the Future # # ---------------------------------------- # test = swe_model.make_future_dataframe(periods=100) f = swe_model.predict(test) fig = swe_model.plot(f) plt.show() swe_forecast = swe_model \ .make_future_dataframe(periods=24, freq='W') swe_forecast = swe_model.predict(swe_forecast) swe_names = ['SWE_%s' % column for column in swe_forecast.columns] merge_swe_forecast = swe_forecast.copy() merge_swe_forecast.columns = swe_names forecast = swe_forecast \ .rename(columns={'ds': 'Date'})