def add_prophet_features(df_shop): df = df_shop[['day', 'pays_count']].rename(columns={'day': 'ds', 'pays_count': 'y'}) results = [] biweek_max = df_shop.biweek_id.max() for m in range(biweek_max - 1, 0, -1): train_idx = df_shop.biweek_id >= m df_train = df[train_idx] not_null = ~df_train.y.isnull() if not_null.sum() < 7: continue p = Prophet().fit(df_train) future = p.make_future_dataframe(14, include_history=False) pred = p.predict(future) results.append(pred) df_res = pd.concat(results) df_res.columns = ['prophet_%s' % c for c in pred.columns] df_res = df_shop.merge(df_res, how='left', left_on='day', right_on='prophet_ds') del df_res['prophet_t'], df_res['prophet_ds'] df_res.drop_duplicates('days_from_beginning', keep='last', inplace=1) if len(df_res) != len(df_shop): raise Exception("size doesn't match") return df_res
def get_predictions(validate, train): total_dates = train['date'].unique() result = pd.DataFrame(columns=['id', 'unit_sales']) problem_pairs = [] example_items = [510052, 1503899, 2081175, 1047674, 215327, 1239746, 765520, 1463867, 1010755, 1473396] store47examples = validate.loc[(validate.store_nbr == 47) & (validate.item_nbr.isin(example_items))] print("ONLY PREDICTING ITEMS {} IN STORE NO. 47!".format(example_items)) for name, y in store47examples.groupby(['item_nbr']): # for name, y in validate.groupby(['item_nbr', 'store_nbr']): item_nbr=int(name) store_nbr = 47 df = train[(train.item_nbr==item_nbr)&(train.store_nbr==store_nbr)] CV_SIZE = 16 #if you make it bigger, fill missing dates in cv with 0 if any TRAIN_SIZE = 365 total_dates = train['date'].unique() df = fill_missing_date(df, total_dates) df = df.sort_values(by=['date']) X = df[-TRAIN_SIZE:] X = X[['date','unit_sales']] X.columns = ['ds', 'y'] m = Prophet(yearly_seasonality=True) try: m.fit(X) except ValueError: print("problem for this item store pair") problem_pairs.append((item_nbr, store_nbr)) continue future = m.make_future_dataframe(periods=CV_SIZE) pred = m.predict(future) data = pred[['ds','yhat']].tail(CV_SIZE) data = pred[['ds','yhat']].merge(y, left_on='ds', right_on='date') data['unit_sales'] = data['yhat'].fillna(0).clip(0, 999999) result = result.append(data[['id', 'unit_sales']]) return (result, problem_pairs)
def test_make_future_dataframe(self): N = 468 train = DATA.head(N // 2) forecaster = Prophet() forecaster.fit(train) future = forecaster.make_future_dataframe(periods=3, freq='D', include_history=False) correct = pd.DatetimeIndex(['2013-04-26', '2013-04-27', '2013-04-28']) self.assertEqual(len(future), 3) for i in range(3): self.assertEqual(future.iloc[i]['ds'], correct[i]) future = forecaster.make_future_dataframe(periods=3, freq='M', include_history=False) correct = pd.DatetimeIndex(['2013-04-30', '2013-05-31', '2013-06-30']) self.assertEqual(len(future), 3) for i in range(3): self.assertEqual(future.iloc[i]['ds'], correct[i])
def hello(): print('Hello, world!') df = pd.read_csv(url) df['y'] = np.log(df['y']) df.head() m = Prophet() m.fit(df); future = m.make_future_dataframe(periods=365) future.tail() forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() return forecast.to_json(orient='table')
def build_forecast( data, forecast_range, truncate_range=0 ): """build a forecast for publishing Args: data (:obj:`pandas.data_frame`): data to build prediction forecast_range (int): how much time into the future to forecast truncate_range (int, optional): truncate output to CREST_RANGE Returns: pandas.DataFrame: collection of data + forecast info ['date', 'avgPrice', 'yhat', 'yhat_low', 'yhat_high', 'prediction'] """ data['date'] = pd.to_datetime(data['date']) filter_date = data['date'].max() ## Build DataFrame ## predict_df = pd.DataFrame() predict_df['ds'] = data['date'] predict_df['y'] = data['avgPrice'] ## Run prediction ## # https://facebookincubator.github.io/prophet/docs/quick_start.html#python-api model = Prophet() model.fit(predict_df) future = model.make_future_dataframe(periods=forecast_range) tst = model.predict(future) predict_df = pd.merge( predict_df, model.predict(future), on='ds', how='right' ) ## Build report for endpoint ## report = pd.DataFrame() report['date'] = pd.to_datetime(predict_df['ds'], format='%Y-%m-%d') report['avgPrice'] = predict_df['y'] report['yhat'] = predict_df['yhat'] report['yhat_low'] = predict_df['yhat_lower'] report['yhat_high'] = predict_df['yhat_upper'] report['prediction'] = False report.loc[report.date > filter_date, 'prediction'] = True if truncate_range > 0: cut_date = filter_date - timedelta(days=truncate_range) report = report.loc[report.date > cut_date] return report
def train_prophet(df, modelDir, confidence=0.99): # train and cache into modelDir m = Prophet( yearly_seasonality=True, daily_seasonality=True, interval_width=confidence ) with suppress_stdout_stderr(): m.fit(df) # Predict the future. print "PREDICTING!" future = m.make_future_dataframe(periods=0) forecast = m.predict(future) # Merge in the historical data. forecast["y"] = df.y.astype(float) # Backup the model. forecast.to_csv( pJoin(modelDir, "forecasted_{}.csv".format(confidence)), index=False ) return forecast
plt.style.use('ggplot') market_df = pd.read_csv('UNVR.JK2.csv', index_col='Date', parse_dates=True) market_df.head() df = market_df.reset_index().rename(columns={'Date': 'ds', 'Close': 'y'}) #df['y'] = np.log(df['y']) df.head() df.set_index('ds').y.plot() model = Prophet() model.fit(df) future = model.make_future_dataframe(periods=366) forecast = model.predict(future) model.changepoints figure = model.plot(forecast) for changepoint in model.changepoints: plt.axvline(changepoint, ls='--', lw=1) deltas = model.params['delta'].mean(0) fig = plt.figure(facecolor='w') ax = fig.add_subplot(111) ax.bar(range(len(deltas)), deltas) ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2) ax.set_ylabel('Rate change') ax.set_xlabel('Potential changepoint')
def create_prophet_m(app_name,z1,delay=24): ### --- For realtime pred ---### full_df = z1.app_rsp_time.iloc[0:len(z1)] full_df = full_df.reset_index() full_df.columns = ['ds','y'] #removing outliers q50 = full_df.y.median() q100 = full_df.y.quantile(1) q75 = full_df.y.quantile(.75) if((q100-q50) >= (2*q50)): full_df.loc[full_df.y>=(2*q50),'y'] = None #-- Realtime prediction --## #model model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model_r.fit(full_df) future_r = model_r.make_future_dataframe(periods=delay,freq='H') forecast_r = model_r.predict(future_r) forecast_r.index = forecast_r['ds'] #forecast pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)]) pred_r=pred_r.reset_index() #--- completes realtime prediction ---# train_end_index=len(z1.app_rsp_time)-delay train_df=z1.app_rsp_time.iloc[0:train_end_index] test_df=z1.app_rsp_time.iloc[train_end_index:len(z1)] train_df=train_df.reset_index() test_df=test_df.reset_index() train_df.columns=['ds','y'] #--- removing outliers in trainset ---# q50 = train_df.y.median() q100 = train_df.y.quantile(1) q75 = train_df.y.quantile(.75) if((q100-q50) >= (2*q50)): train_df.loc[train_df.y>=(2*q50),'y'] = None test_df.columns=['ds','y'] #model model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model.fit(train_df) future = model.make_future_dataframe(periods=len(test_df),freq='H') forecast = model.predict(future) forecast.index = forecast['ds'] #forecast pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)]) pred=pred.reset_index() pred_df=pd.merge(test_df,pred,on='ds',how='left') pred_df.dropna(inplace=True) df=pd.DataFrame() if(len(pred_df)>0): pred_df['error_test']=pred_df.y-pred_df.yhat MSE=mse(pred_df.y,pred_df.yhat) RMSE=math.sqrt(MSE) pred_df['APE']=abs(pred_df.error_test*100/pred_df.y) MAPE=pred_df.APE.mean() print("App name:",app_name) print("MSE :",MSE) print("RMSE :",RMSE) print("MAPE :",MAPE) mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean() df = pd.DataFrame({#'length':len(z1), 'test_rmse':RMSE, 'test_mape':MAPE, 'test_mape_98':mape_q98}, index=[app_name]) return(df,model,forecast,pred_df,pred_r)
#graph the data to have idea for parameter settings for model store_1_df.plot(x='ds', y='y', color='red', kind='scatter') store_2_df.plot(x='ds', y='y', color='blue', kind='scatter') plt.show() #fit the prophet model and set the parameters based on observations in graph #model for store_1 m = Prophet() m.fit(store_1_df) #model for store_2 n = Prophet() n.fit(store_2_df) #future dataframes to hold predictions for both stores future_1 = m.make_future_dataframe(periods=30) future_2 = n.make_future_dataframe(periods=30) #make predictions to be held in future dataframes forecast_1 = m.predict(future_1) forecast_2 = n.predict(future_2) #plot the results of the predicition fig1 = m.plot(forecast_1) fig2 = n.plot(forecast_2) plt.show() #show the decomposition of the model fig3 = m.plot_components(forecast_1)
period=91.5, fourier_order=7, prior_scale=0.02) m_TS.fit(train_df_TunnelStudy) m_TM = Prophet(interval_width=0.95, yearly_seasonality=True, weekly_seasonality=True, changepoint_prior_scale=2) m_TM.add_seasonality(name='quarterly', period=91.5, fourier_order=7, prior_scale=0.02) m_TM.fit(train_df_TunnelMiscellaneous) # Predict a future dataframe by quarter future_AirHoliday = m_AH.make_future_dataframe(periods=len(test_df_AirHoliday), freq='Q') print(future_AirHoliday.tail()) future_AirBusiness = m_AB.make_future_dataframe( periods=len(test_df_AirBusiness), freq='Q') print(future_AirBusiness.tail()) future_AirVFR = m_AV.make_future_dataframe(periods=len(test_df_AirVFR), freq='Q') print(future_AirVFR.tail()) future_AirStudy = m_AS.make_future_dataframe(periods=len(test_df_AirStudy), freq='Q') print(future_AirStudy.tail()) future_AirMiscellaneous = m_AM.make_future_dataframe( periods=len(test_df_AirMiscellaneous), freq='Q') print(future_AirMiscellaneous.tail()) future_SeaHoliday = m_SH.make_future_dataframe(periods=len(test_df_SeaHoliday),
def prophetForecast(self, x, y, z): df = pd.read_csv('temp.csv') #read temp.csv df = df.rename(columns={ 'time_interval': 'ds', 'count': 'y' }) #rename columns, prophet requires 'ds' and 'y' column names #df['y']= np.log(df['y']) #log as in logrithmic, log the y column for better prediction w_df = pd.read_csv('weather_dates.csv') #holiday/event dataframes to look for effects on those dates Thanksgiving = pd.DataFrame({ 'holiday': 'Thanksgiving', 'ds': pd.to_datetime([ '2017-11-23', '2018-11-22', '2019-11-28', '2020-11-26', '2021-11-25', '2022-11-24' ]), 'lower_window': 0, #days after holiday 'upper_window': 1, #days before holiday }) Christmas = pd.DataFrame({ 'holiday': 'Christmas', 'ds': pd.to_datetime([ '2017-12-25', '2018-12-25', '2019-12-25', '2020-12-25', '2021-12-25', '2022-12-25' ]), 'lower_window': 0, 'upper_window': 1, }) Superbowl = pd.DataFrame({ 'holiday': 'Superbowl', 'ds': pd.to_datetime([ '2018-02-04', '2019-02-03', '2020-02-02', '2021-02-07', '2022-02-06' ]), #dates subject to change 'lower_window': 0, 'upper_window': 1, }) NewYears = pd.DataFrame({ 'holiday': 'NewYears', 'ds': pd.to_datetime([ '2018-01-01', '2019-01-01', '2020-01-01', '2021-01-01', '2022-01-01' ]), 'lower_window': 0, 'upper_window': 1, }) SpringBreak = pd.DataFrame({ 'holiday': 'SpringBreak', 'ds': pd.to_datetime(['2018-03-11, 2019-03-12' ]), #dates subject to change 'lower_window': -6, 'upper_window': 0, }) StPatricks = pd.DataFrame({ 'holiday': 'StPatricks', 'ds': pd.to_datetime([ '2018-02-17', '2019-02-17', '2020-02-17', '2021-02-17', '2022-02-17' ]), 'lower_window': 0, 'upper_window': 1, }) Valentines = pd.DataFrame({ 'holiday': 'Valentines', 'ds': pd.to_datetime([ '2018-02-14', '2019-02-14', '2020-02-14', '2021-02-14', '2022-02-14' ]), 'lower_window': 0, 'upper_window': 1, }) ForthOfJuly = pd.DataFrame({ 'holiday': 'ForthOfJuly', 'ds': pd.to_datetime([ '2018-07-04', '2019-07-04', '2020-07-04', '2021-07-04', '2022-07-04' ]), 'lower_window': 0, 'upper_window': 1, }) InclementWeather = pd.DataFrame({ 'holiday': 'InclementWeather', 'ds': pd.to_datetime([ '2018-02-06 12:00:00', '2018-02-06 15:00:00', '2018-02-06 18:00:00', '2018-02-06 21:00:00' ]), #Freezing rain day 'lower_window': -.125, 'upper_window': 0, }) InclementWeather = pd.concat( [InclementWeather, w_df], ignore_index=True ) #concat forecast dataframe(w_df) to InclementWeather df InclementWeather.drop_duplicates(subset=['ds'], inplace=True, keep='last') #remove duplicates holidays = pd.concat([ Thanksgiving, Christmas, Superbowl, NewYears, SpringBreak, StPatricks, Valentines, ForthOfJuly, InclementWeather ]) #concat all holiday dataframes m = Prophet(yearly_seasonality=False, holidays=holidays, changepoint_prior_scale=z ) #apply holidays, change flexibility default: .05 df['floor'] = 0 #set floor print("before") m.fit(df) #------------------------Freezes print("after") future = m.make_future_dataframe( periods=x, freq='1min', include_history=False ) #freq = interval into future. period = how many times. include_history = do not include. forecast = m.predict(future) forecast['floor'] = 0 #m.plot(forecast); #display graph #m.plot_components(forecast); #display seasonality/holiday information forecast = forecast.rename(columns={ 'ds': 'date_and_time', 'yhat': y + '_min_prediction' }) forecast[['date_and_time', y + '_min_prediction']].to_csv( y + 'MinPredictionOutput.csv') #export prophets calculations return
def stock(): menu = { 'ho': 0, 'da': 1, 'ml': 0, 'se': 0, 'co': 0, 'cg': 0, 'cr': 0, 'st': 1, 'wc': 0 } if request.method == 'GET': return render_template('stock/stock.html', menu=menu, weather=get_weather(), kospi=kospi_dict, kosdaq=kosdaq_dict, nyse=nyse_dict, nasdaq=nasdaq_dict) else: market = request.form['market'] if market == 'KS': code = request.form['kospi_code'] company = kospi_dict[code] code += '.KS' elif market == 'KQ': code = request.form['kosdaq_code'] company = kosdaq_dict[code] code += '.KQ' elif market == 'NY': code = request.form['nyse_code'] company = nyse_dict[code] else: code = request.form['nasdaq_code'] company = nasdaq_dict[code] learn_period = int(request.form['learn']) pred_period = int(request.form['pred']) current_app.logger.debug( f'{market}, {code}, {learn_period}, {pred_period}') today = datetime.now() start_learn = today - timedelta(days=learn_period * 365) end_learn = today - timedelta(days=1) stock_data = pdr.DataReader(code, data_source='yahoo', start=start_learn, end=end_learn) current_app.logger.info(f"get stock data: {company}({code})") df = pd.DataFrame({'ds': stock_data.index, 'y': stock_data.Close}) df.reset_index(inplace=True) try: del df['Date'] except: current_app.logger.error('Date error') model = Prophet(daily_seasonality=True) model.fit(df) future = model.make_future_dataframe(periods=pred_period) forecast = model.predict(future) fig = model.plot(forecast) img_file = os.path.join(current_app.root_path, 'static/img/stock.png') fig.savefig(img_file) mtime = int(os.stat(img_file).st_mtime) return render_template('stock/stock_res.html', menu=menu, weather=get_weather_main(), mtime=mtime, company=company, code=code)
def fbprophet(ts_obj, gaussian_window_size, step_size, changepoint_prior_scale=.05, growth='linear', yearly_seasonality='auto', weekly_seasonality='auto', daily_seasonality='auto', holidays=None, seasonality_mode='additive', seasonality_prior_scale=10, holidays_prior_scale=10, plot_anomaly_score=False, plot_forecast=False, grid_search_mode=False): start = time.time() fb_prophet_model = Prophet(changepoint_prior_scale=changepoint_prior_scale, growth=growth, yearly_seasonality=yearly_seasonality, weekly_seasonality=weekly_seasonality, daily_seasonality=daily_seasonality, holidays=holidays, seasonality_mode=seasonality_mode, seasonality_prior_scale=seasonality_prior_scale, holidays_prior_scale=holidays_prior_scale) if ts_obj.miss: # https://facebook.github.io/prophet/docs/outliers.html # Prophet has no problem with missing data # You set the missing values to NaNs in the training data # But you LEAVE the dates in the prediction ref_date_range = ch.get_ref_date_range(ts_obj.dataframe, ts_obj.dateformat, ts_obj.timestep) data_copy = copy.deepcopy(ts_obj.dataframe) data_copy["timestamp"] = pd.to_datetime(data_copy["timestamp"], format=ts_obj.dateformat) data_copy.set_index('timestamp', inplace=True) data_copy = data_copy.reindex(ref_date_range, fill_value=np.nan) # use entire time series for training counts = [i for i in range(len(data_copy))] fb_df_train = pd.DataFrame({ "count": counts, "ds": ref_date_range, "y": data_copy["value"] }) else: # use entire time series for training fb_df_train = pd.DataFrame({ "ds": ts_obj.dataframe["timestamp"], "y": ts_obj.dataframe["value"] }) fb_prophet_model.fit(fb_df_train, verbose=False) # periods=how much further you want to extend from the training dataset # this is not periodicity relating to seasonality future = fb_prophet_model.make_future_dataframe(periods=0, freq=ts_obj.timestep) # make a forecast over the entire time series fcst = fb_prophet_model.predict(future) predictions = fcst["yhat"].values # get RMSE if grid_search_mode: if ts_obj.miss: # remove the predictions from missing time steps inds = fb_df_train.loc[ pd.isna(fb_df_train["y"]), :]["count"].values print(inds) nonmissing_predictions = [] for i in range(len(predictions)): if i not in inds: nonmissing_predictions.append(predictions[i]) rmse = mean_squared_error(ts_obj.dataframe["value"].values, nonmissing_predictions, squared=False) print("RMSE: ", rmse) else: rmse = mean_squared_error(ts_obj.dataframe["value"].values, predictions, squared=False) print("RMSE: ", rmse) return rmse # get anomaly scores else: if ts_obj.miss: # you HAVE to interpolate to get a gaussian window new_ts_obj = copy.deepcopy(ts_obj) new_ts_obj.set_miss(fill=True) actual = list(new_ts_obj.dataframe["value"]) else: actual = ts_obj.dataframe["value"] anomaly_scores = ah.determine_anomaly_scores_error( actual, predictions, ts_obj.get_length(), gaussian_window_size, step_size) end = time.time() if plot_forecast: plt.plot([i for i in range(len(fcst))], fcst["yhat"]) plt.fill_between([i for i in range(len(fcst))], fcst["yhat_lower"], fcst["yhat_upper"], facecolor='blue', alpha=.3) if ts_obj.miss: plt.plot([i for i in range(len(predictions))], data_copy["value"], alpha=.5) else: plt.plot([i for i in range(len(predictions))], ts_obj.dataframe["value"], alpha=.5) plt.xticks(rotation=90) plt.show() if plot_anomaly_score: plt.subplot(211) plt.title("Anomaly Scores") plt.plot(anomaly_scores) plt.ylim([.99, 1]) plt.subplot(212) plt.title("Time Series") plt.plot(ts_obj.dataframe["value"].values) plt.axvline(ts_obj.get_probationary_index(), color="black", label="probationary line") plt.tight_layout() plt.show() return { "Anomaly Scores": anomaly_scores, "Time": end - start, "Predictions": predictions }
df.head() #设置跟随性: changepoint_prior_scale=0.05 值越大,拟合的跟随性越好,可能会过拟合 #设置置信区间:interval_width=0.8(默认值),值越小,上下线的带宽越小。 #指定预测类型: growth='linear'或growth = "logistic" ,默认应该是linear。 #马尔科夫蒙特卡洛取样(MCMC): mcmc_samples=0,会计算很慢。距离意义不清楚 #设置寻找突变点的比例:changepoint_range=0.9 默认从数据的前90%中寻找异常数据。预测这个正弦曲线,如果不设置changepoint_range=1,预测的结果是不对的,不知道为什么。 m = Prophet(changepoint_prior_scale=0.9,interval_width=0.9,growth='linear',changepoint_range=1) m.fit(df); #periods 周期,一般是根据实际意义确定,重点:后续预测的长度是一个周期的长度。 #freq 我见的有‘MS‘、H、M ,预测sin,要设置H ,个人理解数据如果变化很快,要用H future = m.make_future_dataframe(periods=120, freq='H') #freq=‘MS‘或者H 来设置 future['cap'] = 1 #log预测才用?linear也可以加上。 future['floor'] = -1#log预测才用?linear也可以加上。 #画图 future.tail() forecast = m.predict(future) forecast.tail() fig=m.plot(forecast) plt.savefig('./out/'+filename+'_1.jpg',dpi=500) m.plot_components(forecast) plt.savefig('./out/'+filename+'_2.jpg',dpi=500) #print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]) #打印到console
def forecastr(data, forecast_settings, column_headers, freq_val, build_settings): """ Background: This function will take the data from the csv and forecast out x number of days. Input: data: This is a pandas dataframe containing time series data (2 columns: date and metric) forecast_settings: This is a list containing values for model type, forecast period length and seasonality parameters column_headers: List containing the name of the date and metric freq_val: String containing "D","M","Y" build_settings: String determining whether this is an initial or updated forecast. Output: [y_hat,dates,m,csv_ready_for_export]: A list containing forecasted data, dimension, model and data for the csv export """ ##### Variables, Model Settings & Facebook Prophet Hyper Parameters ##### # Initial Variables build = build_settings # Determine the build_setting - either initial or update forecast settings. dimension = column_headers[0] # date metric = column_headers[1] # metric name # Rename the columns so we can use FB Prophet data.rename(index=str, columns={ dimension: "ds", metric: "y" }, inplace=True) # Hyper-parameters fs_model_type = forecast_settings[0] # linear or logistic fs_period = int(forecast_settings[1]) # int fs_seasonality_mode = forecast_settings[4] # additive or multiplicative fs_daily_seasonality = forecast_settings[6][0] # True or False fs_weekly_seasonality = forecast_settings[6][1] # True or False fs_yearly_seasonality = forecast_settings[6][2] # True or False # Need to set carrying capacity and saturated min as an int if model_type = 'logistic', else we'll set as 'auto' to be filtered out. if fs_model_type == 'logistic': fs_carrying_capacity = int(forecast_settings[2]) # int fs_saturated_minimum = int(forecast_settings[3]) # int data['cap'] = fs_carrying_capacity data['floor'] = fs_saturated_minimum else: print('no cap or floor needed as it is a linear model.') fs_carrying_capcity = 'auto' fs_saturated_minimum = 'auto' # Additional Hyper Parameters fs_seasonality_prior_scale = forecast_settings[5] # int fs_n_changepoints = forecast_settings[7] # int fs_changepoints_prior_scale = forecast_settings[8] # int?? # Check the following hyper parameters to see if they were set from within the UI. If not, they'll be set to 'auto' fs_seasonality_prior_scale = check_val_of_forecast_settings( fs_seasonality_prior_scale) fs_n_changepoints = check_val_of_forecast_settings(fs_n_changepoints) fs_changepoints_prior_scale = check_val_of_forecast_settings( fs_changepoints_prior_scale) # Holidays - to be included in a future iteration.... holidays_prior_scale = 10 # Determines how much of an effect holidays should have on a prediction. Default value is 10 #### End of Hyper Parameters Settings #### # No let's set up the arguments so that we can pass them into Prophet() when we instantiate the model. arguments = [ 'growth', 'seasonality_mode', 'seasonality_prior_scale', 'daily_seasonality', 'weekly_seasonality', 'yearly_seasonality', 'n_changepoints', 'changepoint_prior_scale' ] arg_values = [ fs_model_type, fs_seasonality_mode, fs_seasonality_prior_scale, fs_daily_seasonality, fs_weekly_seasonality, fs_yearly_seasonality, fs_n_changepoints, fs_changepoints_prior_scale ] # Needs to be a dictionary model_arg_vals = dict(zip(arguments, arg_values)) ###### CHECK TO SEE WHAT VALUES WERE SET FROM WITHIN THE UI ###### # Check to see if any values are 0, auto or false. If any hyper-parameters have these values, they will not be included # when the pass in the dictionary prophet_arg_vals as kwarg prophet_arg_vals = {} for key, value in model_arg_vals.items(): if (value == "") or (value == False) or (value == 0) or (value == 'auto'): print('skipping this key value pair') else: prophet_arg_vals[key] = value ##### TIME TO INSTANTIATE, FIT AND PREDICT WITH FACEBOOK PROPHET ###### # Instantiate with prophet_arg_vals that are not auto, 0 or False. m = Prophet(**prophet_arg_vals) # Fit the Model - Side Note it would be interesting to time how long this takes by file size #start = time.time() start = time.time() m.fit(data) end = time.time() print(end - start) # Status update emit('processing', {'data': 'model has been fit'}) # Let's create a new data frame for the forecast which includes how long the user requested to forecast out in time units and by time unit type (eg. "D", "M","Y") future = m.make_future_dataframe(periods=fs_period, freq=freq_val) # If fs_model_type = 'logistic', create a column in future for carrying_capacity and saturated_minimum if fs_model_type == 'logistic': future['cap'] = fs_carrying_capacity future['floor'] = fs_saturated_minimum else: print('no cap or floor needed as it is a linear model.') # Let's predict the future :) forecast = m.predict(future) ##### Removed Cross-Validation for this release - see v3 for previous implementation ##### ##### Send y_hat and dates to a list, so that they can be graphed easily when set in ChartJS y_hat = forecast['yhat'].tolist() dates = forecast['ds'].apply(lambda x: str(x).split(' ')[0]).tolist() ##### Lets see how the forecast compares to historical performance ##### # First, lets sum up the forecasted metric forecast_sum = forecast['yhat'][-fs_period:].sum() forecast_mean = forecast['yhat'][-fs_period:].mean() # Now lets sum up the actuals for the same time interval as we predicted actual_sum = float(data['y'][-fs_period:].sum()) actual_mean = float(data['y'][-fs_period:].mean()) difference = '{0:.1%}'.format(((forecast_sum - actual_sum) / forecast_sum)) difference_mean = '{0:.1%}'.format( ((forecast_mean - actual_mean) / forecast_mean)) forecasted_vals = [ '{0:.1f}'.format(forecast_sum), '{0:.1f}'.format(actual_sum), difference ] forecasted_vals_mean = [ '{0:.1f}'.format(forecast_mean), '{0:.1f}'.format(actual_mean), difference_mean ] ''' # Lets compare those two numbers, if forecast_sum is greater than actual, calculate the increase. Else, calculate the decrease if forecast_sum - actual_sum > 0: # this if else handles percent increase vs. decrease difference = '{0:.2%}'.format(((forecast_sum - actual_sum) / forecast_sum)) print("*********** DIFFERENCE IS ********") print(difference) else: difference = '{0:.2f}'.format(((actual_sum - forecast_sum) / actual_sum)) print("*********** DIFFERENCE IS ********") print(difference) ''' ####### Formatting data for CSV Export Functionality ########## # First, let's merge the original and forecast dataframes data_for_csv_export = pd.merge(forecast, data, on='ds', how='left') # Select the columns we want to include in the export export_formatted = data_for_csv_export[[ 'ds', 'y', 'yhat', 'yhat_upper', 'yhat_lower' ]] # Rename y and yhat to the actual metric names export_formatted.rename(index=str, columns={ 'ds': 'date', 'y': metric, 'yhat': metric + '_forecast', 'yhat_upper': metric + '_upper_forecast', 'yhat_lower': metric + '_lower_forecast' }, inplace=True) # replace NaN with an empty val export_formatted = export_formatted.replace(np.nan, '', regex=True) # Format timestamp export_formatted['date'] = export_formatted['date'].apply( lambda x: str(x).split(' ')[0]) # Create dictionary format for sending to csv csv_ready_for_export = export_formatted.to_dict('records') # print(y_hat) # print(csv_ready_for_export) print(forecasted_vals) print(forecasted_vals_mean) return [ y_hat, dates, m, csv_ready_for_export, forecasted_vals, forecasted_vals_mean ]
def PlotSeries(): #obtém valores de selects da pagina select_ano = request.form.get("Anos", None) # select_mun = request.form.get("Municipios", None) select_mun = 'Santos' select_dp = request.form.get("Delegacias", None) select_crime = request.form.get("Crimes", None) if select_dp != None and select_dp != "" and select_crime != None and select_crime != "": #dá um nome para o arquivo do plot img = 'static/plot' + select_ano + 'Santos' + select_dp + select_crime + '.png' print(select_dp) #obtém o dataframe df = getDataAtDB(select_mun, select_dp, select_crime) #print(df.head()) df['datas'] = pd.to_datetime(df['datas']) #altera colunas do dataframe df.set_index('datas') df.columns = ["ds", "y"] #cria um modelo m = Prophet(changepoint_prior_scale=0.05, changepoint_range=0.8, seasonality_prior_scale=0.05, seasonality_mode='additive') m.add_country_holidays(country_name='BR') m.fit(df) #prevendo o futuro future = m.make_future_dataframe(periods=12 * (int(select_ano) - date.today().year), freq='MS') forecast = m.predict(future) #cria imagem do plot m.plot(forecast, figsize=(8, 4)) plt.xlabel('Data') plt.ylabel('Ocorrencias') plt.gca().set_ylim(bottom=0) if (select_dp != 'Todos'): plt.title("Série temporal das ocorrências de " + select_crime + " registradas no " + select_dp) else: plt.title("Série temporal das ocorrências de " + select_crime + " registradas na cidade de " + select_mun) plt.savefig(img, bbox_inches='tight') plt.clf() #limpa figura atual # df_cv = cross_validation(m, initial='3600 days', horizon = '1200 days', parallel="processes") # df_p = performance_metrics(df_cv) # print(df_p.head()) #Otimização dos hiperparametros # params_df = create_param_combinations(**param_grid) # print(len(params_df.values)) # for param in params_df.values: # param_dict = dict(zip(params_df.keys(), param)) # cv_df = single_cv_run(df, metrics, param_dict, parallel="processes") # results.append(cv_df) # results_df = pd.concat(results).reset_index(drop=True) # best_param = results_df.loc[results_df['rmse'] == min(results_df['rmse']), ['params']] # print(f'\n The best param combination is {best_param.values[0][0]}') # print(results_df) return render_template("previsao.html", image=img) return render_template("previsao.html")
model_holiday_indo_kantor = Prophet( weekly_seasonality=3, yearly_seasonality=10, # holidays=holiday_kantor, changepoint_range=0.8, # default = 0.8 Recommended range: [0.8, 0.95] changepoint_prior_scale= 0.095 # default =0.05 Recommended range: [0.001, 0.5] ) model_holiday_indo_kantor.add_seasonality(name='monthly', period=30.5, fourier_order=5) model_holiday_indo_kantor.add_country_holidays(country_name='ID') model_holiday_indo_kantor.fit(daily_kas_kantor) # forecasting future_kantor = model_holiday_indo_kantor.make_future_dataframe(periods=31, freq='D') forecast_kantor = model_holiday_indo_kantor.predict(future_kantor) # visualize model_holiday_indo_kantor.train_holiday_names # plot_plotly(model_holiday_indo_kantor, forecast_kantor) # plot_components_plotly(model_holiday_indo_kantor, forecast_kantor) from sklearn.metrics import mean_squared_log_error err = np.sqrt( mean_squared_log_error(forecast_kantor['yhat'].head(425), daily_kas_kantor.loc[:, 'y'])) print('log mse:', err)
stream=byte_stream) byte_stream.seek(0) ser = pd.read_excel(byte_stream, index_col=0) byte_stream.close() ser.head() #ser = pd.read_excel('Copy of DB-O.xlsx',sheet_name='SSEDB1-O',index_col=0) for i in range(0, 4): ser.iloc[:21, i] = ser.iloc[:21, i].apply(lambda x: x * 1000) #print(i) ########################FBPROPHET#################### revdf = ser revdf['ds'] = revdf.index revdf = revdf.rename(columns={"Total Sum of Revenue": 'y'}) my_model = Prophet(interval_width=0.95, changepoint_prior_scale=4) my_model.fit(revdf[['ds', 'y']]) future_dates = my_model.make_future_dataframe(periods=6, freq='MS') forecast = my_model.predict(future_dates) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']] from sklearn.metrics import mean_squared_error rms = np.sqrt( mean_squared_error(revdf['y'], forecast['yhat'][:len(revdf['y'])])) #print(rms) adrf = forecast['yhat'].tail(6) my_model.plot(forecast, uncertainty=True) ########################### predicting FTE ################################## A = ser['Total Sum of BFTE'] #from plotly.plotly import plot_mpl from statsmodels.tsa.seasonal import seasonal_decompose result = seasonal_decompose(A, model='additive', freq=12) fig = result.plot() from statsmodels.tsa.stattools import adfuller
def estimate_and_predict_prophet_PR(calendar, punched_df, end_train_date, start_test_date, daily_view=False, target_column='cost', pred_days=120, horizon=8, missing_val=201735): ''' Using facbook prophet model without any regressor 'daily_view' variable is an indicator specified by user whether to seperate bi-weekly SAP data to daily 'daily_view' is not recommended. 'pred_days' variable is how many days ahead you want to predict return type: prediction result as a DataFrame, columns=['ds','yhat','club'] ds is the posting_date and yhat is the prediction value this serves as the first layer of mixed model. ''' if 'club_nbr' not in punched_df.columns: punched_df['club_nbr'] = punched_df['club'] punched_df = punched_df.drop('club', axis=1) if 'posting_date' not in punched_df.columns: punched_df['posting_date'] = getDatesFromWMWks( punched_df['wm_yr_wk_nbr']) punched = punched_df.groupby(['club_nbr', 'posting_date'])[target_column].sum() punched.column = ['total_punched_wg'] punched = punched.reset_index() punched = pd.merge(left=punched, right=calendar, how='left', left_on='posting_date', right_on='calendar_date').drop('calendar_date', axis=1) punched = punched.drop('posting_date', axis=1) punched_pro = punched_df.groupby(['club_nbr', 'posting_date'])[target_column].sum() punched_pro.column = ['total_punched_wg'] punched_pro = punched_pro.reset_index() punched_pro = pd.merge(left=punched_pro, right=calendar, how='left', left_on='posting_date', right_on='calendar_date').drop('calendar_date', axis=1) punched_pro = removehurricane( target_column, punched_pro, 201733, 201739, sales=False) #201735 is missing in the SAP data, recover below club_ls = punched_pro.club_nbr.unique() res = pd.DataFrame() for club in club_ls: cur = club punched_pro_club = punched_pro[punched_pro.club_nbr.isin([club])] ########################################## #adding missing value if missing_val not in punched_pro_club.wm_yr_wk_nbr.values.tolist(): punched_pro_club.loc[-1] = [ club, punched_pro_club.loc[punched_pro_club.wm_yr_wk_nbr == wm_nbr_add(missing_val, -2)].iloc[0, 1] + timedelta(days=14), 0.5 * punched_pro_club.loc[punched_pro_club.wm_yr_wk_nbr == wm_nbr_add(missing_val, -2)].iloc[0, 2] + 0.5 * punched_pro_club.loc[punched_pro_club.wm_yr_wk_nbr == wm_nbr_add(missing_val, 2)].iloc[0, 2], missing_val ] # adding a row punched_pro_club.index = punched_pro_club.index + 1 # shifting index ############################################## punched_pro_club = punched_pro_club.sort_values(by='wm_yr_wk_nbr') punched_pro_club = punched_pro_club.drop( 'club_nbr', axis=1).reset_index().drop('index', axis=1) if (daily_view): punched_pro_club = gen_daily_data(punched_pro_club, day_sep) trainset = punched_pro_club.loc[ punched_pro_club.wm_yr_wk_nbr <= end_train_date].drop( ['wm_yr_wk_nbr'], axis=1) columnsTitles = ["posting_date", target_column] trainset = trainset.reindex(columns=columnsTitles) trainset.columns = ["ds", "y"] m = Prophet(yearly_seasonality=True) m.fit(trainset) future = m.make_future_dataframe(periods=pred_days) forecast = m.predict(future) result = forecast[['ds', 'yhat']].tail(pred_days) weeklist = [] for i in range(horizon): weeklist.append( trainset.iloc[-1, trainset.columns.tolist().index('ds')] + timedelta(days=14 * (i + 1))) result = result[result.ds.isin(weeklist)] yhat = result.yhat.values if res.shape[0] == 0: tmp = result tmp['club'] = pd.Series([cur for i in range(result.shape[0])], index=tmp.index) res = tmp else: tmp = result tmp['club'] = pd.Series([cur for i in range(result.shape[0])], index=tmp.index) res = pd.concat([res, tmp], axis=0) return res
for n, index_code in enumerate(corporation): # temp, what_day = make_am_data(index_code, 20200917) temp, what_day = make_am_data(index_code, int(date)) amf = pd.DataFrame(columns=['ds', 'y']) amf['ds'] = temp['DateTime'] # 훈련용 데이터프레임 생성 amf['y'] = temp['체결가'] # 앞으로 쓸 y값 지정 amf['y'].plot() plt.savefig('weekend_to_am.png', dpi=400) am_model = Prophet(changepoint_range=0.8).fit(amf) future = am_model.make_future_dataframe(periods=1260, freq='min') # 12시~16시 제외 future2 = future[(future['ds'].dt.day == what_day)] am = future2[(future2['ds'].dt.hour >= 9) & (future2['ds'].dt.hour < 12)] # temp, what_day = make_pm_data(index_code, 20200917) temp, what_day = make_pm_data(index_code, int(date)) pmf = pd.DataFrame(columns=['ds', 'y']) pmf['ds'] = temp['DateTime'] # 훈련용 데이터프레임 생성 pmf['y'] = temp['체결가'] # 앞으로 쓸 y값 지정 pmf['y'].plot()
##Train print("Start training...") model_imporvement2 = Prophet(holidays=holidays, weekly_seasonality=False, yearly_seasonality=20) model_imporvement2.add_seasonality(name='monthly', period=30.5, fourier_order=5) model_imporvement2.add_regressor('monetary_base_diff') model_imporvement2.add_regressor('cpi_diff') model_imporvement2.add_regressor('fed_fund') model_imporvement2.add_regressor('saudi_production') model_imporvement2.fit(data_with_regressors) future_imporvement2 = model_imporvement2.make_future_dataframe(periods=365) future_imporvement2["monetary_base_diff"] = data_with_regressors[ "monetary_base_diff"] future_imporvement2["monetary_base_diff"] = future_imporvement2[ "monetary_base_diff"].fillna(method='pad') future_imporvement2["cpi_diff"] = data_with_regressors["cpi_diff"] future_imporvement2["cpi_diff"] = future_imporvement2["cpi_diff"].fillna( method='pad') future_imporvement2["fed_fund"] = data_with_regressors["fed_fund"] future_imporvement2["fed_fund"] = future_imporvement2["fed_fund"].fillna( method='pad') future_imporvement2["saudi_production"] = data_with_regressors[ "saudi_production"] future_imporvement2["saudi_production"] = future_imporvement2[ "saudi_production"].fillna(method='pad')
from fbprophet import Prophet df = pd.DataFrame() df['ds'] = stock_return.index #df['y_orig']=daily_df.Pageviews.values df['y']=graph['Close'].apply(lambda x: np.log(x)).values df.tail() m0 = Prophet(yearly_seasonality=True) m0.fit(df) #n_add = 365 - len() n_add = 100 print("adding {n} days to reach the end of 2017.".format(n=n_add)) future = m0.make_future_dataframe(periods=n_add) # generate frame going to end of 2017; 112 added on 9/11/2017 future.tail() forecast = m0.predict(future) forecast[['ds','yhat','yhat_lower','yhat_upper']].tail() forcast = m0.plot(forecast, ylabel='$\ln($stock_return$)$'); forcast.savefig('/home/ubuntu/Desktop/TelegramBot/charts/OILforcast.jpeg', dpi=400, bbox_inches='tight') trend = m0.plot_components(forecast);
from fbprophet import Prophet from fbprophet.plot import plot_plotly, plot_components_plotly from sklearn.metrics import mean_absolute_error import pandas as pd dataset = pd.read_csv('acoes.csv') # dataset.set_index(keys=['Date'], inplace=True) dataset = dataset[['Date', 'BOVA']].rename(columns={'Date': 'ds', 'BOVA': 'y'}) # Modelo modelo = Prophet() modelo.fit(dataset) futuro = modelo.make_future_dataframe(periods=90) previsoes = modelo.predict(futuro) # Gráfico das previsões modelo.plot(previsoes, xlabel='Data', ylabel='Preço') modelo.plot_components(previsoes) plot_plotly(modelo, previsoes) plot_components_plotly(modelo, previsoes) # Avaliação do modelo pred = modelo.make_future_dataframe(periods=0) previsoes = modelo.predict(pred) previsoes = previsoes['yhat'].tail(365) mean_absolute_error(teste, previsoes)
def prophet(df, var_season, item = np.nan, title = 'error') : print(df) # prophet 변수 period = 32 # 예측기간 changepoint_prior_scale = 0.07 # 유연성 조절 / default = 0.05, 늘리면 유연(=언더피팅 해결), 줄이면 경직(=오버피팅 해결) seasonality_mode = 'additive' # 단순 Seasonality = additive, 점점 증가하는 Seasonality = multiplicative df_temp = df.copy() df_temp['month'] = df_temp['ds'].apply(lambda item: dt.datetime.strptime(str(item).split(' ')[0], '%Y-%m-%d')).dt.month lst_season = sorted(list(df_temp['month'].unique())) if pd.isna(item) : df['on_season'] = df['ds'].apply(lambda item : is_season(item, lst_season)) df['off_season'] = ~df['ds'].apply(lambda item : is_season(item, lst_season)) else : df['on_season'] = df['ds'].apply(lambda item : item_season(item, var_season)) df['off_season'] = ~df['ds'].apply(lambda item : item_season(item, var_season)) m = Prophet( growth='linear', seasonality_mode=seasonality_mode, changepoint_prior_scale=changepoint_prior_scale, daily_seasonality=False, weekly_seasonality=False, yearly_seasonality=False, ).add_seasonality( name='monthly', period=30.5, fourier_order=12 ).add_seasonality( name='yearly', period=365.25, fourier_order=10 ).add_seasonality( name='quarterly', period=365.25 / 4, fourier_order=5, prior_scale=15 ).add_seasonality( name='on_season', period=7, fourier_order=20 ).add_seasonality( name='off_season', period=7, fourier_order=20 ) # prophet에 모델 적용 m.fit(df) future = m.make_future_dataframe(periods=period, freq='W') if pd.isna(item) : future['on_season'] = future['ds'].apply(lambda item : is_season(item, lst_season)) future['off_season'] = future['ds'].apply(lambda item : is_season(item, lst_season)) else : future['on_season'] = future['ds'].apply(lambda item: item_season(item, var_season)) future['off_season'] = future['ds'].apply(lambda item: item_season(item, var_season)) forecast = m.predict(future) # prophet에 모델 보정 <-- off-season(과거 특정 월 판매 '0'인 경우, 일괄 0으로 조정) # <-- off-season(과거 특정 월 판매 'median'를 Cap으로 설정) # <--(-) 값 Handling(과거 데이터 min 값을 토대로, 그 이하로는 하락하지 않도록 구성) forecast['upper_gap'] = abs(forecast['yhat_upper'] - forecast['yhat']) forecast['lower_gap'] = abs(forecast['yhat'] - forecast['yhat_lower']) forecast['min'] = forecast[forecast['ds'] < df.tail(1).iloc[0]['ds']]['yhat'].min() forecast['off_season_max'] = df[df['off_season'] == True]['y'].median() lst_temp = list(df[df['off_season'] == True]['ds'].apply(lambda item: pd.to_datetime(item).month)) for i, row in forecast.iterrows(): if (row['ds'].month in lst_temp) & (row['ds'].year >= dt.datetime.today().year) : if row['yhat'] > row['off_season_max'] : forecast.loc[i, 'yhat'] = row['off_season_max'] forecast.loc[i, 'yhat_upper'] = row['off_season_max'] + abs(row['upper_gap']) forecast.loc[i, 'yhat_lower'] = row['off_season_max'] - abs(row['lower_gap']) if (row['yhat'] < row['min']) & (row['ds'].year >= dt.datetime.today().year) : forecast.loc[i, 'yhat'] = 0 forecast.loc[i, 'yhat_upper'] = abs(row['upper_gap']) forecast.loc[i, 'yhat_lower'] = -abs(row['lower_gap']) if (not(row['ds'].month in lst_season)) & (row['ds'].year >= dt.datetime.today().year): forecast.loc[i, 'yhat'] = 0 forecast.loc[i, 'yhat_upper'] = abs(row['upper_gap']) forecast.loc[i, 'yhat_lower'] = -abs(row['lower_gap']) forecast_temp = forecast[['ds', 'yhat', 'yhat_upper', 'yhat_lower']] forecast_temp = forecast_temp.assign(prdt_cd = item) # 그래프 그리기 fig1 = m.plot(forecast, uncertainty=True) plt.title(title) add_changepoints_to_plot(fig1.gca(), m, forecast) title = title + '.png' plt.savefig(title) plt.show() return forecast_temp
df = df.merge(tweets_group, how='left', on='DATETIME_CONVERTED') df['TWEET_COUNT'].fillna(0, inplace=True) df.columns = ['DATETIME_CONVERTED', 'Volume_(BTC)', 'y', 'ds', 'TWEET_COUNT'] ###split the data 20-80% ###number of rows rows = int(df.shape[0] * 0.8) train_df = df[:rows] test_df = df[rows:] from datetime import datetime from fbprophet import Prophet print(datetime.now()) df_prophet = Prophet(changepoint_prior_scale=0.15, daily_seasonality=True) df_prophet.fit(train_df) fcast_time = 144 # 1 year print(datetime.now()) df_forecast = df_prophet.make_future_dataframe(periods=fcast_time, freq='10min') df_forecast.tail(10) df_forecast = df_prophet.predict(df_forecast) import pickle pickle.dump(df_prophet, open("prophet_model.pickle", "wb")) # df_forecast.to_parquet('../Data/df_forcast.parquet') forecast = df_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']] fig1 = df_prophet.plot(forecast) fig1.show()
dat = dat.rename(columns={dat.columns[0]: 'ds'}) dat = dat.rename(columns={dat.columns[1]: 'y'}) for j in range(550, 750, 20): # Change 20 to 5 if you want to run on just DJIA total print(j) train_index = [i for i in range(j)] test_index = [i for i in range(j, len(dJIA))] testLen = len(test_index) if testLen < 260 or len(train_index) < 522: continue ## # Run Through prophet and calculate MAE ## m = Prophet() m.fit(dat.iloc[train_index, :]) future = m.make_future_dataframe(periods=testLen + extraPred, freq="B") pred = m.predict(future) pred = pred[~pred.ds.isin(leftOvers)] pred = pred.reset_index(drop=True) maeD1.append( abs(pred.yhat[j] - dat.iloc[test_index[0], 1])) #/dat.iloc[test_index[0],1]) maeD5.append( abs(pred.yhat[j + 4] - dat.iloc[test_index[4], 1])) #/dat.iloc[test_index[4],1]) maeD20.append( abs(pred.yhat[j + 19] - dat.iloc[test_index[19], 1])) #/dat.iloc[test_index[19],1]) maeD65.append( abs(pred.yhat[j + 64] - dat.iloc[test_index[64], 1])) #/dat.iloc[test_index[64],1])
df_test = df trainings_zeitraum_von = '2020-8-01' df_test = df_test.loc[df_test.index > trainings_zeitraum_von].copy() df_test = df_test.rename(columns={"7TIW": "y"}) df_test = df_test.reset_index() df_test = df_test.rename(columns={"Date": "ds"}) m = Prophet(changepoint_prior_scale=2) m.add_country_holidays(country_name='DE') m.add_regressor('Temperatur') m.add_regressor('ResNbg') m.fit(df_test) future = m.make_future_dataframe(periods=10, include_history=False) future = future.set_index('ds') future['Temperatur'] = future.join(WetterVorhersage) future['ResNbg'] = df_test.ResNbg.iloc[-1] future = future.reset_index() forecast = m.predict(future) #fig = m.plot(forecast) #ax = fig.gca() #ax.set_title(today, size=34) #plt.tight_layout() #fig.savefig('Vorhersagen/'+str(today)+'.png')
def runmodels(): # load the data dfTrain = pd.read_csv('train.csv', low_memory=False) dfTest = pd.read_csv('test.csv', low_memory=False) dfStore = pd.read_csv("store.csv", low_memory=False) # dropping the zero sales and closed stores dfTrain = dfTrain[(dfTrain.Open != 0) & (dfTrain.Sales != 0)] sales, holidays = prophetData(dfTrain) # filling the NaN values in CompetitionDistance col dfStore.CompetitionDistance.fillna(dfStore.CompetitionDistance.median(), inplace=True) # replace all the other NaN values with zeros dfStore.fillna(0, inplace=True) # fill the missing values dfTest.fillna(1, inplace=True) # merge train and test dataset with store data dfTrainStore = merge(dfTrain, dfStore) dfTestStore = merge(dfTest, dfStore) # Set the target column Y = dfTrainStore['Sales'] Id = dfTestStore['Id'] # remove dataset specific columns dfTrainStore = dfTrainStore.drop(['Customers', 'Sales'], axis=1) dfTestStore = dfTestStore.drop(['Id'], axis=1) # split the data into a training set and a validation set xTrain, xTrainTest, yTrain, yTrainTest = train_test_split(dfTrainStore, Y, test_size=0.20, random_state=42) pipe = Pipeline(steps=[('multipleTrans', multipleTransformer()), ('randomForest', RandomForestRegressor(n_estimators=128, criterion='mse', max_depth=20, min_samples_split=10, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=4, random_state=35, verbose=0, warm_start=False))]) regModel = TransformedTargetRegressor(regressor=pipe, func=targetTransform, inverse_func=reverseTargetTransform) # training the Regression Model regModel.fit(xTrain, yTrain) # Regression Model prediction yPred = regModel.predict(xTrainTest) # predict on the testStore set predictions = regModel.predict(dfTestStore) # turn the predictions into a dataframe dfPreds = pd.DataFrame({'Id': Id, 'Sales': predictions}) # training the prophet Model pModel = Prophet(interval_width=0.5, holidays=holidays) pModel.fit(sales) # dataframe that extends into future 6 weeks future_dates = pModel.make_future_dataframe(periods=6 * 7) # prophet model predictions forecast = pModel.predict(future_dates) # rename prediction columns and isolate the predictions fc = forecast[['ds', 'yhat']].rename(columns={ 'Date': 'ds', 'Forecast': 'yhat' }) # get the current time and turn it into a string now = datetime.datetime.now().strftime('%d-%m-%Y-%H-%M-%S-%f')[:-3] # Save the model filenameReg = 'regModel-' + now + '.pkl' filenamePro = 'pModel-' + now + '.pkl' pickle.dump(regModel, open(filenameReg, 'wb')) pickle.dump(pModel, open(filenamePro, 'wb')) return render_template('model.html', labels=dfPreds['Id'], values=dfPreds['Sales'], linelabels=fc['ds'], linevalues=fc['yhat'])
import pandas as pd import numpy as np from fbprophet import Prophet # Prep the dataset data = pd.read_csv("/home/dusty/Econ8310/DataSets/chicagoBusRiders.csv") route3 = data[data.route=='3'][['date','rides']] route3.date = pd.to_datetime(route3.date, infer_datetime_format=True) route3.columns = [['ds', 'y']] # Initialize Prophet instance and fit to data m = Prophet() m.fit(route3) # Create timeline for 1 year in future, then generate predictions based on that timeline future = m.make_future_dataframe(periods=365) forecast = m.predict(future) # Create plots of forecast and truth, as well as component breakdowns of the trends plt = m.plot(forecast) plt.show() comp = m.plot_components(forecast) comp.show()
fig.layout.update(title_text='Time Series Data', xaxis_rangeslider_visible=True) st.plotly_chart(fig) plot_raw_data() ### Prediction forecast with fbProphet ## https://facebook.github.io/prophet/docs/quick_start.html#python-api #list within list df_train = data[['Date', 'Close']] df_train = df_train.rename(columns={"Date": "ds", "Close": "y"}) # create prophet prediction model m = Prophet() m.fit(df_train) future = m.make_future_dataframe(periods=period) forecast = m.predict(future) # Show raw forecast data st.subheader('Raw Forecast Data') st.write(forecast.tail()) # Plot forecast data st.write(f'Forecast plot for {n_years} years') fig1 = plot_plotly(m, forecast) st.plotly_chart(fig1) st.write("Forecast components") fig2 = m.plot_components(forecast) st.write(fig2)
ret_df = pd.DataFrame(columns={'fips', 'date', 'adj_cases'}) for fips in df.fips.unique(): try: #Generates adjacency sums and appends to a new sub dataframe with cases and normalized flu data adjacent = adj_df.loc[adj_df.county == fips, ].values.flatten().tolist() adjacent = [x for x in adjacent if str(x) != 'nan'] sub_df = df[df.fips.isin(adjacent)] sub_df = sub_df.groupby('date')['cases'].sum().to_frame() sub_df.reset_index(inplace=True) sub_df = sub_df.rename(columns={'cases': 'y', 'date': 'ds'}) sub_df = sub_df.dropna() print(sub_df) #Modeling model_p = Prophet(yearly_seasonality=True) model_p.fit(sub_df) future = model_p.make_future_dataframe(periods=115) forecast = model_p.predict(future) test = forecast[['ds', 'yhat']] test.insert(0, 'fips', fips) test = test.rename(columns={'ds': 'date', 'yhat': 'adj_cases'}) test['adj_cases'] = test['adj_cases'].astype(int) test.loc[test['adj_cases'] < 0, 'adj_cases'] = 0 ret_df = ret_df.append(test) except: print(str(fips) + ": Failed") ret_df.to_csv('../data/processed/adj_sum.csv', index=False)
st.write("Sharpe Ratio: {:.2f}".format(w[2])) returns = risk_models.returns_from_prices(data_, log_returns=True) returns["sum"] = returns.sum(axis=1) returns["cum"] = returns['sum'].cumsum(axis=0) returns = returns.reset_index() plt.figure(figsize=(12, 6)) plt.plot(returns.cum) st.pyplot() shift_d = shift_d Prop = returns Prop['ds'] = Prop['t'] Prop['y'] = Prop['cum'] Prop = Prop.iloc[:, -2:] m = Prophet(n_changepoints=n_changepoints) m.fit(Prop) future = m.make_future_dataframe(periods=shift_d) forecast = m.predict(future) fig = add_changepoints_to_plot((m.plot(forecast)).gca(), m, forecast) st.pyplot() prices = returns.set_index('ds') prices = prices.y peeks = prices.cummax() drowdown = (prices - peeks) / peeks plt.plot(drowdown) st.pyplot() st.write(drowdown.min())
def create_prophet_m(app_name,z1,cpu_perc_list,delay=24): ### --- For realtime pred ---### full_df = z1.bw.iloc[0:len(z1)] full_df = full_df.reset_index() full_df.columns = ['ds','y'] #removing outliers q50 = full_df.y.median() q100 = full_df.y.quantile(1) q75 = full_df.y.quantile(.75) if((q100-q50) >= (2*q50)): full_df.loc[full_df.y>=(2*q50),'y'] = None #-- Realtime prediction --## #model model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.1,seasonality_prior_scale=0.05) model_r.fit(full_df) cpu_perc_list.append(py.cpu_percent()) cpu_perc_list = [max(cpu_perc_list)] future_r = model_r.make_future_dataframe(periods=delay,freq='D') forecast_r = model_r.predict(future_r) forecast_r.index = forecast_r['ds'] #forecast pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)]) pred_r=pred_r.reset_index() #--- completes realtime pred ---# train_end_index=len(z1.bw)-delay train_df=z1.bw.iloc[0:train_end_index] test_df=z1.bw.iloc[train_end_index:len(z1)] train_df=train_df.reset_index() test_df=test_df.reset_index() train_df.columns=['ds','y'] #--- removing outliers in trainset ---# q50 = train_df.y.median() q100 = train_df.y.quantile(1) q75 = train_df.y.quantile(.75) if((q100-q50) >= (2*q50)): train_df.loc[train_df.y>=(2*q50),'y'] = None test_df.columns=['ds','y'] test_df['ds'] = pd.to_datetime(test_df['ds']) #model model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.1,seasonality_prior_scale=0.05) model.fit(train_df) cpu_perc_list.append(py.cpu_percent()) cpu_perc_list = [max(cpu_perc_list)] future = model.make_future_dataframe(periods=len(test_df),freq='D') forecast = model.predict(future) forecast.index = forecast['ds'] #forecast pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)]) print('length forecasted non realtime=',len(pred)) pred=pred.reset_index() pred_df=pd.merge(test_df,pred,on='ds',how='left') pred_df.dropna(inplace=True) df=pd.DataFrame() if(len(pred_df)>0): pred_df['error_test']=pred_df.y-pred_df.yhat MSE=mse(pred_df.y,pred_df.yhat) RMSE=math.sqrt(MSE) pred_df['APE']=abs(pred_df.error_test*100/pred_df.y) MAPE=pred_df.APE.mean() min_error_rate = pred_df['APE'].quantile(0)/100 max_error_rate = pred_df['APE'].quantile(1)/100 median_error_rate = pred_df['APE'].quantile(.50)/100 print("App name:",app_name) #print("MSE :",MSE) print("RMSE :",RMSE) print("MAPE :",MAPE) mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean() std_MAPE = math.sqrt(((pred_df.APE-MAPE)**2).mean()) df = pd.DataFrame({'length':len(z1), 'test_rmse':RMSE, 'test_mape':MAPE, 'std_mape':std_MAPE, #standerd deviation of mape 'min_error_rate':min_error_rate , 'max_error_rate':max_error_rate , 'median_error_rate':median_error_rate, 'test_mape_98':mape_q98}, index=[app_name]) return(df,model,forecast,pred_df,pred_r)
adf_US_organic = adf_US_organic.sort_values(by='Date') # Valid = adf[(adf['year'] == 2017) | (adf['year'] == 2018)] # Train = adf[(adf['year'] != 2017) & (adf['year'] != 2018)] Train = adf_US_organic.sort_values(by='Date') # In[19]: from fbprophet import Prophet #works best with time series & robust to missing data import matplotlib.pyplot as plt # In[20]: m = Prophet() date_volume = Train.rename(columns={'Date': 'ds', 'Total Volume': 'y'}) m.fit(date_volume) future = m.make_future_dataframe(periods=365) forecast = m.predict(future) # In[21]: fig1 = m.plot(forecast) # In[22]: fig2 = m.plot_components(forecast) # In[23]: n = Prophet() date_bags = Train.rename(columns={'Date': 'ds', 'Total Bags': 'y'}) n.fit(date_bags)
def create_prophet_m(source_name,z1,delay=24): train_end_index=len(z1.app_count)-delay train_df=z1.app_count.iloc[0:train_end_index] #train_df= train_df[train_df<cutter] full_df = z1.app_count.iloc[0:len(z1)] test_df=z1.app_count.iloc[train_end_index:len(z1)] train_df=train_df.reset_index() test_df=test_df.reset_index() train_df.columns=['ds','y'] full_df = full_df.reset_index() full_df.columns = ['ds','y'] test_df.columns=['ds','y'] ##-- Realtime prediction --## #model model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model_r.fit(full_df) future_r = model_r.make_future_dataframe(periods=delay,freq='H') forecast_r = model_r.predict(future_r) forecast_r.index = forecast_r['ds'] #forecast pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)]) pred_r=pred_r.reset_index() #model model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model.fit(train_df) future = model.make_future_dataframe(periods=len(test_df),freq='H') forecast = model.predict(future) forecast.index = forecast['ds'] #forecast pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)]) pred=pred.reset_index() pred_df=pd.merge(test_df,pred,on='ds',how='left') pred_df.dropna(inplace=True) df=pd.DataFrame() if(len(pred_df)>0): pred_df['error_test']=pred_df.y-pred_df.yhat MSE=mse(pred_df.y,pred_df.yhat) RMSE=math.sqrt(MSE) pred_df['APE']=abs(pred_df.error_test*100/pred_df.y) MAPE=pred_df.APE.mean() print("App name:",source_name) print("MSE :",MSE) print("RMSE :",RMSE) print("MAPE :",MAPE) q98=pred_df['APE'].quantile(0.98) mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean() df = pd.DataFrame({#'length':len(z1), 'test_rmse':RMSE, 'test_mape':MAPE, 'test_mape_98':mape_q98}, index=[source_name]) return(df,model,forecast,pred_df,pred_r)
EndTimeTrain = StartTimeTrain + train_periods StartTimeTest = EndTimeTrain EndTimeTest = StartTimeTest + test_periods while EndTimeTest < TimeTotal: # prepare train & test sets df_to_fit = df_adm.iloc[StartTimeTrain:EndTimeTrain] df_to_test = df_adm.iloc[StartTimeTest:EndTimeTest] # initialize model, train & predict m = Prophet(weekly_seasonality=False, daily_seasonality=False, uncertainty_samples=0) m.fit(df_to_fit) future = m.make_future_dataframe(periods=test_periods, freq='M') forecast = m.predict(future) # prepare dataframe with test data & predictions df_result = pd.merge(df_to_test, forecast.iloc[-test_periods:], on='ds') # compute results (evaluation metrics) results = evaluate(df_result['y'], df_result['yhat'], metrics=metrics) # add metadata to results results['adm'] = df_result['ADM1_EN'].values[0] results['train_periods'] = train_periods results['test_periods'] = test_periods results['test_dates'] = df_to_test.ds.tolist()
def get_outliers(my_dict, all_intents_text_df): websessionids = [] convs = [] turn_predicted = [] all_ces = [] all_texts = [] for i in sorted(my_dict.keys()): print(i) all_ces += my_dict[i]["CES"][:-1] all_texts += my_dict[i]["Texts"][:-1] # plt.plot(my_dict[i]["CES"]) fb_prophet_model = Prophet(growth='linear', yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False, interval_width=.999) # make up some dates dates = [ str(item) for item in list( pd.date_range(start='2018-01-01', end='2018-12-31', periods=len(my_dict[i]["CES"]))) ] fb_df_train = pd.DataFrame({"ds": dates, "y": my_dict[i]["CES"]}) periods = 0 fb_prophet_model.fit(fb_df_train, verbose=False) future = fb_prophet_model.make_future_dataframe(periods=periods) fcst = fb_prophet_model.predict(future) indices = [] for k in range(len(fcst)): if fcst["yhat_upper"][k] < my_dict[i]["CES"][k]: indices.append(k) # print(indices) ys = [my_dict[i]["CES"][j] for j in indices] # print(ys) # plt.scatter(indices, ys,color="r", marker='x') if indices: for q in indices: print(my_dict[i]["WebSessionIDs"][q]) print(my_dict[i]["Texts"][q]) if my_dict[i]["WebSessionIDs"][q] == "DIFFERENT INTENT": if "xx" not in my_dict[i]["Texts"][q]: websessionids.append("") convs.append("") websessionids.append("DIFFERENT INTENT") convs.append(my_dict[i]["Texts"][q]) # no trickery else: mask = (all_intents_text_df['WebSessionID'] == my_dict[i] ["WebSessionIDs"][q]) test = all_intents_text_df.loc[mask] test = test.sort_values(by=['Asked_Date_Time']) print(test["Input"].values) if my_dict[i]["WebSessionIDs"][q] not in websessionids: if "xx" not in my_dict[i]["Texts"][q]: websessionids.append("") convs.append("") turn_predicted.append("") convs += list(test["Input"].values) websessionids += len(test["Input"].values) * [ my_dict[i]["WebSessionIDs"][q] ] turn_predicted_value = [""] * len( test["Input"].values) turn_predicted_value[0] = my_dict[i]["Texts"][q] turn_predicted += turn_predicted_value # plt.show() print("\n\n---------------------------------\n\n") tagging_df = pd.DataFrame({ "WebSessionID": websessionids, "Full Conversation": convs, "Preprocessed Turn Predicted to be Anomalous": turn_predicted }) return tagging_df
view_hour['y'] = np.log(view_hour['distinct_freq_sum']) view_hour['ds'] = view_hour['date_hour'] view_hour.head(5) #%% ## Prophet1 # set the uncertainty interval to 95% (the Prophet default is 80%) m = Prophet() m.add_seasonality(name='hourly', period=24, fourier_order=2) m.fit(view_hour); #%% ## Create a dataframe for the future dates ## The tail will only display the time periods without the forecasted values future = m.make_future_dataframe(periods=24,freq='H') future.tail() #%% ## This is the data that is exponentiated below forecast = m.predict(future) forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() #%% ## This is the data that retains the log transform ## Note that the predict function will create a df that contains ## many period features(e.g., trend, daily, hourly, weekly, seasonal ## along with _upper and _lower ci's). Execute a .info() against ## the dataframe to see all the elements. ## This creates a dataframe with just the 4 elements below forecast1 = m.predict(future)
def prophet_forecast(raw_data_with_filter: pd.DataFrame, num_days, seasonality=30): fb_prophet_df_close = raw_data_with_filter.filter(['Date', 'Close'], axis=1) fb_prophet_df_close.columns = ['ds', 'y'] fb_prophet_df_close['y'] = np.log(fb_prophet_df_close['y']) model_close = Prophet() # fitting to the model model_close.add_seasonality(name='monthly', period=seasonality, fourier_order=10) model_close.fit(fb_prophet_df_close) # making future predictions future_close = model_close.make_future_dataframe(periods=num_days) forecast_close = model_close.predict(future_close) forecast_df_close = pd.DataFrame(forecast_close) forecast_output_df_ds = pd.DataFrame(forecast_df_close.ds) raw_sigma_close = np.exp(forecast_df_close['yhat']).std() forecast_output_df_close = pd.DataFrame(filters.gaussian_filter1d( np.exp(forecast_df_close.yhat), raw_sigma_close), columns=['yhat']) forecast_output_df_close = forecast_output_df_ds.join( forecast_output_df_close) forecast_output_df_close.columns = ['Date', 'Close'] fb_prophet_df_high = raw_data_with_filter.filter(['Date', 'High'], axis=1) fb_prophet_df_high.columns = ['ds', 'y'] fb_prophet_df_high['y'] = np.log(fb_prophet_df_high['y']) model_high = Prophet() # fitting to the model model_high.add_seasonality(name='monthly', period=seasonality, fourier_order=10) model_high.fit(fb_prophet_df_high) # making future predictions future_high = model_high.make_future_dataframe(periods=num_days) forecast_high = model_high.predict(future_high) forecast_df_high = pd.DataFrame(forecast_high) raw_sigma_high = np.exp(forecast_df_high['yhat']).std() forecast_output_df_high = pd.DataFrame(filters.gaussian_filter1d( np.exp(forecast_df_high.yhat), raw_sigma_high), columns=['yhat']) forecast_output_df_high.rename(columns={'yhat': 'High'}, inplace=True) fb_prophet_df_low = raw_data_with_filter.filter(['Date', 'Low'], axis=1) fb_prophet_df_low.columns = ['ds', 'y'] fb_prophet_df_low['y'] = np.log(fb_prophet_df_low['y']) model_low = Prophet() # fitting to the model model_low.add_seasonality(name='monthly', period=seasonality, fourier_order=10) model_low.fit(fb_prophet_df_low) # making future predictions future_low = model_low.make_future_dataframe(periods=num_days) forecast_low = model_low.predict(future_low) forecast_df_low = pd.DataFrame(forecast_low) raw_sigma_low = np.exp(forecast_df_low['yhat']).std() forecast_output_df_low = pd.DataFrame(filters.gaussian_filter1d( np.exp(forecast_df_low.yhat), raw_sigma_low), columns=['yhat']) forecast_output_df_low.rename(columns={'yhat': 'Low'}, inplace=True) fb_prophet_df_volume = raw_data_with_filter.filter(['Date', 'Volume'], axis=1) fb_prophet_df_volume.columns = ['ds', 'y'] fb_prophet_df_volume['y'] = np.log(fb_prophet_df_volume['y']) model_volume = Prophet() # fitting to the model model_volume.add_seasonality(name='monthly', period=seasonality, fourier_order=10) model_volume.fit(fb_prophet_df_volume) # making future predictions future_volume = model_volume.make_future_dataframe(periods=num_days) forecast_volume = model_volume.predict(future_volume) forecast_df_volume = pd.DataFrame(forecast_volume) forecast_output_df_volume = pd.DataFrame(np.exp(forecast_df_volume.yhat)) forecast_output_df_volume.rename(columns={'yhat': 'Volume'}, inplace=True) # output forecast forecast_output_df = forecast_output_df_close.join( forecast_output_df_high.join( forecast_output_df_low.join(forecast_output_df_volume))).tail( num_days) # print(forecast_output_df) # # return forecast_output_df # # plt.plot(forecast_output_df.Date, forecast_output_df.Close, label='Close', linewidth=0.7) # plt.plot(forecast_output_df.Date, forecast_output_df.High, label='High', linewidth=0.7) # plt.plot(forecast_output_df.Date, forecast_output_df.Low, label='Low', linewidth=0.7) # plt.gcf().autofmt_xdate() # plt.rcParams["figure.figsize"] = [12, 9] # plt.xlabel('Timeline') # plt.ylabel('Stock Price (Closing)') # plt.title('Evaluation \nComparison (20 Days)') # plt.legend() # plt.show() return forecast_output_df
def create_prophet_m(source_name,z1,delay): import math train_end_index=len(z1.bw)-delay train_df=z1.bw.iloc[0:train_end_index] full_df = z1.bw.iloc[0:len(z1)] test_df=z1.bw.iloc[train_end_index:len(z1)] train_df=train_df.reset_index() test_df=test_df.reset_index() train_df.columns=['ds','y'] #--- removing outliers in trainset ---# q50 = train_df.y.median() q100 = train_df.y.quantile(1) q75 = train_df.y.quantile(.75) print(max(train_df.y)) if((q100-q50) >= (2*q50)): print('ind') train_df.loc[train_df.y>=(2*q50),'y'] = None full_df = full_df.reset_index() full_df.columns = ['ds','y'] test_df.columns=['ds','y'] ##-- Realtime prediction --## #model model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model_r.fit(full_df) future_r = model_r.make_future_dataframe(periods=delay,freq='H') forecast_r = model_r.predict(future_r) forecast_r.index = forecast_r['ds'] #forecast pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)]) pred_r=pred_r.reset_index() #model model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model.fit(train_df) future = model.make_future_dataframe(periods=len(test_df),freq='H') forecast = model.predict(future) forecast.index = forecast['ds'] #forecast pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)]) pred=pred.reset_index() pred_df=pd.merge(test_df,pred,on='ds',how='left') pred_df.dropna(inplace=True) df=pd.DataFrame() if(len(pred_df)>0): pred_df['error_test']=pred_df.y-pred_df.yhat MSE=mse(pred_df.y,pred_df.yhat) RMSE=math.sqrt(MSE) pred_df['APE']=abs(pred_df.error_test*100/pred_df.y) MAPE=pred_df.APE.mean() min_error_rate = pred_df.quantile(0)/100 max_error_rate = pred_df.quantile(1)/100 median_error_rate = pred_df.quantile(.50)/100 std_MAPE = math.sqrt(((pred_df.APE-MAPE)**2).mean()) print("App name:",source_name) print("MSE :",MSE) print("RMSE :",RMSE) print("MAPE :",MAPE) q98=pred_df['APE'].quantile(0.98) mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean() df = pd.DataFrame({'length':len(z1),#'predicted_t':[forcast_lag], 'test_rmse':RMSE, 'test_mape':MAPE, 'std_mape':std_MAPE, #standerd deviation of mape 'min_error_rate':min_error_rate , 'max_error_rate':max_error_rate , 'median_error_rate':median_error_rate, 'test_mape_98':mape_q98}, index=[source_name]) return(df,model,forecast,pred_df,pred_r)
def plot_raw_data(): fig = go.Figure() fig.add_trace(go.Scatter(x=data['date'], y=data['y'], name="Close")) fig.layout.update(title_text='raw data with Rangeslider', xaxis_rangeslider_visible=True) st.plotly_chart(fig) if st.button("predict"): model = Prophet(changepoint_range=0.8, yearly_seasonality='auto', weekly_seasonality='auto', daily_seasonality=True, seasonality_mode='multiplicative', changepoint_prior_scale=0.05 ) model.fit(df) future = model.make_future_dataframe(periods=no_of_days) forecast = model.predict(future) st.subheader("Prediction Data") st.write(forecast.head(30)) st.subheader(f'Forecast plot for {no_of_days} days') fig1 = plot_plotly(model, forecast) st.write(fig1) st.subheader("Forecast components") fig2 = model.plot_components(forecast) st.write(fig2)
def create_prophet_m(self,app_name,z1,delay=24): import pandas as pd import pymysql import warnings warnings.filterwarnings("ignore") from datetime import datetime, timedelta import logging from tqdm import tqdm from fbprophet import Prophet from sklearn.metrics import mean_squared_error as mse import math ### --- For realtime pred ---### full_df = z1.bw.iloc[0:len(z1)] full_df = full_df.reset_index() full_df.columns = ['ds','y'] #removing outliers q50 = full_df.y.median() q100 = full_df.y.quantile(1) q75 = full_df.y.quantile(.75) #print(max(train_df.y)) if((q100-q50) >= (2*q75)): #print('ind') full_df.loc[full_df.y>=(2*q75),'y'] = None #-- Realtime prediction --## #model model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model_r.fit(full_df) future_r = model_r.make_future_dataframe(periods=delay,freq='H') forecast_r = model_r.predict(future_r) forecast_r.index = forecast_r['ds'] #forecast pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)]) pred_r=pred_r.reset_index() #--- completes realtime pred ---# train_end_index=len(z1.bw)-delay train_df=z1.bw.iloc[0:train_end_index] #train_df= train_df[train_df<cutter] test_df=z1.bw.iloc[train_end_index:len(z1)] train_df=train_df.reset_index() test_df=test_df.reset_index() train_df.columns=['ds','y'] #--- removing outliers in trainset ---# q50 = train_df.y.median() q100 = train_df.y.quantile(1) q75 = train_df.y.quantile(.75) #print(max(train_df.y)) if((q100-q50) >= (2*q75)): #print('ind') train_df.loc[train_df.y>=(2*q75),'y'] = None test_df.columns=['ds','y'] #print('len of testdf = ',len(test_df)) #model model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2) model.fit(train_df) future = model.make_future_dataframe(periods=len(test_df),freq='H') forecast = model.predict(future) forecast.index = forecast['ds'] #forecast pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)]) pred=pred.reset_index() pred_df=pd.merge(test_df,pred,on='ds',how='left') pred_df.dropna(inplace=True) df=pd.DataFrame() if(len(pred_df)>0): pred_df['error_test']=pred_df.y-pred_df.yhat MSE=mse(pred_df.y,pred_df.yhat) RMSE=math.sqrt(MSE) pred_df['APE']=abs(pred_df.error_test*100/pred_df.y) MAPE=pred_df.APE.mean() #print("App name:",app_name) #print("MSE :",MSE) #print("RMSE :",RMSE) #print("MAPE :",MAPE) q98=pred_df['APE'].quantile(0.98) mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean() df = pd.DataFrame({'length':len(z1),#'predicted_t':[forcast_lag], 'test_rmse':RMSE, 'test_mape':MAPE, #'test_ape_98':q98, 'test_mape_98':mape_q98}, index=[app_name]) return(df,model,forecast,pred_df,pred_r)
def run( self, database="timeseries", source_table="retail_sales", target_table="predicted_sales", start_date="1993-01-01", end_date="2016-05-31", period=365, with_aws=False, ): """Train Prophet model and predict future sales :param database: Target DB name, defaults to "timeseries" :type database: str :param source_table: Source table for past sales, defaults to "retail_sales" :type source_table: str :param target_table: Table name for storing future sales prediction, defaults to "predicted_sales" :type target_table: str :param start_date: Beginning date for training data, defaults to "1993-01-01" :type start_date: str :param end_date: Last date for training data, defaults to "2016-05-31" :type end_date: str :param period: Duration for prediction, defaults to 365 :type period: int :param with_aws: If True, upload prediction graphs to AWS, defaults to False :type with_aws: bool """ import pytd import pandas as pd from fbprophet import Prophet # Ensure type of period is integer period = int(period) # Create TD connection apikey = os.getenv("TD_API_KEY") endpoint = os.getenv("TD_API_SERVER") client = pytd.Client(apikey=apikey, endpoint=endpoint, database=database) # Fetch past sales data from Treasure Data # Note: Prophet requires `ds` column as date string and `y` column as target # value res = client.query(f""" select ds, y from {source_table} where ds between '{start_date}' and '{end_date}' """) df = pd.DataFrame(**res) # Train Prophet model model = Prophet(seasonality_mode="multiplicative") model.fit(df) # Predict future sales data future = model.make_future_dataframe(periods=period) forecast = model.predict(future) # If True, upload prediction graph to S3 if with_aws: self._upload_graph(model, forecast) # To avoid TypeError: can't serialize Timestamp, convert # `pandas._libs.tslibs.timestamps.Timestamp` to `str` forecast.ds = forecast.ds.apply(str) # Store prediction results client.load_table_from_dataframe(forecast, target_table, if_exists="overwrite")