Beispiel #1
0
    def test_logistic_floor(self):
        m = Prophet(growth='logistic')
        N = DATA.shape[0]
        history = DATA.head(N // 2).copy()
        history['floor'] = 10.
        history['cap'] = 80.
        future = DATA.tail(N // 2).copy()
        future['cap'] = 80.
        future['floor'] = 10.
        m.fit(history, algorithm='Newton')
        self.assertTrue(m.logistic_floor)
        self.assertTrue('floor' in m.history)
        self.assertAlmostEqual(m.history['y_scaled'][0], 1.)
        fcst1 = m.predict(future)

        m2 = Prophet(growth='logistic')
        history2 = history.copy()
        history2['y'] += 10.
        history2['floor'] += 10.
        history2['cap'] += 10.
        future['cap'] += 10.
        future['floor'] += 10.
        m2.fit(history2, algorithm='Newton')
        self.assertAlmostEqual(m2.history['y_scaled'][0], 1.)
        fcst2 = m2.predict(future)
        fcst2['yhat'] -= 10.
        # Check for approximate shift invariance
        self.assertTrue((np.abs(fcst1['yhat'] - fcst2['yhat']) < 1).all())
Beispiel #2
0
 def test_fit_changepoint_not_in_history(self):
     train = DATA[(DATA['ds'] < '2013-01-01') | (DATA['ds'] > '2014-01-01')]
     train[(train['ds'] > '2014-01-01')] += 20
     future = pd.DataFrame({'ds': DATA['ds']})
     forecaster = Prophet(changepoints=['2013-06-06'])
     forecaster.fit(train)
     forecaster.predict(future)
Beispiel #3
0
    def test_fit_predict(self):
        N = DATA.shape[0]
        train = DATA.head(N // 2)
        future = DATA.tail(N // 2)

        forecaster = Prophet()
        forecaster.fit(train)
        forecaster.predict(future)
Beispiel #4
0
    def test_fit_predict_no_seasons(self):
        N = DATA.shape[0]
        train = DATA.head(N // 2)
        future = DATA.tail(N // 2)

        forecaster = Prophet(weekly_seasonality=False, yearly_seasonality=False)
        forecaster.fit(train)
        forecaster.predict(future)
Beispiel #5
0
    def test_fit_predict_no_changepoints(self):
        N = DATA.shape[0]
        train = DATA.head(N // 2)
        future = DATA.tail(N // 2)

        forecaster = Prophet(n_changepoints=0)
        forecaster.fit(train)
        forecaster.predict(future)
Beispiel #6
0
 def test_fit_predict_duplicates(self):
     N = DATA.shape[0]
     train1 = DATA.head(N // 2).copy()
     train2 = DATA.head(N // 2).copy()
     train2['y'] += 10
     train = train1.append(train2)
     future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)})
     forecaster = Prophet()
     forecaster.fit(train)
     forecaster.predict(future)
def build_forecast(
        data,
        forecast_range,
        truncate_range=0
):
    """build a forecast for publishing

    Args:
        data (:obj:`pandas.data_frame`): data to build prediction
        forecast_range (int): how much time into the future to forecast
        truncate_range (int, optional): truncate output to CREST_RANGE

    Returns:
        pandas.DataFrame: collection of data + forecast info
            ['date', 'avgPrice', 'yhat', 'yhat_low', 'yhat_high', 'prediction']

    """
    data['date'] = pd.to_datetime(data['date'])
    filter_date = data['date'].max()

    ## Build DataFrame ##
    predict_df = pd.DataFrame()
    predict_df['ds'] = data['date']
    predict_df['y'] = data['avgPrice']

    ## Run prediction ##
    # https://facebookincubator.github.io/prophet/docs/quick_start.html#python-api
    model = Prophet()
    model.fit(predict_df)
    future = model.make_future_dataframe(periods=forecast_range)
    tst = model.predict(future)

    predict_df = pd.merge(
        predict_df, model.predict(future),
        on='ds',
        how='right'
    )

    ## Build report for endpoint ##
    report = pd.DataFrame()
    report['date'] = pd.to_datetime(predict_df['ds'], format='%Y-%m-%d')
    report['avgPrice'] = predict_df['y']
    report['yhat'] = predict_df['yhat']
    report['yhat_low'] = predict_df['yhat_lower']
    report['yhat_high'] = predict_df['yhat_upper']
    report['prediction'] = False
    report.loc[report.date > filter_date, 'prediction'] = True

    if truncate_range > 0:
        cut_date = filter_date - timedelta(days=truncate_range)
        report = report.loc[report.date > cut_date]

    return report
Beispiel #8
0
 def test_fit_predict_constant_history(self):
     N = DATA.shape[0]
     train = DATA.head(N // 2).copy()
     train['y'] = 20
     future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)})
     m = Prophet()
     m.fit(train)
     fcst = m.predict(future)
     self.assertEqual(fcst['yhat'].values[-1], 20)
     train['y'] = 0
     future = pd.DataFrame({'ds': DATA['ds'].tail(N // 2)})
     m = Prophet()
     m.fit(train)
     fcst = m.predict(future)
     self.assertEqual(fcst['yhat'].values[-1], 0)
def add_prophet_features(df_shop):
    df = df_shop[['day', 'pays_count']].rename(columns={'day': 'ds', 'pays_count': 'y'})

    results = []
    biweek_max = df_shop.biweek_id.max()

    for m in range(biweek_max - 1, 0, -1):
        train_idx = df_shop.biweek_id >= m
        df_train = df[train_idx]

        not_null = ~df_train.y.isnull()
        if not_null.sum() < 7:
            continue

        p = Prophet().fit(df_train)
        future = p.make_future_dataframe(14, include_history=False)
        pred = p.predict(future)
        results.append(pred)

    df_res = pd.concat(results)
    df_res.columns = ['prophet_%s' % c for c in pred.columns]

    df_res = df_shop.merge(df_res, how='left', left_on='day', right_on='prophet_ds')
    del df_res['prophet_t'], df_res['prophet_ds']
    
    df_res.drop_duplicates('days_from_beginning', keep='last', inplace=1)

    if len(df_res) != len(df_shop):
        raise Exception("size doesn't match")

    return df_res
def get_predictions(validate, train):
    total_dates = train['date'].unique()
    result = pd.DataFrame(columns=['id', 'unit_sales'])
    problem_pairs = []
    example_items = [510052, 1503899, 2081175, 1047674, 215327, 1239746, 765520, 1463867, 1010755, 1473396]
    store47examples = validate.loc[(validate.store_nbr == 47) & (validate.item_nbr.isin(example_items))]
    print("ONLY PREDICTING ITEMS {} IN STORE NO. 47!".format(example_items))
    for name, y in store47examples.groupby(['item_nbr']):
    # for name, y in validate.groupby(['item_nbr', 'store_nbr']):
        item_nbr=int(name)
        store_nbr = 47
        df = train[(train.item_nbr==item_nbr)&(train.store_nbr==store_nbr)]
        CV_SIZE = 16 #if you make it bigger, fill missing dates in cv with 0 if any
        TRAIN_SIZE = 365
        total_dates = train['date'].unique()
        df = fill_missing_date(df, total_dates)
        df = df.sort_values(by=['date'])
        X = df[-TRAIN_SIZE:]
        X = X[['date','unit_sales']]
        X.columns = ['ds', 'y']
        m = Prophet(yearly_seasonality=True)
        try:
            m.fit(X)
        except ValueError:
            print("problem for this item store pair")
            problem_pairs.append((item_nbr, store_nbr))
            continue
        future = m.make_future_dataframe(periods=CV_SIZE)
        pred = m.predict(future)
        data = pred[['ds','yhat']].tail(CV_SIZE)
        data = pred[['ds','yhat']].merge(y, left_on='ds', right_on='date')
        data['unit_sales'] = data['yhat'].fillna(0).clip(0, 999999)
        result = result.append(data[['id', 'unit_sales']])
    return (result, problem_pairs)
Beispiel #11
0
def run():
    journal = ledger.read_journal("./secret/ledger.dat")
    last_post = None
    amount = 0

    for post in journal.query(""):
        if last_post == None or post.date == last_post.date:
            if str(post.amount.commodity) != "£":
                continue
            amount = amount + post.amount
        else:
            print post.date, ",", amount
            amount = 0
        last_post = post

    df = pd.read_csv('./testing.csv')
    df['y'] = np.multiply(100, df['y'])

    m = Prophet()
    m.fit(df);

    forecast = m.predict(future)
    forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

    m.plot(forecast);
    m.plot_components(forecast);
Beispiel #12
0
 def test_subdaily_holidays(self):
     holidays = pd.DataFrame({
         'ds': pd.to_datetime(['2017-01-02']),
         'holiday': ['special_day'],
     })
     m = Prophet(holidays=holidays)
     m.fit(DATA2)
     fcst = m.predict()
     self.assertEqual(sum(fcst['special_day'] == 0), 575)
def hello():
    print('Hello, world!')
    df = pd.read_csv(url)
    df['y'] = np.log(df['y'])
    df.head()
    m = Prophet()
    m.fit(df);
    future = m.make_future_dataframe(periods=365)
    future.tail()
    forecast = m.predict(future)
    forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
    return forecast.to_json(orient='table')
Beispiel #14
0
def train_prophet(df, modelDir, confidence=0.99):
	# train and cache into modelDir
	m = Prophet(
		yearly_seasonality=True, daily_seasonality=True, interval_width=confidence
	)
	with suppress_stdout_stderr():
		m.fit(df)

		# Predict the future.
	print "PREDICTING!"
	future = m.make_future_dataframe(periods=0)
	forecast = m.predict(future)
	# Merge in the historical data.
	forecast["y"] = df.y.astype(float)
	# Backup the model.
	forecast.to_csv(
		pJoin(modelDir, "forecasted_{}.csv".format(confidence)), index=False
	)
	return forecast
def get_prophet_forecasting(group_name, data, logs=True):

    #	data = group.reset_index()
    #	data.columns = ['ds', 'y']

    # Remove first null rows
    first_row = 0
    for i in range(len(data)):
        if data.y[i] == 0:
            first_row = i + 1
        else:
            break

    data = data.loc[first_row:, :]

    #	min_value = min(data.y)
    #	non_zero_inds = data.y > 0
    #	data.y[non_zero_inds] = data.y[non_zero_inds] + min_value

    non_zero_inds = data.y > 0
    data['y'][data.y <= 0] = 0.0

    if logs:
        data['y'][non_zero_inds] = np.log(data['y'][non_zero_inds].tolist())

    if len(data) == 0:
        #		frcst = forecastings[-1][1]
        #		frcst['yhat'] = 0
        return [group_name[0], group_name[1], np.zeros(12)], [None, None]

    with suppress_stdout_stderr():

        try:
            m = Prophet(growth='logistic',
                        weekly_seasonality=False,
                        daily_seasonality=False)
            m.add_seasonality(name='monthly', period=30.5, fourier_order=5)

            #			cap = max(data.y)*1.2
            cap = max(data.y)
            data['cap'] = cap
            m.fit(data)

            future = m.make_future_dataframe(periods=1 * 12, freq='M')
            future['cap'] = cap
            frcst = m.predict(future)

        except:
            return [group_name[0], group_name[1], np.zeros(12)], [None, None]

        for field in ['yhat', 'yhat_lower', 'yhat_upper']:
            #			print('---- ',list(frcst[frcst[field] > 0][field]))
            #			print(sum(frcst[field] > 0))
            frcst[field] = np.exp(list(frcst[field].values))
#			print('*****', list(frcst[frcst[field] > 0][field]))
#			print(np.exp([2.02]))
#
    res = frcst['yhat'].values
    if logs: res = np.exp(res)
    #	res = data['y'] - min_value

    return [group_name[0], group_name[1], res], [m, frcst]
Beispiel #16
0
 def test_added_regressors(self):
     m = Prophet()
     m.add_regressor('binary_feature', prior_scale=0.2)
     m.add_regressor('numeric_feature', prior_scale=0.5)
     m.add_regressor('binary_feature2', standardize=True)
     df = DATA.copy()
     df['binary_feature'] = [0] * 255 + [1] * 255
     df['numeric_feature'] = range(510)
     with self.assertRaises(ValueError):
         # Require all regressors in df
         m.fit(df)
     df['binary_feature2'] = [1] * 100 + [0] * 410
     m.fit(df)
     # Check that standardizations are correctly set
     self.assertEqual(
         m.extra_regressors['binary_feature'],
         {
             'prior_scale': 0.2,
             'mu': 0,
             'std': 1,
             'standardize': 'auto'
         },
     )
     self.assertEqual(m.extra_regressors['numeric_feature']['prior_scale'],
                      0.5)
     self.assertEqual(m.extra_regressors['numeric_feature']['mu'], 254.5)
     self.assertAlmostEqual(m.extra_regressors['numeric_feature']['std'],
                            147.368585,
                            places=5)
     self.assertEqual(m.extra_regressors['binary_feature2']['prior_scale'],
                      10.)
     self.assertAlmostEqual(m.extra_regressors['binary_feature2']['mu'],
                            0.1960784,
                            places=5)
     self.assertAlmostEqual(m.extra_regressors['binary_feature2']['std'],
                            0.3974183,
                            places=5)
     # Check that standardization is done correctly
     df2 = m.setup_dataframe(df.copy())
     self.assertEqual(df2['binary_feature'][0], 0)
     self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4)
     self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4)
     # Check that feature matrix and prior scales are correctly constructed
     seasonal_features, prior_scales = m.make_all_seasonality_features(df2)
     self.assertIn('binary_feature', seasonal_features)
     self.assertIn('numeric_feature', seasonal_features)
     self.assertIn('binary_feature2', seasonal_features)
     self.assertEqual(seasonal_features.shape[1], 29)
     self.assertEqual(set(prior_scales[26:]), set([0.2, 0.5, 10.]))
     # Check that forecast components are reasonable
     future = pd.DataFrame({
         'ds': ['2014-06-01'],
         'binary_feature': [0],
         'numeric_feature': [10],
     })
     with self.assertRaises(ValueError):
         m.predict(future)
     future['binary_feature2'] = 0
     fcst = m.predict(future)
     self.assertEqual(fcst.shape[1], 31)
     self.assertEqual(fcst['binary_feature'][0], 0)
     self.assertAlmostEqual(
         fcst['extra_regressors'][0],
         fcst['numeric_feature'][0] + fcst['binary_feature2'][0],
     )
     self.assertAlmostEqual(
         fcst['seasonalities'][0],
         fcst['yearly'][0] + fcst['weekly'][0],
     )
     self.assertAlmostEqual(
         fcst['seasonal'][0],
         fcst['seasonalities'][0] + fcst['extra_regressors'][0],
     )
     self.assertAlmostEqual(
         fcst['yhat'][0],
         fcst['trend'][0] + fcst['seasonal'][0],
     )
Beispiel #17
0
def create_prophet_m(app_name,z1,delay=24):
    
    ### --- For realtime pred ---###
    
    full_df = z1.app_rsp_time.iloc[0:len(z1)]
    full_df = full_df.reset_index()
    full_df.columns = ['ds','y']
    
    #removing outliers
    q50 = full_df.y.median()
    q100 = full_df.y.quantile(1)
    q75  = full_df.y.quantile(.75)
    
    if((q100-q50) >= (2*q50)):
        
        full_df.loc[full_df.y>=(2*q50),'y'] = None
    
    #-- Realtime prediction --##
    #model 
    model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
    model_r.fit(full_df)
    future_r = model_r.make_future_dataframe(periods=delay,freq='H')
    forecast_r = model_r.predict(future_r)
    forecast_r.index = forecast_r['ds']
    #forecast 
    pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)])
    pred_r=pred_r.reset_index()
    #--- completes realtime prediction ---#
    
    train_end_index=len(z1.app_rsp_time)-delay
    train_df=z1.app_rsp_time.iloc[0:train_end_index]
    
    
    test_df=z1.app_rsp_time.iloc[train_end_index:len(z1)]
    
    
    
    train_df=train_df.reset_index()
    test_df=test_df.reset_index()
    train_df.columns=['ds','y']
    
    #--- removing outliers in trainset  ---#
    
    q50 = train_df.y.median()
    q100 = train_df.y.quantile(1)
    q75  = train_df.y.quantile(.75)
    
    if((q100-q50) >= (2*q50)):
        
        train_df.loc[train_df.y>=(2*q50),'y'] = None
    
    test_df.columns=['ds','y']
    
    #model 
    model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
    model.fit(train_df)
    future = model.make_future_dataframe(periods=len(test_df),freq='H')
    forecast = model.predict(future)
    forecast.index = forecast['ds']
    #forecast 
    pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
    pred=pred.reset_index()
    pred_df=pd.merge(test_df,pred,on='ds',how='left')
    pred_df.dropna(inplace=True)
    
    df=pd.DataFrame()
    
    if(len(pred_df)>0):
        
        pred_df['error_test']=pred_df.y-pred_df.yhat
    
        
    
        MSE=mse(pred_df.y,pred_df.yhat)
        RMSE=math.sqrt(MSE)
        pred_df['APE']=abs(pred_df.error_test*100/pred_df.y)
        MAPE=pred_df.APE.mean()
        print("App name:",app_name)
        print("MSE  :",MSE)
        print("RMSE :",RMSE)
        print("MAPE :",MAPE)
        
       
        mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean()

        df = pd.DataFrame({#'length':len(z1),
                             'test_rmse':RMSE,
                             'test_mape':MAPE,
                 
                 'test_mape_98':mape_q98},
                   
                          index=[app_name])

    return(df,model,forecast,pred_df,pred_r)
## Prophet1
# set the uncertainty interval to 95% (the Prophet default is 80%)
m = Prophet()
m.add_seasonality(name='hourly', period=24, fourier_order=2)
m.fit(view_hour);


#%%
## Create a dataframe for the future dates
## The tail will only display the time periods without the forecasted values
future = m.make_future_dataframe(periods=24,freq='H')
future.tail()

#%%
## This is the data that is exponentiated below
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

#%%
## This is the data that retains the log transform
## Note that the predict function will create a df that contains
##   many period features(e.g., trend, daily, hourly, weekly, seasonal
##   along with _upper and _lower ci's). Execute a .info() against
##   the dataframe to see all the elements.
## This creates a dataframe with just the 4 elements below
forecast1 = m.predict(future)
forecast1[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail() 


#%%
## This works !
Beispiel #19
0
def main():
    if request.method == 'POST':
        stock = request.form['companyname']
        df_whole = get_historical_stock_price(stock)

        df = df_whole.filter(['Close'])

        df['ds'] = df.index
        #log transform the ‘Close’ variable to convert non-stationary data to stationary.
        df['y'] = np.log(df['Close'])
        original_end = df['Close'][-1]

        model = Prophet()
        model.fit(df)

        #num_days = int(input("Enter no of days to predict stock price for: "))

        num_days = 10
        future = model.make_future_dataframe(periods=num_days)
        forecast = model.predict(future)

        print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())

        #Prophet plots the observed values of our time series (the black dots), the forecasted values (blue line) and
        #the uncertainty intervalsof our forecasts (the blue shaded regions).

        #forecast_plot = model.plot(forecast)
        #forecast_plot.show()

        #make the vizualization a little better to understand
        df.set_index('ds', inplace=True)
        forecast.set_index('ds', inplace=True)
        #date = df['ds'].tail(plot_num)

        viz_df = df.join(forecast[['yhat', 'yhat_lower', 'yhat_upper']],
                         how='outer')
        viz_df['yhat_scaled'] = np.exp(viz_df['yhat'])

        #close_data = viz_df.Close.tail(plot_num)
        #forecasted_data = viz_df.yhat_scaled.tail(plot_num)
        #date = future['ds'].tail(num_days+plot_num)

        close_data = viz_df.Close
        forecasted_data = viz_df.yhat_scaled
        date = future['ds']
        #date = viz_df.index[-plot_num:-1]
        forecast_start = forecasted_data[-num_days]

        d = [date, close_data, forecasted_data]
        export_data = zip_longest(*d, fillvalue='')
        with open('static/numbers.csv', 'w', encoding="ISO-8859-1",
                  newline='') as myfile:
            wr = csv.writer(myfile)
            wr.writerow(("Date", "Actual", "Forecasted"))
            wr.writerows(export_data)
        myfile.close()

        return render_template("plot.html",
                               original=round(original_end, 2),
                               forecast=round(forecast_start, 2),
                               stock_tinker=stock.upper())
# This trend can be confirmed through the use of the Prophet library, which has some robustness to outliers.
# We can split the time series into its various time components - years, months and weeks.
# This is similar to running a Fourier analysis.
# The prophet library includes considerations for holidays dates.

# %%
# Confirm trend with prophet (facebook)
from fbprophet import Prophet

time_model = Prophet()
prophet_data = temp.loc[:, ["datetime", "count"]]
prophet_data.columns = ["ds", "y"]
time_model.fit(prophet_data)

# Show components
forecast = time_model.predict(prophet_data)
fig_components = time_model.plot_components(forecast, weekly_start=1)

# Make future predictions
future = time_model.make_future_dataframe(periods=365, include_history=True)
fig_pred = time_model.plot(time_model.predict(future),
                           xlabel="Date",
                           ylabel="Number of trips/day")

fig_components.savefig("images/prophet_comp.png")
fig_pred.savefig("images/prophet_pred.png")

# %% [markdown]
# ![](images/prophet_comp.png)
#
# This matches our conclusions that weekends are less popular overall, and there is a summer month boom.
Beispiel #21
0
def PlotSeries():

    #obtém valores de selects da pagina
    select_ano = request.form.get("Anos", None)
    # select_mun = request.form.get("Municipios", None)
    select_mun = 'Brotas'
    select_dp = request.form.get("Delegacias", None)
    select_crime = request.form.get("Crimes", None)

    if select_dp != None and select_dp != "" and select_crime != None and select_crime != "":

        #dá um nome para o arquivo do plot
        img = 'static/plot' + select_ano + 'Brotas' + select_dp + select_crime + '.png'
        print(select_dp)
        #obtém o dataframe
        df = getDataAtDB(select_mun, select_dp, select_crime)
        print(df.head())
        df['datas'] = pd.to_datetime(df['datas'])

        #altera colunas do dataframe
        df.set_index('datas')
        df.columns = ["ds", "y"]

        #cria um modelo
        m = Prophet(changepoint_prior_scale=0.05, changepoint_range=0.8)
        m.add_country_holidays(country_name='BR')
        m.fit(df)

        #prevendo o futuro
        future = m.make_future_dataframe(periods=12 *
                                         (int(select_ano) - date.today().year),
                                         freq='MS')
        forecast = m.predict(future)

        #cria imagem do plot
        m.plot(forecast, figsize=(8, 4))
        plt.xlabel('Data')
        plt.ylabel('Ocorrencias')
        plt.gca().set_ylim(bottom=0)
        if (select_dp != 'Todos'):
            plt.title("Série temporal das ocorrências de " + select_crime +
                      " registradas no " + select_dp)
        else:
            plt.title("Série temporal das ocorrências de " + select_crime +
                      " registradas na cidade de " + select_mun)
        plt.savefig(img, bbox_inches='tight')

        plt.clf()  #limpa figura atual

        # df_cv = cross_validation(m, initial='3600 days', horizon = '1200 days', parallel="processes")
        # df_p = performance_metrics(df_cv)
        # print(df_p.head())

        #Otimização dos hiperparametros
        # params_df = create_param_combinations(**param_grid)
        # print(len(params_df.values))
        # for param in params_df.values:
        #     param_dict = dict(zip(params_df.keys(), param))
        #     cv_df = single_cv_run(df, metrics, param_dict, parallel="processes")
        #     results.append(cv_df)
        # results_df = pd.concat(results).reset_index(drop=True)
        # best_param = results_df.loc[results_df['rmse'] == min(results_df['rmse']), ['params']]
        # print(f'\n The best param combination is {best_param.values[0][0]}')
        # print(results_df)

        return render_template("previsao.html", image=img)

    return render_template("previsao.html")
Beispiel #22
0
fb.add_regressor('store_id')
fb.add_regressor('cat_id')
fb.fit(df_cat_pred)
future = fb.make_future_dataframe(freq='D', periods=56, include_history=False)

s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
c = pd.Series([1, 2, 3])
stores1 = pd.DataFrame({'store_id': s.repeat(56)}).reset_index()
stores1 = pd.concat([stores1] * 3).reset_index().drop('index', axis=1)
cat1 = pd.DataFrame({'cat_id': s.repeat(560)}).reset_index()

final_df = pd.concat([future] * 30).reset_index().drop('index', axis=1)
final_df['store_id'] = stores1['store_id']
final_df['cat_id'] = cat1['cat_id']

predict = fb.predict(final_df)
y_pred_df = predict[['yhat']]

final = pd.concat([final_df, y_pred_df], axis=1)
stores2 = {
    1: 'CA_1',
    2: 'CA_2',
    3: 'CA_3',
    4: 'CA_4',
    5: 'TX_1',
    6: 'TX_2',
    7: 'TX_3',
    8: 'WI_1',
    9: 'WI_2',
    10: 'WI_3'
}
Beispiel #23
0
def FBprophet_memory(dataset, host_name):
    dataset['date'] = pd.to_datetime(dataset.index)
    dataset.set_index('date', inplace=True)
    pjme = dataset
    pred = test_dummy(dataset)
    pred['date'] = pd.to_datetime(pred.index)
    pred.set_index('date', inplace=True)
    split_date = (date.today() - timedelta(days=7)).strftime('%d-%b-%Y')
    pjme_train = pjme.loc[pjme.index <= split_date].copy()
    pjme_test = pjme.loc[pjme.index > split_date].copy()

    pjme_test.rename(columns={'used': 'TEST SET'}) \
    .join(pjme_train.rename(columns={'used': 'TRAINING SET'}),how='outer')

    # Format data for prophet model using ds and y
    pjme_train.reset_index().rename(columns={'date': 'ds', 'used': 'y'})

    # Setup and train model and fit
    model = Prophet(changepoint_prior_scale=0.95)
    try:
        model.fit(pjme.reset_index().rename(columns={
            'date': 'ds',
            'used': 'y'
        }))

        future = model.make_future_dataframe(periods=7)
        forecast = model.predict(future)
        df_result_prev = pd.DataFrame({
            'dtime':
            model.history['ds'].dt.to_pydatetime(),
            'y':
            model.history['y']
        })
        pjme_test_fcst = model.predict(df=pjme_test.reset_index().rename(
            columns={'date': 'ds'}))
        df_past = pd.DataFrame({
            'ds': model.history['ds'].dt.to_pydatetime(),
            'y': model.history['y']
        })
        df_future = pd.DataFrame({
            'ds': forecast['ds'],
            'y': forecast['yhat_upper']
        })
        df_past = df_future.copy()  #changed based on feedback from manju
        #df_past = df_past.append(df_future)
        df_past['y'] = df_past['y'].apply(lambda x: x * 1.2)
        pjme_test_fcst = model.predict(df=pjme_test.reset_index().rename(
            columns={'date': 'ds'}))
        pjme_fut_fcst = model.predict(df=pred.reset_index().rename(
            columns={'date': 'ds'}))

        df_past_resample = df_past.copy(deep=True)
        df_past_resample_indx = df_past_resample.set_index('ds')

        data_cols = ['y']
        df_recomm = df_past_resample_indx[data_cols].resample('W').max()
        f, ax = plt.subplots(1)
        plt.plot(model.history['ds'].dt.to_pydatetime(),
                 model.history['y'],
                 color='teal',
                 marker='o',
                 linestyle='solid',
                 linewidth=1,
                 label='Actual Utilization')
        plt.plot(df_recomm.index,
                 df_recomm['y'],
                 color='orange',
                 marker='o',
                 linestyle='solid',
                 linewidth=1,
                 label='Recommendation')
        f.set_figheight(5)
        f.set_figwidth(15)
        ax.scatter(pjme_test.index, pjme_test['used'], color='yellow')
        ax.legend(loc='upper left', frameon=False)
        #fig = model.plot(pjme_test_fcst, ax=ax)
        #fig1 = model.plot(pjme_fut_fcst, ax=ax)
        pjme_test_fcst = pjme_test_fcst.append(pjme_fut_fcst)
        fig = model.plot(pjme_test_fcst, ax=ax)

        #plt.xticks(rotation='vertical')

        df = hostname_zone_df()
        zone = " ".join(df[df['Hostname'] == host_name]['Zone'].values)
        instance_id = instance_id_func(host_name)

        plt.savefig(
            os.path.join(
                os.getcwd() + '/images',
                "Memory_{}_{}_{}_{}.png".format(project, zone, host_name,
                                                instance_id)))
        plt.close()
        upload_blob(
            "gcp_cost_recommendation_bucket",
            os.path.join(
                os.getcwd() + '/images',
                "Memory_{}_{}_{}_{}.png".format(project, zone, host_name,
                                                instance_id)),
            "Memory_{}_{}_{}_{}.png".format(project, zone, host_name,
                                            instance_id))
        #logging.debug("Done with {}".format(host_name))
        return df_past['y'].max()
    except ValueError:
        return None
Beispiel #24
0
def fitForecast(y, h, sumMat, nodes, method, freq, include_history, cap, capF, changepoints, n_changepoints, \
                yearly_seasonality, weekly_seasonality, daily_seasonality, holidays, seasonality_prior_scale, \
                holidays_prior_scale, changepoint_prior_scale, mcmc_samples, interval_width, uncertainty_samples, \
                boxcoxT, skipFitting):

    forecastsDict = {}
    mse = {}
    resids = {}
    nForecasts = sumMat.shape[0]
    ##
    # If you have a ditionary of Prophet Dataframes already, skip the prophet part, and put all the values into a dictionary
    ##
    if skipFitting == True:
        for key in range(len(y.columns.tolist()) - 1):
            forecastsDict[key] = pd.DataFrame(y.iloc[:, key + 1])
            forecastsDict[key] = forecastsDict[key].rename(
                columns={forecastsDict[key].columns[0]: 'yhat'})

    if skipFitting == False:

        if method == 'FP':
            nForecasts = sum(list(map(sum, nodes))) + 1

        for node in range(nForecasts):
            nodeToForecast = pd.concat([y.iloc[:, [0]], y.iloc[:, node + 1]],
                                       axis=1)
            if isinstance(cap, pd.DataFrame):
                cap1 = cap.iloc[:, node]
            else:
                cap1 = cap
            if isinstance(capF, pd.DataFrame):
                cap2 = capF.iloc[:, node]
            else:
                cap2 = capF
            if isinstance(changepoints, pd.DataFrame):
                changepoints1 = changepoints[:, node]
            else:
                changepoints1 = changepoints
            if isinstance(n_changepoints, list):
                n_changepoints1 = n_changepoints[node]
            else:
                n_changepoints1 = n_changepoints
            ##
            # Put the forecasts into a dictionary of dataframes
            ##
            with contextlib.redirect_stdout(open(os.devnull, "w")):
                # Prophet related stuff
                nodeToForecast = nodeToForecast.rename(
                    columns={nodeToForecast.columns[0]: 'ds'})
                nodeToForecast = nodeToForecast.rename(
                    columns={nodeToForecast.columns[1]: 'y'})
                if capF is None:
                    growth = 'linear'
                    m = Prophet(
                        growth=growth,
                        changepoints=changepoints1,
                        n_changepoints=n_changepoints1,
                        yearly_seasonality=yearly_seasonality,
                        weekly_seasonality=weekly_seasonality,
                        daily_seasonality=daily_seasonality,
                        holidays=holidays,
                        seasonality_prior_scale=seasonality_prior_scale,
                        holidays_prior_scale=holidays_prior_scale,
                        changepoint_prior_scale=changepoint_prior_scale,
                        mcmc_samples=mcmc_samples,
                        interval_width=interval_width,
                        uncertainty_samples=uncertainty_samples)
                else:
                    growth = 'logistic'
                    m = Prophet(
                        growth=growth,
                        changepoints=changepoints,
                        n_changepoints=n_changepoints,
                        yearly_seasonality=yearly_seasonality,
                        weekly_seasonality=weekly_seasonality,
                        daily_seasonality=daily_seasonality,
                        holidays=holidays,
                        seasonality_prior_scale=seasonality_prior_scale,
                        holidays_prior_scale=holidays_prior_scale,
                        changepoint_prior_scale=changepoint_prior_scale,
                        mcmc_samples=mcmc_samples,
                        interval_width=interval_width,
                        uncertainty_samples=uncertainty_samples)
                    nodeToForecast['cap'] = cap1
                m.fit(nodeToForecast)
                future = m.make_future_dataframe(
                    periods=h, freq=freq, include_history=include_history)
                if capF is not None:
                    future['cap'] = cap2
                ##
                # Base Forecasts, Residuals, and MSE
                ##
                forecastsDict[node] = m.predict(future)
                resids[node] = y.iloc[:, node +
                                      1] - forecastsDict[node].yhat[:-h].values
                mse[node] = np.mean(np.array(resids[node])**2)
                ##
                # If logistic use exponential function, so that values can be added correctly
                ##
                if capF is not None:
                    forecastsDict[node].yhat = np.exp(forecastsDict[node].yhat)
                if boxcoxT is not None:
                    forecastsDict[node].yhat = inv_boxcox(
                        forecastsDict[node].yhat, boxcoxT[node])
                    forecastsDict[node].trend = inv_boxcox(
                        forecastsDict[node].trend, boxcoxT[node])
                    if "seasonal" in forecastsDict[node].columns.tolist():
                        forecastsDict[node].seasonal = inv_boxcox(
                            forecastsDict[node].seasonal, boxcoxT[node])
                    if "daily" in forecastsDict[node].columns.tolist():
                        forecastsDict[node].daily = inv_boxcox(
                            forecastsDict[node].daily, boxcoxT[node])
                    if "weekly" in forecastsDict[node].columns.tolist():
                        forecastsDict[node].weekly = inv_boxcox(
                            forecastsDict[node].weekly, boxcoxT[node])
                    if "yearly" in forecastsDict[node].columns.tolist():
                        forecastsDict[node].yearly = inv_boxcox(
                            forecastsDict[node].yearly, boxcoxT[node])
                    if "holidays" in forecastsDict[node].columns.tolist():
                        forecastsDict[node].yearly = inv_boxcox(
                            forecastsDict[node].yearly, boxcoxT[node])
    ##
    # Now, Revise them
    ##
    if method == 'BU' or method == 'AHP' or method == 'PHA':
        y1 = y.copy()
        nCols = len(list(forecastsDict.keys())) + 1
        if method == 'BU':
            '''
             Pros:
               No information lost due to aggregation
             Cons:
               Bottom level data can be noisy and more challenging to model and forecast
            '''
            hatMat = np.zeros([len(forecastsDict[0].yhat), 1])
            for key in range(nCols - sumMat.shape[1] - 1, nCols - 1):
                f1 = np.array(forecastsDict[key].yhat)
                f2 = f1[:, np.newaxis]
                if np.all(hatMat == 0):
                    hatMat = f2
                else:
                    hatMat = np.concatenate((hatMat, f2), axis=1)

        if method == 'AHP':
            '''
             Pros:
               Creates reliable aggregate forecasts, and good for low count data
             Cons:
               Unable to capture individual series dynamics
            '''
            if boxcoxT is not None:
                for column in range(len(y.columns.tolist()) - 1):
                    y1.iloc[:,
                            column + 1] = inv_boxcox(y1.iloc[:, column + 1],
                                                     boxcoxT[column])
            ##
            # Find Proportions
            ##
            fcst = forecastsDict[0].yhat
            fcst = fcst[:, np.newaxis]
            numBTS = sumMat.shape[1]
            btsDat = pd.DataFrame(y1.iloc[:, nCols - numBTS:nCols])
            divs = np.divide(np.transpose(np.array(btsDat)),
                             np.array(y1.iloc[:, 1]))
            props = divs.mean(1)
            props = props[:, np.newaxis]
            hatMat = np.dot(np.array(fcst), np.transpose(props))

        if method == 'PHA':
            '''
             Pros:
               Creates reliable aggregate forecasts, and good for low count data
             Cons:
               Unable to capture individual series dynamics
            '''
            if boxcoxT is not None:
                for column in range(len(y.columns.tolist()) - 1):
                    y1.iloc[:,
                            column + 1] = inv_boxcox(y1.iloc[:, column + 1],
                                                     boxcoxT[column])
            ##
            # Find Proportions
            ##
            fcst = forecastsDict[0].yhat
            fcst = fcst[:, np.newaxis]
            numBTS = sumMat.shape[1]
            btsDat = pd.DataFrame(y1.iloc[:, nCols - numBTS:nCols])
            btsSum = btsDat.sum(0)
            topSum = sum(y1.iloc[:, 1])
            props = btsSum / topSum
            props = props[:, np.newaxis]
            hatMat = np.dot(np.array(fcst), np.transpose(props))

        newMat = np.empty([hatMat.shape[0], sumMat.shape[0]])
        for i in range(hatMat.shape[0]):
            newMat[i, :] = np.dot(sumMat, np.transpose(hatMat[i, :]))

    if method == 'FP':
        newMat = forecastProp(forecastsDict, nodes)
    if method == 'OLS' or method == 'WLSS' or method == 'WLSV':
        if capF is not None:
            print(
                "An error might occur because of how these methods are defined (They can produce negative values). If it does, then please use another method"
            )
        newMat = optimalComb(forecastsDict, sumMat, method, mse)

    for key in forecastsDict.keys():
        values = forecastsDict[key].yhat.values
        values = newMat[:, key]
        forecastsDict[key].yhat = values
        ##
        # If Logistic fit values with natural log function to revert back to format of input
        ##
        if capF is not None:
            forecastsDict[key].yhat = np.log(forecastsDict[key].yhat)

    return forecastsDict
Beispiel #25
0
test = test.rename(columns={'Date' : 'ds', 'MeanHomeValue' : 'y'})

train.head()

# setting uncertainty interval to 95%
z_model = Prophet(interval_width=.95)
z_model.fit(train)

# making new df with future values

future_home_values = z_model.make_future_dataframe(periods = 40, freq = 'M')
future_home_values.tail()

# base model prediction

forecast = z_model.predict(future_home_values)

# examining data types for forecast df

forecast.info()

# observing forecasted values for 2021

forecast.tail()

"""prophet drops the index (zip code) so it is impossible to make projects for the whole country. Instead, I will make projections for each of the top 5 zip codes in the country based on percent change in home value over the relevant time period (2010-present)"""

# taking zip code, first day of 2010, and last day of 2020

top_zip = zillow[['RegionName','2010-01-31','2020-03-31']]
top_zip.head()
Beispiel #26
0
class ProphetRegressor(BaseEstimator, RegressorMixin):
    """
    Scikit-learn wrapper for the Prophet forecaster.

    Parameters
    ----------
    growth: String 'linear' or 'logistic' to specify a linear or logistic
        trend.

    changepoints: List of dates at which to include potential changepoints. If
        not specified, potential changepoints are selected automatically.

    n_changepoints: Number of potential changepoints to include. Not used
        if input `changepoints` is supplied. If `changepoints` is not supplied,
        then n_changepoints potential changepoints are selected uniformly from
        the first 80 percent of the history.

    yearly_seasonality: Fit yearly seasonality.
        Can be 'auto', True, False, or a number of Fourier terms to generate.

    weekly_seasonality: Fit weekly seasonality.
        Can be 'auto', True, False, or a number of Fourier terms to generate.

    daily_seasonality: Fit daily seasonality.
        Can be 'auto', True, False, or a number of Fourier terms to generate.

    holidays: pd.DataFrame with columns holiday (string) and ds (date type)
        and optionally columns lower_window and upper_window which specify a
        range of days around the date to be included as holidays.
        lower_window=-2 will include 2 days prior to the date as holidays. Also
        optionally can have a column prior_scale specifying the prior scale for
        that holiday.

    seasonality_prior_scale: Parameter modulating the strength of the
        seasonality model. Larger values allow the model to fit larger seasonal
        fluctuations, smaller values dampen the seasonality. Can be specified
        for individual seasonalities using add_seasonality.

    holidays_prior_scale: Parameter modulating the strength of the holiday
        components model, unless overridden in the holidays input.

    changepoint_prior_scale: Parameter modulating the flexibility of the
        automatic changepoint selection. Large values will allow many
        changepoints, small values will allow few changepoints.

    mcmc_samples: Integer, if greater than 0, will do full Bayesian inference
        with the specified number of MCMC samples. If 0, will do MAP
        estimation.

    interval_width: Float, width of the uncertainty intervals provided
        for the forecast. If mcmc_samples=0, this will be only the uncertainty
        in the trend using the MAP estimate of the extrapolated generative
        model. If mcmc.samples>0, this will be integrated over all model
        parameters, which will include uncertainty in seasonality.

    uncertainty_samples: Number of simulated draws used to estimate
        uncertainty intervals.

    Attributes
    ----------
    model_ : object
        Underlying Prophet model.

    preds_ : object
        Prediction DataFrame returned by the Prophet model after forecasting.
        Contains various extra columns that may be useful.
    """
    def __init__(self,
                 growth='linear',
                 changepoints=None,
                 n_changepoints=25,
                 yearly_seasonality='auto',
                 weekly_seasonality='auto',
                 daily_seasonality='auto',
                 holidays=None,
                 seasonality_prior_scale=10.0,
                 holidays_prior_scale=10.0,
                 changepoint_prior_scale=0.05,
                 mcmc_samples=0,
                 interval_width=0.80,
                 uncertainty_samples=1000):
        self.growth = growth
        self.changepoints = changepoints
        self.n_changepoints = n_changepoints
        self.yearly_seasonality = yearly_seasonality
        self.weekly_seasonality = weekly_seasonality
        self.daily_seasonality = daily_seasonality
        self.holidays = holidays
        self.seasonality_prior_scale = seasonality_prior_scale
        self.holidays_prior_scale = holidays_prior_scale
        self.changepoint_prior_scale = changepoint_prior_scale
        self.mcmc_samples = mcmc_samples
        self.interval_width = interval_width
        self.uncertainty_samples = uncertainty_samples
        self.model_ = None
        self.preds_ = None

    def fit(self, X, y):
        """
        Fit the Prophet forecast model.

        Parameters
        ----------
        X : array, shape = (n_samples,)
            Dates.
        y : array, shape = (n_samples,)
            Time series values.

        Returns
        -------
        self : Returns an instance of self.
        """
        if isinstance(X, pd.DataFrame):
            X = X.values
        if isinstance(y, pd.DataFrame):
            y = y.values
        data = pd.DataFrame(X, columns=['ds'])
        data['y'] = y
        self.model_ = Prophet(**self.get_params())
        self.model_.fit(data)

        return self

    def predict(self, X):
        """
        Predict using the Prophet forecast model.

        Parameters
        ----------
        X : array, shape = (n_samples,)
            Dates to generate predictions.

        Returns
        -------
        C : array, shape = (n_samples,)
            Returns predicted values.
        """
        if isinstance(X, pd.DataFrame):
            X = X.values
        data = pd.DataFrame(X, columns=['ds'])
        self.preds_ = self.model_.predict(data)

        return self.preds_['yhat'].values
Beispiel #27
0
class tsModel(multiprocessing.Process):
    """
    Fundamental class definition for estimating a model. 

    Required initialization: 
    pg: dictionary of postgres values for username, password, host, database, and port
    ds_key: DarkSky API key

    Optional initialization:
    bin_window: The size of the time window to model. Specify any valid Pandas offset string. 
        All data will be resampled accordingly. Default is '15T', i.e. 15 minute windows.
    include_weather: Boolean to indicate if the model should use weather covariates for building and predicting.
        Defaults to True.
    """
    def __init__(self, pg, ds_key, bin_window='15T', include_weather=True):
        multiprocessing.Process.__init__(self)
        self.ds_key = ds_key
        self.include_weather = include_weather
        if include_weather:
            self.init_ds_obj()
        self.bin_window = bin_window
        self.pg_username = pg['username']
        self.pg_password = pg['password']
        self.pg_host = pg['host']
        self.pg_db = pg['database']
        self.pg_port = pg['port']
        self.ds_key = ds_key
        self.engine = create_engine(
            f'postgresql://{self.pg_username}:{self.pg_password}@{self.pg_host}:{self.pg_port}/{self.pg_db}'
        )

    def init_ds_obj(self):
        """Thin wrapper to initialize DarkSky object"""
        self.ds = DarkSky(self.ds_key)

    def get_area_series(self,
                        idx,
                        series='scooter',
                        log_transform=False,
                        window_start=None,
                        window_end=None):
        """
        Function to query postgres for time series data.

        Parameters:
        idx: area identifier
        series: which series to query - options are 'scooter' or 'bicycle'
        log_transform: Boolean for whether the usage numbers should be logged. Defaults to False.
        window_start: Arbitrary date for starting the time series. Can pair with window_end 
            for any arbitrary, logical window.Default is None (i.e. use the full time series.)
        window_end: See window_start.
        """
        self.idx = idx
        self.series = series
        if self.series == 'scooter':
            q = f"SELECT n, in_use, area, district, tract, time FROM ts WHERE area = '{idx}'"
        else:
            q = f"SELECT bike_n, bike_in_use, area, district, tract, time FROM ts WHERE area = '{idx}'"
        if window_start is not None:
            q = q + f" AND time >= '{window_start}' AND time <= '{window_end}'"
        with psycopg2.connect(database=self.pg_db,
                              user=self.pg_username,
                              password=self.pg_password,
                              port=self.pg_port,
                              host=self.pg_host) as conn:
            self.area_series = pd.read_sql_query(q, conn)
        if self.bin_window != "15T":
            self.area_series = self.area_series.set_index('time').resample(
                self.bin_window).sum()
            self.area_series.reset_index(inplace=True)
        if log_transform:
            if series == 'scooter':
                self.area_series['n'] = np.log(self.area_series['n'] + 1)
                self.area_series['in'] = np.log(self.area_series['in_use'] + 1)
            else:
                self.area_series['bike_n'] = np.log(
                    self.area_series['bike_n'] + 1)
                self.area_series['bike_in'] = np.log(
                    self.area_series['bike_in_use'] + 1)

    def transform_area_series(self, select_var='n'):
        """Simple function to select only required variable from time series data."""
        if self.series == 'scooter':
            if select_var == 'n':
                self.area_series.drop(columns=['in_use'], inplace=True)
            elif select_var == 'in_use':
                self.area_series.drop(columns=['n'], inplace=True)
            elif select_var == 'diff':
                self.area_series['available'] = self.area_series.apply(
                    lambda x: max([0, x['n'] - x['in_use']]), axis=1)
                self.area_series.drop(columns=['n', 'in_use'], inplace=True)
        else:
            if select_var == 'bike_n':
                self.area_series.drop(columns=['bike_in_use'], inplace=True)
            elif select_var == 'bike_in_use':
                self.area_series.drop(columns=['bike_n'], inplace=True)
            elif select_var == 'diff':
                self.area_series['available'] = self.area_series.apply(
                    lambda x: max([0, x['bike_n'] - x['bike_in_use']]), axis=1)
                self.area_series.drop(columns=['bike_n', 'bike_in_use'],
                                      inplace=True)

    def get_weather_data(self):
        """Simple function to query weather data from postgres"""
        start_time = self.area_series['time'].min()
        end_time = self.area_series['time'].max()
        q = f"SELECT * FROM weather WHERE time >= '{start_time}' AND time <= '{end_time}'"
        with psycopg2.connect(database=self.pg_db,
                              user=self.pg_username,
                              password=self.pg_password,
                              port=self.pg_port,
                              host=self.pg_host) as conn:
            self.weather = pd.read_sql_query(q, conn)
        if self.bin_window == '15T':
            self.weather = self.weather.set_index('time').resample('15T').pad()
        elif self.bin_window == '1H':
            pass
        else:
            self.weather = self.weather.set_index('time').resample(
                self.bin_window).mean()

    def prep_model_data(self):
        """Simple function to prepare guarantee time series data is in correct format for prophet model"""
        if self.include_weather:
            self.dat = pd.merge(self.area_series,
                                self.weather,
                                how='right',
                                on='time')
        else:
            self.dat = self.area_series
        self.dat.fillna(0, inplace=True)
        if self.bin_window != '15T':
            self.dat.drop(columns=['district', 'tract'], inplace=True)
        else:
            self.dat.drop(columns=['area', 'district', 'tract'], inplace=True)
        self.dat.rename(columns={
            'time': 'ds',
            'n': 'y',
            'in_use': 'y',
            'bike_n': 'y',
            'bike_in_use': 'y',
            'available': 'y'
        },
                        inplace=True)

    def make_special_events(self):
        """Simple function to prepare holiday dataframes for model"""
        sxsw = pd.DataFrame({
            'holiday':
            'sxsw',
            'ds':
            pd.to_datetime([
                '2018-03-09', '2018-03-10', '2018-03-11', '2018-03-12',
                '2018-03-13', '2018-03-14', '2018-03-15', '2018-03-16',
                '2018-03-17', '2018-03-18', '2018-03-19', '2019-03-08',
                '2019-03-09', '2019-03-10', '2019-03-11', '2019-03-12',
                '2019-03-13', '2019-03-14', '2019-03-15', '2019-03-16',
                '2019-03-17', '2020-03-13', '2020-03-14', '2020-03-15',
                '2020-03-16', '2020-03-17', '2020-03-18', '2020-03-19',
                '2020-03-20', '2020-03-21', '2020-03-22'
            ])
        })
        acl = pd.DataFrame({
            'holiday':
            'sxsw',
            'ds':
            pd.to_datetime([
                '2018-10-05', '2018-10-06', '2018-10-07', '2018-10-12',
                '2018-10-13', '2018-10-14', '2019-10-04', '2019-10-05',
                '2019-10-06', '2019-10-11', '2019-10-12', '2019-10-13',
                '2020-10-02', '2020-10-03', '2020-10-04', '2020-10-09',
                '2020-10-10', '2020-10-11'
            ])
        })
        self.holidays = pd.concat((sxsw, acl))

    def build_model(self, scale=0.05, hourly=False, holidays_scale=10.0):
        """Simple function to build model. Allows for specification of model parameters."""
        self.make_special_events()
        self.model = Prophet(changepoint_prior_scale=scale,
                             holidays=self.holidays,
                             holidays_prior_scale=holidays_scale)
        if self.include_weather:
            for v in ['temp', 'wind', 'cloud_cover', 'humidity']:
                self.model.add_regressor(v)
        if hourly:
            self.model.add_seasonality(name='hourly',
                                       period=0.04167,
                                       fourier_order=1)

    def train_model(self):
        """Thin wrapper to train model"""
        self.model.fit(self.dat)

    def calculate_periods(self):
        """Determine number of prediction periods required to reach 4 week forecast"""
        max_d = self.area_series['ds'].max()
        two_weeks = datetime.datetime.now() + datetime.timedelta(weeks=4)
        t_diff = two_weeks - max_d
        return int(t_diff.total_seconds() / 3600 * 4)

    def build_prediction_df(self, lat=30.267151, lon=-97.743057, periods=192):
        """
        Simple function to build the prediction dataframe.

        Lat and lon only required if using weather data. Defaults to center of Austin.
        """
        self.future = self.model.make_future_dataframe(periods=periods,
                                                       freq='15T')
        if self.include_weather:
            self.get_weather_pred(lat, lon)
            self.future = pd.merge(self.future,
                                   self.weather,
                                   how='left',
                                   left_on='ds',
                                   right_on='time')
            self.future.update(self.future_weather)

    def get_weather_pred(self, lat, lon):
        """Fetch forecast from DarkSky"""
        w_pred = self.ds.get_forecast(
            lat,
            lon,
            extend=False,
            lang=languages.ENGLISH,
            units=units.AUTO,
            exclude=[weather.MINUTELY, weather.ALERTS],
            timezone='UTC')
        times = [x.time for x in w_pred.hourly.data]
        temps = [x.temperature for x in w_pred.hourly.data]
        precips = [x.precip_intensity for x in w_pred.hourly.data]
        rain_prob = [x.precip_probability for x in w_pred.hourly.data]
        humidities = [x.humidity for x in w_pred.hourly.data]
        wind = [x.wind_speed for x in w_pred.hourly.data]
        clouds = [x.cloud_cover for x in w_pred.hourly.data]
        uv = [x.uv_index for x in w_pred.hourly.data]
        self.future_weather = pd.DataFrame({
            'time': times,
            'temp': temps,
            'current_rain': precips,
            'rain_prob': rain_prob,
            'humidity': humidities,
            'wind': wind,
            'cloud_cover': clouds,
            'uv': uv
        })
        if self.bin_window == '15T':
            self.future_weather = self.future_weather.set_index(
                'time').resample('15T').pad()
        elif self.bin_window == '1H':
            self.future_weather.set_index('time', inplace=True)
        else:
            self.future_weather = self.future_weather.set_index(
                'time').resample(self.bin_window).mean()
        self.future_weather = self.future_weather.tz_convert(None)

    def predict(self):
        """Thin wrapper to produce predictions"""
        self.fcst = self.model.predict(self.future)

    def preds_to_sql(self, var):
        """Simple function to write predictions to postgres table"""
        fcst_out = self.fcst[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].copy()
        fcst_out.columns = map(lambda x: x.lower(), fcst_out.columns)
        fcst_out['area'] = self.idx
        fcst_out['var'] = var
        fcst_out['modified_date'] = pd.to_datetime(
            datetime.datetime.today().strftime("%Y-%m-%d"))
        time_cutoff = pd.to_datetime(datetime.datetime.today() -
                                     datetime.timedelta(days=1))
        fcst_out = fcst_out[fcst_out['ds'] >= time_cutoff]
        fcst_out.to_sql('predictions',
                        self.engine,
                        if_exists='append',
                        index=False)

    def query_preds(self, time_stamp):
        """Simple function for querying previous predictions"""
        q = f"SELECT * FROM predictions WHERE area = '{self.idx}' AND ds >= '{time_stamp}'"
        with psycopg2.connect(database=self.pg_db,
                              user=self.pg_username,
                              password=self.pg_password,
                              port=self.pg_port,
                              host=self.pg_host) as conn:
            self.old_preds = pd.read_sql(q, conn)

    def plot_results(self):
        """
        Thin wrapper to plot results. 

        Usually it is preferable to use object model and fcst dataframe to plot separately.
        """
        self.fig = self.model.plot(self.fcst)

    def cv(self, initial, period, horizon, log=False):
        """
        Simple function to do walk forward validation. 

        Parameters:
        Initial: length of time to train original model
        Period: frequency with which to test beyond the original training period
        Horizon: Length of predictions
        Log: Was the model trained on logged data? Defaults to False.
        """
        self.df_cv = cross_validation(self.model,
                                      initial=initial,
                                      period=period,
                                      horizon=horizon)
        if log:
            self.df_cv = self.df_cv.apply(
                lambda x: np.exp(x) if x.name not in ['ds', 'cutoff'] else x)
        self.df_p = performance_metrics(self.df_cv)

    def save_results(self, save_path):
        """Thin wrapper to save forecast dataframe to pickle object"""
        self.fcst.to_pickle(save_path)
Beispiel #28
0
            weekly_seasonality=True,
            daily_seasonality=True)
m.fit(train_df[['ds', 'y']])

# Using the helper method ```Prophet.make_future_dataframe```, we create a dataframe which will contain all dates from the history and also extend into the future for those 92 days that we left out before.

# In[ ]:

future = m.make_future_dataframe(periods=prediction_size)
future.tail(n=3)

# We predict values with Prophet by passing in the dates for which we want to create a forecast. If we also supply the historical dates (as in our case), then in addition to the prediction we will get an in-sample fit for the history. Let's call the model's predict method with our future dataframe as an input:

# In[ ]:

forecast = m.predict(future)
forecast.tail(n=3)

# In the resulting dataframe you can see many columns characterizing the prediction, including trend and seasonality components as well as their confidence intervals. The forecast itself is stored in the yhat column.
#
# The Prophet library has its own built-in tools for visualization that enable us to quickly evaluate the result.
#
# - First, there is a method called Prophet.plot that plots all the points from the forecast:
# - The Second function Prophet.plot_components might be much more useful in our case. It allows us to observe different components of the model separately: trend, yearly and weekly seasonality. In addition, if you supply information about holidays and events to your model, they will also be shown in this plot.
#
# Let's try it out:

# In[ ]:

m.plot(forecast)
m.plot_components(forecast)
        ]

        model_fbp = Prophet(seasonality_mode='multiplicative',
                            holidays=holidays)
        model_fbp.add_country_holidays(country_name='US')
        for feature in exogenous_features:
            model_fbp.add_regressor(feature, prior_scale=0.1)

        with suppress_stdout_stderr():
            model_fbp.fit(
                df_train[["DATE", 'SHIPPED_QTY', 'cap'] +
                         exogenous_features].rename(columns={
                             "DATE": "ds",
                             'SHIPPED_QTY': "y"
                         }))
        forecast = model_fbp.predict(
            df_valid[["DATE", 'SHIPPED_QTY', 'cap'] +
                     exogenous_features].rename(columns={"DATE": "ds"}))
        # model_fbp.plot_components(forecast)
        df_valid.loc[:, "Forecast_Prophet"] = np.exp(forecast.yhat).values
        # model_fbp.plot_components(forecast)
        actual, forecast = sum(np.exp(df_valid['SHIPPED_QTY']).values), sum(
            df_valid["Forecast_Prophet"].values)
        sub_error = mape(actual, forecast)
        print(datetime.strftime(test_start[i], '%Y-%B'),
              np.round(sub_error, 3))
        error.append([
            datetime.strftime(test_start[i], '%Y-%B'), actual, forecast,
            sub_error
        ])
df = df.set_index('Date').sort_index()

vol = df['Volume']

df = df['Close']
"""
#plotar anual
fig,ax = plt.subplots(figsize = (10,5))
df.plot(ax=ax)
plt.show()

# plotar gráfico semanal
fig, ax = plt.subplots(figsize=(10,5))
vol.plot(ax=ax)
plt.show()
"""

df = df.reset_index().rename(columns={'Date': 'ds', 'Close': 'y'})

#Forecasting
model = Prophet()
model.add_country_holidays(country_name='BR')
model.fit(df)

future = model.make_future_dataframe(periods=365)
forecast = model.predict(future)

model.plot(forecast, xlabel='Date', ylabel='Close')

model.plot_components(forecast)
def fbprophet(other_args: List[str], s_ticker: str, df_stock: pd.DataFrame):
    """
    Run FB Prophet model
    Parameters
    ----------
    other_args: List[str]:
        List of argparse argumenst
    s_ticker: str
        Stock ticker
    df_stock: pd.DataFrame
        Dataframe of prices

    Returns
    -------

    """
    parser = argparse.ArgumentParser(
        add_help=False,
        prog="fbprophet",
        description="""
            Facebook Prophet is a forecasting procedure that is fast and provides
            completely automated forecasts that can be tuned by hand by data scientists
            and analysts. It was developed by Facebook's data science team and is open
            source.
        """,
    )

    parser.add_argument(
        "-d",
        "--days",
        action="store",
        dest="n_days",
        type=check_positive,
        default=5,
        help="prediction days",
    )
    parser.add_argument(
        "-e",
        "--end",
        action="store",
        type=valid_date,
        dest="s_end_date",
        default=None,
        help="The end date (format YYYY-MM-DD) to select - Backtesting",
    )

    try:
        ns_parser = parse_known_args_and_warn(parser, other_args)
        if not ns_parser:
            return

        # BACKTESTING
        if ns_parser.s_end_date:

            if ns_parser.s_end_date < df_stock.index[0]:
                print(
                    "Backtesting not allowed, since End Date is older than Start Date of historical data\n"
                )
                return

            if (ns_parser.s_end_date < get_next_stock_market_days(
                    last_stock_day=df_stock.index[0],
                    n_next_days=5 + ns_parser.n_days)[-1]):
                print(
                    "Backtesting not allowed, since End Date is too close to Start Date to train model\n"
                )
                return

            future_index = get_next_stock_market_days(
                last_stock_day=ns_parser.s_end_date,
                n_next_days=ns_parser.n_days)

            if future_index[-1] > datetime.datetime.now():
                print(
                    "Backtesting not allowed, since End Date + Prediction days is in the future\n"
                )
                return

            df_future = df_stock[future_index[0]:future_index[-1]]
            df_stock = df_stock[:ns_parser.s_end_date]

        df_stock = df_stock.sort_index(ascending=True)
        df_stock.reset_index(level=0, inplace=True)
        df_stock = df_stock[["date", "5. adjusted close"]]
        df_stock = df_stock.rename(columns={
            "date": "ds",
            "5. adjusted close": "y"
        })
        df_stock["ds"] = pd.to_datetime(df_stock["ds"])

        model = Prophet(yearly_seasonality=False, daily_seasonality=False)
        model.fit(df_stock)

        l_pred_days = get_next_stock_market_days(
            last_stock_day=pd.to_datetime(df_stock["ds"].values[-1]),
            n_next_days=ns_parser.n_days,
        )
        close_prices = model.make_future_dataframe(periods=ns_parser.n_days)
        forecast = model.predict(close_prices)

        df_pred = forecast["yhat"][
            -ns_parser.n_days:]  # .apply(lambda x: f"{x:.2f} $")
        df_pred.index = l_pred_days

        _, ax = plt.subplots(figsize=plot_autoscale(), dpi=PLOT_DPI)
        model.plot(
            forecast[:-ns_parser.n_days],
            ax=ax,
            xlabel="Time",
            ylabel="Share Price ($)",
        )
        _, _, ymin, ymax = ax.axis()
        ax.vlines(
            df_stock["ds"].values[-1],
            ymin,
            ymax,
            linewidth=2,
            linestyle="--",
            color="k",
        )
        plt.axvspan(
            df_stock["ds"].values[-1],
            l_pred_days[-1],
            facecolor="tab:orange",
            alpha=0.2,
        )
        plt.ylim(ymin, ymax)
        plt.xlim(df_stock["ds"].values[0],
                 get_next_stock_market_days(l_pred_days[-1], 1)[-1])
        # BACKTESTING
        if ns_parser.s_end_date:
            plt.title(
                f"BACKTESTING: Fb Prophet on {s_ticker} - {ns_parser.n_days} days prediction"
            )
        else:
            plt.title(
                f"Fb Prophet on {s_ticker} - {ns_parser.n_days} days prediction"
            )

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(
                [df_stock["ds"].values[-1], df_future.index[0]],
                [
                    df_stock["y"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=1,
                c="tab:blue",
                linestyle="--",
            )

        plt.plot(df_pred.index, df_pred.values, lw=2, c="green")

        if gtff.USE_ION:
            plt.ion()

        plt.show()

        # BACKTESTING
        if ns_parser.s_end_date:
            plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI)
            plt.subplot(211)
            plt.plot(
                df_future.index,
                df_future["5. adjusted close"],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.plot(df_pred.index, df_pred, lw=2, c="green")
            plt.scatter(
                df_future.index,
                df_future["5. adjusted close"],
                c="tab:blue",
                lw=3,
            )
            plt.plot(
                [df_stock["ds"].values[-1], df_future.index[0]],
                [
                    df_stock["y"].values[-1],
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                c="tab:blue",
                ls="--",
            )
            plt.scatter(df_pred.index, df_pred, c="green", lw=3)
            plt.plot(
                [df_stock["ds"].values[-1], df_pred.index[0]],
                [df_stock["y"].values[-1], df_pred.values[0]],
                lw=2,
                c="green",
                ls="--",
            )
            plt.title("BACKTESTING: Real data price versus Prediction")
            plt.xlim(
                df_stock["ds"].values[-1],
                df_pred.index[-1] + datetime.timedelta(days=1),
            )
            plt.ylabel("Share Price ($)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])
            plt.xticks([])

            plt.subplot(212)
            plt.axhline(y=0, color="k", linestyle="--", linewidth=2)

            plt.plot(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                lw=2,
                c="red",
            )
            plt.scatter(
                df_future.index,
                100 *
                (df_pred.values - df_future["5. adjusted close"].values) /
                df_future["5. adjusted close"].values,
                c="red",
                lw=5,
            )
            plt.title(
                "BACKTESTING: Error between Real data and Prediction [%]")
            plt.plot(
                [df_stock["ds"].values[-1], df_future.index[0]],
                [
                    0,
                    100 * (df_pred.values[0] -
                           df_future["5. adjusted close"].values[0]) /
                    df_future["5. adjusted close"].values[0],
                ],
                lw=2,
                ls="--",
                c="red",
            )
            plt.xlim(
                df_stock["ds"].values[-1],
                df_pred.index[-1] + datetime.timedelta(days=1),
            )
            plt.xlabel("Time")
            plt.ylabel("Prediction Error (%)")
            plt.grid(b=True, which="major", color="#666666", linestyle="-")
            plt.minorticks_on()
            plt.grid(b=True,
                     which="minor",
                     color="#999999",
                     linestyle="-",
                     alpha=0.2)
            plt.legend(["Real data", "Prediction data"])

            if gtff.USE_ION:
                plt.ion()

            plt.show()

            # Refactor prediction dataframe for backtesting print
            df_pred.name = "Prediction"
            df_pred = df_pred.to_frame()
            df_pred["Real"] = df_future["5. adjusted close"]

            if gtff.USE_COLOR:

                patch_pandas_text_adjustment()

                print("Time         Real [$]  x  Prediction [$]")
                print(
                    df_pred.apply(price_prediction_backtesting_color,
                                  axis=1).to_string())
            else:
                print(df_pred[["Real", "Prediction"]].round(2).to_string())

            print("")
            print_prediction_kpis(df_pred["Real"].values,
                                  df_pred["Prediction"].values)
        else:
            print("")
            print("Predicted share price:")
            print(df_pred.to_string())
        print("")

    except Exception as e:
        print(e)
        print("")
#cache for loading data
@st.cache
def load_data(ticker):
  data = yf.download(ticker, START, TODAY)
  data.reset_index(inplace = True)
  return data

data_load_state = st.text("Loading Data...")
data = load_data(selected_stock)
data_load_state.text("Loading Data...")

#Prediction of prices using Prophet
df_train = data[['Date','Close']]
df_train = df_train.rename(columns = {"Date":"ds", "Close":'y'})

m = Prophet()
m.fit(df_train)
future = m.make_future_dataframe(periods = period)

forecast = m.predict(future)

st.write(f'Forecast plot for {n_years} years')
fig1 = plot_plotly(m,forecast)
st.plotly_chart(fig1)

#Extra Components graphs
st.write("COMPONENTS")
fig2 = m.plot_components(forecast)
st.write(fig2)
def create_prophet_m(app_name,z1,cpu_perc_list,delay=24):
    
    ### --- For realtime pred ---###
    
    full_df = z1.bw.iloc[0:len(z1)]
    full_df = full_df.reset_index()
    full_df.columns = ['ds','y']
    
    #removing outliers
    q50 = full_df.y.median()
    q100 = full_df.y.quantile(1)
    q75  = full_df.y.quantile(.75)
    
    if((q100-q50) >= (2*q50)):
        
        full_df.loc[full_df.y>=(2*q50),'y'] = None
    
    #-- Realtime prediction --##
    #model 
    model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.1,seasonality_prior_scale=0.05)
    model_r.fit(full_df)

    cpu_perc_list.append(py.cpu_percent())
    cpu_perc_list = [max(cpu_perc_list)]

    future_r = model_r.make_future_dataframe(periods=delay,freq='D')
    forecast_r = model_r.predict(future_r)
    forecast_r.index = forecast_r['ds']
    #forecast 
    pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)])
    pred_r=pred_r.reset_index()
    #--- completes realtime pred ---#
    
    train_end_index=len(z1.bw)-delay
    train_df=z1.bw.iloc[0:train_end_index]
    
    test_df=z1.bw.iloc[train_end_index:len(z1)]
    
    train_df=train_df.reset_index()
    test_df=test_df.reset_index()
    
    train_df.columns=['ds','y']
    
    #--- removing outliers in trainset  ---#
    
    q50 = train_df.y.median()
    q100 = train_df.y.quantile(1)
    q75  = train_df.y.quantile(.75)
    
    if((q100-q50) >= (2*q50)):
        
        train_df.loc[train_df.y>=(2*q50),'y'] = None
    
    test_df.columns=['ds','y']
    test_df['ds'] = pd.to_datetime(test_df['ds'])
   
    #model 
    model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.1,seasonality_prior_scale=0.05)
    model.fit(train_df)

    cpu_perc_list.append(py.cpu_percent())
    cpu_perc_list = [max(cpu_perc_list)]


    future = model.make_future_dataframe(periods=len(test_df),freq='D')
    forecast = model.predict(future)
    forecast.index = forecast['ds']
    #forecast 
    pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
    
    print('length forecasted non realtime=',len(pred))
    pred=pred.reset_index()
    pred_df=pd.merge(test_df,pred,on='ds',how='left')
    
    pred_df.dropna(inplace=True)
    
    
    df=pd.DataFrame()
    
    if(len(pred_df)>0):
        
        pred_df['error_test']=pred_df.y-pred_df.yhat
    
        
    
        MSE=mse(pred_df.y,pred_df.yhat)
        RMSE=math.sqrt(MSE)
        pred_df['APE']=abs(pred_df.error_test*100/pred_df.y)
        MAPE=pred_df.APE.mean()
        min_error_rate = pred_df['APE'].quantile(0)/100
        max_error_rate = pred_df['APE'].quantile(1)/100
        median_error_rate = pred_df['APE'].quantile(.50)/100
        print("App name:",app_name)
        #print("MSE  :",MSE)
        print("RMSE :",RMSE)
        print("MAPE :",MAPE)
        
       
        mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean()
        std_MAPE = math.sqrt(((pred_df.APE-MAPE)**2).mean())

        df = pd.DataFrame({'length':len(z1),
                             'test_rmse':RMSE,
                             'test_mape':MAPE,
                             'std_mape':std_MAPE, #standerd deviation of mape
                             'min_error_rate':min_error_rate ,
                             'max_error_rate':max_error_rate ,
                             'median_error_rate':median_error_rate,
                 
                 'test_mape_98':mape_q98},
                   
                          index=[app_name])

    return(df,model,forecast,pred_df,pred_r)
Beispiel #34
0
def predict_series(df, date, special_dates, n=100):
    testdf = df.copy()
    date = date[:10]
    date_time_obj = datetime.datetime.strptime(date, '%Y-%m-%d').date()
    n = [i for i, d in enumerate(df.date) if d > date_time_obj]
    if not n:
        n = df.shape[0]
    else:
        n = df.shape[0] - n[-1]
    series = pd.DataFrame({'ds': df.date.values, 'y': df.total_cases.values})
    series_train = series.tail(n).copy()
    m = Prophet()
    m.fit(series_train)
    future = m.make_future_dataframe(periods=df.shape[0] - n + 20)
    future.tail()

    forecast = m.predict(future)
    forecast = forecast[['ds', 'yhat', 'yhat_lower',
                         'yhat_upper']].tail(df.shape[0] - n + 6 + 20)

    x = [pd.Timestamp(d, tz=None).to_pydatetime() for d in series['ds'].values]
    x1 = [
        pd.Timestamp(d, tz=None).to_pydatetime()
        for d in special_dates['date'].values
    ]
    x2 = [
        pd.Timestamp(d, tz=None).to_pydatetime() for d in forecast['ds'].values
    ]
    x3 = [
        pd.Timestamp(d, tz=None).to_pydatetime() for d in testdf['date'].values
    ]
    trace1 = go.Scatter(
        x=x1,
        y=special_dates['value'].values,
        text=['{}'.format(i) for i in special_dates['event'].values],
        hovertemplate='<br>date: %{x}<br>' + '<b>%{text}</b>',
        mode='markers',
        name='events',
        marker=dict(color='navy',
                    size=15,
                    line=dict(color='MediumPurple', width=1)))

    trace2 = go.Scatter(x=x,
                        y=series['y'].values,
                        mode='lines',
                        name='original',
                        line=dict(color='dodgerblue', width=4),
                        opacity=0.5)

    trace3 = go.Scatter(x=x2,
                        y=forecast['yhat'].values,
                        mode='lines',
                        name='predicted',
                        line=dict(color='firebrick', width=3, dash='dash'),
                        opacity=0.7)
    trace4 = go.Scatter(x=x3,
                        y=testdf['total_tests'].values,
                        mode='lines',
                        name='original',
                        line=dict(color='green', width=3))

    fig = go.Figure()
    fig.add_trace(trace2)
    fig.add_trace(trace3)
    #fig.add_trace(trace4)
    fig.add_trace(trace1)
    fig.update_layout(title="Prediction from " + date)

    fig.update_layout(
        xaxis_range=[
            datetime.datetime(2020, 1, 1),
            datetime.datetime(2020, 6, 1)
        ],
        xaxis={'type': 'date'},
    )
    fig.update_xaxes(title_text="Date")
    fig.update_yaxes(title_text="Total cases")
    fig.update_layout(hovermode='x unified')
    fig.update_layout(height=600,
                      margin=dict(l=10, r=10, t=50, b=30),
                      paper_bgcolor='rgba(0,0,0,0)',
                      plot_bgcolor='rgba(0,0,0,0)',
                      font=dict(
                          family="Courier New, monospace",
                          size=10,
                          color="#7f7f7f",
                      ))

    fig.update_xaxes(showgrid=False, )

    return fig
Beispiel #35
0
 def test_added_regressors(self):
     m = Prophet()
     m.add_regressor('binary_feature', prior_scale=0.2)
     m.add_regressor('numeric_feature', prior_scale=0.5)
     m.add_regressor(
         'numeric_feature2', prior_scale=0.5, mode='multiplicative'
     )
     m.add_regressor('binary_feature2', standardize=True)
     df = DATA.copy()
     df['binary_feature'] = [0] * 255 + [1] * 255
     df['numeric_feature'] = range(510)
     df['numeric_feature2'] = range(510)
     with self.assertRaises(ValueError):
         # Require all regressors in df
         m.fit(df)
     df['binary_feature2'] = [1] * 100 + [0] * 410
     m.fit(df)
     # Check that standardizations are correctly set
     self.assertEqual(
         m.extra_regressors['binary_feature'],
         {
             'prior_scale': 0.2,
             'mu': 0,
             'std': 1,
             'standardize': 'auto',
             'mode': 'additive',
         },
     )
     self.assertEqual(
         m.extra_regressors['numeric_feature']['prior_scale'], 0.5)
     self.assertEqual(
         m.extra_regressors['numeric_feature']['mu'], 254.5)
     self.assertAlmostEqual(
         m.extra_regressors['numeric_feature']['std'], 147.368585, places=5)
     self.assertEqual(
         m.extra_regressors['numeric_feature2']['mode'], 'multiplicative')
     self.assertEqual(
         m.extra_regressors['binary_feature2']['prior_scale'], 10.)
     self.assertAlmostEqual(
         m.extra_regressors['binary_feature2']['mu'], 0.1960784, places=5)
     self.assertAlmostEqual(
         m.extra_regressors['binary_feature2']['std'], 0.3974183, places=5)
     # Check that standardization is done correctly
     df2 = m.setup_dataframe(df.copy())
     self.assertEqual(df2['binary_feature'][0], 0)
     self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4)
     self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4)
     # Check that feature matrix and prior scales are correctly constructed
     seasonal_features, prior_scales, component_cols, modes = (
         m.make_all_seasonality_features(df2)
     )
     self.assertEqual(seasonal_features.shape[1], 30)
     names = ['binary_feature', 'numeric_feature', 'binary_feature2']
     true_priors = [0.2, 0.5, 10.]
     for i, name in enumerate(names):
         self.assertIn(name, seasonal_features)
         self.assertEqual(sum(component_cols[name]), 1)
         self.assertEqual(
             sum(np.array(prior_scales) * component_cols[name]),
             true_priors[i],
         )
     # Check that forecast components are reasonable
     future = pd.DataFrame({
         'ds': ['2014-06-01'],
         'binary_feature': [0],
         'numeric_feature': [10],
         'numeric_feature2': [10],
     })
     with self.assertRaises(ValueError):
         m.predict(future)
     future['binary_feature2'] = 0
     fcst = m.predict(future)
     self.assertEqual(fcst.shape[1], 37)
     self.assertEqual(fcst['binary_feature'][0], 0)
     self.assertAlmostEqual(
         fcst['extra_regressors_additive'][0],
         fcst['numeric_feature'][0] + fcst['binary_feature2'][0],
     )
     self.assertAlmostEqual(
         fcst['extra_regressors_multiplicative'][0],
         fcst['numeric_feature2'][0],
     )
     self.assertAlmostEqual(
         fcst['additive_terms'][0],
         fcst['yearly'][0] + fcst['weekly'][0]
             + fcst['extra_regressors_additive'][0],
     )
     self.assertAlmostEqual(
         fcst['multiplicative_terms'][0],
         fcst['extra_regressors_multiplicative'][0],
     )
     self.assertAlmostEqual(
         fcst['yhat'][0],
         fcst['trend'][0] * (1 + fcst['multiplicative_terms'][0])
             + fcst['additive_terms'][0],
     )
     # Check works if constant extra regressor at 0
     df['constant_feature'] = 0
     m = Prophet()
     m.add_regressor('constant_feature')
     m.fit(df)
     self.assertEqual(m.extra_regressors['constant_feature']['std'], 1)
Beispiel #36
0
def create_prophet_m(source_name,z1,delay=24):
    
   
    train_end_index=len(z1.app_count)-delay
    train_df=z1.app_count.iloc[0:train_end_index]
    #train_df= train_df[train_df<cutter]
    full_df = z1.app_count.iloc[0:len(z1)]
    
    
    test_df=z1.app_count.iloc[train_end_index:len(z1)]
    
    
    
    train_df=train_df.reset_index()
    test_df=test_df.reset_index()
    train_df.columns=['ds','y']
    
    full_df = full_df.reset_index()
    full_df.columns = ['ds','y']
    
    test_df.columns=['ds','y']
    
    ##-- Realtime prediction --##
    #model 
    model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
    model_r.fit(full_df)
    future_r = model_r.make_future_dataframe(periods=delay,freq='H')
    forecast_r = model_r.predict(future_r)
    forecast_r.index = forecast_r['ds']
    #forecast 
    pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)])
    pred_r=pred_r.reset_index()
    
    
    #model 
    model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
    model.fit(train_df)
    future = model.make_future_dataframe(periods=len(test_df),freq='H')
    forecast = model.predict(future)
    forecast.index = forecast['ds']
    #forecast 
    pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
    pred=pred.reset_index()
    pred_df=pd.merge(test_df,pred,on='ds',how='left')
    pred_df.dropna(inplace=True)
    
    df=pd.DataFrame()
    
    if(len(pred_df)>0):
        
        pred_df['error_test']=pred_df.y-pred_df.yhat
    
       
        MSE=mse(pred_df.y,pred_df.yhat)
        RMSE=math.sqrt(MSE)
        pred_df['APE']=abs(pred_df.error_test*100/pred_df.y)
        MAPE=pred_df.APE.mean()
        print("App name:",source_name)
        print("MSE  :",MSE)
        print("RMSE :",RMSE)
        print("MAPE :",MAPE)
        
        q98=pred_df['APE'].quantile(0.98)
        mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean()

        df = pd.DataFrame({#'length':len(z1),
                             'test_rmse':RMSE,
                             'test_mape':MAPE,
                 
                 'test_mape_98':mape_q98},
                          index=[source_name])

    return(df,model,forecast,pred_df,pred_r)
# In[29]:

model = Prophet(holidays=promotions,
                weekly_seasonality=True,
                daily_seasonality=True)
model.fit(df)

# In[30]:

future = model.make_future_dataframe(periods=24, freq='m')
future.tail()

# In[31]:

forecast = model.predict(future)

# In[32]:

forecast.tail()

# In[33]:

forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

# In[34]:

model.plot(forecast)

# In[35]:
model_prophet = Prophet()
model_prophet.fit(
    total_performa
)  # DataFrame total_performa didapatkan pada saat langkah untuk mengubah nama kolom menjadi "ds" & "y"
"""Setelah sebelumnya mengimport fbprophet pada pengujian ini, saya akan melanjutkan kembali yaitu melakukan tahapan prediksi untuk 1 tahun ke depan dengan memanfaatkan metode dari fbprophet yaitu .make_future_dataFrame dengan memakai perhitungan "M" atau bukan. Jadi 1 tahun= 12 bulan."""

prediksi_karyawan_masa_depan = model_prophet.make_future_dataframe(
    periods=12, freq="M"
)  # M = month/bulan, sedangkan periods itu adalah jumlah bulan dalam 1 tahun
prediksi_karyawan_masa_depan.tail()

prediksi_karyawan_masa_depan["ds"].describe()
"""Setelah melakukan pemodelan dengan prophet pada DataFrame prediksi_karyawan_masa_depan, saya melakukan langkah prediksi kembali agar mendapatkan rentang waktu selama 12 bulan ke depan (mengikuti periode pada DataFrame prediksi_karyawan_masa_depan)."""

nilai_prediksi_karyawan = model_prophet.predict(prediksi_karyawan_masa_depan)
nilai_prediksi_karyawan[[
    "ds", "trend", "yearly", "yhat", "yhat_lower", "yhat_upper"
]].tail()
"""Output yang dihasilkan pada DataFrame nilai_prediksi_karyawan terdapat ds, yearly, yhat, yhat_lower & yhat_upper yang bisa diartikan sebagai berikut:
1. Ds= kolom yang menyatakan waktu. Yang dimana kolom tersebut adalah year_graduated.
2. Trend = merupakan naik dan turunnya suatu prediksi yang diperoleh dari perubahan dari waktu ke waktu.
3. Yearly = Musim tahunan yang menyatakan tingkat kinerja dari para karyawan.
4. yhat = Nilai prediksi
5. Yhat_lower & yhat_upper = Merupakan range error pada interval data prediksi.

Untuk lebih detailnya saya akan membuatkan sebuah visualisasi dari variabel DataFrame nilai_prediksi_karyawan menggunakan salah satu library yaitu matplotlib. 
"""

fig = model_prophet.plot(nilai_prediksi_karyawan)
def fitting_model(data,
                  weather_included=False,
                  holiday_included=False,
                  CNY_season=False,
                  last_7_days_validation=False,
                  four_season=False):
    #change the format of date
    data_copy = data.copy()
    temp = []
    for i in range(len(data_copy)):
        str_date = str(data_copy.iloc[i]["Date"])
        temp.append(datetime.strptime(str_date, '%Y%m%d'))
    data_copy['Date'] = temp
    data_copy = remove_outlier(data_copy, 'Reserve')
    #Config the model used
    #m=Prophet(daily_seasonality=False,changepoint_prior_scale=0.1,holidays_prior_scale=0.1)
    m = Prophet(daily_seasonality=False)
    if holiday_included:
        m.holidays = getting_holiday_info("datas/All_Holiday.csv")

    if CNY_season:
        m.weekly_seasonality = False
        m.yearly_seasonality = False
        m.add_seasonality(name="Weekly on CNY Season",
                          period=7,
                          fourier_order=3,
                          condition_name="CNY season")
        m.add_seasonality(name="Weekly on other dates",
                          period=7,
                          fourier_order=3,
                          condition_name="Other season")

    if four_season:
        m.weekly_seasonality = False
        m.yearly_seasonality = False
        m.add_seasonality(name="Spring Season",
                          period=91.5,
                          fourier_order=5,
                          prior_scale=1,
                          condition_name='Spring')
        m.add_seasonality(name="Summer Season",
                          period=91.5,
                          fourier_order=5,
                          prior_scale=1,
                          condition_name='Summer')
        m.add_seasonality(name="Autumn Season",
                          period=91.5,
                          fourier_order=5,
                          prior_scale=1,
                          condition_name='Autumn')
        m.add_seasonality(name="Winter Season",
                          period=91.5,
                          fourier_order=5,
                          prior_scale=1,
                          condition_name='Winter')

    data_history = pd.DataFrame({
        'ds': data_copy["Date"],
        "y": data_copy['Reserve']
    })

    if last_7_days_validation:
        data_history = data_history[:-7]

    if weather_included:
        #Get temperature info
        '''
        weather_csv=pd.read_csv(weather_csv_location[city])
        date=data["Date"]
        temperature=get_weather_info(weather_csv,date)
        data_history["Weather"]=temperature
        m.add_regressor('Weather')
        '''
    if CNY_season:
        data_history["CNY season"], data_history[
            "Yearly season"] = on_CNY_season(data_history)
        data_history["Other season"] = ~data_history["CNY season"]
    if four_season:
        data_history["Spring"], data_history["Summer"], data_history[
            "Autumn"], data_history["Winter"] = summer_wintter_spring_auttum(
                data_history)

    #Predict one year ahead
    m.fit(data_history)
    future_date = m.make_future_dataframe(periods=8)

    if weather_included:
        '''
        temp=pd.to_datetime(future_date['ds'],format="%Y-%m-%d")
        temperature=get_weather_info(weather_csv,temp.dt.date)
        future_date["Weather"]=temperature
        '''

    if CNY_season:
        future_date["CNY season"], future_date[
            "Yearly season"] = on_CNY_season(future_date)
        future_date["Other season"] = ~future_date["CNY season"]

    if four_season:
        future_date["Spring"], future_date["Summer"], future_date[
            "Autumn"], future_date["Winter"] = summer_wintter_spring_auttum(
                future_date)

    #predict future price

    future = m.predict(future_date)

    #fig3=m.plot(future)
    #fig3.canvas.set_window_title("Prediction")
    #fig2=m.plot_components(future)
    #fig2.canvas.set_window_title("Component")

    #plot the corss validation erro
    #validation(m,data,city,future,"1825 days","100 days","100 days")
    if last_7_days_validation:
        print(f"RMSE:{rmse(future['yhat'].tail(7),data_copy[-7:].Reserve)}")
    return future
def main():

    st.title("Welcome to Predict Future of Stocks.")

    menu = ["Home", "Stock Prediction using ML"]
    choice = st.sidebar.selectbox("Menu", menu)

    if choice == "Home":
        st.subheader("Recommendations")

        si.get_day_most_active()
        st.subheader("Today's Most Active Users")
        st.write(si.get_day_most_active())

        si.get_day_gainers()
        st.subheader("Today's Top Gainers")
        st.write(si.get_day_gainers())

        si.get_day_losers()
        st.subheader("Today's Top Losers")
        st.write(si.get_day_losers())

    elif choice == "Stock Prediction using ML":
        st.subheader("Stock Prediction using ML")

        START = "2015-01-01"
        TODAY = date.today().strftime("%Y-%m-%d")

        selected_stock = st.text_input("Type Stocks's name...")

        submit = st.button('Search')
        if submit:

            si.get_live_price(selected_stock)
            st.write("Live Price : ", si.get_live_price(selected_stock))

            si.get_market_status()
            st.write("Market state : ", si.get_market_status())

            n_years = st.slider("Years of prediction:", 1, 10)
            period = n_years * 365

            def load_data(ticker):
                data = yf.download(ticker, START, TODAY)
                data.reset_index(inplace=True)
                return data

            data_load_state = st.text('Loading data...')
            data = load_data(selected_stock)
            data_load_state.text('Loading data... done!')

            st.subheader('Raw data')
            st.write(data.tail())

            # Plot raw data

            def plot_raw_data():
                fig = go.Figure()
                fig.add_trace(
                    go.Scatter(x=data['Date'],
                               y=data['Open'],
                               name="stock_open"))
                fig.add_trace(
                    go.Scatter(x=data['Date'],
                               y=data['Close'],
                               name="stock_close"))
                fig.layout.update(
                    title_text='Time Series data with Rangeslider',
                    xaxis_rangeslider_visible=True)
                st.plotly_chart(fig)

            plot_raw_data()

            # Predict forecast with Prophet.
            df_train = data[['Date', 'Close']]
            df_train = df_train.rename(columns={"Date": "ds", "Close": "y"})

            m = Prophet()
            m.fit(df_train)
            future = m.make_future_dataframe(periods=period)
            forecast = m.predict(future)

            # Show and plot forecast
            st.subheader('Forecast data')
            st.write(forecast.tail())

            st.write(f'Forecast plot for {n_years} years')
            fig1 = plot_plotly(m, forecast)
            st.plotly_chart(fig1)

            st.write("Forecast components")
            fig2 = m.plot_components(forecast)
            st.write(fig2)
Beispiel #41
0
matplotlib.pyplot.legend()
matplotlib.pyplot.ylabel(sensor)
matplotlib.pyplot.show()

# Prophet .fit(df) requires columns ds (dates) and y
df['y'] = df['raw']
df['ds'] = df.index

m = Prophet(
    changepoint_range=1.0,
    n_changepoints=150,
    changepoint_prior_scale=30,
    seasonality_prior_scale=35,
    growth='linear',
    holidays=None,
    daily_seasonality=False,
    weekly_seasonality=False,
    yearly_seasonality=False,
).add_seasonality(name="daily", period=1, fourier_order=5, prior_scale=10)
m.fit(df)
forecast = m.predict()

# data, model, and changepoint plot
fig1 = m.plot(forecast)
a = add_changepoints_to_plot(fig1.gca(), m, forecast)
fig1.show()

# component plot
fig2 = m.plot_components(forecast)
fig2.show()
	           between 1997 and 2018""".format(curr_ticker)

	X_train = pd.io.sql.read_sql(query, conn)
	X_train.columns = ['ds', 'y']
	try:
		model = Prophet()
		model.fit(X_train)
	except:
		print('####################')
		print('Modeling Error on ', curr_ticker)
		error_log['Ticker'].append(curr_ticker)
		error_log['TransactionDate'].append('')
		error_log['Issue'].append('Modeling Error')
		continue

	pred = model.predict(ds_test.copy())
	for index, row in pred.iterrows():
		try:
			insert_pred(conn, 'pred_price_sp500base', curr_ticker, row)
		except:
			print('####################')
			print('Prediction Insertion Error on ', curr_ticker)
			error_log['Ticker'].append(curr_ticker)
			error_log['TransactionDate'].append(row['ds'])
			error_log['Issue'].append('Prediction Insertion')
			# To end the query
			conn.commit()
	curr_endtime = datetime.now()
	curr_time = str(curr_endtime - curr_starttime)
	
	time_log['Ticker'].append(curr_ticker)
jfc.set_index('CHART_DATE').CLOSE.plot(figsize=(15, 10))
plt.title('Jollibee Daily Closing Price', fontsize=25)

from fbprophet import Prophet
#Forecasting closing prices
ts = jfc[['CHART_DATE', 'CLOSE']]
ts.columns = ['ds', 'y']
ts.head()

HOLDOUT_START = '2019-03-01'

m = Prophet(daily_seasonality=True, yearly_seasonality=True).fit(ts[ts.ds < HOLDOUT_START])
future = m.make_future_dataframe(periods=7*4*12, freq='D')

pred = m.predict(future)

from matplotlib import pyplot as plt
fig1 = m.plot(pred)
plt.title('Jollibee: Forecasted Daily Closing Price', fontsize=25)

pred_holdout = pred[(pred.ds >= HOLDOUT_START)&(pred.ds <= ts.ds.max())].set_index('ds').yhat
target_holdout = ts[ts.ds >= HOLDOUT_START].set_index('ds')
comb = pd.concat([pred_holdout, target_holdout], axis=1).dropna()
comb

import numpy as np
rmse_holdout = np.sqrt(comb.yhat.subtract(comb.y).pow(2).mean())
rmse_holdout

mae_holdout = np.mean(np.abs(comb.yhat.subtract(comb.y)))
Beispiel #44
0
    'Microsoft':'MSFT',
    'Tesla':'TSLA'
}

st.markdown("# Welcome to Stock4Cast!")
st.write('Stock4cast lets you forecast the stock prices of top companies for the next 2 years.')
image = Image.open('stock.jpg')
st.image(image, use_column_width=True)

if action:
    if len(company[choice]) > 1:
        end = dt.datetime.now()
        start = end.year-1
        df = reader.get_data_yahoo(company[choice],start,end)
        st.markdown(f'## {choice} statistics')
        st.write(df.sort_index(ascending=False))
        df.reset_index(inplace=True)
        df = df[['Date','Close']]
        df = df.rename(columns={'Date':'ds','Close':'y'})
        # create prophet model
        model = Prophet(yearly_seasonality=True, daily_seasonality=True)
        model.fit(df)
        future_dates = model.make_future_dataframe(periods=input_periods, freq='MS')
        forecast = model.predict(future_dates)
        st.markdown(f'## {choice} close price forecast')
        model.plot(forecast, uncertainty=True)
        st.pyplot()
        model.plot_components(forecast)
        st.pyplot()

    
Beispiel #45
0
def predict(values, timestamp, n_pred):
	df = pd.DataFrame(columns = ['value', 'api_timestamp'])
	df['value'] = values
	df['api_timestamp'] = timestamp
	df['value'] = df.value.astype(float)
	df = df.set_index('api_timestamp')

	#### try to infer freq
	pred_freq = pd.infer_freq(index = df.index)

	#### if freq = None then default to 1H
	if not pred_freq:
		pred_freq = '1H'


	#### try prophet forecasting engine
	try:
		forecasting_engine = 'Prophet'

		#### resample to make predictions consistent. 
		df = df.resample(pred_freq).mean()

		#### prophet requires specific naming of columns 
		df = df.reset_index()
		df = df.rename(index=str, columns={'api_timestamp': 'ds', 'value': 'y'})

		#### prophet fails with inf values. Replacing them with nan
		df.replace([np.inf, -np.inf], np.nan)
		
		#### Instantiate the prophet object and fit 
		m = Prophet()
		m.fit(df)

		#### Make forecast
		future = m.make_future_dataframe(periods=n_pred, freq=pred_freq)
		forecast = m.predict(future)

		#### Compute MAPE by refitting on 70% and testing on 30%
		df_mape = df.iloc[0: len(df)-round(len(df)/100 * 30)]
		m = Prophet()
		m.fit(df_mape)	

		#### Generate the remaining 30% of the dataset as predictions
		n_pred_mape = np.abs(len(df) - len(df)-round(len(df)/100 * 30))

		#### Predict the 30%
		future_mape = m.make_future_dataframe(periods=n_pred_mape, freq=pred_freq)
		forecast_mape = m.predict(future_mape)

		#### Compute MAPE
		mape = np.round(mean_absolute_percentage_error(df.y.values[len(df_mape):len(df)], 
                              forecast_mape.yhat.values[len(df_mape):len(df)]),3)

		#### Gather the forecasted values

		temp = []
		pred_timestamps = forecast.ds.values[len(df)-n_pred:len(df)]

		for i, j in enumerate(range(len(df)-n_pred,len(df))):
			temp.append({
				'Value': np.round(forecast.yhat.values[j],3),
				'Value_Upper':  np.round(forecast.yhat_upper.values[j],3),
				'Value_Lower':  np.round(forecast.yhat_lower.values[j],3),
				'Timestamp': datetime.strftime(pd.to_datetime(pred_timestamps[i]), '%Y-%m-%d %H:%M:%S')
			})

		return temp, mape, forecasting_engine

	except Exception as e:
		#### TODO log the error
			return None, None, None
    # prepare train and test sets
    train_size = int(prophet_df.shape[0] * 0.9)
    train_df = prophet_df.iloc[:train_size]
    test_df = prophet_df.iloc[train_size + 1:]

    # build a prophet model
    pro_model = Prophet()

    # fit the model
    pro_model.fit(train_df)

    # prepare a future dataframe
    test_dates = pro_model.make_future_dataframe(periods=test_df.shape[0])

    # forecast values
    forecast_df = pro_model.predict(test_dates)

    # plot the forecast
    pro_model.plot(forecast_df)
    plt.show()

    # plot against true data
    plt.plot(forecast_df.yhat, c='r', label='Forecast')
    plt.plot(forecast_df.yhat_lower.iloc[train_size + 1:],
             linestyle='--',
             c='b',
             alpha=0.3,
             label='Confidence Interval')
    plt.plot(forecast_df.yhat_upper.iloc[train_size + 1:],
             linestyle='--',
             c='b',
Beispiel #47
0
    0,
})
superbowls = pd.DataFrame({
    'holiday':
    'chunjie',
    'ds':
    pd.to_datetime([
        '2016-02-07', '2016-02-08', '2016-02-09', '2016-02-10', '2016-02-11',
        '2016-02-12', '2016-02-13', '2017-01-27', '2017-01-28', '2017-01-29',
        '2017-01-30', '2017-01-31', '2017-02-01', '2017-02-02'
    ]),
    'lower_window':
    0,
    'upper_window':
    0,
})
holidays = pd.concat((playoffs, superbowls))
prophet = Prophet(
    holidays=holidays,
    changepoint_prior_scale=0.01,
    seasonality_prior_scale=0.009,
    holidays_prior_scale=25.0,
)  #yearly_seasonality=True
prophet.fit(df)
future = prophet.make_future_dataframe(periods=90, include_history=False)
forecast = prophet.predict(future)
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])
print('ISIR =', np.mean(forecast['yhat']))
prophet.plot_components(forecast)
plt.show()
Beispiel #48
0
def create_prophet_m(source_name,z1,delay):

    import math
   
    train_end_index=len(z1.bw)-delay
    train_df=z1.bw.iloc[0:train_end_index]
    
    full_df = z1.bw.iloc[0:len(z1)]
    
    
    test_df=z1.bw.iloc[train_end_index:len(z1)]
    
    
    
    train_df=train_df.reset_index()
    test_df=test_df.reset_index()
    train_df.columns=['ds','y']
    #--- removing outliers in trainset  ---#
    
    q50 = train_df.y.median()
    q100 = train_df.y.quantile(1)
    q75  = train_df.y.quantile(.75)
    print(max(train_df.y))
    if((q100-q50) >= (2*q50)):
        print('ind')
        train_df.loc[train_df.y>=(2*q50),'y'] = None
    
    full_df = full_df.reset_index()
    full_df.columns = ['ds','y']
    
    test_df.columns=['ds','y']
    
    ##-- Realtime prediction --##
    #model 
    model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
    model_r.fit(full_df)
    future_r = model_r.make_future_dataframe(periods=delay,freq='H')
    forecast_r = model_r.predict(future_r)
    forecast_r.index = forecast_r['ds']
    #forecast 
    pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)])
    pred_r=pred_r.reset_index()
    
    
    #model 
    model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
    model.fit(train_df)
    future = model.make_future_dataframe(periods=len(test_df),freq='H')
    forecast = model.predict(future)
    forecast.index = forecast['ds']
    #forecast 
    pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
    pred=pred.reset_index()
    pred_df=pd.merge(test_df,pred,on='ds',how='left')
    pred_df.dropna(inplace=True)
    
    df=pd.DataFrame()
    
    if(len(pred_df)>0):
        
        pred_df['error_test']=pred_df.y-pred_df.yhat
    
       
        MSE=mse(pred_df.y,pred_df.yhat)
        RMSE=math.sqrt(MSE)
        pred_df['APE']=abs(pred_df.error_test*100/pred_df.y)
        
        MAPE=pred_df.APE.mean()
        min_error_rate = pred_df.quantile(0)/100
        max_error_rate = pred_df.quantile(1)/100
        median_error_rate = pred_df.quantile(.50)/100
        
        std_MAPE = math.sqrt(((pred_df.APE-MAPE)**2).mean())
        print("App name:",source_name)
        print("MSE  :",MSE)
        print("RMSE :",RMSE)
        print("MAPE :",MAPE)
        
        q98=pred_df['APE'].quantile(0.98)
        mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean()

        df = pd.DataFrame({'length':len(z1),#'predicted_t':[forcast_lag],
                             'test_rmse':RMSE,
                             'test_mape':MAPE,
                             'std_mape':std_MAPE, #standerd deviation of mape
                             'min_error_rate':min_error_rate ,
                             'max_error_rate':max_error_rate ,
                             'median_error_rate':median_error_rate,
                 
                 'test_mape_98':mape_q98},
                          index=[source_name])

    return(df,model,forecast,pred_df,pred_r)
Beispiel #49
0
                                               end=pd.to_datetime(max(test_arima.index)), dynamic=True)
            pred_test_ci = pred_test.conf_int()

            # ceating test and train emsembled result
            # test result
            result_test = test
            result_test['y_ARIMA'] = np.array(pred_test.predicted_mean)[1:]

            # prophet
            m = Prophet(weekly_seasonality=False, yearly_seasonality=False, changepoint_prior_scale=5)
            m.fit(train);

            # creating pred train and test data frame
            past = m.make_future_dataframe(periods=0, freq='W')
            future = pd.DataFrame(test['ds'])
            pf_train_pred = m.predict(past)
            pf_test_pred = m.predict(future)
            pf_train_pred = pf_train_pred[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].set_index([past.index])
            pf_test_pred = pf_test_pred[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].set_index([future.index])

            # ceating test and train emsembled result
            # test result
            result_test['y_Prophet'] = np.array(pf_test_pred.yhat)

            # Ansemble
            result_test['y_Ensembled'] = result_test[["y_ARIMA", "y_Prophet"]].mean(axis=1)

            train = prod[:(max(train.index) + 1 + test_points)]
            test = prod[(max(train.index) + 1):(max(train.index) + 1 + test_points)]
            rem_data = prod[(max(train.index) + test_points):]
Beispiel #50
0
    def create_prophet_m(self,app_name,z1,delay=24):

        import pandas as pd
        import pymysql
        import warnings
        warnings.filterwarnings("ignore")
        from datetime import datetime, timedelta
        import logging
        from tqdm import tqdm
        from fbprophet import Prophet
        from sklearn.metrics import mean_squared_error as mse
        import math

        ### --- For realtime pred ---###

        full_df = z1.bw.iloc[0:len(z1)]
        full_df = full_df.reset_index()
        full_df.columns = ['ds','y']

        #removing outliers
        q50 = full_df.y.median()
        q100 = full_df.y.quantile(1)
        q75  = full_df.y.quantile(.75)
        #print(max(train_df.y))
        if((q100-q50) >= (2*q75)):
            #print('ind')
            full_df.loc[full_df.y>=(2*q75),'y'] = None

        #-- Realtime prediction --##
        #model 
        model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
        model_r.fit(full_df)
        future_r = model_r.make_future_dataframe(periods=delay,freq='H')
        forecast_r = model_r.predict(future_r)
        forecast_r.index = forecast_r['ds']
        #forecast 
        pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)])
        pred_r=pred_r.reset_index()
        #--- completes realtime pred ---#

        train_end_index=len(z1.bw)-delay
        train_df=z1.bw.iloc[0:train_end_index]
        #train_df= train_df[train_df<cutter]


        test_df=z1.bw.iloc[train_end_index:len(z1)]



        train_df=train_df.reset_index()
        test_df=test_df.reset_index()
        train_df.columns=['ds','y']

        #--- removing outliers in trainset  ---#

        q50 = train_df.y.median()
        q100 = train_df.y.quantile(1)
        q75  = train_df.y.quantile(.75)
        #print(max(train_df.y))
        if((q100-q50) >= (2*q75)):
            #print('ind')
            train_df.loc[train_df.y>=(2*q75),'y'] = None

        test_df.columns=['ds','y']
        #print('len of testdf = ',len(test_df))
        #model 
        model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
        model.fit(train_df)
        future = model.make_future_dataframe(periods=len(test_df),freq='H')
        forecast = model.predict(future)
        forecast.index = forecast['ds']
        #forecast 
        pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
        pred=pred.reset_index()
        pred_df=pd.merge(test_df,pred,on='ds',how='left')
        pred_df.dropna(inplace=True)

        df=pd.DataFrame()

        if(len(pred_df)>0):

            pred_df['error_test']=pred_df.y-pred_df.yhat



            MSE=mse(pred_df.y,pred_df.yhat)
            RMSE=math.sqrt(MSE)
            pred_df['APE']=abs(pred_df.error_test*100/pred_df.y)
            MAPE=pred_df.APE.mean()
            #print("App name:",app_name)
            #print("MSE  :",MSE)
            #print("RMSE :",RMSE)
            #print("MAPE :",MAPE)

            q98=pred_df['APE'].quantile(0.98)
            mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean()

            df = pd.DataFrame({'length':len(z1),#'predicted_t':[forcast_lag],
                                 'test_rmse':RMSE,
                                 'test_mape':MAPE,
                     #'test_ape_98':q98,
                     'test_mape_98':mape_q98},

                              index=[app_name])

        return(df,model,forecast,pred_df,pred_r)
Beispiel #51
0
# ----- set DATES index as a column ----- #
swe_for_prophet = swe_for_prophet.reset_index().rename(
    columns={'datetime': 'ds'})

# --------- #
# Fit Model #
# --------- #

swe_model.fit(swe_for_prophet)

# ---------------------------------------- #
#          Forecasting to the Future       #
# ---------------------------------------- #

test = swe_model.make_future_dataframe(periods=100)
f = swe_model.predict(test)

fig = swe_model.plot(f)
plt.show()

swe_forecast = swe_model \
    .make_future_dataframe(periods=24, freq='W')
swe_forecast = swe_model.predict(swe_forecast)

swe_names = ['SWE_%s' % column for column in swe_forecast.columns]

merge_swe_forecast = swe_forecast.copy()
merge_swe_forecast.columns = swe_names

forecast = swe_forecast \
    .rename(columns={'ds': 'Date'})