Ejemplo n.º 1
0
def add_prophet_features(df_shop):
    df = df_shop[['day', 'pays_count']].rename(columns={'day': 'ds', 'pays_count': 'y'})

    results = []
    biweek_max = df_shop.biweek_id.max()

    for m in range(biweek_max - 1, 0, -1):
        train_idx = df_shop.biweek_id >= m
        df_train = df[train_idx]

        not_null = ~df_train.y.isnull()
        if not_null.sum() < 7:
            continue

        p = Prophet().fit(df_train)
        future = p.make_future_dataframe(14, include_history=False)
        pred = p.predict(future)
        results.append(pred)

    df_res = pd.concat(results)
    df_res.columns = ['prophet_%s' % c for c in pred.columns]

    df_res = df_shop.merge(df_res, how='left', left_on='day', right_on='prophet_ds')
    del df_res['prophet_t'], df_res['prophet_ds']
    
    df_res.drop_duplicates('days_from_beginning', keep='last', inplace=1)

    if len(df_res) != len(df_shop):
        raise Exception("size doesn't match")

    return df_res
def get_predictions(validate, train):
    total_dates = train['date'].unique()
    result = pd.DataFrame(columns=['id', 'unit_sales'])
    problem_pairs = []
    example_items = [510052, 1503899, 2081175, 1047674, 215327, 1239746, 765520, 1463867, 1010755, 1473396]
    store47examples = validate.loc[(validate.store_nbr == 47) & (validate.item_nbr.isin(example_items))]
    print("ONLY PREDICTING ITEMS {} IN STORE NO. 47!".format(example_items))
    for name, y in store47examples.groupby(['item_nbr']):
    # for name, y in validate.groupby(['item_nbr', 'store_nbr']):
        item_nbr=int(name)
        store_nbr = 47
        df = train[(train.item_nbr==item_nbr)&(train.store_nbr==store_nbr)]
        CV_SIZE = 16 #if you make it bigger, fill missing dates in cv with 0 if any
        TRAIN_SIZE = 365
        total_dates = train['date'].unique()
        df = fill_missing_date(df, total_dates)
        df = df.sort_values(by=['date'])
        X = df[-TRAIN_SIZE:]
        X = X[['date','unit_sales']]
        X.columns = ['ds', 'y']
        m = Prophet(yearly_seasonality=True)
        try:
            m.fit(X)
        except ValueError:
            print("problem for this item store pair")
            problem_pairs.append((item_nbr, store_nbr))
            continue
        future = m.make_future_dataframe(periods=CV_SIZE)
        pred = m.predict(future)
        data = pred[['ds','yhat']].tail(CV_SIZE)
        data = pred[['ds','yhat']].merge(y, left_on='ds', right_on='date')
        data['unit_sales'] = data['yhat'].fillna(0).clip(0, 999999)
        result = result.append(data[['id', 'unit_sales']])
    return (result, problem_pairs)
Ejemplo n.º 3
0
    def test_make_future_dataframe(self):
        N = 468
        train = DATA.head(N // 2)
        forecaster = Prophet()
        forecaster.fit(train)
        future = forecaster.make_future_dataframe(periods=3, freq='D',
                                                  include_history=False)
        correct = pd.DatetimeIndex(['2013-04-26', '2013-04-27', '2013-04-28'])
        self.assertEqual(len(future), 3)
        for i in range(3):
            self.assertEqual(future.iloc[i]['ds'], correct[i])

        future = forecaster.make_future_dataframe(periods=3, freq='M',
                                                  include_history=False)
        correct = pd.DatetimeIndex(['2013-04-30', '2013-05-31', '2013-06-30'])
        self.assertEqual(len(future), 3)
        for i in range(3):
            self.assertEqual(future.iloc[i]['ds'], correct[i])
Ejemplo n.º 4
0
def hello():
    print('Hello, world!')
    df = pd.read_csv(url)
    df['y'] = np.log(df['y'])
    df.head()
    m = Prophet()
    m.fit(df);
    future = m.make_future_dataframe(periods=365)
    future.tail()
    forecast = m.predict(future)
    forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
    return forecast.to_json(orient='table')
Ejemplo n.º 5
0
def build_forecast(
        data,
        forecast_range,
        truncate_range=0
):
    """build a forecast for publishing

    Args:
        data (:obj:`pandas.data_frame`): data to build prediction
        forecast_range (int): how much time into the future to forecast
        truncate_range (int, optional): truncate output to CREST_RANGE

    Returns:
        pandas.DataFrame: collection of data + forecast info
            ['date', 'avgPrice', 'yhat', 'yhat_low', 'yhat_high', 'prediction']

    """
    data['date'] = pd.to_datetime(data['date'])
    filter_date = data['date'].max()

    ## Build DataFrame ##
    predict_df = pd.DataFrame()
    predict_df['ds'] = data['date']
    predict_df['y'] = data['avgPrice']

    ## Run prediction ##
    # https://facebookincubator.github.io/prophet/docs/quick_start.html#python-api
    model = Prophet()
    model.fit(predict_df)
    future = model.make_future_dataframe(periods=forecast_range)
    tst = model.predict(future)

    predict_df = pd.merge(
        predict_df, model.predict(future),
        on='ds',
        how='right'
    )

    ## Build report for endpoint ##
    report = pd.DataFrame()
    report['date'] = pd.to_datetime(predict_df['ds'], format='%Y-%m-%d')
    report['avgPrice'] = predict_df['y']
    report['yhat'] = predict_df['yhat']
    report['yhat_low'] = predict_df['yhat_lower']
    report['yhat_high'] = predict_df['yhat_upper']
    report['prediction'] = False
    report.loc[report.date > filter_date, 'prediction'] = True

    if truncate_range > 0:
        cut_date = filter_date - timedelta(days=truncate_range)
        report = report.loc[report.date > cut_date]

    return report
Ejemplo n.º 6
0
def train_prophet(df, modelDir, confidence=0.99):
	# train and cache into modelDir
	m = Prophet(
		yearly_seasonality=True, daily_seasonality=True, interval_width=confidence
	)
	with suppress_stdout_stderr():
		m.fit(df)

		# Predict the future.
	print "PREDICTING!"
	future = m.make_future_dataframe(periods=0)
	forecast = m.predict(future)
	# Merge in the historical data.
	forecast["y"] = df.y.astype(float)
	# Backup the model.
	forecast.to_csv(
		pJoin(modelDir, "forecasted_{}.csv".format(confidence)), index=False
	)
	return forecast
Ejemplo n.º 7
0
plt.style.use('ggplot')

market_df = pd.read_csv('UNVR.JK2.csv', index_col='Date', parse_dates=True)

market_df.head()

df = market_df.reset_index().rename(columns={'Date': 'ds', 'Close': 'y'})
#df['y'] = np.log(df['y'])

df.head()

df.set_index('ds').y.plot()

model = Prophet()
model.fit(df)
future = model.make_future_dataframe(periods=366)
forecast = model.predict(future)

model.changepoints

figure = model.plot(forecast)
for changepoint in model.changepoints:
    plt.axvline(changepoint, ls='--', lw=1)

deltas = model.params['delta'].mean(0)
fig = plt.figure(facecolor='w')
ax = fig.add_subplot(111)
ax.bar(range(len(deltas)), deltas)
ax.grid(True, which='major', c='gray', ls='-', lw=1, alpha=0.2)
ax.set_ylabel('Rate change')
ax.set_xlabel('Potential changepoint')
Ejemplo n.º 8
0
def create_prophet_m(app_name,z1,delay=24):
    
    ### --- For realtime pred ---###
    
    full_df = z1.app_rsp_time.iloc[0:len(z1)]
    full_df = full_df.reset_index()
    full_df.columns = ['ds','y']
    
    #removing outliers
    q50 = full_df.y.median()
    q100 = full_df.y.quantile(1)
    q75  = full_df.y.quantile(.75)
    
    if((q100-q50) >= (2*q50)):
        
        full_df.loc[full_df.y>=(2*q50),'y'] = None
    
    #-- Realtime prediction --##
    #model 
    model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
    model_r.fit(full_df)
    future_r = model_r.make_future_dataframe(periods=delay,freq='H')
    forecast_r = model_r.predict(future_r)
    forecast_r.index = forecast_r['ds']
    #forecast 
    pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)])
    pred_r=pred_r.reset_index()
    #--- completes realtime prediction ---#
    
    train_end_index=len(z1.app_rsp_time)-delay
    train_df=z1.app_rsp_time.iloc[0:train_end_index]
    
    
    test_df=z1.app_rsp_time.iloc[train_end_index:len(z1)]
    
    
    
    train_df=train_df.reset_index()
    test_df=test_df.reset_index()
    train_df.columns=['ds','y']
    
    #--- removing outliers in trainset  ---#
    
    q50 = train_df.y.median()
    q100 = train_df.y.quantile(1)
    q75  = train_df.y.quantile(.75)
    
    if((q100-q50) >= (2*q50)):
        
        train_df.loc[train_df.y>=(2*q50),'y'] = None
    
    test_df.columns=['ds','y']
    
    #model 
    model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
    model.fit(train_df)
    future = model.make_future_dataframe(periods=len(test_df),freq='H')
    forecast = model.predict(future)
    forecast.index = forecast['ds']
    #forecast 
    pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
    pred=pred.reset_index()
    pred_df=pd.merge(test_df,pred,on='ds',how='left')
    pred_df.dropna(inplace=True)
    
    df=pd.DataFrame()
    
    if(len(pred_df)>0):
        
        pred_df['error_test']=pred_df.y-pred_df.yhat
    
        
    
        MSE=mse(pred_df.y,pred_df.yhat)
        RMSE=math.sqrt(MSE)
        pred_df['APE']=abs(pred_df.error_test*100/pred_df.y)
        MAPE=pred_df.APE.mean()
        print("App name:",app_name)
        print("MSE  :",MSE)
        print("RMSE :",RMSE)
        print("MAPE :",MAPE)
        
       
        mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean()

        df = pd.DataFrame({#'length':len(z1),
                             'test_rmse':RMSE,
                             'test_mape':MAPE,
                 
                 'test_mape_98':mape_q98},
                   
                          index=[app_name])

    return(df,model,forecast,pred_df,pred_r)
Ejemplo n.º 9
0
#graph the data to have idea for parameter settings for model
store_1_df.plot(x='ds', y='y', color='red', kind='scatter')
store_2_df.plot(x='ds', y='y', color='blue', kind='scatter')
plt.show()

#fit the prophet model and set the parameters based on observations in graph
#model for store_1
m = Prophet()
m.fit(store_1_df)

#model for store_2
n = Prophet()
n.fit(store_2_df)

#future dataframes to hold predictions for both stores
future_1 = m.make_future_dataframe(periods=30)

future_2 = n.make_future_dataframe(periods=30)

#make predictions to be held in future dataframes
forecast_1 = m.predict(future_1)

forecast_2 = n.predict(future_2)

#plot the results of the predicition
fig1 = m.plot(forecast_1)
fig2 = n.plot(forecast_2)
plt.show()

#show the decomposition of the model
fig3 = m.plot_components(forecast_1)
Ejemplo n.º 10
0
                     period=91.5,
                     fourier_order=7,
                     prior_scale=0.02)
m_TS.fit(train_df_TunnelStudy)
m_TM = Prophet(interval_width=0.95,
               yearly_seasonality=True,
               weekly_seasonality=True,
               changepoint_prior_scale=2)
m_TM.add_seasonality(name='quarterly',
                     period=91.5,
                     fourier_order=7,
                     prior_scale=0.02)
m_TM.fit(train_df_TunnelMiscellaneous)

# Predict a future dataframe by quarter
future_AirHoliday = m_AH.make_future_dataframe(periods=len(test_df_AirHoliday),
                                               freq='Q')
print(future_AirHoliday.tail())
future_AirBusiness = m_AB.make_future_dataframe(
    periods=len(test_df_AirBusiness), freq='Q')
print(future_AirBusiness.tail())
future_AirVFR = m_AV.make_future_dataframe(periods=len(test_df_AirVFR),
                                           freq='Q')
print(future_AirVFR.tail())
future_AirStudy = m_AS.make_future_dataframe(periods=len(test_df_AirStudy),
                                             freq='Q')
print(future_AirStudy.tail())
future_AirMiscellaneous = m_AM.make_future_dataframe(
    periods=len(test_df_AirMiscellaneous), freq='Q')
print(future_AirMiscellaneous.tail())

future_SeaHoliday = m_SH.make_future_dataframe(periods=len(test_df_SeaHoliday),
Ejemplo n.º 11
0
    def prophetForecast(self, x, y, z):
        df = pd.read_csv('temp.csv')  #read temp.csv
        df = df.rename(columns={
            'time_interval': 'ds',
            'count': 'y'
        })  #rename columns, prophet requires 'ds' and 'y' column names
        #df['y']= np.log(df['y']) #log as in logrithmic, log the y column for better prediction
        w_df = pd.read_csv('weather_dates.csv')

        #holiday/event dataframes to look for effects on those dates
        Thanksgiving = pd.DataFrame({
            'holiday':
            'Thanksgiving',
            'ds':
            pd.to_datetime([
                '2017-11-23', '2018-11-22', '2019-11-28', '2020-11-26',
                '2021-11-25', '2022-11-24'
            ]),
            'lower_window':
            0,  #days after holiday
            'upper_window':
            1,  #days before holiday
        })
        Christmas = pd.DataFrame({
            'holiday':
            'Christmas',
            'ds':
            pd.to_datetime([
                '2017-12-25', '2018-12-25', '2019-12-25', '2020-12-25',
                '2021-12-25', '2022-12-25'
            ]),
            'lower_window':
            0,
            'upper_window':
            1,
        })
        Superbowl = pd.DataFrame({
            'holiday':
            'Superbowl',
            'ds':
            pd.to_datetime([
                '2018-02-04', '2019-02-03', '2020-02-02', '2021-02-07',
                '2022-02-06'
            ]),  #dates subject to change
            'lower_window':
            0,
            'upper_window':
            1,
        })
        NewYears = pd.DataFrame({
            'holiday':
            'NewYears',
            'ds':
            pd.to_datetime([
                '2018-01-01', '2019-01-01', '2020-01-01', '2021-01-01',
                '2022-01-01'
            ]),
            'lower_window':
            0,
            'upper_window':
            1,
        })
        SpringBreak = pd.DataFrame({
            'holiday':
            'SpringBreak',
            'ds':
            pd.to_datetime(['2018-03-11, 2019-03-12'
                            ]),  #dates subject to change
            'lower_window':
            -6,
            'upper_window':
            0,
        })
        StPatricks = pd.DataFrame({
            'holiday':
            'StPatricks',
            'ds':
            pd.to_datetime([
                '2018-02-17', '2019-02-17', '2020-02-17', '2021-02-17',
                '2022-02-17'
            ]),
            'lower_window':
            0,
            'upper_window':
            1,
        })
        Valentines = pd.DataFrame({
            'holiday':
            'Valentines',
            'ds':
            pd.to_datetime([
                '2018-02-14', '2019-02-14', '2020-02-14', '2021-02-14',
                '2022-02-14'
            ]),
            'lower_window':
            0,
            'upper_window':
            1,
        })
        ForthOfJuly = pd.DataFrame({
            'holiday':
            'ForthOfJuly',
            'ds':
            pd.to_datetime([
                '2018-07-04', '2019-07-04', '2020-07-04', '2021-07-04',
                '2022-07-04'
            ]),
            'lower_window':
            0,
            'upper_window':
            1,
        })

        InclementWeather = pd.DataFrame({
            'holiday':
            'InclementWeather',
            'ds':
            pd.to_datetime([
                '2018-02-06 12:00:00', '2018-02-06 15:00:00',
                '2018-02-06 18:00:00', '2018-02-06 21:00:00'
            ]),  #Freezing rain day
            'lower_window':
            -.125,
            'upper_window':
            0,
        })

        InclementWeather = pd.concat(
            [InclementWeather, w_df], ignore_index=True
        )  #concat forecast dataframe(w_df) to InclementWeather df
        InclementWeather.drop_duplicates(subset=['ds'],
                                         inplace=True,
                                         keep='last')  #remove duplicates

        holidays = pd.concat([
            Thanksgiving, Christmas, Superbowl, NewYears, SpringBreak,
            StPatricks, Valentines, ForthOfJuly, InclementWeather
        ])  #concat all holiday dataframes

        m = Prophet(yearly_seasonality=False,
                    holidays=holidays,
                    changepoint_prior_scale=z
                    )  #apply holidays, change flexibility default: .05
        df['floor'] = 0  #set floor
        print("before")
        m.fit(df)  #------------------------Freezes
        print("after")
        future = m.make_future_dataframe(
            periods=x, freq='1min', include_history=False
        )  #freq = interval into future. period = how many times. include_history = do not include.
        forecast = m.predict(future)
        forecast['floor'] = 0
        #m.plot(forecast); #display graph
        #m.plot_components(forecast); #display seasonality/holiday information
        forecast = forecast.rename(columns={
            'ds': 'date_and_time',
            'yhat': y + '_min_prediction'
        })
        forecast[['date_and_time', y + '_min_prediction']].to_csv(
            y + 'MinPredictionOutput.csv')  #export prophets calculations
        return
Ejemplo n.º 12
0
def stock():
    menu = {
        'ho': 0,
        'da': 1,
        'ml': 0,
        'se': 0,
        'co': 0,
        'cg': 0,
        'cr': 0,
        'st': 1,
        'wc': 0
    }
    if request.method == 'GET':
        return render_template('stock/stock.html',
                               menu=menu,
                               weather=get_weather(),
                               kospi=kospi_dict,
                               kosdaq=kosdaq_dict,
                               nyse=nyse_dict,
                               nasdaq=nasdaq_dict)
    else:
        market = request.form['market']
        if market == 'KS':
            code = request.form['kospi_code']
            company = kospi_dict[code]
            code += '.KS'
        elif market == 'KQ':
            code = request.form['kosdaq_code']
            company = kosdaq_dict[code]
            code += '.KQ'
        elif market == 'NY':
            code = request.form['nyse_code']
            company = nyse_dict[code]
        else:
            code = request.form['nasdaq_code']
            company = nasdaq_dict[code]
        learn_period = int(request.form['learn'])
        pred_period = int(request.form['pred'])
        current_app.logger.debug(
            f'{market}, {code}, {learn_period}, {pred_period}')

        today = datetime.now()
        start_learn = today - timedelta(days=learn_period * 365)
        end_learn = today - timedelta(days=1)

        stock_data = pdr.DataReader(code,
                                    data_source='yahoo',
                                    start=start_learn,
                                    end=end_learn)
        current_app.logger.info(f"get stock data: {company}({code})")
        df = pd.DataFrame({'ds': stock_data.index, 'y': stock_data.Close})
        df.reset_index(inplace=True)
        try:
            del df['Date']
        except:
            current_app.logger.error('Date error')

        model = Prophet(daily_seasonality=True)
        model.fit(df)
        future = model.make_future_dataframe(periods=pred_period)
        forecast = model.predict(future)

        fig = model.plot(forecast)
        img_file = os.path.join(current_app.root_path, 'static/img/stock.png')
        fig.savefig(img_file)
        mtime = int(os.stat(img_file).st_mtime)

        return render_template('stock/stock_res.html',
                               menu=menu,
                               weather=get_weather_main(),
                               mtime=mtime,
                               company=company,
                               code=code)
def fbprophet(ts_obj,
              gaussian_window_size,
              step_size,
              changepoint_prior_scale=.05,
              growth='linear',
              yearly_seasonality='auto',
              weekly_seasonality='auto',
              daily_seasonality='auto',
              holidays=None,
              seasonality_mode='additive',
              seasonality_prior_scale=10,
              holidays_prior_scale=10,
              plot_anomaly_score=False,
              plot_forecast=False,
              grid_search_mode=False):

    start = time.time()

    fb_prophet_model = Prophet(changepoint_prior_scale=changepoint_prior_scale,
                               growth=growth,
                               yearly_seasonality=yearly_seasonality,
                               weekly_seasonality=weekly_seasonality,
                               daily_seasonality=daily_seasonality,
                               holidays=holidays,
                               seasonality_mode=seasonality_mode,
                               seasonality_prior_scale=seasonality_prior_scale,
                               holidays_prior_scale=holidays_prior_scale)
    if ts_obj.miss:
        # https://facebook.github.io/prophet/docs/outliers.html
        # Prophet has no problem with missing data
        # You set the missing values to NaNs in the training data
        # But you LEAVE the dates in the prediction
        ref_date_range = ch.get_ref_date_range(ts_obj.dataframe,
                                               ts_obj.dateformat,
                                               ts_obj.timestep)
        data_copy = copy.deepcopy(ts_obj.dataframe)
        data_copy["timestamp"] = pd.to_datetime(data_copy["timestamp"],
                                                format=ts_obj.dateformat)
        data_copy.set_index('timestamp', inplace=True)
        data_copy = data_copy.reindex(ref_date_range, fill_value=np.nan)
        # use entire time series for training
        counts = [i for i in range(len(data_copy))]
        fb_df_train = pd.DataFrame({
            "count": counts,
            "ds": ref_date_range,
            "y": data_copy["value"]
        })
    else:
        # use entire time series for training
        fb_df_train = pd.DataFrame({
            "ds": ts_obj.dataframe["timestamp"],
            "y": ts_obj.dataframe["value"]
        })

    fb_prophet_model.fit(fb_df_train, verbose=False)

    # periods=how much further you want to extend from the training dataset
    # this is not periodicity relating to seasonality
    future = fb_prophet_model.make_future_dataframe(periods=0,
                                                    freq=ts_obj.timestep)
    # make a forecast over the entire time series
    fcst = fb_prophet_model.predict(future)

    predictions = fcst["yhat"].values

    # get RMSE
    if grid_search_mode:
        if ts_obj.miss:
            # remove the predictions from missing time steps
            inds = fb_df_train.loc[
                pd.isna(fb_df_train["y"]), :]["count"].values
            print(inds)
            nonmissing_predictions = []
            for i in range(len(predictions)):
                if i not in inds:
                    nonmissing_predictions.append(predictions[i])
            rmse = mean_squared_error(ts_obj.dataframe["value"].values,
                                      nonmissing_predictions,
                                      squared=False)
            print("RMSE: ", rmse)

        else:
            rmse = mean_squared_error(ts_obj.dataframe["value"].values,
                                      predictions,
                                      squared=False)
            print("RMSE: ", rmse)
        return rmse

    # get anomaly scores
    else:

        if ts_obj.miss:
            # you HAVE to interpolate to get a gaussian window
            new_ts_obj = copy.deepcopy(ts_obj)
            new_ts_obj.set_miss(fill=True)
            actual = list(new_ts_obj.dataframe["value"])
        else:
            actual = ts_obj.dataframe["value"]

        anomaly_scores = ah.determine_anomaly_scores_error(
            actual, predictions, ts_obj.get_length(), gaussian_window_size,
            step_size)

        end = time.time()

        if plot_forecast:
            plt.plot([i for i in range(len(fcst))], fcst["yhat"])
            plt.fill_between([i for i in range(len(fcst))],
                             fcst["yhat_lower"],
                             fcst["yhat_upper"],
                             facecolor='blue',
                             alpha=.3)
            if ts_obj.miss:
                plt.plot([i for i in range(len(predictions))],
                         data_copy["value"],
                         alpha=.5)
            else:
                plt.plot([i for i in range(len(predictions))],
                         ts_obj.dataframe["value"],
                         alpha=.5)
            plt.xticks(rotation=90)
            plt.show()

        if plot_anomaly_score:
            plt.subplot(211)
            plt.title("Anomaly Scores")
            plt.plot(anomaly_scores)
            plt.ylim([.99, 1])
            plt.subplot(212)
            plt.title("Time Series")
            plt.plot(ts_obj.dataframe["value"].values)
            plt.axvline(ts_obj.get_probationary_index(),
                        color="black",
                        label="probationary line")
            plt.tight_layout()
            plt.show()

        return {
            "Anomaly Scores": anomaly_scores,
            "Time": end - start,
            "Predictions": predictions
        }
Ejemplo n.º 14
0
df.head()


#设置跟随性: changepoint_prior_scale=0.05 值越大,拟合的跟随性越好,可能会过拟合
#设置置信区间:interval_width=0.8(默认值),值越小,上下线的带宽越小。
#指定预测类型: growth='linear'或growth = "logistic" ,默认应该是linear。
#马尔科夫蒙特卡洛取样(MCMC): mcmc_samples=0,会计算很慢。距离意义不清楚
#设置寻找突变点的比例:changepoint_range=0.9 默认从数据的前90%中寻找异常数据。预测这个正弦曲线,如果不设置changepoint_range=1,预测的结果是不对的,不知道为什么。

m = Prophet(changepoint_prior_scale=0.9,interval_width=0.9,growth='linear',changepoint_range=1)          
m.fit(df);

#periods 周期,一般是根据实际意义确定,重点:后续预测的长度是一个周期的长度。
#freq 我见的有‘MS‘、H、M ,预测sin,要设置H ,个人理解数据如果变化很快,要用H
future = m.make_future_dataframe(periods=120, freq='H') #freq=‘MS‘或者H  来设置

future['cap'] = 1 #log预测才用?linear也可以加上。
future['floor'] = -1#log预测才用?linear也可以加上。

#画图
future.tail()

forecast = m.predict(future)
forecast.tail()
fig=m.plot(forecast)
plt.savefig('./out/'+filename+'_1.jpg',dpi=500)
m.plot_components(forecast)
plt.savefig('./out/'+filename+'_2.jpg',dpi=500)
#print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']])  #打印到console
 
Ejemplo n.º 15
0
def forecastr(data, forecast_settings, column_headers, freq_val,
              build_settings):
    """
    Background: This function will take the data from the csv and forecast out x number of days.

    Input:

    data: This is a pandas dataframe containing time series data (2 columns: date and metric)
    forecast_settings: This is a list containing values for model type, forecast period length and seasonality parameters
    column_headers: List containing the name of the date and metric
    freq_val: String containing "D","M","Y"
    build_settings: String determining whether this is an initial or updated forecast.


    Output:

    [y_hat,dates,m,csv_ready_for_export]: A list containing forecasted data, dimension, model and data for the csv export


    """

    ##### Variables, Model Settings & Facebook Prophet Hyper Parameters #####

    # Initial Variables
    build = build_settings  # Determine the build_setting - either initial or update forecast settings.
    dimension = column_headers[0]  # date
    metric = column_headers[1]  # metric name

    # Rename the columns so we can use FB Prophet
    data.rename(index=str,
                columns={
                    dimension: "ds",
                    metric: "y"
                },
                inplace=True)

    # Hyper-parameters
    fs_model_type = forecast_settings[0]  # linear or logistic
    fs_period = int(forecast_settings[1])  # int
    fs_seasonality_mode = forecast_settings[4]  # additive or multiplicative
    fs_daily_seasonality = forecast_settings[6][0]  # True or False
    fs_weekly_seasonality = forecast_settings[6][1]  # True or False
    fs_yearly_seasonality = forecast_settings[6][2]  # True or False

    # Need to set carrying capacity and saturated min as an int if model_type = 'logistic', else we'll set as 'auto' to be filtered out.

    if fs_model_type == 'logistic':
        fs_carrying_capacity = int(forecast_settings[2])  # int
        fs_saturated_minimum = int(forecast_settings[3])  # int
        data['cap'] = fs_carrying_capacity
        data['floor'] = fs_saturated_minimum
    else:
        print('no cap or floor needed as it is a linear model.')
        fs_carrying_capcity = 'auto'
        fs_saturated_minimum = 'auto'

    # Additional Hyper Parameters
    fs_seasonality_prior_scale = forecast_settings[5]  # int
    fs_n_changepoints = forecast_settings[7]  # int
    fs_changepoints_prior_scale = forecast_settings[8]  # int??

    # Check the following hyper parameters to see if they were set from within the UI. If not, they'll be set to 'auto'
    fs_seasonality_prior_scale = check_val_of_forecast_settings(
        fs_seasonality_prior_scale)
    fs_n_changepoints = check_val_of_forecast_settings(fs_n_changepoints)
    fs_changepoints_prior_scale = check_val_of_forecast_settings(
        fs_changepoints_prior_scale)

    # Holidays - to be included in a future iteration....
    holidays_prior_scale = 10  # Determines how much of an effect holidays should have on a prediction. Default value is 10

    #### End of Hyper Parameters Settings ####

    # No let's set up the arguments so that we can pass them into Prophet() when we instantiate the model.

    arguments = [
        'growth', 'seasonality_mode', 'seasonality_prior_scale',
        'daily_seasonality', 'weekly_seasonality', 'yearly_seasonality',
        'n_changepoints', 'changepoint_prior_scale'
    ]

    arg_values = [
        fs_model_type, fs_seasonality_mode, fs_seasonality_prior_scale,
        fs_daily_seasonality, fs_weekly_seasonality, fs_yearly_seasonality,
        fs_n_changepoints, fs_changepoints_prior_scale
    ]

    # Needs to be a dictionary
    model_arg_vals = dict(zip(arguments, arg_values))

    ###### CHECK TO SEE WHAT VALUES WERE SET FROM WITHIN THE UI ######

    # Check to see if any values are 0, auto or false. If any hyper-parameters have these values, they will not be included
    # when the pass in the dictionary prophet_arg_vals as kwarg

    prophet_arg_vals = {}

    for key, value in model_arg_vals.items():
        if (value == "") or (value == False) or (value == 0) or (value
                                                                 == 'auto'):
            print('skipping this key value pair')
        else:
            prophet_arg_vals[key] = value

    ##### TIME TO INSTANTIATE, FIT AND PREDICT WITH FACEBOOK PROPHET ######

    # Instantiate with prophet_arg_vals that are not auto, 0 or False.
    m = Prophet(**prophet_arg_vals)

    # Fit the Model - Side Note it would be interesting to time how long this takes by file size #start = time.time()
    start = time.time()
    m.fit(data)
    end = time.time()
    print(end - start)

    # Status update
    emit('processing', {'data': 'model has been fit'})

    # Let's create a new data frame for the forecast which includes how long the user requested to forecast out in time units and by time unit type (eg. "D", "M","Y")
    future = m.make_future_dataframe(periods=fs_period, freq=freq_val)

    # If fs_model_type = 'logistic', create a column in future for carrying_capacity and saturated_minimum
    if fs_model_type == 'logistic':
        future['cap'] = fs_carrying_capacity
        future['floor'] = fs_saturated_minimum
    else:
        print('no cap or floor needed as it is a linear model.')

    # Let's predict the future :)
    forecast = m.predict(future)

    ##### Removed Cross-Validation for this release - see v3 for previous implementation #####

    ##### Send y_hat and dates to a list, so that they can be graphed easily when set in ChartJS

    y_hat = forecast['yhat'].tolist()
    dates = forecast['ds'].apply(lambda x: str(x).split(' ')[0]).tolist()

    ##### Lets see how the forecast compares to historical performance #####

    # First, lets sum up the forecasted metric
    forecast_sum = forecast['yhat'][-fs_period:].sum()
    forecast_mean = forecast['yhat'][-fs_period:].mean()

    # Now lets sum up the actuals for the same time interval as we predicted
    actual_sum = float(data['y'][-fs_period:].sum())
    actual_mean = float(data['y'][-fs_period:].mean())

    difference = '{0:.1%}'.format(((forecast_sum - actual_sum) / forecast_sum))
    difference_mean = '{0:.1%}'.format(
        ((forecast_mean - actual_mean) / forecast_mean))

    forecasted_vals = [
        '{0:.1f}'.format(forecast_sum), '{0:.1f}'.format(actual_sum),
        difference
    ]
    forecasted_vals_mean = [
        '{0:.1f}'.format(forecast_mean), '{0:.1f}'.format(actual_mean),
        difference_mean
    ]
    '''


    # Lets compare those two numbers, if forecast_sum is greater than actual, calculate the increase.  Else, calculate the decrease
    if forecast_sum - actual_sum > 0:  # this if else handles percent increase vs. decrease
        difference = '{0:.2%}'.format(((forecast_sum - actual_sum) / forecast_sum))
        print("*********** DIFFERENCE IS ********")
        print(difference)
    else:
        difference = '{0:.2f}'.format(((actual_sum - forecast_sum) / actual_sum))
        print("*********** DIFFERENCE IS ********")
        print(difference)

    '''

    ####### Formatting data for CSV Export Functionality ##########

    # First, let's merge the original and forecast dataframes
    data_for_csv_export = pd.merge(forecast, data, on='ds', how='left')

    # Select the columns we want to include in the export
    export_formatted = data_for_csv_export[[
        'ds', 'y', 'yhat', 'yhat_upper', 'yhat_lower'
    ]]

    # Rename y and yhat to the actual metric names
    export_formatted.rename(index=str,
                            columns={
                                'ds': 'date',
                                'y': metric,
                                'yhat': metric + '_forecast',
                                'yhat_upper': metric + '_upper_forecast',
                                'yhat_lower': metric + '_lower_forecast'
                            },
                            inplace=True)

    # replace NaN with an empty val
    export_formatted = export_formatted.replace(np.nan, '', regex=True)

    # Format timestamp
    export_formatted['date'] = export_formatted['date'].apply(
        lambda x: str(x).split(' ')[0])

    # Create dictionary format for sending to csv
    csv_ready_for_export = export_formatted.to_dict('records')

    # print(y_hat)
    # print(csv_ready_for_export)
    print(forecasted_vals)
    print(forecasted_vals_mean)

    return [
        y_hat, dates, m, csv_ready_for_export, forecasted_vals,
        forecasted_vals_mean
    ]
Ejemplo n.º 16
0
def PlotSeries():

    #obtém valores de selects da pagina
    select_ano = request.form.get("Anos", None)
    # select_mun = request.form.get("Municipios", None)
    select_mun = 'Santos'
    select_dp = request.form.get("Delegacias", None)
    select_crime = request.form.get("Crimes", None)

    if select_dp != None and select_dp != "" and select_crime != None and select_crime != "":

        #dá um nome para o arquivo do plot
        img = 'static/plot' + select_ano + 'Santos' + select_dp + select_crime + '.png'
        print(select_dp)
        #obtém o dataframe
        df = getDataAtDB(select_mun, select_dp, select_crime)
        #print(df.head())
        df['datas'] = pd.to_datetime(df['datas'])

        #altera colunas do dataframe
        df.set_index('datas')
        df.columns = ["ds", "y"]

        #cria um modelo
        m = Prophet(changepoint_prior_scale=0.05,
                    changepoint_range=0.8,
                    seasonality_prior_scale=0.05,
                    seasonality_mode='additive')
        m.add_country_holidays(country_name='BR')
        m.fit(df)

        #prevendo o futuro
        future = m.make_future_dataframe(periods=12 *
                                         (int(select_ano) - date.today().year),
                                         freq='MS')
        forecast = m.predict(future)

        #cria imagem do plot
        m.plot(forecast, figsize=(8, 4))
        plt.xlabel('Data')
        plt.ylabel('Ocorrencias')
        plt.gca().set_ylim(bottom=0)
        if (select_dp != 'Todos'):
            plt.title("Série temporal das ocorrências de " + select_crime +
                      " registradas no " + select_dp)
        else:
            plt.title("Série temporal das ocorrências de " + select_crime +
                      " registradas na cidade de " + select_mun)
        plt.savefig(img, bbox_inches='tight')

        plt.clf()  #limpa figura atual

        # df_cv = cross_validation(m, initial='3600 days', horizon = '1200 days', parallel="processes")
        # df_p = performance_metrics(df_cv)
        # print(df_p.head())

        #Otimização dos hiperparametros
        # params_df = create_param_combinations(**param_grid)
        # print(len(params_df.values))
        # for param in params_df.values:
        #     param_dict = dict(zip(params_df.keys(), param))
        #     cv_df = single_cv_run(df, metrics, param_dict, parallel="processes")
        #     results.append(cv_df)
        # results_df = pd.concat(results).reset_index(drop=True)
        # best_param = results_df.loc[results_df['rmse'] == min(results_df['rmse']), ['params']]
        # print(f'\n The best param combination is {best_param.values[0][0]}')
        # print(results_df)

        return render_template("previsao.html", image=img)

    return render_template("previsao.html")
Ejemplo n.º 17
0
model_holiday_indo_kantor = Prophet(
    weekly_seasonality=3,
    yearly_seasonality=10,
    #                                       holidays=holiday_kantor,
    changepoint_range=0.8,  # default = 0.8 Recommended range: [0.8, 0.95]
    changepoint_prior_scale=
    0.095  # default =0.05 Recommended range: [0.001, 0.5]
)
model_holiday_indo_kantor.add_seasonality(name='monthly',
                                          period=30.5,
                                          fourier_order=5)
model_holiday_indo_kantor.add_country_holidays(country_name='ID')
model_holiday_indo_kantor.fit(daily_kas_kantor)

# forecasting
future_kantor = model_holiday_indo_kantor.make_future_dataframe(periods=31,
                                                                freq='D')
forecast_kantor = model_holiday_indo_kantor.predict(future_kantor)

# visualize
model_holiday_indo_kantor.train_holiday_names

# plot_plotly(model_holiday_indo_kantor, forecast_kantor)
# plot_components_plotly(model_holiday_indo_kantor, forecast_kantor)

from sklearn.metrics import mean_squared_log_error
err = np.sqrt(
    mean_squared_log_error(forecast_kantor['yhat'].head(425),
                           daily_kas_kantor.loc[:, 'y']))
print('log mse:', err)
Ejemplo n.º 18
0
                               stream=byte_stream)
byte_stream.seek(0)
ser = pd.read_excel(byte_stream, index_col=0)
byte_stream.close()
ser.head()
#ser = pd.read_excel('Copy of DB-O.xlsx',sheet_name='SSEDB1-O',index_col=0)
for i in range(0, 4):
    ser.iloc[:21, i] = ser.iloc[:21, i].apply(lambda x: x * 1000)
    #print(i)
########################FBPROPHET####################
revdf = ser
revdf['ds'] = revdf.index
revdf = revdf.rename(columns={"Total Sum of Revenue": 'y'})
my_model = Prophet(interval_width=0.95, changepoint_prior_scale=4)
my_model.fit(revdf[['ds', 'y']])
future_dates = my_model.make_future_dataframe(periods=6, freq='MS')
forecast = my_model.predict(future_dates)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
from sklearn.metrics import mean_squared_error
rms = np.sqrt(
    mean_squared_error(revdf['y'], forecast['yhat'][:len(revdf['y'])]))
#print(rms)
adrf = forecast['yhat'].tail(6)
my_model.plot(forecast, uncertainty=True)
########################### predicting FTE ##################################
A = ser['Total Sum of BFTE']
#from plotly.plotly import plot_mpl
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(A, model='additive', freq=12)
fig = result.plot()
from statsmodels.tsa.stattools import adfuller
Ejemplo n.º 19
0
def estimate_and_predict_prophet_PR(calendar,
                                    punched_df,
                                    end_train_date,
                                    start_test_date,
                                    daily_view=False,
                                    target_column='cost',
                                    pred_days=120,
                                    horizon=8,
                                    missing_val=201735):
    ''' 
        Using facbook prophet model without any regressor
        'daily_view' variable is an indicator specified by user whether to seperate bi-weekly SAP data to daily
        'daily_view' is not recommended.
        'pred_days' variable is how many days ahead you want to predict
        return type: prediction result as a DataFrame, 
        columns=['ds','yhat','club'] ds is the posting_date and yhat is the prediction value
        this serves as the first layer of mixed model.
    '''
    if 'club_nbr' not in punched_df.columns:
        punched_df['club_nbr'] = punched_df['club']
        punched_df = punched_df.drop('club', axis=1)
    if 'posting_date' not in punched_df.columns:
        punched_df['posting_date'] = getDatesFromWMWks(
            punched_df['wm_yr_wk_nbr'])
    punched = punched_df.groupby(['club_nbr',
                                  'posting_date'])[target_column].sum()
    punched.column = ['total_punched_wg']
    punched = punched.reset_index()
    punched = pd.merge(left=punched,
                       right=calendar,
                       how='left',
                       left_on='posting_date',
                       right_on='calendar_date').drop('calendar_date', axis=1)
    punched = punched.drop('posting_date', axis=1)
    punched_pro = punched_df.groupby(['club_nbr',
                                      'posting_date'])[target_column].sum()
    punched_pro.column = ['total_punched_wg']
    punched_pro = punched_pro.reset_index()
    punched_pro = pd.merge(left=punched_pro,
                           right=calendar,
                           how='left',
                           left_on='posting_date',
                           right_on='calendar_date').drop('calendar_date',
                                                          axis=1)
    punched_pro = removehurricane(
        target_column, punched_pro, 201733, 201739,
        sales=False)  #201735 is missing in the SAP data, recover below
    club_ls = punched_pro.club_nbr.unique()
    res = pd.DataFrame()
    for club in club_ls:
        cur = club
        punched_pro_club = punched_pro[punched_pro.club_nbr.isin([club])]
        ##########################################
        #adding missing value
        if missing_val not in punched_pro_club.wm_yr_wk_nbr.values.tolist():
            punched_pro_club.loc[-1] = [
                club,
                punched_pro_club.loc[punched_pro_club.wm_yr_wk_nbr ==
                                     wm_nbr_add(missing_val, -2)].iloc[0, 1] +
                timedelta(days=14), 0.5 *
                punched_pro_club.loc[punched_pro_club.wm_yr_wk_nbr ==
                                     wm_nbr_add(missing_val, -2)].iloc[0, 2] +
                0.5 *
                punched_pro_club.loc[punched_pro_club.wm_yr_wk_nbr ==
                                     wm_nbr_add(missing_val, 2)].iloc[0, 2],
                missing_val
            ]  # adding a row
            punched_pro_club.index = punched_pro_club.index + 1  # shifting index
        ##############################################
        punched_pro_club = punched_pro_club.sort_values(by='wm_yr_wk_nbr')
        punched_pro_club = punched_pro_club.drop(
            'club_nbr', axis=1).reset_index().drop('index', axis=1)
        if (daily_view):
            punched_pro_club = gen_daily_data(punched_pro_club, day_sep)
        trainset = punched_pro_club.loc[
            punched_pro_club.wm_yr_wk_nbr <= end_train_date].drop(
                ['wm_yr_wk_nbr'], axis=1)
        columnsTitles = ["posting_date", target_column]
        trainset = trainset.reindex(columns=columnsTitles)
        trainset.columns = ["ds", "y"]
        m = Prophet(yearly_seasonality=True)

        m.fit(trainset)
        future = m.make_future_dataframe(periods=pred_days)
        forecast = m.predict(future)
        result = forecast[['ds', 'yhat']].tail(pred_days)
        weeklist = []
        for i in range(horizon):
            weeklist.append(
                trainset.iloc[-1, trainset.columns.tolist().index('ds')] +
                timedelta(days=14 * (i + 1)))
        result = result[result.ds.isin(weeklist)]
        yhat = result.yhat.values
        if res.shape[0] == 0:
            tmp = result
            tmp['club'] = pd.Series([cur for i in range(result.shape[0])],
                                    index=tmp.index)
            res = tmp
        else:
            tmp = result
            tmp['club'] = pd.Series([cur for i in range(result.shape[0])],
                                    index=tmp.index)
            res = pd.concat([res, tmp], axis=0)
    return res
Ejemplo n.º 20
0
for n, index_code in enumerate(corporation):

    # temp, what_day = make_am_data(index_code, 20200917)
    temp, what_day = make_am_data(index_code, int(date))

    amf = pd.DataFrame(columns=['ds', 'y'])

    amf['ds'] = temp['DateTime']  # 훈련용 데이터프레임 생성

    amf['y'] = temp['체결가']  # 앞으로 쓸 y값 지정

    amf['y'].plot()
    plt.savefig('weekend_to_am.png', dpi=400)

    am_model = Prophet(changepoint_range=0.8).fit(amf)
    future = am_model.make_future_dataframe(periods=1260,
                                            freq='min')  # 12시~16시 제외

    future2 = future[(future['ds'].dt.day == what_day)]

    am = future2[(future2['ds'].dt.hour >= 9) & (future2['ds'].dt.hour < 12)]

    # temp, what_day = make_pm_data(index_code, 20200917)
    temp, what_day = make_pm_data(index_code, int(date))

    pmf = pd.DataFrame(columns=['ds', 'y'])

    pmf['ds'] = temp['DateTime']  # 훈련용 데이터프레임 생성

    pmf['y'] = temp['체결가']  # 앞으로 쓸 y값 지정

    pmf['y'].plot()
Ejemplo n.º 21
0
##Train
print("Start training...")
model_imporvement2 = Prophet(holidays=holidays,
                             weekly_seasonality=False,
                             yearly_seasonality=20)
model_imporvement2.add_seasonality(name='monthly',
                                   period=30.5,
                                   fourier_order=5)
model_imporvement2.add_regressor('monetary_base_diff')
model_imporvement2.add_regressor('cpi_diff')
model_imporvement2.add_regressor('fed_fund')
model_imporvement2.add_regressor('saudi_production')

model_imporvement2.fit(data_with_regressors)

future_imporvement2 = model_imporvement2.make_future_dataframe(periods=365)
future_imporvement2["monetary_base_diff"] = data_with_regressors[
    "monetary_base_diff"]
future_imporvement2["monetary_base_diff"] = future_imporvement2[
    "monetary_base_diff"].fillna(method='pad')
future_imporvement2["cpi_diff"] = data_with_regressors["cpi_diff"]
future_imporvement2["cpi_diff"] = future_imporvement2["cpi_diff"].fillna(
    method='pad')
future_imporvement2["fed_fund"] = data_with_regressors["fed_fund"]
future_imporvement2["fed_fund"] = future_imporvement2["fed_fund"].fillna(
    method='pad')
future_imporvement2["saudi_production"] = data_with_regressors[
    "saudi_production"]
future_imporvement2["saudi_production"] = future_imporvement2[
    "saudi_production"].fillna(method='pad')
from fbprophet import Prophet


df = pd.DataFrame()
df['ds'] = stock_return.index
#df['y_orig']=daily_df.Pageviews.values
df['y']=graph['Close'].apply(lambda x: np.log(x)).values
df.tail()


m0 = Prophet(yearly_seasonality=True)
m0.fit(df)
#n_add = 365 - len()
n_add = 100
print("adding {n} days to reach the end of 2017.".format(n=n_add))
future = m0.make_future_dataframe(periods=n_add) # generate frame going to end of 2017; 112 added on 9/11/2017
future.tail()


forecast = m0.predict(future)
forecast[['ds','yhat','yhat_lower','yhat_upper']].tail()


forcast = m0.plot(forecast, ylabel='$\ln($stock_return$)$');


forcast.savefig('/home/ubuntu/Desktop/TelegramBot/charts/OILforcast.jpeg', dpi=400, bbox_inches='tight')


trend = m0.plot_components(forecast);
Ejemplo n.º 23
0
from fbprophet import Prophet
from fbprophet.plot import plot_plotly, plot_components_plotly
from sklearn.metrics import mean_absolute_error
import pandas as pd

dataset = pd.read_csv('acoes.csv')
# dataset.set_index(keys=['Date'], inplace=True)

dataset = dataset[['Date', 'BOVA']].rename(columns={'Date': 'ds', 'BOVA': 'y'})

# Modelo
modelo = Prophet()
modelo.fit(dataset)

futuro = modelo.make_future_dataframe(periods=90)
previsoes = modelo.predict(futuro)

# Gráfico das previsões
modelo.plot(previsoes, xlabel='Data', ylabel='Preço')

modelo.plot_components(previsoes)

plot_plotly(modelo, previsoes)
plot_components_plotly(modelo, previsoes)

# Avaliação do modelo
pred = modelo.make_future_dataframe(periods=0)
previsoes = modelo.predict(pred)

previsoes = previsoes['yhat'].tail(365)
mean_absolute_error(teste, previsoes)
Ejemplo n.º 24
0
def prophet(df, var_season, item = np.nan, title = 'error') :
    print(df)
    # prophet 변수
    period = 32 # 예측기간
    changepoint_prior_scale = 0.07  # 유연성 조절 / default = 0.05, 늘리면 유연(=언더피팅 해결), 줄이면 경직(=오버피팅 해결)
    seasonality_mode = 'additive'  # 단순 Seasonality = additive, 점점 증가하는 Seasonality =  multiplicative

    df_temp = df.copy()
    df_temp['month'] = df_temp['ds'].apply(lambda item: dt.datetime.strptime(str(item).split(' ')[0], '%Y-%m-%d')).dt.month
    lst_season = sorted(list(df_temp['month'].unique()))

    if pd.isna(item) :
        df['on_season'] = df['ds'].apply(lambda item : is_season(item, lst_season))
        df['off_season'] = ~df['ds'].apply(lambda item : is_season(item, lst_season))
    else :
        df['on_season'] = df['ds'].apply(lambda item : item_season(item, var_season))
        df['off_season'] = ~df['ds'].apply(lambda item : item_season(item, var_season))

    m = Prophet(
        growth='linear',
        seasonality_mode=seasonality_mode,
        changepoint_prior_scale=changepoint_prior_scale,
        daily_seasonality=False,
        weekly_seasonality=False,
        yearly_seasonality=False,
    ).add_seasonality(
        name='monthly',
        period=30.5,
        fourier_order=12
    ).add_seasonality(
        name='yearly',
        period=365.25,
        fourier_order=10
    ).add_seasonality(
        name='quarterly',
        period=365.25 / 4,
        fourier_order=5,
        prior_scale=15
    ).add_seasonality(
        name='on_season',
        period=7,
        fourier_order=20
    ).add_seasonality(
        name='off_season',
        period=7,
        fourier_order=20
    )

    # prophet에 모델 적용
    m.fit(df)

    future = m.make_future_dataframe(periods=period, freq='W')
    if pd.isna(item) :
        future['on_season'] = future['ds'].apply(lambda item : is_season(item, lst_season))
        future['off_season'] = future['ds'].apply(lambda item : is_season(item, lst_season))
    else :
        future['on_season'] = future['ds'].apply(lambda item: item_season(item, var_season))
        future['off_season'] = future['ds'].apply(lambda item: item_season(item, var_season))
    forecast = m.predict(future)

    # prophet에 모델 보정 <-- off-season(과거 특정 월 판매 '0'인 경우, 일괄 0으로 조정)
    #                   <-- off-season(과거 특정 월 판매 'median'를 Cap으로 설정)
    #                   <--(-) 값 Handling(과거 데이터 min 값을 토대로, 그 이하로는 하락하지 않도록 구성)
    forecast['upper_gap'] = abs(forecast['yhat_upper'] - forecast['yhat'])
    forecast['lower_gap'] = abs(forecast['yhat'] - forecast['yhat_lower'])
    forecast['min'] = forecast[forecast['ds'] < df.tail(1).iloc[0]['ds']]['yhat'].min()
    forecast['off_season_max'] = df[df['off_season'] == True]['y'].median()

    lst_temp = list(df[df['off_season'] == True]['ds'].apply(lambda item: pd.to_datetime(item).month))

    for i, row in forecast.iterrows():
        if (row['ds'].month in lst_temp) & (row['ds'].year >= dt.datetime.today().year) :
            if row['yhat'] > row['off_season_max'] :
                forecast.loc[i, 'yhat'] = row['off_season_max']
                forecast.loc[i, 'yhat_upper'] = row['off_season_max'] + abs(row['upper_gap'])
                forecast.loc[i, 'yhat_lower'] = row['off_season_max'] - abs(row['lower_gap'])
        if (row['yhat'] < row['min']) & (row['ds'].year >= dt.datetime.today().year) :
            forecast.loc[i, 'yhat'] = 0
            forecast.loc[i, 'yhat_upper'] = abs(row['upper_gap'])
            forecast.loc[i, 'yhat_lower'] = -abs(row['lower_gap'])
        if (not(row['ds'].month in lst_season)) & (row['ds'].year >= dt.datetime.today().year):
            forecast.loc[i, 'yhat'] = 0
            forecast.loc[i, 'yhat_upper'] = abs(row['upper_gap'])
            forecast.loc[i, 'yhat_lower'] = -abs(row['lower_gap'])

    forecast_temp = forecast[['ds', 'yhat', 'yhat_upper', 'yhat_lower']]
    forecast_temp = forecast_temp.assign(prdt_cd = item)

    # 그래프 그리기
    fig1 = m.plot(forecast, uncertainty=True)
    plt.title(title)
    add_changepoints_to_plot(fig1.gca(), m, forecast)
    title = title + '.png'
    plt.savefig(title)
    plt.show()

    return forecast_temp
Ejemplo n.º 25
0
df = df.merge(tweets_group, how='left', on='DATETIME_CONVERTED')
df['TWEET_COUNT'].fillna(0, inplace=True)
df.columns = ['DATETIME_CONVERTED', 'Volume_(BTC)', 'y', 'ds', 'TWEET_COUNT']
###split the data 20-80%
###number of rows
rows = int(df.shape[0] * 0.8)

train_df = df[:rows]
test_df = df[rows:]

from datetime import datetime
from fbprophet import Prophet
print(datetime.now())
df_prophet = Prophet(changepoint_prior_scale=0.15, daily_seasonality=True)
df_prophet.fit(train_df)

fcast_time = 144  # 1 year

print(datetime.now())
df_forecast = df_prophet.make_future_dataframe(periods=fcast_time,
                                               freq='10min')
df_forecast.tail(10)

df_forecast = df_prophet.predict(df_forecast)

import pickle
pickle.dump(df_prophet, open("prophet_model.pickle", "wb"))
# df_forecast.to_parquet('../Data/df_forcast.parquet')
forecast = df_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
fig1 = df_prophet.plot(forecast)
fig1.show()
Ejemplo n.º 26
0
 dat = dat.rename(columns={dat.columns[0]: 'ds'})
 dat = dat.rename(columns={dat.columns[1]: 'y'})
 for j in range(550, 750,
                20):  # Change 20 to 5 if you want to run on just DJIA total
     print(j)
     train_index = [i for i in range(j)]
     test_index = [i for i in range(j, len(dJIA))]
     testLen = len(test_index)
     if testLen < 260 or len(train_index) < 522:
         continue
     ##
     # Run Through prophet and calculate MAE
     ##
     m = Prophet()
     m.fit(dat.iloc[train_index, :])
     future = m.make_future_dataframe(periods=testLen + extraPred, freq="B")
     pred = m.predict(future)
     pred = pred[~pred.ds.isin(leftOvers)]
     pred = pred.reset_index(drop=True)
     maeD1.append(
         abs(pred.yhat[j] -
             dat.iloc[test_index[0], 1]))  #/dat.iloc[test_index[0],1])
     maeD5.append(
         abs(pred.yhat[j + 4] -
             dat.iloc[test_index[4], 1]))  #/dat.iloc[test_index[4],1])
     maeD20.append(
         abs(pred.yhat[j + 19] -
             dat.iloc[test_index[19], 1]))  #/dat.iloc[test_index[19],1])
     maeD65.append(
         abs(pred.yhat[j + 64] -
             dat.iloc[test_index[64], 1]))  #/dat.iloc[test_index[64],1])
Ejemplo n.º 27
0
df_test = df

trainings_zeitraum_von = '2020-8-01'
df_test = df_test.loc[df_test.index > trainings_zeitraum_von].copy()

df_test = df_test.rename(columns={"7TIW": "y"})
df_test = df_test.reset_index()
df_test = df_test.rename(columns={"Date": "ds"})

m = Prophet(changepoint_prior_scale=2)
m.add_country_holidays(country_name='DE')
m.add_regressor('Temperatur')
m.add_regressor('ResNbg')
m.fit(df_test)

future = m.make_future_dataframe(periods=10, include_history=False)
future = future.set_index('ds')

future['Temperatur'] = future.join(WetterVorhersage)
future['ResNbg'] = df_test.ResNbg.iloc[-1]

future = future.reset_index()

forecast = m.predict(future)

#fig = m.plot(forecast)
#ax = fig.gca()
#ax.set_title(today, size=34)
#plt.tight_layout()
#fig.savefig('Vorhersagen/'+str(today)+'.png')
Ejemplo n.º 28
0
def runmodels():
    # load the data
    dfTrain = pd.read_csv('train.csv', low_memory=False)
    dfTest = pd.read_csv('test.csv', low_memory=False)
    dfStore = pd.read_csv("store.csv", low_memory=False)

    # dropping the zero sales and closed stores
    dfTrain = dfTrain[(dfTrain.Open != 0) & (dfTrain.Sales != 0)]

    sales, holidays = prophetData(dfTrain)

    # filling the NaN values in CompetitionDistance col
    dfStore.CompetitionDistance.fillna(dfStore.CompetitionDistance.median(),
                                       inplace=True)

    # replace all the other NaN values with zeros
    dfStore.fillna(0, inplace=True)

    # fill the missing values
    dfTest.fillna(1, inplace=True)

    # merge train and test dataset with store data
    dfTrainStore = merge(dfTrain, dfStore)
    dfTestStore = merge(dfTest, dfStore)

    # Set the target column
    Y = dfTrainStore['Sales']
    Id = dfTestStore['Id']

    # remove dataset specific columns
    dfTrainStore = dfTrainStore.drop(['Customers', 'Sales'], axis=1)
    dfTestStore = dfTestStore.drop(['Id'], axis=1)

    # split the data into a training set and a validation set
    xTrain, xTrainTest, yTrain, yTrainTest = train_test_split(dfTrainStore,
                                                              Y,
                                                              test_size=0.20,
                                                              random_state=42)

    pipe = Pipeline(steps=[('multipleTrans', multipleTransformer()),
                           ('randomForest',
                            RandomForestRegressor(n_estimators=128,
                                                  criterion='mse',
                                                  max_depth=20,
                                                  min_samples_split=10,
                                                  min_samples_leaf=1,
                                                  min_weight_fraction_leaf=0.0,
                                                  max_features='auto',
                                                  max_leaf_nodes=None,
                                                  min_impurity_decrease=0.0,
                                                  min_impurity_split=None,
                                                  bootstrap=True,
                                                  oob_score=False,
                                                  n_jobs=4,
                                                  random_state=35,
                                                  verbose=0,
                                                  warm_start=False))])

    regModel = TransformedTargetRegressor(regressor=pipe,
                                          func=targetTransform,
                                          inverse_func=reverseTargetTransform)

    # training the Regression Model
    regModel.fit(xTrain, yTrain)

    # Regression Model prediction
    yPred = regModel.predict(xTrainTest)

    # predict on the testStore set
    predictions = regModel.predict(dfTestStore)

    # turn the predictions into a dataframe
    dfPreds = pd.DataFrame({'Id': Id, 'Sales': predictions})

    # training the prophet Model
    pModel = Prophet(interval_width=0.5, holidays=holidays)
    pModel.fit(sales)

    # dataframe that extends into future 6 weeks
    future_dates = pModel.make_future_dataframe(periods=6 * 7)

    # prophet model predictions
    forecast = pModel.predict(future_dates)

    # rename prediction columns and isolate the predictions
    fc = forecast[['ds', 'yhat']].rename(columns={
        'Date': 'ds',
        'Forecast': 'yhat'
    })

    # get the current time and turn it into a string
    now = datetime.datetime.now().strftime('%d-%m-%Y-%H-%M-%S-%f')[:-3]

    # Save the model
    filenameReg = 'regModel-' + now + '.pkl'
    filenamePro = 'pModel-' + now + '.pkl'
    pickle.dump(regModel, open(filenameReg, 'wb'))
    pickle.dump(pModel, open(filenamePro, 'wb'))

    return render_template('model.html',
                           labels=dfPreds['Id'],
                           values=dfPreds['Sales'],
                           linelabels=fc['ds'],
                           linevalues=fc['yhat'])
Ejemplo n.º 29
0
import pandas as pd
import numpy as np
from fbprophet import Prophet

# Prep the dataset

data = pd.read_csv("/home/dusty/Econ8310/DataSets/chicagoBusRiders.csv")
route3 = data[data.route=='3'][['date','rides']]
route3.date = pd.to_datetime(route3.date, infer_datetime_format=True)
route3.columns = [['ds', 'y']]

# Initialize Prophet instance and fit to data

m = Prophet()
m.fit(route3)

# Create timeline for 1 year in future, then generate predictions based on that timeline

future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)

# Create plots of forecast and truth, as well as component breakdowns of the trends

plt = m.plot(forecast)
plt.show()

comp = m.plot_components(forecast)
comp.show()
Ejemplo n.º 30
0
    fig.layout.update(title_text='Time Series Data',
                      xaxis_rangeslider_visible=True)
    st.plotly_chart(fig)


plot_raw_data()

### Prediction forecast with fbProphet
## https://facebook.github.io/prophet/docs/quick_start.html#python-api
#list within list
df_train = data[['Date', 'Close']]
df_train = df_train.rename(columns={"Date": "ds", "Close": "y"})

# create prophet prediction model
m = Prophet()
m.fit(df_train)
future = m.make_future_dataframe(periods=period)
forecast = m.predict(future)

# Show raw forecast data
st.subheader('Raw Forecast Data')
st.write(forecast.tail())

# Plot forecast data
st.write(f'Forecast plot for {n_years} years')
fig1 = plot_plotly(m, forecast)
st.plotly_chart(fig1)

st.write("Forecast components")
fig2 = m.plot_components(forecast)
st.write(fig2)
Ejemplo n.º 31
0
ret_df = pd.DataFrame(columns={'fips', 'date', 'adj_cases'})

for fips in df.fips.unique():
    try:
        #Generates adjacency sums and appends to a new sub dataframe with cases and normalized flu data
        adjacent = adj_df.loc[adj_df.county ==
                              fips, ].values.flatten().tolist()
        adjacent = [x for x in adjacent if str(x) != 'nan']
        sub_df = df[df.fips.isin(adjacent)]
        sub_df = sub_df.groupby('date')['cases'].sum().to_frame()
        sub_df.reset_index(inplace=True)
        sub_df = sub_df.rename(columns={'cases': 'y', 'date': 'ds'})
        sub_df = sub_df.dropna()

        print(sub_df)

        #Modeling
        model_p = Prophet(yearly_seasonality=True)
        model_p.fit(sub_df)
        future = model_p.make_future_dataframe(periods=115)
        forecast = model_p.predict(future)
        test = forecast[['ds', 'yhat']]
        test.insert(0, 'fips', fips)
        test = test.rename(columns={'ds': 'date', 'yhat': 'adj_cases'})
        test['adj_cases'] = test['adj_cases'].astype(int)
        test.loc[test['adj_cases'] < 0, 'adj_cases'] = 0
        ret_df = ret_df.append(test)
    except:
        print(str(fips) + ": Failed")

ret_df.to_csv('../data/processed/adj_sum.csv', index=False)
Ejemplo n.º 32
0
st.write("Sharpe Ratio: {:.2f}".format(w[2]))

returns = risk_models.returns_from_prices(data_, log_returns=True)
returns["sum"] = returns.sum(axis=1)
returns["cum"] = returns['sum'].cumsum(axis=0)
returns = returns.reset_index()
plt.figure(figsize=(12, 6))
plt.plot(returns.cum)
st.pyplot()

shift_d = shift_d
Prop = returns
Prop['ds'] = Prop['t']
Prop['y'] = Prop['cum']
Prop = Prop.iloc[:, -2:]

m = Prophet(n_changepoints=n_changepoints)
m.fit(Prop)
future = m.make_future_dataframe(periods=shift_d)
forecast = m.predict(future)
fig = add_changepoints_to_plot((m.plot(forecast)).gca(), m, forecast)
st.pyplot()

prices = returns.set_index('ds')
prices = prices.y
peeks = prices.cummax()
drowdown = (prices - peeks) / peeks
plt.plot(drowdown)
st.pyplot()
st.write(drowdown.min())
Ejemplo n.º 33
0
def create_prophet_m(app_name,z1,cpu_perc_list,delay=24):
    
    ### --- For realtime pred ---###
    
    full_df = z1.bw.iloc[0:len(z1)]
    full_df = full_df.reset_index()
    full_df.columns = ['ds','y']
    
    #removing outliers
    q50 = full_df.y.median()
    q100 = full_df.y.quantile(1)
    q75  = full_df.y.quantile(.75)
    
    if((q100-q50) >= (2*q50)):
        
        full_df.loc[full_df.y>=(2*q50),'y'] = None
    
    #-- Realtime prediction --##
    #model 
    model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.1,seasonality_prior_scale=0.05)
    model_r.fit(full_df)

    cpu_perc_list.append(py.cpu_percent())
    cpu_perc_list = [max(cpu_perc_list)]

    future_r = model_r.make_future_dataframe(periods=delay,freq='D')
    forecast_r = model_r.predict(future_r)
    forecast_r.index = forecast_r['ds']
    #forecast 
    pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)])
    pred_r=pred_r.reset_index()
    #--- completes realtime pred ---#
    
    train_end_index=len(z1.bw)-delay
    train_df=z1.bw.iloc[0:train_end_index]
    
    test_df=z1.bw.iloc[train_end_index:len(z1)]
    
    train_df=train_df.reset_index()
    test_df=test_df.reset_index()
    
    train_df.columns=['ds','y']
    
    #--- removing outliers in trainset  ---#
    
    q50 = train_df.y.median()
    q100 = train_df.y.quantile(1)
    q75  = train_df.y.quantile(.75)
    
    if((q100-q50) >= (2*q50)):
        
        train_df.loc[train_df.y>=(2*q50),'y'] = None
    
    test_df.columns=['ds','y']
    test_df['ds'] = pd.to_datetime(test_df['ds'])
   
    #model 
    model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.1,seasonality_prior_scale=0.05)
    model.fit(train_df)

    cpu_perc_list.append(py.cpu_percent())
    cpu_perc_list = [max(cpu_perc_list)]


    future = model.make_future_dataframe(periods=len(test_df),freq='D')
    forecast = model.predict(future)
    forecast.index = forecast['ds']
    #forecast 
    pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
    
    print('length forecasted non realtime=',len(pred))
    pred=pred.reset_index()
    pred_df=pd.merge(test_df,pred,on='ds',how='left')
    
    pred_df.dropna(inplace=True)
    
    
    df=pd.DataFrame()
    
    if(len(pred_df)>0):
        
        pred_df['error_test']=pred_df.y-pred_df.yhat
    
        
    
        MSE=mse(pred_df.y,pred_df.yhat)
        RMSE=math.sqrt(MSE)
        pred_df['APE']=abs(pred_df.error_test*100/pred_df.y)
        MAPE=pred_df.APE.mean()
        min_error_rate = pred_df['APE'].quantile(0)/100
        max_error_rate = pred_df['APE'].quantile(1)/100
        median_error_rate = pred_df['APE'].quantile(.50)/100
        print("App name:",app_name)
        #print("MSE  :",MSE)
        print("RMSE :",RMSE)
        print("MAPE :",MAPE)
        
       
        mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean()
        std_MAPE = math.sqrt(((pred_df.APE-MAPE)**2).mean())

        df = pd.DataFrame({'length':len(z1),
                             'test_rmse':RMSE,
                             'test_mape':MAPE,
                             'std_mape':std_MAPE, #standerd deviation of mape
                             'min_error_rate':min_error_rate ,
                             'max_error_rate':max_error_rate ,
                             'median_error_rate':median_error_rate,
                 
                 'test_mape_98':mape_q98},
                   
                          index=[app_name])

    return(df,model,forecast,pred_df,pred_r)
Ejemplo n.º 34
0
adf_US_organic = adf_US_organic.sort_values(by='Date')
# Valid = adf[(adf['year'] == 2017) | (adf['year'] == 2018)]
# Train = adf[(adf['year'] != 2017) & (adf['year'] != 2018)]
Train = adf_US_organic.sort_values(by='Date')

# In[19]:

from fbprophet import Prophet  #works best with time series & robust to missing data
import matplotlib.pyplot as plt

# In[20]:

m = Prophet()
date_volume = Train.rename(columns={'Date': 'ds', 'Total Volume': 'y'})
m.fit(date_volume)
future = m.make_future_dataframe(periods=365)
forecast = m.predict(future)

# In[21]:

fig1 = m.plot(forecast)

# In[22]:

fig2 = m.plot_components(forecast)

# In[23]:

n = Prophet()
date_bags = Train.rename(columns={'Date': 'ds', 'Total Bags': 'y'})
n.fit(date_bags)
Ejemplo n.º 35
0
def create_prophet_m(source_name,z1,delay=24):
    
   
    train_end_index=len(z1.app_count)-delay
    train_df=z1.app_count.iloc[0:train_end_index]
    #train_df= train_df[train_df<cutter]
    full_df = z1.app_count.iloc[0:len(z1)]
    
    
    test_df=z1.app_count.iloc[train_end_index:len(z1)]
    
    
    
    train_df=train_df.reset_index()
    test_df=test_df.reset_index()
    train_df.columns=['ds','y']
    
    full_df = full_df.reset_index()
    full_df.columns = ['ds','y']
    
    test_df.columns=['ds','y']
    
    ##-- Realtime prediction --##
    #model 
    model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
    model_r.fit(full_df)
    future_r = model_r.make_future_dataframe(periods=delay,freq='H')
    forecast_r = model_r.predict(future_r)
    forecast_r.index = forecast_r['ds']
    #forecast 
    pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)])
    pred_r=pred_r.reset_index()
    
    
    #model 
    model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
    model.fit(train_df)
    future = model.make_future_dataframe(periods=len(test_df),freq='H')
    forecast = model.predict(future)
    forecast.index = forecast['ds']
    #forecast 
    pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
    pred=pred.reset_index()
    pred_df=pd.merge(test_df,pred,on='ds',how='left')
    pred_df.dropna(inplace=True)
    
    df=pd.DataFrame()
    
    if(len(pred_df)>0):
        
        pred_df['error_test']=pred_df.y-pred_df.yhat
    
       
        MSE=mse(pred_df.y,pred_df.yhat)
        RMSE=math.sqrt(MSE)
        pred_df['APE']=abs(pred_df.error_test*100/pred_df.y)
        MAPE=pred_df.APE.mean()
        print("App name:",source_name)
        print("MSE  :",MSE)
        print("RMSE :",RMSE)
        print("MAPE :",MAPE)
        
        q98=pred_df['APE'].quantile(0.98)
        mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean()

        df = pd.DataFrame({#'length':len(z1),
                             'test_rmse':RMSE,
                             'test_mape':MAPE,
                 
                 'test_mape_98':mape_q98},
                          index=[source_name])

    return(df,model,forecast,pred_df,pred_r)
Ejemplo n.º 36
0
    EndTimeTrain = StartTimeTrain + train_periods
    StartTimeTest = EndTimeTrain
    EndTimeTest = StartTimeTest + test_periods

    while EndTimeTest < TimeTotal:

        # prepare train & test sets
        df_to_fit = df_adm.iloc[StartTimeTrain:EndTimeTrain]
        df_to_test = df_adm.iloc[StartTimeTest:EndTimeTest]

        # initialize model, train & predict
        m = Prophet(weekly_seasonality=False,
                    daily_seasonality=False,
                    uncertainty_samples=0)
        m.fit(df_to_fit)
        future = m.make_future_dataframe(periods=test_periods, freq='M')
        forecast = m.predict(future)

        # prepare dataframe with test data & predictions
        df_result = pd.merge(df_to_test,
                             forecast.iloc[-test_periods:],
                             on='ds')

        # compute results (evaluation metrics)
        results = evaluate(df_result['y'], df_result['yhat'], metrics=metrics)

        # add metadata to results
        results['adm'] = df_result['ADM1_EN'].values[0]
        results['train_periods'] = train_periods
        results['test_periods'] = test_periods
        results['test_dates'] = df_to_test.ds.tolist()
def get_outliers(my_dict, all_intents_text_df):

    websessionids = []
    convs = []
    turn_predicted = []

    all_ces = []
    all_texts = []
    for i in sorted(my_dict.keys()):
        print(i)
        all_ces += my_dict[i]["CES"][:-1]
        all_texts += my_dict[i]["Texts"][:-1]
        # plt.plot(my_dict[i]["CES"])

        fb_prophet_model = Prophet(growth='linear',
                                   yearly_seasonality=False,
                                   weekly_seasonality=False,
                                   daily_seasonality=False,
                                   interval_width=.999)
        # make up some dates
        dates = [
            str(item) for item in list(
                pd.date_range(start='2018-01-01',
                              end='2018-12-31',
                              periods=len(my_dict[i]["CES"])))
        ]
        fb_df_train = pd.DataFrame({"ds": dates, "y": my_dict[i]["CES"]})
        periods = 0
        fb_prophet_model.fit(fb_df_train, verbose=False)
        future = fb_prophet_model.make_future_dataframe(periods=periods)
        fcst = fb_prophet_model.predict(future)
        indices = []
        for k in range(len(fcst)):
            if fcst["yhat_upper"][k] < my_dict[i]["CES"][k]:
                indices.append(k)
        # print(indices)
        ys = [my_dict[i]["CES"][j] for j in indices]
        # print(ys)
        # plt.scatter(indices, ys,color="r", marker='x')

        if indices:

            for q in indices:
                print(my_dict[i]["WebSessionIDs"][q])
                print(my_dict[i]["Texts"][q])

                if my_dict[i]["WebSessionIDs"][q] == "DIFFERENT INTENT":

                    if "xx" not in my_dict[i]["Texts"][q]:

                        websessionids.append("")
                        convs.append("")
                        websessionids.append("DIFFERENT INTENT")
                        convs.append(my_dict[i]["Texts"][q])

                # no trickery
                else:
                    mask = (all_intents_text_df['WebSessionID'] == my_dict[i]
                            ["WebSessionIDs"][q])
                    test = all_intents_text_df.loc[mask]
                    test = test.sort_values(by=['Asked_Date_Time'])
                    print(test["Input"].values)

                    if my_dict[i]["WebSessionIDs"][q] not in websessionids:

                        if "xx" not in my_dict[i]["Texts"][q]:

                            websessionids.append("")
                            convs.append("")
                            turn_predicted.append("")

                            convs += list(test["Input"].values)
                            websessionids += len(test["Input"].values) * [
                                my_dict[i]["WebSessionIDs"][q]
                            ]

                            turn_predicted_value = [""] * len(
                                test["Input"].values)
                            turn_predicted_value[0] = my_dict[i]["Texts"][q]
                            turn_predicted += turn_predicted_value

        # plt.show()

        print("\n\n---------------------------------\n\n")

    tagging_df = pd.DataFrame({
        "WebSessionID":
        websessionids,
        "Full Conversation":
        convs,
        "Preprocessed Turn Predicted to be Anomalous":
        turn_predicted
    })
    return tagging_df
view_hour['y'] = np.log(view_hour['distinct_freq_sum'])
view_hour['ds'] = view_hour['date_hour']
view_hour.head(5)

#%%
## Prophet1
# set the uncertainty interval to 95% (the Prophet default is 80%)
m = Prophet()
m.add_seasonality(name='hourly', period=24, fourier_order=2)
m.fit(view_hour);


#%%
## Create a dataframe for the future dates
## The tail will only display the time periods without the forecasted values
future = m.make_future_dataframe(periods=24,freq='H')
future.tail()

#%%
## This is the data that is exponentiated below
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

#%%
## This is the data that retains the log transform
## Note that the predict function will create a df that contains
##   many period features(e.g., trend, daily, hourly, weekly, seasonal
##   along with _upper and _lower ci's). Execute a .info() against
##   the dataframe to see all the elements.
## This creates a dataframe with just the 4 elements below
forecast1 = m.predict(future)
Ejemplo n.º 39
0
def prophet_forecast(raw_data_with_filter: pd.DataFrame,
                     num_days,
                     seasonality=30):

    fb_prophet_df_close = raw_data_with_filter.filter(['Date', 'Close'],
                                                      axis=1)
    fb_prophet_df_close.columns = ['ds', 'y']
    fb_prophet_df_close['y'] = np.log(fb_prophet_df_close['y'])
    model_close = Prophet()
    # fitting to the model
    model_close.add_seasonality(name='monthly',
                                period=seasonality,
                                fourier_order=10)
    model_close.fit(fb_prophet_df_close)

    # making future predictions
    future_close = model_close.make_future_dataframe(periods=num_days)

    forecast_close = model_close.predict(future_close)

    forecast_df_close = pd.DataFrame(forecast_close)

    forecast_output_df_ds = pd.DataFrame(forecast_df_close.ds)
    raw_sigma_close = np.exp(forecast_df_close['yhat']).std()
    forecast_output_df_close = pd.DataFrame(filters.gaussian_filter1d(
        np.exp(forecast_df_close.yhat), raw_sigma_close),
                                            columns=['yhat'])

    forecast_output_df_close = forecast_output_df_ds.join(
        forecast_output_df_close)
    forecast_output_df_close.columns = ['Date', 'Close']

    fb_prophet_df_high = raw_data_with_filter.filter(['Date', 'High'], axis=1)
    fb_prophet_df_high.columns = ['ds', 'y']
    fb_prophet_df_high['y'] = np.log(fb_prophet_df_high['y'])
    model_high = Prophet()
    # fitting to the model
    model_high.add_seasonality(name='monthly',
                               period=seasonality,
                               fourier_order=10)
    model_high.fit(fb_prophet_df_high)

    # making future predictions
    future_high = model_high.make_future_dataframe(periods=num_days)

    forecast_high = model_high.predict(future_high)

    forecast_df_high = pd.DataFrame(forecast_high)

    raw_sigma_high = np.exp(forecast_df_high['yhat']).std()
    forecast_output_df_high = pd.DataFrame(filters.gaussian_filter1d(
        np.exp(forecast_df_high.yhat), raw_sigma_high),
                                           columns=['yhat'])
    forecast_output_df_high.rename(columns={'yhat': 'High'}, inplace=True)

    fb_prophet_df_low = raw_data_with_filter.filter(['Date', 'Low'], axis=1)
    fb_prophet_df_low.columns = ['ds', 'y']
    fb_prophet_df_low['y'] = np.log(fb_prophet_df_low['y'])
    model_low = Prophet()
    # fitting to the model
    model_low.add_seasonality(name='monthly',
                              period=seasonality,
                              fourier_order=10)
    model_low.fit(fb_prophet_df_low)

    # making future predictions
    future_low = model_low.make_future_dataframe(periods=num_days)

    forecast_low = model_low.predict(future_low)

    forecast_df_low = pd.DataFrame(forecast_low)

    raw_sigma_low = np.exp(forecast_df_low['yhat']).std()
    forecast_output_df_low = pd.DataFrame(filters.gaussian_filter1d(
        np.exp(forecast_df_low.yhat), raw_sigma_low),
                                          columns=['yhat'])
    forecast_output_df_low.rename(columns={'yhat': 'Low'}, inplace=True)

    fb_prophet_df_volume = raw_data_with_filter.filter(['Date', 'Volume'],
                                                       axis=1)
    fb_prophet_df_volume.columns = ['ds', 'y']
    fb_prophet_df_volume['y'] = np.log(fb_prophet_df_volume['y'])
    model_volume = Prophet()
    # fitting to the model
    model_volume.add_seasonality(name='monthly',
                                 period=seasonality,
                                 fourier_order=10)
    model_volume.fit(fb_prophet_df_volume)

    # making future predictions
    future_volume = model_volume.make_future_dataframe(periods=num_days)

    forecast_volume = model_volume.predict(future_volume)

    forecast_df_volume = pd.DataFrame(forecast_volume)
    forecast_output_df_volume = pd.DataFrame(np.exp(forecast_df_volume.yhat))
    forecast_output_df_volume.rename(columns={'yhat': 'Volume'}, inplace=True)

    # output forecast
    forecast_output_df = forecast_output_df_close.join(
        forecast_output_df_high.join(
            forecast_output_df_low.join(forecast_output_df_volume))).tail(
                num_days)
    # print(forecast_output_df)
    # # return forecast_output_df
    #
    # plt.plot(forecast_output_df.Date, forecast_output_df.Close, label='Close', linewidth=0.7)
    # plt.plot(forecast_output_df.Date, forecast_output_df.High, label='High', linewidth=0.7)
    # plt.plot(forecast_output_df.Date, forecast_output_df.Low, label='Low', linewidth=0.7)
    # plt.gcf().autofmt_xdate()
    # plt.rcParams["figure.figsize"] = [12, 9]
    # plt.xlabel('Timeline')
    # plt.ylabel('Stock Price (Closing)')
    # plt.title('Evaluation \nComparison (20 Days)')
    # plt.legend()
    # plt.show()
    return forecast_output_df
Ejemplo n.º 40
0
def create_prophet_m(source_name,z1,delay):

    import math
   
    train_end_index=len(z1.bw)-delay
    train_df=z1.bw.iloc[0:train_end_index]
    
    full_df = z1.bw.iloc[0:len(z1)]
    
    
    test_df=z1.bw.iloc[train_end_index:len(z1)]
    
    
    
    train_df=train_df.reset_index()
    test_df=test_df.reset_index()
    train_df.columns=['ds','y']
    #--- removing outliers in trainset  ---#
    
    q50 = train_df.y.median()
    q100 = train_df.y.quantile(1)
    q75  = train_df.y.quantile(.75)
    print(max(train_df.y))
    if((q100-q50) >= (2*q50)):
        print('ind')
        train_df.loc[train_df.y>=(2*q50),'y'] = None
    
    full_df = full_df.reset_index()
    full_df.columns = ['ds','y']
    
    test_df.columns=['ds','y']
    
    ##-- Realtime prediction --##
    #model 
    model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
    model_r.fit(full_df)
    future_r = model_r.make_future_dataframe(periods=delay,freq='H')
    forecast_r = model_r.predict(future_r)
    forecast_r.index = forecast_r['ds']
    #forecast 
    pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)])
    pred_r=pred_r.reset_index()
    
    
    #model 
    model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
    model.fit(train_df)
    future = model.make_future_dataframe(periods=len(test_df),freq='H')
    forecast = model.predict(future)
    forecast.index = forecast['ds']
    #forecast 
    pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
    pred=pred.reset_index()
    pred_df=pd.merge(test_df,pred,on='ds',how='left')
    pred_df.dropna(inplace=True)
    
    df=pd.DataFrame()
    
    if(len(pred_df)>0):
        
        pred_df['error_test']=pred_df.y-pred_df.yhat
    
       
        MSE=mse(pred_df.y,pred_df.yhat)
        RMSE=math.sqrt(MSE)
        pred_df['APE']=abs(pred_df.error_test*100/pred_df.y)
        
        MAPE=pred_df.APE.mean()
        min_error_rate = pred_df.quantile(0)/100
        max_error_rate = pred_df.quantile(1)/100
        median_error_rate = pred_df.quantile(.50)/100
        
        std_MAPE = math.sqrt(((pred_df.APE-MAPE)**2).mean())
        print("App name:",source_name)
        print("MSE  :",MSE)
        print("RMSE :",RMSE)
        print("MAPE :",MAPE)
        
        q98=pred_df['APE'].quantile(0.98)
        mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean()

        df = pd.DataFrame({'length':len(z1),#'predicted_t':[forcast_lag],
                             'test_rmse':RMSE,
                             'test_mape':MAPE,
                             'std_mape':std_MAPE, #standerd deviation of mape
                             'min_error_rate':min_error_rate ,
                             'max_error_rate':max_error_rate ,
                             'median_error_rate':median_error_rate,
                 
                 'test_mape_98':mape_q98},
                          index=[source_name])

    return(df,model,forecast,pred_df,pred_r)
Ejemplo n.º 41
0
def plot_raw_data():
	fig = go.Figure()
	fig.add_trace(go.Scatter(x=data['date'], y=data['y'], name="Close"))
	fig.layout.update(title_text='raw data with Rangeslider', xaxis_rangeslider_visible=True)
	st.plotly_chart(fig)

if st.button("predict"):
    model = Prophet(changepoint_range=0.8,
        yearly_seasonality='auto',
        weekly_seasonality='auto',
        daily_seasonality=True,
        seasonality_mode='multiplicative',
        changepoint_prior_scale=0.05
		)
    model.fit(df)

    future = model.make_future_dataframe(periods=no_of_days)
    forecast = model.predict(future)

    st.subheader("Prediction Data")
    st.write(forecast.head(30))

    st.subheader(f'Forecast plot for {no_of_days} days')
    fig1 = plot_plotly(model, forecast)
    st.write(fig1)

    st.subheader("Forecast components")
    fig2 = model.plot_components(forecast)
    st.write(fig2)
Ejemplo n.º 42
0
    def create_prophet_m(self,app_name,z1,delay=24):

        import pandas as pd
        import pymysql
        import warnings
        warnings.filterwarnings("ignore")
        from datetime import datetime, timedelta
        import logging
        from tqdm import tqdm
        from fbprophet import Prophet
        from sklearn.metrics import mean_squared_error as mse
        import math

        ### --- For realtime pred ---###

        full_df = z1.bw.iloc[0:len(z1)]
        full_df = full_df.reset_index()
        full_df.columns = ['ds','y']

        #removing outliers
        q50 = full_df.y.median()
        q100 = full_df.y.quantile(1)
        q75  = full_df.y.quantile(.75)
        #print(max(train_df.y))
        if((q100-q50) >= (2*q75)):
            #print('ind')
            full_df.loc[full_df.y>=(2*q75),'y'] = None

        #-- Realtime prediction --##
        #model 
        model_r = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
        model_r.fit(full_df)
        future_r = model_r.make_future_dataframe(periods=delay,freq='H')
        forecast_r = model_r.predict(future_r)
        forecast_r.index = forecast_r['ds']
        #forecast 
        pred_r = pd.DataFrame(forecast_r['yhat'][len(z1):(len(z1)+delay)])
        pred_r=pred_r.reset_index()
        #--- completes realtime pred ---#

        train_end_index=len(z1.bw)-delay
        train_df=z1.bw.iloc[0:train_end_index]
        #train_df= train_df[train_df<cutter]


        test_df=z1.bw.iloc[train_end_index:len(z1)]



        train_df=train_df.reset_index()
        test_df=test_df.reset_index()
        train_df.columns=['ds','y']

        #--- removing outliers in trainset  ---#

        q50 = train_df.y.median()
        q100 = train_df.y.quantile(1)
        q75  = train_df.y.quantile(.75)
        #print(max(train_df.y))
        if((q100-q50) >= (2*q75)):
            #print('ind')
            train_df.loc[train_df.y>=(2*q75),'y'] = None

        test_df.columns=['ds','y']
        #print('len of testdf = ',len(test_df))
        #model 
        model = Prophet(yearly_seasonality=False,changepoint_prior_scale=.2)
        model.fit(train_df)
        future = model.make_future_dataframe(periods=len(test_df),freq='H')
        forecast = model.predict(future)
        forecast.index = forecast['ds']
        #forecast 
        pred = pd.DataFrame(forecast['yhat'][train_end_index:len(z1)])
        pred=pred.reset_index()
        pred_df=pd.merge(test_df,pred,on='ds',how='left')
        pred_df.dropna(inplace=True)

        df=pd.DataFrame()

        if(len(pred_df)>0):

            pred_df['error_test']=pred_df.y-pred_df.yhat



            MSE=mse(pred_df.y,pred_df.yhat)
            RMSE=math.sqrt(MSE)
            pred_df['APE']=abs(pred_df.error_test*100/pred_df.y)
            MAPE=pred_df.APE.mean()
            #print("App name:",app_name)
            #print("MSE  :",MSE)
            #print("RMSE :",RMSE)
            #print("MAPE :",MAPE)

            q98=pred_df['APE'].quantile(0.98)
            mape_q98=pred_df['APE'][pred_df.APE<pred_df['APE'].quantile(0.98)].mean()

            df = pd.DataFrame({'length':len(z1),#'predicted_t':[forcast_lag],
                                 'test_rmse':RMSE,
                                 'test_mape':MAPE,
                     #'test_ape_98':q98,
                     'test_mape_98':mape_q98},

                              index=[app_name])

        return(df,model,forecast,pred_df,pred_r)
Ejemplo n.º 43
0
    def run(
        self,
        database="timeseries",
        source_table="retail_sales",
        target_table="predicted_sales",
        start_date="1993-01-01",
        end_date="2016-05-31",
        period=365,
        with_aws=False,
    ):
        """Train Prophet model and predict future sales

        :param database: Target DB name, defaults to "timeseries"
        :type database: str
        :param source_table: Source table for past sales, defaults to "retail_sales"
        :type source_table: str
        :param target_table: Table name for storing future sales prediction, defaults
          to "predicted_sales"
        :type target_table: str
        :param start_date: Beginning date for training data, defaults to "1993-01-01"
        :type start_date: str
        :param end_date: Last date for training data, defaults to "2016-05-31"
        :type end_date: str
        :param period: Duration for prediction, defaults to 365
        :type period: int
        :param with_aws: If True, upload prediction graphs to AWS, defaults to False
        :type with_aws: bool
        """

        import pytd
        import pandas as pd
        from fbprophet import Prophet

        # Ensure type of period is integer
        period = int(period)

        # Create TD connection
        apikey = os.getenv("TD_API_KEY")
        endpoint = os.getenv("TD_API_SERVER")
        client = pytd.Client(apikey=apikey,
                             endpoint=endpoint,
                             database=database)

        # Fetch past sales data from Treasure Data
        # Note: Prophet requires `ds` column as date string and `y` column as target
        #       value
        res = client.query(f"""
            select ds, y
            from {source_table}
            where ds between '{start_date}' and '{end_date}'
            """)
        df = pd.DataFrame(**res)

        # Train Prophet model
        model = Prophet(seasonality_mode="multiplicative")
        model.fit(df)

        # Predict future sales data
        future = model.make_future_dataframe(periods=period)
        forecast = model.predict(future)

        # If True, upload prediction graph to S3
        if with_aws:
            self._upload_graph(model, forecast)

        # To avoid TypeError: can't serialize Timestamp, convert
        # `pandas._libs.tslibs.timestamps.Timestamp` to `str`
        forecast.ds = forecast.ds.apply(str)

        # Store prediction results
        client.load_table_from_dataframe(forecast,
                                         target_table,
                                         if_exists="overwrite")