Ejemplo n.º 1
0
 def test_performance_metrics(self):
     m = Prophet()
     m.fit(self.__df)
     df_cv = diagnostics.cross_validation(
         m, horizon='4 days', period='10 days', initial='90 days')
     # Aggregation level none
     df_none = diagnostics.performance_metrics(df_cv, rolling_window=0)
     self.assertEqual(
         set(df_none.columns),
         {'horizon', 'coverage', 'mae', 'mape', 'mse', 'rmse'},
     )
     self.assertEqual(df_none.shape[0], 16)
     # Aggregation level 0.2
     df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2)
     self.assertEqual(len(df_horizon['horizon'].unique()), 4)
     self.assertEqual(df_horizon.shape[0], 14)
     # Aggregation level all
     df_all = diagnostics.performance_metrics(df_cv, rolling_window=1)
     self.assertEqual(df_all.shape[0], 1)
     for metric in ['mse', 'mape', 'mae', 'coverage']:
         self.assertEqual(df_all[metric].values[0], df_none[metric].mean())
     # Custom list of metrics
     df_horizon = diagnostics.performance_metrics(
         df_cv, metrics=['coverage', 'mse'],
     )
     self.assertEqual(
         set(df_horizon.columns),
         {'coverage', 'mse', 'horizon'},
     )
Ejemplo n.º 2
0
 def test_performance_metrics(self):
     m = Prophet()
     m.fit(self.__df)
     df_cv = diagnostics.cross_validation(m,
                                          horizon='4 days',
                                          period='10 days',
                                          initial='90 days')
     # Aggregation level none
     df_none = diagnostics.performance_metrics(df_cv, rolling_window=0)
     self.assertEqual(
         set(df_none.columns),
         {'horizon', 'coverage', 'mae', 'mape', 'mse', 'rmse'},
     )
     self.assertEqual(df_none.shape[0], 16)
     # Aggregation level 0.2
     df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2)
     self.assertEqual(len(df_horizon['horizon'].unique()), 4)
     self.assertEqual(df_horizon.shape[0], 14)
     # Aggregation level all
     df_all = diagnostics.performance_metrics(df_cv, rolling_window=1)
     self.assertEqual(df_all.shape[0], 1)
     for metric in ['mse', 'mape', 'mae', 'coverage']:
         self.assertEqual(df_all[metric].values[0], df_none[metric].mean())
     # Custom list of metrics
     df_horizon = diagnostics.performance_metrics(
         df_cv,
         metrics=['coverage', 'mse'],
     )
     self.assertEqual(
         set(df_horizon.columns),
         {'coverage', 'mse', 'horizon'},
     )
Ejemplo n.º 3
0
 def test_performance_metrics(self):
     m = Prophet()
     m.fit(self.__df)
     df_cv = diagnostics.cross_validation(m,
                                          horizon='4 days',
                                          period='10 days',
                                          initial='90 days')
     # Aggregation level none
     df_none = diagnostics.performance_metrics(df_cv, rolling_window=-1)
     self.assertEqual(
         set(df_none.columns),
         {'horizon', 'coverage', 'mae', 'mape', 'mdape', 'mse', 'rmse'},
     )
     self.assertEqual(df_none.shape[0], 16)
     # Aggregation level 0
     df_0 = diagnostics.performance_metrics(df_cv, rolling_window=0)
     self.assertEqual(len(df_0), 4)
     self.assertEqual(len(df_0['horizon'].unique()), 4)
     # Aggregation level 0.2
     df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2)
     self.assertEqual(len(df_horizon), 4)
     self.assertEqual(len(df_horizon['horizon'].unique()), 4)
     # Aggregation level all
     df_all = diagnostics.performance_metrics(df_cv, rolling_window=1)
     self.assertEqual(df_all.shape[0], 1)
     for metric in ['mse', 'mape', 'mae', 'coverage']:
         self.assertAlmostEqual(df_all[metric].values[0],
                                df_none[metric].mean())
     self.assertAlmostEqual(df_all['mdape'].values[0],
                            df_none['mdape'].median())
     # Custom list of metrics
     df_horizon = diagnostics.performance_metrics(
         df_cv,
         metrics=['coverage', 'mse'],
     )
     self.assertEqual(
         set(df_horizon.columns),
         {'coverage', 'mse', 'horizon'},
     )
     # Skip MAPE
     df_cv.loc[0, 'y'] = 0.
     df_horizon = diagnostics.performance_metrics(
         df_cv,
         metrics=['coverage', 'mape'],
     )
     self.assertEqual(
         set(df_horizon.columns),
         {'coverage', 'horizon'},
     )
     df_horizon = diagnostics.performance_metrics(
         df_cv,
         metrics=['mape'],
     )
     self.assertIsNone(df_horizon)
     # List of metrics containing non-valid metrics
     with self.assertRaises(ValueError):
         diagnostics.performance_metrics(
             df_cv,
             metrics=['mse', 'error_metric'],
         )
def FB_Model(train_x, train_y,train_ratio,predict_period,changepoint_prior_scale,analysisornot,holiday):
    time_split = int((train_x.shape[0]) * train_ratio)
    df = pd.DataFrame(columns=['ds', 'y'])
    df['ds'] = train_x[:time_split];df['y'] = train_y[:time_split]
    df['y'] = np.log((np.asarray(df['y'], dtype=float))) #log能让预测效果更好

    Hoday = pd.DataFrame({
        'holiday': 'weekend',
        'ds': pd.to_datetime(holiday),
        'lower_window': 0,
        'upper_window': 1,
    })

    #训练
    model = Prophet(changepoint_prior_scale=changepoint_prior_scale,mcmc_samples=0,holidays=Hoday)#默认为0,增大后将最大后验估计取代为马尔科夫蒙特卡洛取样,但是会极大地延长训练时间
    model.fit(df)
    future = model.make_future_dataframe(freq='D', periods=predict_period)  # 建立数据预测框架,数据粒度为天,预测步长为20天
    forecast = model.predict(future)
    model.plot(forecast).show()  # 绘制预测效果图
    model.plot_components(forecast).show()

    #分析结果
    if analysisornot:
        df_cv = cross_validation(model, initial='1100 days', period='20 days', horizon='20 days')
        df_p = performance_metrics(df_cv)
        print(df_cv.head())
        print(df_p.head())
        fig = plot_cross_validation_metric(df_cv, metric='mape')
        fig.show()
Ejemplo n.º 5
0
    def fit_cv(
        self,
        param_grid=None,
        initial="80 days",
        horizon="14 days",
        period="14 day",
        rolling_window=1,
    ):

        if param_grid is None:
            param_grid = {
                "changepoint_prior_scale": [0.001, 0.01, 0.1, 0.5],
                "seasonality_prior_scale": [0.01, 0.1, 1.0, 10.0],
                "holidays_prior_scale": [0.01, 0.1, 1.0, 10.0],
            }

        all_params = [
            dict(zip(param_grid.keys(), v))
            for v in itertools.product(*param_grid.values())
        ]

        # We manually set this cap otherwise are not able
        # to see the plots clearly. If we were to set the correct cap,
        # we would have used the total pop:
        # self.df['cap'] = get_region_pop(province, pop_prov_df, prov_list_df)
        if self.growth == "logistic":
            if self.cap is None:
                self.cap = self.df["y"].max() + 20000
            self.df["cap"] = self.cap

        rmses, mae, mse = [], [], []
        for params in all_params:
            m = Prophet(self.growth, **params)

            if self.holidays:
                m.add_country_holidays(country_name="IT")

            m.fit(self.df)

            df_cv = cross_validation(m,
                                     initial=initial,
                                     horizon=horizon,
                                     period=period,
                                     parallel="processes")

            df_p = performance_metrics(df_cv, rolling_window=rolling_window)

            rmses.append(df_p["rmse"].values[0])
            mae.append(df_p["mae"].values[0])
            mse.append(df_p["mse"].values[0])

        tuning_results = pd.DataFrame(all_params)
        tuning_results["rmse"] = rmses
        tuning_results["mae"] = mae
        tuning_results["mse"] = mse

        self.tuning_results = tuning_results
        self.best_params = all_params[np.argmin(rmses)]

        self.fit(**self.best_params)
Ejemplo n.º 6
0
def validate_model(model,dates):
    
    """
    
    Background:
    
    This model validation function is still under construction and will be updated during a future release.
    
    
    """
    
    count_of_time_units = len(dates)
    #print(count_of_time_units)
    initial_size = str(int(count_of_time_units * 0.20)) + " days"
    horizon_size = str(int(count_of_time_units * 0.10)) + " days"
    period_size = str(int(count_of_time_units * 0.05)) + " days"
    
    df_cv = cross_validation(model, initial=initial_size, horizon=horizon_size, period=period_size)
    #df_cv = cross_validation(model,initial='730 days', period='180 days', horizon = '365 days')
    df_p = performance_metrics(df_cv)
    
    #print(df_cv.head(100))
    #print(df_p.head(100))
    
    mape_score_avg = str(round(df_p['mape'].mean()*100,2)) + "%"

    return mape_score_avg
Ejemplo n.º 7
0
 def getparameters_4ph(
         self):  #return best_params and overide default parameters
     param_grid = {
         'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.3, 0.5, 1],
         'seasonality_prior_scale': [0.01, 0.1, 1, 3, 6, 10, 20],
     }
     all_params = [
         dict(zip(param_grid.keys(), v))
         for v in itertools.product(*param_grid.values())
     ]
     rmses = []
     # Use cross validation to evaluate all parameters
     for params in all_params:
         m = Prophet(**params,
                     daily_seasonality=True,
                     weekly_seasonality=True,
                     yearly_seasonality=True,
                     growth='linear')
         m.add_seasonality(name='yearly', period=365.25, fourier_order=20)
         m = m.fit(self.ph_train)
         df_cv = cross_validation(m,
                                  initial='730 days',
                                  period='90 days',
                                  horizon='180 days')
         df_p = performance_metrics(df_cv, rolling_window=1)
         rmses.append(df_p['rmse'].values[0])
     # Find the best parameters
     tuning_results = pd.DataFrame(all_params)
     tuning_results['rmse'] = rmses
     if disp: print(tuning_results)
     best_params = all_params[np.argmin(rmses)]
     if disp: print(best_params)
     self.ph_bestparameters = best_params
     return best_params
Ejemplo n.º 8
0
    def train(self, metric_dict, oldest_data_datetime):
        """Train the Prophet model and store the predictions in predicted_df."""
        prediction_freq = "1MIN"

        # convert incoming metric to Metric Object
        metric = Metric(metric_dict, oldest_data_datetime)

        self._model = Prophet(daily_seasonality=True,
                              weekly_seasonality=True,
                              yearly_seasonality=True)

        _LOGGER.info("training data range: %s - %s", metric.start_time,
                     metric.end_time)

        _LOGGER.debug("begin training")

        df_fit = self._model.fit(metric.metric_values)

        if True:
            df_cv = cross_validation(self._model,
                                     horizon="1 day",
                                     period="8 hours",
                                     initial="4 days")
            df_p = performance_metrics(df_cv)
            _LOGGER.info("Performance data: %s %s", metric.metric_name, df_p)
Ejemplo n.º 9
0
 def cross_validation(self):
     if self.cv is None:
         LOGGER.info("Cross validation not configured, skipping")
         return None
     try:
         horizon = self._get_timedelta(self.cv["horizon"])
         period = self._get_timedelta(self.cv.get("period"))
         initial = self._get_timedelta(self.cv.get("initial"))
     except KeyError:
         raise ValueError(
             "Horizon is the required config for cross validation")
     self.cv_data = cross_validation(self.model,
                                     horizon=horizon,
                                     period=period,
                                     initial=initial)
     if self.cv_output_uri is not None:
         self.save(self.cv_data, "cv_output_uri")
         LOGGER.info(f"Cross validation data saved to {self.cv_output_uri}")
     rolling_window = self.cv.get("rolling_window") or 0.1
     metrics = self.cv.get("metrics")
     self.cv_metrics = performance_metrics(self.cv_data,
                                           rolling_window=rolling_window,
                                           metrics=metrics)
     if self.cv_metrics_uri is not None:
         self.save(self.cv_metrics, "cv_metrics_uri")
         LOGGER.info(
             f"Cross validation metrics saved to {self.cv_metrics_uri}")
Ejemplo n.º 10
0
def prophet_cv_performance(model, train, minmax_pipe):

    #fit the model
    model.fit(train)

    #run cross validation
    #cv_results = cross_validation(model, initial='365 days', period='24 hours', horizon='24 hours')

    cv_results = cross_validation(model,
                                  initial='366 days',
                                  period='24 hours',
                                  horizon='24 hours')

    #inverse transform the target and prediction columns
    cv_results[['yhat',
                'y']] = minmax_pipe.inverse_transform(cv_results[['yhat',
                                                                  'y']])

    #get performance results on the cv output
    cv_performance = performance_metrics(cv_results)

    #display the results
    print('Model Mean Hourly MAE {0:.2f}'.format(np.mean(
        cv_performance['mae'])))
    print('Model Mean Hourly MAPE {0:.2f}'.format(
        np.mean(cv_performance['mape'])))

    return cv_results, cv_performance
Ejemplo n.º 11
0
def evaluate_model(model):
    df_cv = cross_validation(model,
                             initial="700 days",
                             period="92 days",
                             horizon="8 days")
    df_p = performance_metrics(df_cv)
    return df_cv, df_p
Ejemplo n.º 12
0
def fbp(df, p, freq):
    model = fbprophet.Prophet()
    model.fit(df)
    future = model.make_future_dataframe(periods=p,
                                         freq=freq,
                                         include_history=True)
    # future.tail()
    forecast = model.predict(future)
    # model.plot(forecast)
    # model.plot_components(forecast)
    # print(forecast)
    if freq == 'Y':
        time_format = '%Y'
    elif freq == 'M':
        time_format = '%Y-%m'
    elif freq == 'D':
        time_format = '%Y-%m-%d'
    df_cv = cross_validation(model, horizon='30 days')
    df_pe = performance_metrics(df_cv)
    df_cv.to_csv('C:/Users/47135/Desktop/df_cv.csv', encoding='UTF-8')
    df_pe.to_csv('C:/Users/47135/Desktop/df_pe.csv', encoding='UTF-8')
    forecast['ds'] = forecast['ds'].dt.strftime(time_format)
    result = forecast.to_dict(orient='list')
    # print(result)
    return result
def random_search(df, param_grid, max_evals=MAX_EVALS):
    """Random search for hyperparameter optimization"""
    results = pd.DataFrame(columns=['mape', 'params', 'iteration', 'model'],
                           index=list(range(MAX_EVALS)))

    for i in range(max_evals):

        # Choose random hyperparameters
        hyperparameters = {k: sample(v, 1)[0] for k, v in param_grid.items()}
        # Evaluate randomly selected hyperparameters
        eval_results = objective(df, hyperparameters, i)
        results.loc[i, :] = eval_results

    # Model without param tuning
    m = Prophet()
    m.fit(df)
    df_cv = cross_validation(m,
                             initial='{} days'.format(round(len(df) * 0.75)),
                             horizon='{} days'.format(round(len(df) * 0.1)))
    df_p = performance_metrics(df_cv)
    mape = df_p.mape.mean()
    results.loc[max_evals, :] = [mape, None, None, m]

    # Sort with best score on top
    results.sort_values('mape', inplace=True)
    results.reset_index(inplace=True)
    return results
Ejemplo n.º 14
0
def model_prophet(series,
                  holidays,
                  weekly=0,
                  monthly=0,
                  dev_length=729,
                  period='1 days',
                  horizon='1 days',
                  cvf=True,
                  perf=True,
                  val_dates=None):

    #define model
    modelf = Prophet(interval_width=0.95,
                     daily_seasonality=False,
                     weekly_seasonality=False,
                     yearly_seasonality=False,
                     holidays=holidays)
    if weekly != 0:
        modelf.add_seasonality(name='weeklyx', period=7, fourier_order=weekly)
    if monthly != 0:
        modelf.add_seasonality(name='monthlyx',
                               period=30.5,
                               fourier_order=monthly)

    modelf.fit(series)

    #test for horizon
    if cvf:
        cvf = cross_validation(modelf,
                               initial=str(dev_length) + ' days',
                               period=period,
                               horizon=horizon)
    if perf: perf = performance_metrics(cvf)

    return modelf, cvf, perf
Ejemplo n.º 15
0
def evaluate_metrics(model):
    df_cv = cross_validation(model,
                             initial='730 days',
                             period='90 days',
                             horizon='180 days')
    df_p = performance_metrics(df_cv)
    return df_p
Ejemplo n.º 16
0
    def plot_residuals(self):

        print("CROSS VALIDATION RESULTS")
        df_cv = cross_validation(self.m,
                                 initial='365.25 days',
                                 period='365.25 days',
                                 horizon='365.25 days')
        # df_cv = cross_validation(self.m, initial='180 days', period='180 days', horizon='180 days')
        # self.out_table = df_cv
        print(pdtabulate(df_cv))

        print("PERFORMANCE METRICS")
        df_p = performance_metrics(df_cv)
        print(pdtabulate(df_p))

        # Mean absolute percentage error
        fig3 = plot_cross_validation_metric(df_cv,
                                            metric='mape',
                                            figsize=(11, 6))
        ax3 = fig3.gca()
        blue_patch = mpatches.Patch(color='#5F86BC',
                                    label='Accuracy ~= ((1 - MAPE) * 100) %')
        plt.legend(handles=[blue_patch])
        # ax3.legend("Accuracy ~= ((1 - MAPE) * 100) %")
        ax3.set_title(
            label="Prophet Forecast - Mean Absolute Percentage Error - " +
            self.name,
            fontsize=24)
        ax3.set_xlabel(xlabel="Forecast Horizon (days)", fontsize=16)
        ax3.set_ylabel(ylabel="MAPE", fontsize=16)
        ax3.set_ylim([0.0, 0.8])
        plt.show()
Ejemplo n.º 17
0
def cross_validation_worker(model, initial, period, horizon, metric):
    df_cv = cross_validation(model,
                             initial=initial,
                             period=period,
                             horizon=horizon)
    df_p = performance_metrics(df_cv)
    average_metric = df_p[metric].mean()
    return average_metric
Ejemplo n.º 18
0
def test_model(model):

	# Making 90 forecasts with cutoffs between 2008-03-11 00:00:00 and 2010-08-18 00:00:00
	df_cv = cross_validation(model, initial='450 days', period='10 days', horizon = '100 days')
	df_cv.head()

	df_p = performance_metrics(df_cv)
	df_p.head()
Ejemplo n.º 19
0
    def __cv_run(self, params):
        #交叉验证过程
        model = Prophet(**params, holidays=self.holidays)
        model.fit(self.data)
        future = model.make_future_dataframe(freq=self.freq, periods=1)
        forecast = model.predict(future)
        data_merage = pd.concat([self.data, forecast[['yhat']]],
                                axis=1,
                                join='inner')
        mape_train = np.abs(data_merage['yhat'] / data_merage['y'] - 1).mean()

        initial_value = (self.data['ds'].max() -
                         self.data['ds'].min()).days / 1.9
        if 1.5 < initial_value < 5 or 7 < initial_value < 194 or initial_value > 368:
            initial_value = (self.data['ds'].max() -
                             self.data['ds'].min()) / 1.9
            horizon_weight = 1 - 1 / 1.9
            df_cv = cross_validation(model, initial=(self.data['ds'].max()-sef.data['ds'].min())/1.9, \
                                     period=(self.data['ds'].max()-self.data['ds'].min())/4.2, \
                                     horizon =(self.data['ds'].max()-self.data['ds'].min())/2.2)
        elif initial_value <= 1.5:
            initial_value = '25 hours'
            period_value = '25 hours'
            horizon_value = self.data['ds'].max() - self.data['ds'].min(
            ) - datetime.timedelta(hours=25)
            horizon_weight = horizon_value / (self.data['ds'].max() -
                                              self.data['ds'].min())
            df_cv = cross_validation(model,
                                     initial=initial_value,
                                     period=period_value,
                                     horizon=horizon_value)
        elif initial_value <= 7:
            initial_value = '8 days'
            period_value = '8 days'
            horizon_value = self.data['ds'].max() - self.data['ds'].min(
            ) - datetime.timedelta(days=8)
            horizon_weight = horizon_value / (self.data['ds'].max() -
                                              self.data['ds'].min())
            df_cv = cross_validation(model,
                                     initial=initial_value,
                                     period=period_value,
                                     horizon=horizon_value)
        else:
            initial_value = '367 days'
            period_value = '367 days'
            horizon_value = self.data['ds'].max() - self.data['ds'].min(
            ) - datetime.timedelta(days=367)
            horizon_weight = horizon_value / (self.data['ds'].max() -
                                              self.data['ds'].min())
            df_cv = cross_validation(model,
                                     initial=initial_value,
                                     period=period_value,
                                     horizon=horizon_value)
        mape = horizon_weight * performance_metrics(df_cv, metrics=[
            'mape'
        ])['mape'].mean() + (1 - horizon_weight) * mape_train
        return mape
Ejemplo n.º 20
0
    def ts_diagnose(self):
        """Diagnoses the fitted model"""
        try:
            assert self.model_fit is not None
        except AssertionError:
            self._prophet_logger.exception(
                "Model has to be fitted first! Please call ts_fit(...)")
            sys.exit("STOP")

        self.plot_residuals()

        if self._diagnose:
            if input(
                    "Run cross validation y/n? Note, depending on parameters provided "
                    "this can take some time...").strip().lower() == 'y':
                start = time()
                self._prophet_logger.info(
                    "Running cross validation using parameters provided....")
                if self._history is not None:
                    try:
                        self._prophet_cv = cross_validation(
                            self.model_fit,
                            initial=self._history,
                            period=self._step,
                            horizon=self._horizon)
                    except Exception:
                        self._prophet_logger.exception(
                            "Prophet cross validation error: check your "
                            "parameters 'history', 'horizon', 'step'!")
                else:
                    try:
                        self._prophet_cv = cross_validation(
                            self.model_fit,
                            period=self._step,
                            horizon=self._horizon)
                    except Exception:
                        self._prophet_logger.exception(
                            "Prophet cross validation error: "
                            "check your parameters 'horizon', 'step'!")

                self._prophet_logger.info("Time elapsed: {}".format(time() -
                                                                    start))
                simu_intervals = self._prophet_cv.groupby('cutoff')['ds'].agg([
                    ('forecast_start', 'min'), ('forecast_till', 'max')
                ])
                self._prophet_logger.info(
                    "Following time windows and cutoffs have been set-up:\n")
                print(simu_intervals)
                #
                plot_cross_validation_metric(self._prophet_cv, metric='mape')
                #
                self._prophet_logger.info("Running performance metrics...")
                self._prophet_p = performance_metrics(self._prophet_cv)

            else:
                self._prophet_logger.info("OK")
                return
Ejemplo n.º 21
0
 def cross_val(self):
     df_cv = cross_validation(self.m,
                              initial='62 days',
                              period='1 days',
                              horizon='7 days')
     # for col in ['yhat', 'yhat_lower', 'yhat_upper', 'y']:
     #     df_cv[col] = inv_boxcox(df_cv[col], lmbda)
     print(df_cv.sort_values('ds').tail())
     df_p = performance_metrics(df_cv)
     print(df_p)
def cross_validate(fitted_model, training_range, forecast_range, cv_interval):   
    '''
    Input --> 1 already defined model
    Output --> avg MAPE
    '''
    # nee dot make sure that each initial, horizon covers full week??
    df_cv = cross_validation(fitted_model, initial=training_range, horizon=forecast_range, period=cv_interval)
    df_p = performance_metrics(df_cv, rolling_window = 1/7)
    
    return df_p['mape'].mean()
Ejemplo n.º 23
0
 def test_cross_validation_uncertainty_disabled(self):
     df = self.__df.copy()
     for uncertainty in [0, False]:
         m = Prophet(uncertainty_samples=uncertainty)
         m.fit(df, algorithm='Newton')
         df_cv = diagnostics.cross_validation(
             m, horizon='4 days', period='4 days', initial='115 days')
         expected_cols = ['ds', 'yhat', 'y', 'cutoff']
         self.assertTrue(all(col in expected_cols for col in df_cv.columns.tolist()))
         df_p = diagnostics.performance_metrics(df_cv)
         self.assertTrue('coverage' not in df_p.columns)
Ejemplo n.º 24
0
    def validator(self):
        self.__cv_metrics.df_cv = cross_validation(self.__model,
                                                   initial="1 days",
                                                   period="120 days",
                                                   horizon="15 days")

        self.__cv_metrics.df_perf = performance_metrics(
            self.__cv_metrics.df_cv)

        self.__cv_metrics.manual_mape = self.mean_absolute_percentage_error(
            self.__cv_metrics.df_cv.y, self.__cv_metrics.df_cv.yhat)
Ejemplo n.º 25
0
def test_prophecy(ticker):
    df = get_daily_data(ticker, 90)
    df.rename(columns={'time': 'ds', 'close': 'y'}, inplace=True)

    m.fit(df)

    df_cv = cross_validation(m, horizon='10 days')
    df_p = performance_metrics(df_cv)
    df_p.head(5)

    plot_cross_validation_metric(df_cv, metric='mape')
    plt.show()
Ejemplo n.º 26
0
def cross_validate(df):

    prophet = Prophet()
    prophet.fit(df)

    df_cv = cross_validation(prophet, initial='30 days', period='4 days', horizon='7 days')
    df_performance = performance_metrics(df_cv)
    fig_performance = plot_cross_validation_metric(df_cv, metric='mape')

    return df_performance

#print(predict(df_cases_fb))
Ejemplo n.º 27
0
def cross_validate(df):

    prophet = Prophet()
    prophet.fit(df)

    df_cv = cross_validation(
        prophet, initial="30 days", period="4 days", horizon="7 days"
    )
    df_performance = performance_metrics(df_cv)
    fig_performance = plot_cross_validation_metric(df_cv, metric="mape")

    return df_performance
Ejemplo n.º 28
0
 def fit_hex_model(hex_data: pd.DataFrame) -> Prophet:
     fit_data = hex_data[['timestamp', 'y']].copy()
     fit_data.rename(columns={'timestamp': 'ds'}, inplace=True)
     fit_data['cap'] = 1
     fit_data['floor'] = 0
     model = Prophet(n_changepoints=0, growth='logistic')
     forecaster = model.fit(fit_data, algorithm='Newton')
     df_cv = cross_validation(model,
                              initial='40 days',
                              period='7 days',
                              horizon='7 days')
     df_metrics = performance_metrics(df_cv)
     print(df_metrics.mdape.mean())
Ejemplo n.º 29
0
def process_job(conn, job):
    assert job['type'].lower() in ['cases', 'deaths', 'tests']

    print(f"{time.strftime('%H:%M:%S')} Starting job={job}")

    data = query_data(conn, job)

    df = prepare_data(job, data)
    m = create_model(job)

    m.fit(df)

    # predict a third into the future of what we looked back
    future_days = round(job['days_to_look_back'] / 3)
    future = m.make_future_dataframe(periods=future_days)

    future['cap'] = df['cap'][0]
    forecast = m.predict(future)

    # region debug
    if os.getenv('DOCKER_ACTIVE') is None:
        fig = m.plot(forecast)
        add_changepoints_to_plot(fig.gca(), m, forecast)
        fig.savefig(f"../job/prediction-{job['id']}.png")
    # endregion

    change_points = m.changepoints.dt.date.tolist()
    store_prediction(conn, job, forecast, change_points)

    # cross validate and create score
    if job['with_score']:
        # compute period to have 5-6 simulated forecasts
        horizon = pd.Timedelta("14 days")
        initial = horizon * 3
        period = (df.iloc[-1]['ds'] - df.iloc[0]['ds'] - horizon - initial) / 5

        df_cv = cross_validation(m,
                                 initial=initial,
                                 horizon=horizon,
                                 period=period,
                                 parallel='processes')
        df_p = performance_metrics(df_cv)

        # region debug
        if os.getenv('DOCKER_ACTIVE') is None:
            fig = plot_cross_validation_metric(df_cv, metric='mape')
            fig.savefig(f"../job/score-{job['id']}.png")
        # endregion

        score = df_p.iloc[-1]['mape']
        store_score(conn, job, score)
def cv_cases(state):

    df = prepdata_cases(state)

    prophet = Prophet()
    prophet.fit(df)

    df_cv = cross_validation(
        prophet, initial="50 days", period="4 days", horizon="7 days"
    )
    df_performance = performance_metrics(df_cv)
    fig_performance = plot_cross_validation_metric(df_cv, metric="mape")

    return plt.show()
Ejemplo n.º 31
0
def single_cv_run(history_df, metrics, param_dict, parallel):
    m = Prophet(**param_dict)
    m.add_country_holidays(country_name='BR')
    history_df['cap'] = 2 * history_df["y"].max()
    m.fit(history_df)
    df_cv = cross_validation(m,
                             initial='3600 days',
                             horizon='1200 days',
                             parallel=parallel)
    df_p = performance_metrics(df_cv, rolling_window=1)
    df_p['params'] = str(param_dict)
    print(df_p.head())
    df_p = df_p.loc[:, metrics]
    return df_p
Ejemplo n.º 32
0
def plot_cross_validation_metric(df_cv, metric, rolling_window=0.1, ax=None):
    """Plot a performance metric vs. forecast horizon from cross validation.

    Cross validation produces a collection of out-of-sample model predictions
    that can be compared to actual values, at a range of different horizons
    (distance from the cutoff). This computes a specified performance metric
    for each prediction, and aggregated over a rolling window with horizon.

    This uses fbprophet.diagnostics.performance_metrics to compute the metrics.
    Valid values of metric are 'mse', 'rmse', 'mae', 'mape', and 'coverage'.

    rolling_window is the proportion of data included in the rolling window of
    aggregation. The default value of 0.1 means 10% of data are included in the
    aggregation for computing the metric.

    As a concrete example, if metric='mse', then this plot will show the
    squared error for each cross validation prediction, along with the MSE
    averaged over rolling windows of 10% of the data.

    Parameters
    ----------
    df_cv: The output from fbprophet.diagnostics.cross_validation.
    metric: Metric name, one of ['mse', 'rmse', 'mae', 'mape', 'coverage'].
    rolling_window: Proportion of data to use for rolling average of metric.
        In [0, 1]. Defaults to 0.1.
    ax: Optional matplotlib axis on which to plot. If not given, a new figure
        will be created.

    Returns
    -------
    a matplotlib figure.
    """
    if ax is None:
        fig = plt.figure(facecolor='w', figsize=(10, 6))
        ax = fig.add_subplot(111)
    else:
        fig = ax.get_figure()
    # Get the metric at the level of individual predictions, and with the rolling window.
    df_none = performance_metrics(df_cv, metrics=[metric], rolling_window=0)
    df_h = performance_metrics(df_cv, metrics=[metric], rolling_window=rolling_window)

    # Some work because matplotlib does not handle timedelta
    # Target ~10 ticks.
    tick_w = max(df_none['horizon'].astype('timedelta64[ns]')) / 10.
    # Find the largest time resolution that has <1 unit per bin.
    dts = ['D', 'h', 'm', 's', 'ms', 'us', 'ns']
    dt_names = [
        'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds',
        'nanoseconds'
    ]
    dt_conversions = [
        24 * 60 * 60 * 10 ** 9,
        60 * 60 * 10 ** 9,
        60 * 10 ** 9,
        10 ** 9,
        10 ** 6,
        10 ** 3,
        1.,
    ]
    for i, dt in enumerate(dts):
        if np.timedelta64(1, dt) < np.timedelta64(tick_w, 'ns'):
            break

    x_plt = df_none['horizon'].astype('timedelta64[ns]').astype(np.int64) / float(dt_conversions[i])
    x_plt_h = df_h['horizon'].astype('timedelta64[ns]').astype(np.int64) / float(dt_conversions[i])

    ax.plot(x_plt, df_none[metric], '.', alpha=0.5, c='gray')
    ax.plot(x_plt_h, df_h[metric], '-', c='b')
    ax.grid(True)

    ax.set_xlabel('Horizon ({})'.format(dt_names[i]))
    ax.set_ylabel(metric)
    return fig