Esempio n. 1
0
 def test_cross_validation(self):
     m = Prophet()
     m.fit(self.__df)
     # Calculate the number of cutoff points(k)
     horizon = pd.Timedelta('4 days')
     period = pd.Timedelta('10 days')
     initial = pd.Timedelta('115 days')
     df_cv = diagnostics.cross_validation(
         m, horizon='4 days', period='10 days', initial='115 days')
     self.assertEqual(len(np.unique(df_cv['cutoff'])), 3)
     self.assertEqual(max(df_cv['ds'] - df_cv['cutoff']), horizon)
     self.assertTrue(min(df_cv['cutoff']) >= min(self.__df['ds']) + initial)
     dc = df_cv['cutoff'].diff()
     dc = dc[dc > pd.Timedelta(0)].min()
     self.assertTrue(dc >= period)
     self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
     # Each y in df_cv and self.__df with same ds should be equal
     df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
     self.assertAlmostEqual(
         np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
     df_cv = diagnostics.cross_validation(
         m, horizon='4 days', period='10 days', initial='135 days')
     self.assertEqual(len(np.unique(df_cv['cutoff'])), 1)
     with self.assertRaises(ValueError):
         diagnostics.cross_validation(
             m, horizon='10 days', period='10 days', initial='140 days')
Esempio n. 2
0
 def test_cross_validation_default_value_check(self):
     m = Prophet()
     m.fit(self.__df)
     # Default value of initial should be equal to 3 * horizon
     df_cv1 = diagnostics.cross_validation(
         m, horizon='32 days', period='10 days')
     df_cv2 = diagnostics.cross_validation(
         m, horizon='32 days', period='10 days', initial='96 days')
     self.assertAlmostEqual(
         ((df_cv1['y'] - df_cv2['y']) ** 2).sum(), 0.0)
     self.assertAlmostEqual(
         ((df_cv1['yhat'] - df_cv2['yhat']) ** 2).sum(), 0.0)
Esempio n. 3
0
def run_prophet(target_variable, cap, floor, initial, period, horizon):
    df = prepare.wrangle_fitbit_data()
    df = prep_fb(df, target_variable)

    train = df[:"2018-10"]
    validate = df["2018-11"]
    test = df["2018-12"]

    train["cap"] = cap
    train["floor"] = floor

    train = train.reset_index()
    validate = validate.reset_index()
    test = test.reset_index()

    m = Prophet(growth = 'logistic', 
            weekly_seasonality = True, 
            daily_seasonality = False,
            changepoint_range = 0.8)
    m.add_country_holidays(country_name='US')
    m.fit(train)

    df_cv = cross_validation(m, initial = initial, period = period, horizon = horizon)

    return m, df_cv
Esempio n. 4
0
def fbp(df, p, freq):
    model = fbprophet.Prophet()
    model.fit(df)
    future = model.make_future_dataframe(periods=p,
                                         freq=freq,
                                         include_history=True)
    # future.tail()
    forecast = model.predict(future)
    # model.plot(forecast)
    # model.plot_components(forecast)
    # print(forecast)
    if freq == 'Y':
        time_format = '%Y'
    elif freq == 'M':
        time_format = '%Y-%m'
    elif freq == 'D':
        time_format = '%Y-%m-%d'
    df_cv = cross_validation(model, horizon='30 days')
    df_pe = performance_metrics(df_cv)
    df_cv.to_csv('C:/Users/47135/Desktop/df_cv.csv', encoding='UTF-8')
    df_pe.to_csv('C:/Users/47135/Desktop/df_pe.csv', encoding='UTF-8')
    forecast['ds'] = forecast['ds'].dt.strftime(time_format)
    result = forecast.to_dict(orient='list')
    # print(result)
    return result
Esempio n. 5
0
def evaluate_model(model):
    df_cv = cross_validation(model,
                             initial="700 days",
                             period="92 days",
                             horizon="8 days")
    df_p = performance_metrics(df_cv)
    return df_cv, df_p
Esempio n. 6
0
 def test_performance_metrics(self):
     m = Prophet()
     m.fit(self.__df)
     df_cv = diagnostics.cross_validation(m,
                                          horizon='4 days',
                                          period='10 days',
                                          initial='90 days')
     # Aggregation level none
     df_none = diagnostics.performance_metrics(df_cv, rolling_window=-1)
     self.assertEqual(
         set(df_none.columns),
         {'horizon', 'coverage', 'mae', 'mape', 'mdape', 'mse', 'rmse'},
     )
     self.assertEqual(df_none.shape[0], 16)
     # Aggregation level 0
     df_0 = diagnostics.performance_metrics(df_cv, rolling_window=0)
     self.assertEqual(len(df_0), 4)
     self.assertEqual(len(df_0['horizon'].unique()), 4)
     # Aggregation level 0.2
     df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2)
     self.assertEqual(len(df_horizon), 4)
     self.assertEqual(len(df_horizon['horizon'].unique()), 4)
     # Aggregation level all
     df_all = diagnostics.performance_metrics(df_cv, rolling_window=1)
     self.assertEqual(df_all.shape[0], 1)
     for metric in ['mse', 'mape', 'mae', 'coverage']:
         self.assertAlmostEqual(df_all[metric].values[0],
                                df_none[metric].mean())
     self.assertAlmostEqual(df_all['mdape'].values[0],
                            df_none['mdape'].median())
     # Custom list of metrics
     df_horizon = diagnostics.performance_metrics(
         df_cv,
         metrics=['coverage', 'mse'],
     )
     self.assertEqual(
         set(df_horizon.columns),
         {'coverage', 'mse', 'horizon'},
     )
     # Skip MAPE
     df_cv.loc[0, 'y'] = 0.
     df_horizon = diagnostics.performance_metrics(
         df_cv,
         metrics=['coverage', 'mape'],
     )
     self.assertEqual(
         set(df_horizon.columns),
         {'coverage', 'horizon'},
     )
     df_horizon = diagnostics.performance_metrics(
         df_cv,
         metrics=['mape'],
     )
     self.assertIsNone(df_horizon)
     # List of metrics containing non-valid metrics
     with self.assertRaises(ValueError):
         diagnostics.performance_metrics(
             df_cv,
             metrics=['mse', 'error_metric'],
         )
Esempio n. 7
0
    def test_check_single_cutoff_forecast_func_calls(self):
        m = Prophet()
        m.fit(self.__df)
        mock_predict = pd.DataFrame({
            'ds':
            pd.date_range(start='2012-09-17', periods=3),
            'yhat':
            np.arange(16, 19),
            'yhat_lower':
            np.arange(15, 18),
            'yhat_upper':
            np.arange(17, 20),
            'y':
            np.arange(16.5, 19.5),
            'cutoff': [datetime.date(2012, 9, 15)] * 3
        })

        # cross validation  with 3 and 7 forecasts
        for args, forecasts in ((['4 days', '10 days', '115 days'], 3),
                                (['4 days', '4 days', '115 days'], 7)):
            with patch('fbprophet.diagnostics.single_cutoff_forecast'
                       ) as mock_func:
                mock_func.return_value = mock_predict
                df_cv = diagnostics.cross_validation(m, *args)
                # check single forecast function called expected number of times
                self.assertEqual(diagnostics.single_cutoff_forecast.call_count,
                                 forecasts)
Esempio n. 8
0
def validate_model(model, dates):
    """

    Background:

    This model validation function is still under construction and will be updated during a future release.


    """

    count_of_time_units = len(dates)
    #print(count_of_time_units)
    initial_size = str(int(count_of_time_units * 0.20)) + " days"
    horizon_size = str(int(count_of_time_units * 0.10)) + " days"
    period_size = str(int(count_of_time_units * 0.05)) + " days"

    df_cv = cross_validation(model,
                             initial=initial_size,
                             horizon=horizon_size,
                             period=period_size)
    #df_cv = cross_validation(model,initial='730 days', period='180 days', horizon = '365 days')
    df_p = performance_metrics(df_cv)

    #print(df_cv.head(100))
    #print(df_p.head(100))

    mape_score_avg = str(round(df_p['mape'].mean() * 100, 2)) + "%"

    return mape_score_avg
Esempio n. 9
0
 def test_performance_metrics(self):
     m = Prophet()
     m.fit(self.__df)
     df_cv = diagnostics.cross_validation(
         m, horizon='4 days', period='10 days', initial='90 days')
     # Aggregation level none
     df_none = diagnostics.performance_metrics(df_cv, rolling_window=0)
     self.assertEqual(
         set(df_none.columns),
         {'horizon', 'coverage', 'mae', 'mape', 'mse', 'rmse'},
     )
     self.assertEqual(df_none.shape[0], 16)
     # Aggregation level 0.2
     df_horizon = diagnostics.performance_metrics(df_cv, rolling_window=0.2)
     self.assertEqual(len(df_horizon['horizon'].unique()), 4)
     self.assertEqual(df_horizon.shape[0], 14)
     # Aggregation level all
     df_all = diagnostics.performance_metrics(df_cv, rolling_window=1)
     self.assertEqual(df_all.shape[0], 1)
     for metric in ['mse', 'mape', 'mae', 'coverage']:
         self.assertEqual(df_all[metric].values[0], df_none[metric].mean())
     # Custom list of metrics
     df_horizon = diagnostics.performance_metrics(
         df_cv, metrics=['coverage', 'mse'],
     )
     self.assertEqual(
         set(df_horizon.columns),
         {'coverage', 'mse', 'horizon'},
     )
Esempio n. 10
0
def prophetForecast(rawData, startDate, modelDir, partitions):
	"""Forecasting with fbprophet"""
	from fbprophet import Prophet
	from fbprophet.diagnostics import cross_validation

	partitions = int(partitions)
	# initiate model
	prophet = Prophet()

	# put dates in df
	dates = pd.date_range(start=startDate, periods=len(rawData), freq="H")
	input_df = pd.DataFrame(rawData, columns=["y", "temp"])
	input_df["ds"] = dates.to_pydatetime()
	input_df.to_csv(pJoin(modelDir, "prophetin.csv"))

	# give prophet the input data
	with suppress_stdout_stderr():
		prophet.fit(input_df)

		# determine partition length for the cross-validation
	total_hours = len(input_df.ds)
	hp = total_hours // partitions  # horizon and period
	init = total_hours % partitions  # total_hours - hp * (partitions - 1)

	# train prophet w/ those partitions
	# take a moment to appreciate this stupid way to pass the durations
	out_df = cross_validation(
		prophet,
		initial="%d hours" % init,
		horizon="%d hours" % hp,
		period="%d hours" % hp,
	)
	out_df.to_csv(pJoin(modelDir, "prophetout.csv"))
	return (list(out_df.yhat), list(out_df.yhat_lower), list(out_df.yhat_upper))
Esempio n. 11
0
def last6monthscrossval(dataframe,init,per,hor,cps,period):
    m=simpleforecastwithoutplot(cps,period,dataframe)
    df_cv = cross_validation(m, initial=str(init)+'days', period=str(per)+'days', horizon =str(hor)+'days')
    df_cv.tail()
    last6=df_cv.iloc[-181:]
    last6 = last6.reset_index(drop=True)
    return last6
 def test_cross_validation_extra_regressors(self):
     df = self.__df.copy()
     df['extra'] = range(df.shape[0])
     df['is_conditional_week'] = np.arange(df.shape[0]) // 7 % 2
     m = Prophet()
     m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
     m.add_seasonality(name='conditional_weekly',
                       period=7,
                       fourier_order=3,
                       prior_scale=2.,
                       condition_name='is_conditional_week')
     m.add_regressor('extra')
     m.fit(df)
     df_cv = diagnostics.cross_validation(m,
                                          horizon='4 days',
                                          period='4 days',
                                          initial='135 days')
     self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
     period = pd.Timedelta('4 days')
     dc = df_cv['cutoff'].diff()
     dc = dc[dc > pd.Timedelta(0)].min()
     self.assertTrue(dc >= period)
     self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
     df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
     self.assertAlmostEqual(
         np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)
Esempio n. 13
0
def cross_validation_worker(model, initial, period, horizon, metric):
    df_cv = cross_validation(model,
                             initial=initial,
                             period=period,
                             horizon=horizon)
    df_p = performance_metrics(df_cv)
    average_metric = df_p[metric].mean()
    return average_metric
Esempio n. 14
0
def test_model(model):

	# Making 90 forecasts with cutoffs between 2008-03-11 00:00:00 and 2010-08-18 00:00:00
	df_cv = cross_validation(model, initial='450 days', period='10 days', horizon = '100 days')
	df_cv.head()

	df_p = performance_metrics(df_cv)
	df_p.head()
    def ts_diagnose(self):
        """Diagnoses the fitted model"""
        try:
            assert self.model_fit is not None
        except AssertionError:
            self._prophet_logger.exception("Model has to be fitted first! Please call ts_fit(...)")
            sys.exit("STOP")

        self.plot_residuals()

        if self._diagnose:
            if input("Run cross validation y/n? Note, depending on parameters provided "
                     "this can take some time...").strip().lower() == 'y':
                start = time()
                self._prophet_logger.info("Running cross validation using parameters provided....")
                if self._history is not None:
                    try:
                        self._prophet_cv = cross_validation(self.model_fit, initial=self._history,
                                                            period=self._step,
                                                            horizon=self._horizon)
                    except Exception:
                        self._prophet_logger.exception("Prophet cross validation error: check your "
                                                       "parameters 'history', 'horizon', 'step'!")
                else:
                    try:
                        self._prophet_cv = cross_validation(self.model_fit, period=self._step,
                                                            horizon=self._horizon)
                    except Exception:
                        self._prophet_logger.exception("Prophet cross validation error: "
                                                       "check your parameters 'horizon', 'step'!")

                self._prophet_logger.info("Time elapsed: {}".format(time() - start))
                simu_intervals = self._prophet_cv.groupby('cutoff')['ds'].agg(
                    [('forecast_start', 'min'),
                     ('forecast_till', 'max')])
                self._prophet_logger.info("Following time windows and cutoffs have been set-up:\n")
                print(simu_intervals)
                #
                plot_cross_validation_metric(self._prophet_cv, metric='mape')
                #
                self._prophet_logger.info("Running performance metrics...")
                self._prophet_p = performance_metrics(self._prophet_cv)

            else:
                self._prophet_logger.info("OK")
                return
Esempio n. 16
0
def cross_validating(model,
                     initial_days=730,
                     period_days=180,
                     horizon_days=365):
    df_cv = cross_validation(model,
                             initial=str(initial_days) + ' days',
                             period=str(period_days) + ' days',
                             horizon=str(horizon_days) + ' days')
    return df_cv
Esempio n. 17
0
 def test_cross_validation_uncertainty_disabled(self):
     df = self.__df.copy()
     for uncertainty in [0, False]:
         m = Prophet(uncertainty_samples=uncertainty)
         m.fit(df)
         df_cv = diagnostics.cross_validation(
             m, horizon='4 days', period='4 days', initial='115 days')
         expected_cols = ['ds', 'yhat', 'y', 'cutoff']
         self.assertTrue(all(col in expected_cols for col in df_cv.columns.tolist()))
Esempio n. 18
0
    def test_cross_validation(self):
        m = Prophet()
        m.fit(self.__df)
        # Calculate the number of cutoff points(k)
        horizon = pd.Timedelta('4 days')
        period = pd.Timedelta('10 days')
        initial = pd.Timedelta('115 days')
        methods = [None, 'processes', 'threads', CustomParallelBackend()]

        try:
            from dask.distributed import Client
            client = Client(processes=False)  # noqa
            methods.append("dask")
        except ImportError:
            pass

        for parallel in methods:
            df_cv = diagnostics.cross_validation(m,
                                                 horizon='4 days',
                                                 period='10 days',
                                                 initial='115 days',
                                                 parallel=parallel)
            self.assertEqual(len(np.unique(df_cv['cutoff'])), 3)
            self.assertEqual(max(df_cv['ds'] - df_cv['cutoff']), horizon)
            self.assertTrue(
                min(df_cv['cutoff']) >= min(self.__df['ds']) + initial)
            dc = df_cv['cutoff'].diff()
            dc = dc[dc > pd.Timedelta(0)].min()
            self.assertTrue(dc >= period)
            self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
            # Each y in df_cv and self.__df with same ds should be equal
            df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
            self.assertAlmostEqual(
                np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)
            df_cv = diagnostics.cross_validation(m,
                                                 horizon='4 days',
                                                 period='10 days',
                                                 initial='135 days')
            self.assertEqual(len(np.unique(df_cv['cutoff'])), 1)
            with self.assertRaises(ValueError):
                diagnostics.cross_validation(m,
                                             horizon='10 days',
                                             period='10 days',
                                             initial='140 days')

        # invalid alias
        with self.assertRaises(ValueError, match="'parallel' should be one"):
            diagnostics.cross_validation(m, horizon="4 days", parallel="bad")

        # no map method
        with self.assertRaises(ValueError, match="'parallel' should be one"):
            diagnostics.cross_validation(m,
                                         horizon="4 days",
                                         parallel=object())
Esempio n. 19
0
 def cross_val(self):
     df_cv = cross_validation(self.m,
                              initial='62 days',
                              period='1 days',
                              horizon='7 days')
     # for col in ['yhat', 'yhat_lower', 'yhat_upper', 'y']:
     #     df_cv[col] = inv_boxcox(df_cv[col], lmbda)
     print(df_cv.sort_values('ds').tail())
     df_p = performance_metrics(df_cv)
     print(df_p)
def cross_validate(fitted_model, training_range, forecast_range, cv_interval):   
    '''
    Input --> 1 already defined model
    Output --> avg MAPE
    '''
    # nee dot make sure that each initial, horizon covers full week??
    df_cv = cross_validation(fitted_model, initial=training_range, horizon=forecast_range, period=cv_interval)
    df_p = performance_metrics(df_cv, rolling_window = 1/7)
    
    return df_p['mape'].mean()
Esempio n. 21
0
 def cross_validate(self, model, horizon, save=False):
     
     # 交叉检验
     from fbprophet.diagnostics import cross_validation
     
     df_cv = cross_validation(model, horizon=horizon)
     if save is True:
         df_cv.to_csv('c:\\scnguh\\cross_validate.csv', index=False)
     else:
         return df_cv
Esempio n. 22
0
    def validator(self):
        self.__cv_metrics.df_cv = cross_validation(self.__model,
                                                   initial="1 days",
                                                   period="120 days",
                                                   horizon="15 days")

        self.__cv_metrics.df_perf = performance_metrics(
            self.__cv_metrics.df_cv)

        self.__cv_metrics.manual_mape = self.mean_absolute_percentage_error(
            self.__cv_metrics.df_cv.y, self.__cv_metrics.df_cv.yhat)
Esempio n. 23
0
 def test_cross_validation_custom_cutoffs(self):
     m = Prophet()
     m.fit(self.__df)
     # When specify a list of cutoffs
     #  the cutoff dates in df_cv are those specified
     df_cv1 = diagnostics.cross_validation(
         m,
         horizon='32 days',
         period='10 days',
         cutoffs=[pd.Timestamp('2012-07-31'), pd.Timestamp('2012-08-31')])
     self.assertEqual(len(df_cv1['cutoff'].unique()), 2)
Esempio n. 24
0
    def test_shim(self):
        m = Prophet()
        m.fit(DATA)
        future = m.make_future_dataframe(10, include_history=False)
        fcst = m.predict(future)
 
        df_cv = cross_validation(
            model=m, horizon='4 days', period='10 days', initial='115 days',
        )

        fig = plot.plot_forecast_component(m=m, fcst=fcst, name='weekly')
Esempio n. 25
0
 def test_cross_validation_logistic(self):
     df = self.__df.copy()
     df['cap'] = 40
     m = Prophet(growth='logistic').fit(df)
     df_cv = diagnostics.cross_validation(
         m, horizon='1 days', period='1 days', initial='140 days')
     self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
     self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
     df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
     self.assertAlmostEqual(
         np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
Esempio n. 26
0
 def test_cross_validation_logistic(self):
     df = self.__df.copy()
     df['cap'] = 40
     m = Prophet(growth='logistic').fit(df)
     df_cv = diagnostics.cross_validation(
         m, horizon='1 days', period='1 days', initial='140 days')
     self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
     self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
     df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
     self.assertAlmostEqual(
         np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)
Esempio n. 27
0
def variable_selection(df, best_params):
    """ This function is used to determine which variable left out has the biggest impact on the error 
        (This function may be left out if there are no additional variables).
    :param df: The dataset
    :param best_params: Best parameters for the model yielded from hyperparameter tuning
    :return variable_sel_results: Returns a result dataframe
    """
    variable_sel_results = pd.DataFrame()

    # Change column names for Prophet
    temp_df = df[['timestamp', 'total_daily_arrivals']]
    temp_df.columns = ['ds', 'y']

    for i, feature_leftout in enumerate(FEATURES):
        variables = get_variables(df, feature_leftout)
        start1 = time.time()
        print('-----------------------\nRound {} without feature: {}'.format(
            i, feature_leftout))

        with suppress_stdout_stderr():
            model = Prophet(**best_params,
                            uncertainty_samples=0,
                            daily_seasonality=False,
                            yearly_seasonality=True,
                            holidays=variables).fit(temp_df)
            df_crossv = cross_validation(model,
                                         initial='1100 days',
                                         horizon='1 days',
                                         period='1 days',
                                         parallel='processes')

        pred = df_crossv['yhat']
        ground_truth = temp_df['y'][1101:]

        mape = compute_mape(ground_truth, pred)
        rmse = np.sqrt(mean_squared_error(ground_truth, pred))
        mae = mean_absolute_error(ground_truth, pred)

        new_row = {
            'Variable_left_out': feature_leftout,
            'MAPE': mape,
            'RMSE': rmse,
            'MAE': mae
        }
        variable_sel_results = variable_sel_results.append(new_row,
                                                           ignore_index=True)
        end1 = time.time()
        print(
            'Training and validation took {} minutes. Feature leftout: {}. Yielded MAPE: {}'
            .format((end1 - start1) / 60, feature_leftout, mape))

    for index, row in variable_sel_results.iterrows():
        print(row['Variable_left_out'], row['MAPE'], row['RMSE'], row['MAE'])
    return variable_sel_results
Esempio n. 28
0
def test_prophecy(ticker):
    df = get_daily_data(ticker, 90)
    df.rename(columns={'time': 'ds', 'close': 'y'}, inplace=True)

    m.fit(df)

    df_cv = cross_validation(m, horizon='10 days')
    df_p = performance_metrics(df_cv)
    df_p.head(5)

    plot_cross_validation_metric(df_cv, metric='mape')
    plt.show()
Esempio n. 29
0
def cross_validate(df):

    prophet = Prophet()
    prophet.fit(df)

    df_cv = cross_validation(
        prophet, initial="30 days", period="4 days", horizon="7 days"
    )
    df_performance = performance_metrics(df_cv)
    fig_performance = plot_cross_validation_metric(df_cv, metric="mape")

    return df_performance
Esempio n. 30
0
def cross_validate(df):

    prophet = Prophet()
    prophet.fit(df)

    df_cv = cross_validation(prophet, initial='30 days', period='4 days', horizon='7 days')
    df_performance = performance_metrics(df_cv)
    fig_performance = plot_cross_validation_metric(df_cv, metric='mape')

    return df_performance

#print(predict(df_cases_fb))
Esempio n. 31
0
 def fit_hex_model(hex_data: pd.DataFrame) -> Prophet:
     fit_data = hex_data[['timestamp', 'y']].copy()
     fit_data.rename(columns={'timestamp': 'ds'}, inplace=True)
     fit_data['cap'] = 1
     fit_data['floor'] = 0
     model = Prophet(n_changepoints=0, growth='logistic')
     forecaster = model.fit(fit_data, algorithm='Newton')
     df_cv = cross_validation(model,
                              initial='40 days',
                              period='7 days',
                              horizon='7 days')
     df_metrics = performance_metrics(df_cv)
     print(df_metrics.mdape.mean())
Esempio n. 32
0
def process_job(conn, job):
    assert job['type'].lower() in ['cases', 'deaths', 'tests']

    print(f"{time.strftime('%H:%M:%S')} Starting job={job}")

    data = query_data(conn, job)

    df = prepare_data(job, data)
    m = create_model(job)

    m.fit(df)

    # predict a third into the future of what we looked back
    future_days = round(job['days_to_look_back'] / 3)
    future = m.make_future_dataframe(periods=future_days)

    future['cap'] = df['cap'][0]
    forecast = m.predict(future)

    # region debug
    if os.getenv('DOCKER_ACTIVE') is None:
        fig = m.plot(forecast)
        add_changepoints_to_plot(fig.gca(), m, forecast)
        fig.savefig(f"../job/prediction-{job['id']}.png")
    # endregion

    change_points = m.changepoints.dt.date.tolist()
    store_prediction(conn, job, forecast, change_points)

    # cross validate and create score
    if job['with_score']:
        # compute period to have 5-6 simulated forecasts
        horizon = pd.Timedelta("14 days")
        initial = horizon * 3
        period = (df.iloc[-1]['ds'] - df.iloc[0]['ds'] - horizon - initial) / 5

        df_cv = cross_validation(m,
                                 initial=initial,
                                 horizon=horizon,
                                 period=period,
                                 parallel='processes')
        df_p = performance_metrics(df_cv)

        # region debug
        if os.getenv('DOCKER_ACTIVE') is None:
            fig = plot_cross_validation_metric(df_cv, metric='mape')
            fig.savefig(f"../job/score-{job['id']}.png")
        # endregion

        score = df_p.iloc[-1]['mape']
        store_score(conn, job, score)
Esempio n. 33
0
def single_cv_run(history_df, metrics, param_dict, parallel):
    m = Prophet(**param_dict)
    m.add_country_holidays(country_name='BR')
    history_df['cap'] = 2 * history_df["y"].max()
    m.fit(history_df)
    df_cv = cross_validation(m,
                             initial='3600 days',
                             horizon='1200 days',
                             parallel=parallel)
    df_p = performance_metrics(df_cv, rolling_window=1)
    df_p['params'] = str(param_dict)
    print(df_p.head())
    df_p = df_p.loc[:, metrics]
    return df_p
Esempio n. 34
0
def createDiagnosticsLayout(signal, frequency, holidayDropdown, holidayScale, seasonalityScale, changepointScale, seasonalityMode, contents, filename, paramSearch):
    print(signal, frequency, holidayDropdown, holidayScale, seasonalityScale, changepointScale, seasonalityMode, filename, paramSearch)
    if signal == 'VOID':
        return None
    if signal != 'NOTIFY':
        return html.Div('Encountered an error : ' + signal, style = {'margin-left' : '1rem'})
    df = parseContents(contents, filename)
    model = generateModel(frequency, holidayDropdown, holidayScale, seasonalityScale, changepointScale, seasonalityMode, df, paramSearch)
    initial, period, horizon =  getParams(frequency, len(df))
    df_cv =cross_validation(model, initial = initial, period = period, horizon = horizon)
    #df_p = performance_metrics(df_cv, rolling_window = 0)
    #print(df_p.head())
    fig = mpl_to_plotly(plot_cross_validation_metric(df_cv, metric = 'mae', rolling_window = 0))
    return html.Div(children = [html.H6('Mean Absolute Error', style = {'margin-left': '1rem'}), dcc.Graph(figure = fig)])
def cv_cases(state):

    df = prepdata_cases(state)

    prophet = Prophet()
    prophet.fit(df)

    df_cv = cross_validation(
        prophet, initial="50 days", period="4 days", horizon="7 days"
    )
    df_performance = performance_metrics(df_cv)
    fig_performance = plot_cross_validation_metric(df_cv, metric="mape")

    return plt.show()
Esempio n. 36
0
 def test_cross_validation_extra_regressors(self):
     df = self.__df.copy()
     df['extra'] = range(df.shape[0])
     m = Prophet()
     m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
     m.add_regressor('extra')
     m.fit(df)
     df_cv = diagnostics.cross_validation(
         m, horizon='4 days', period='4 days', initial='135 days')
     self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
     period = pd.Timedelta('4 days')
     dc = df_cv['cutoff'].diff()
     dc = dc[dc > pd.Timedelta(0)].min()
     self.assertTrue(dc >= period)
     self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
     df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
     self.assertAlmostEqual(
         np.sum((df_merged['y_x'] - df_merged['y_y']) ** 2), 0.0)