def fit_prophet(dtf_train, dtf_test, lst_exog=None, model=None, freq="D", conf=0.95, figsize=(15,10)):
    ## setup prophet
    if model is None:
        model = Prophet(growth="linear", changepoints=None, n_changepoints=25, seasonality_mode="multiplicative",
                        yearly_seasonality="auto", weekly_seasonality="auto", daily_seasonality="auto",
                        holidays=None, interval_width=conf)
    if lst_exog != None:
        for regressor in lst_exog:
            model.add_regressor(regressor)

    ## train
    model.fit(dtf_train)

    ## test
    dtf_prophet = model.make_future_dataframe(periods=len(dtf_test)+10, freq=freq, include_history=True)

    if model.growth == "logistic":
        dtf_prophet["cap"] = dtf_train["cap"].unique()[0]

    if lst_exog != None:
        dtf_prophet = dtf_prophet.merge(dtf_train[["ds"]+lst_exog], how="left")
        dtf_prophet.iloc[-len(dtf_test):][lst_exog] = dtf_test[lst_exog].values

    dtf_prophet = model.predict(dtf_prophet)
    dtf_train = dtf_train.merge(dtf_prophet[["ds","yhat"]], how="left").rename(
        columns={'yhat':'model', 'y':'ts'}).set_index("ds")
    dtf_test = dtf_test.merge(dtf_prophet[["ds","yhat","yhat_lower","yhat_upper"]], how="left").rename(
        columns={'yhat':'forecast', 'y':'ts', 'yhat_lower':'lower', 'yhat_upper':'upper'}).set_index("ds")

    ## evaluate
    dtf = dtf_train.append(dtf_test)
    dtf = utils_evaluate_ts_model(dtf, conf=conf, figsize=figsize, title="Prophet")
    return dtf, model
Beispiel #2
0
 def test_cross_validation_extra_regressors(self):
     df = self.__df.copy()
     df['extra'] = range(df.shape[0])
     df['is_conditional_week'] = np.arange(df.shape[0]) // 7 % 2
     m = Prophet()
     m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
     m.add_seasonality(name='conditional_weekly',
                       period=7,
                       fourier_order=3,
                       prior_scale=2.,
                       condition_name='is_conditional_week')
     m.add_regressor('extra')
     m.fit(df)
     df_cv = diagnostics.cross_validation(m,
                                          horizon='4 days',
                                          period='4 days',
                                          initial='135 days')
     self.assertEqual(len(np.unique(df_cv['cutoff'])), 2)
     period = pd.Timedelta('4 days')
     dc = df_cv['cutoff'].diff()
     dc = dc[dc > pd.Timedelta(0)].min()
     self.assertTrue(dc >= period)
     self.assertTrue((df_cv['cutoff'] < df_cv['ds']).all())
     df_merged = pd.merge(df_cv, self.__df, 'left', on='ds')
     self.assertAlmostEqual(
         np.sum((df_merged['y_x'] - df_merged['y_y'])**2), 0.0)
Beispiel #3
0
 def test_seasonality_modes(self):
     # Model with holidays, seasonalities, and extra regressors
     holidays = pd.DataFrame({
         'ds': pd.to_datetime(['2016-12-25']),
         'holiday': ['xmas'],
         'lower_window': [-1],
         'upper_window': [0],
     })
     m = Prophet(seasonality_mode='multiplicative', holidays=holidays)
     m.add_seasonality('monthly',
                       period=30,
                       mode='additive',
                       fourier_order=3)
     m.add_regressor('binary_feature', mode='additive')
     m.add_regressor('numeric_feature')
     # Construct seasonal features
     df = DATA.copy()
     df['binary_feature'] = [0] * 255 + [1] * 255
     df['numeric_feature'] = range(510)
     df = m.setup_dataframe(df, initialize_scales=True)
     m.history = df.copy()
     m.set_auto_seasonalities()
     seasonal_features, prior_scales, component_cols, modes = (
         m.make_all_seasonality_features(df))
     self.assertEqual(sum(component_cols['additive_terms']), 7)
     self.assertEqual(sum(component_cols['multiplicative_terms']), 29)
     self.assertEqual(
         set(modes['additive']),
         {
             'monthly', 'binary_feature', 'additive_terms',
             'extra_regressors_additive'
         },
     )
     self.assertEqual(
         set(modes['multiplicative']),
         {
             'weekly',
             'yearly',
             'xmas',
             'numeric_feature',
             'multiplicative_terms',
             'extra_regressors_multiplicative',
             'holidays',
         },
     )
Beispiel #4
0
 def prophet_fit_and_predict_with_exog_and_advance_vars(
         y: [[float]],
         k: int,
         t: [float],
         a: [[float]],
         model_params: dict = None) -> Tuple[List, List, Any, Any]:
     """ Simpler wrapper for testing - univariate w/ advance vars w/ supplied times and future times  """
     assert len(t) == len(y) + k
     assert len(a) == len(y) + k
     assert isinstance(y[0], List)
     a_cols = ['a' + str(i) for i in range(len(a[0]))]
     df = pd.DataFrame(columns=a_cols, data=a[:-k])
     Y = transpose(y)
     df['y'] = Y[0]
     n_exog = len(y[0]) - 1
     y_cols = ['y'] + ['y' + str(i) for i in range(1, len(y[0]))]
     for i in range(1, n_exog + 1):
         df['y' + str(i)] = Y[i][:len(y)]
     dt = epoch_to_naive_datetime(t)
     df['ds'] = dt[:len(y)]
     kwargs_used = dict([(k, v) for k, v in PROPHET_MODEL.items()])
     if model_params:
         kwargs_used.update(model_params)
     m = Prophet(**kwargs_used)
     for a_col in a_cols:
         m.add_regressor(name=a_col)
     for y_col in y_cols[1:]:
         m.add_regressor(name=y_col)
     with no_stdout_stderr():
         m.fit(df)
     freq = infer_freq_from_epoch(t)
     future = m.make_future_dataframe(periods=k, freq=freq)
     future['ds'] = dt
     full_a_data = transpose(a)
     for a_col, a_vals in zip(a_cols, full_a_data):
         future[a_col] = a_vals
     for i in range(1, n_exog + 1):  # Just bring forward
         future['y' + str(i)] = Y[i] + [Y[i][-1]] * k
     forecast = m.predict(future)
     x = forecast['yhat'].values[-k:]  # Use m.plot(forecast) to take a peak
     x_std = [
         u - l for u, l in zip(forecast['yhat_upper'].values,
                               forecast['yhat_lower'].values)
     ]
     return x, x_std, forecast, m
Beispiel #5
0
def estimate(file_path):
    try:
        df = pd.read_csv(file_path)
    except:
        raise FileNotFoundError('파일을 찾을 수 없습니다')
    date_range = pd.date_range(start=START_DATE, end=END_DATE, freq='1H')[:-1]
    df['ds'] = df['dt'].map(str) + " " + df['dhour'].map(str) + ":00:00"
    df['ds'] = pd.to_datetime(df['ds'])
    df = df.set_index('ds')
    df = pd.merge(date_range.to_frame(), df, left_index=True, right_index=True, how='left')
    df['ds'] = df.index
    df = df.rename(columns={'sales' : 'y'})
    missing_fill_val = {'avg_prc' : df.avg_prc.median(), 'y' : 0}
    df.fillna(missing_fill_val, inplace=True)
    q3 = df['y'].quantile(q=0.75)
    cap = q3 * 1.5
    df['y'] = df['y'].apply(lambda x : cap if x >= cap else x)
    df = df[['ds', 'y', 'avg_prc']]
    
    scaler = MinMaxScaler()
    scaled_value = scaler.fit_transform(df[['avg_prc', 'y']].values)
    df[['avg_prc', 'y']] = scaled_value
    df['floor'] = 0
    df['cap'] = 1.2
    
    train = df[:-PRED_DAYS]
    test = df[-PRED_DAYS:]
    
    m = Prophet(growth='logistic', holidays=holidays, holidays_prior_scale=1)
    m.add_regressor('avg_prc')
    m.fit(train)
    future = m.make_future_dataframe(periods=PRED_DAYS, freq='H')
    future = pd.merge(future, train, left_on='ds', right_on='ds', how='left')
    future = future[['ds', 'floor', 'cap', 'avg_prc']]
    future_fill_missing = {'avg_prc' : df.iloc[len(df)-1]['avg_prc'], 'cap' : 1.2, 'floor' : 0}
    future.fillna(future_fill_missing, inplace=True)
    forecast = m.predict(future)
    
    pred = forecast[['ds', 'yhat']][-PRED_DAYS:]
    pred['yhat'] = np.where(pred['yhat'] < 0, 0, pred['yhat'])
    
    rmse = math.sqrt(mean_squared_error(test['y'], pred['yhat']))
    r2score = r2_score(test['y'], pred['yhat'])
    
    return rmse, r2score
Beispiel #6
0
    def test_regressor_coefficients(self):
        m = Prophet()
        N = DATA.shape[0]
        df = DATA.copy()
        np.random.seed(123)
        df['regr1'] = np.random.normal(size=N)
        df['regr2'] = np.random.normal(size=N)
        m.add_regressor('regr1', mode='additive')
        m.add_regressor('regr2', mode='multiplicative')
        m.fit(df)

        coefs = regressor_coefficients(m)
        self.assertTrue(coefs.shape == (2, 6))
        # No MCMC sampling, so lower and upper should be the same as mean
        self.assertTrue(
            np.array_equal(coefs['coef_lower'].values, coefs['coef'].values))
        self.assertTrue(
            np.array_equal(coefs['coef_upper'].values, coefs['coef'].values))
Beispiel #7
0
def get_model_forecast(info):
    ds = ast.literal_eval(info['train_ds'])
    y = ast.literal_eval(info['train_y'])
    avg_prc = ast.literal_eval(info['train_avg_prc'])
    test_y = ast.literal_eval(info['test_y'])
    test_avg_prc = ast.literal_eval(info['test_avg_prc'])
    dic = {'ds': ds, 'y': y, 'avg_prc': avg_prc}
    data = pd.DataFrame(dic)
    holidays = pd.read_json(info['holidays'])

    ## feature engineering
    if data['avg_prc'].max() > 0:
        data['avg_prc'] = data['avg_prc'] / data['avg_prc'].max() * 100
    else:
        data['avg_prc'] = data['avg_prc'] / (data['avg_prc'].max() + 1) * 100
    data['cap'] = 100.0
    data['floor'] = 0.0

    ## run prophet
    model = Prophet(growth='logistic', holidays=holidays)
    model.add_country_holidays(country_name='KR')
    model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
    if data['avg_prc'].isna().sum() == 0:
        model.add_regressor('avg_prc')
    model.fit(data)

    ## get estimation
    future = model.make_future_dataframe(periods=PRED_DAYS)
    if data['avg_prc'].isna().sum() == 0:
        future['avg_prc'] = pd.concat(
            [pd.Series(avg_prc), pd.Series(test_avg_prc)], ignore_index=True)
    future['cap'] = 100
    future['floor'] = 0.0

    forecast = model.predict(future)

    return model, forecast
Beispiel #8
0
 def get_model_forecast_pred(self):
     train, test = self.data[:-self.PRED_DAYS], self.data[-self.PRED_DAYS:]
     model = Prophet(growth='logistic',
                     holidays=self.holidays,
                     holidays_prior_scale=self.holiday_weight,
                     seasonality_prior_scale=self.seasonality_weight,
                     changepoint_prior_scale=self.changepoint_weight,
                     changepoint_range=self.changepoint_range,
                     changepoints=self.changepoints if self.changepoints else None,
             )
     
     if self.ADD_COUNTRY_HOLIDAY:
         model.add_country_holidays(country_name='KR')
     if self.ADD_MONTHLY_SEASONALITY:
         model.add_seasonality(name='montly_seasonality', period=30.5, fourier_order=5)
     
     if self.PRC:
         model.add_regressor('avg_prc', prior_scale=self.price_weight, standardize=False)    
         model.fit(train)
         future = model.make_future_dataframe(periods=self.PRED_DAYS)
         future = pd.merge(future, train, left_on='ds', right_on='ds', how='left')
         future = future[['ds', 'floor', 'cap', 'avg_prc']]
         future['avg_prc'] = self.data.avg_prc.values
         future_fill_missing = {'cap' : 100, 'floor' : 0.0}
         future.fillna(future_fill_missing, inplace=True)
     else:
         model.fit(train)
         future = model.make_future_dataframe(periods=self.PRED_DAYS)
         future['cap'] = 100
         future['floor'] = 0.0
     
     forecast = model.predict(future)
     pred = forecast[['ds', 'yhat']][-self.PRED_DAYS:]
     pred['yhat'] = np.where(pred['yhat'] < 0, 0, pred['yhat'])
     
     return model, forecast, pred
Beispiel #9
0
 def test_added_regressors(self):
     m = Prophet()
     m.add_regressor('binary_feature', prior_scale=0.2)
     m.add_regressor('numeric_feature', prior_scale=0.5)
     m.add_regressor('numeric_feature2',
                     prior_scale=0.5,
                     mode='multiplicative')
     m.add_regressor('binary_feature2', standardize=True)
     df = DATA.copy()
     df['binary_feature'] = ['0'] * 255 + ['1'] * 255
     df['numeric_feature'] = range(510)
     df['numeric_feature2'] = range(510)
     with self.assertRaises(ValueError):
         # Require all regressors in df
         m.fit(df)
     df['binary_feature2'] = [1] * 100 + [0] * 410
     m.fit(df)
     # Check that standardizations are correctly set
     self.assertEqual(
         m.extra_regressors['binary_feature'],
         {
             'prior_scale': 0.2,
             'mu': 0,
             'std': 1,
             'standardize': 'auto',
             'mode': 'additive',
         },
     )
     self.assertEqual(m.extra_regressors['numeric_feature']['prior_scale'],
                      0.5)
     self.assertEqual(m.extra_regressors['numeric_feature']['mu'], 254.5)
     self.assertAlmostEqual(m.extra_regressors['numeric_feature']['std'],
                            147.368585,
                            places=5)
     self.assertEqual(m.extra_regressors['numeric_feature2']['mode'],
                      'multiplicative')
     self.assertEqual(m.extra_regressors['binary_feature2']['prior_scale'],
                      10.)
     self.assertAlmostEqual(m.extra_regressors['binary_feature2']['mu'],
                            0.1960784,
                            places=5)
     self.assertAlmostEqual(m.extra_regressors['binary_feature2']['std'],
                            0.3974183,
                            places=5)
     # Check that standardization is done correctly
     df2 = m.setup_dataframe(df.copy())
     self.assertEqual(df2['binary_feature'][0], 0)
     self.assertAlmostEqual(df2['numeric_feature'][0], -1.726962, places=4)
     self.assertAlmostEqual(df2['binary_feature2'][0], 2.022859, places=4)
     # Check that feature matrix and prior scales are correctly constructed
     seasonal_features, prior_scales, component_cols, modes = (
         m.make_all_seasonality_features(df2))
     self.assertEqual(seasonal_features.shape[1], 30)
     names = ['binary_feature', 'numeric_feature', 'binary_feature2']
     true_priors = [0.2, 0.5, 10.]
     for i, name in enumerate(names):
         self.assertIn(name, seasonal_features)
         self.assertEqual(sum(component_cols[name]), 1)
         self.assertEqual(
             sum(np.array(prior_scales) * component_cols[name]),
             true_priors[i],
         )
     # Check that forecast components are reasonable
     future = pd.DataFrame({
         'ds': ['2014-06-01'],
         'binary_feature': [0],
         'numeric_feature': [10],
         'numeric_feature2': [10],
     })
     with self.assertRaises(ValueError):
         m.predict(future)
     future['binary_feature2'] = 0
     fcst = m.predict(future)
     self.assertEqual(fcst.shape[1], 37)
     self.assertEqual(fcst['binary_feature'][0], 0)
     self.assertAlmostEqual(
         fcst['extra_regressors_additive'][0],
         fcst['numeric_feature'][0] + fcst['binary_feature2'][0],
     )
     self.assertAlmostEqual(
         fcst['extra_regressors_multiplicative'][0],
         fcst['numeric_feature2'][0],
     )
     self.assertAlmostEqual(
         fcst['additive_terms'][0],
         fcst['yearly'][0] + fcst['weekly'][0] +
         fcst['extra_regressors_additive'][0],
     )
     self.assertAlmostEqual(
         fcst['multiplicative_terms'][0],
         fcst['extra_regressors_multiplicative'][0],
     )
     self.assertAlmostEqual(
         fcst['yhat'][0],
         fcst['trend'][0] * (1 + fcst['multiplicative_terms'][0]) +
         fcst['additive_terms'][0],
     )
     # Check works if constant extra regressor at 0
     df['constant_feature'] = 0
     m = Prophet()
     m.add_regressor('constant_feature')
     m.fit(df)
     self.assertEqual(m.extra_regressors['constant_feature']['std'], 1)
Beispiel #10
0
    def prophet_iskater_factory(y: [[float]],
                                k: int,
                                a: List = None,
                                t: List = None,
                                e=None,
                                freq: str = None,
                                n_max=1000,
                                recursive: bool = False,
                                model_params: dict = None,
                                return_forecast=True):
        """
        :param y:           A list of observations, each a vector.
        :param k:           Number of steps ahead to predict
        :param a:           Known in advance observations - should be k more of these than y's
        :param t:           Epoch times of observations y. If len(t)=len(y)+k the last k are interpreted as future times.
        :param freq:        'D', '5T' etc, see https://github.com/pandas-dev/pandas/blob/master/pandas/tseries/frequencies.py
        :param n_max:       Maximum number of observations to use, should you wish to prevent prophet from slowing down
        :param recursive    If True, exogenous variables y[1], y[2],... will be predicted forward in time
                                 (obviously this adds to computation time)
        :returns: x         k-vector of predictions
                  x_std     k-vector of standard deviations
                  forecast  full forecast dataframe, familiar to users of fbprophet
        """
        if a:
            assert len(a) == len(y) + k

        if isinstance(y[0], float):
            y = [wrap(yj) for yj in y]

        # Conversion of epoch times to UTC datetime
        # User must supply times, len(y) or len(y)+k, or a valid frequency str
        if t is None:
            if freq is None or not freq:
                freq = PROPHET_META['freq']  # Just assume away ...
            else:
                assert is_valid_freq(
                    freq), 'Freq ' + str(freq) + ' is not a valid frequency'
            dt = pd.date_range(start=EPOCH, periods=len(y), freq=freq)  # UTC
        else:
            freq = infer_freq_from_epoch(t)
            dt = epoch_to_naive_datetime(t)

        if len(dt) == len(y) + k:
            ta = dt
            dt = dt[:len(y)]
        else:
            assert len(dt) == len(
                y), 'Time vector t should be len(y) or len(y)+k'
            ta = None

        # Truncate history so that prophet doesn't take forever to fit
        y_shorter = y[-n_max:]
        a_shorter = a[-(n_max + k):] if a is not None else []  # may be empty
        dt_shorter = dt[-n_max:]

        # Massage data into Prophet friendly dataframe with columns y, y1, ..., yk, a0,...aj
        y_cols = [
            'y' + str(i) if i > 0 else 'y' for i in range(len(y_shorter[-1]))
        ]
        if a:
            a_cols = ['a' + str(i) for i in range(len(a_shorter[-1]))]
            data = [
                list(yi) + list(ai)
                for yi, ai in zip(y_shorter, a_shorter[:-k])
            ]
            df = pd.DataFrame(columns=y_cols + a_cols, data=data)
        else:
            data = [list(yi) for yi in y_shorter]
            df = pd.DataFrame(columns=y_cols, data=data)
        df['ds'] = dt_shorter

        # Instantiate Prophet model, ensure defaults are what we think they are
        kwargs_used = dict([(k, v) for k, v in PROPHET_MODEL.items()])
        if model_params:
            kwargs_used.update(model_params)
        m = Prophet(**kwargs_used)

        # Add regressors
        for y_col in y_cols[1:]:
            m.add_regressor(name=y_col)
        if a:
            for a_col in a_cols:
                m.add_regressor(name=a_col)

        # Fit the model every invocation ... there isn't any other way
        with no_stdout_stderr():
            m.fit(df)

        # Make future dataframe, adding known-in-advance variables
        future = m.make_future_dataframe(periods=k, freq=freq)
        if a:
            for j, a_col in enumerate(a_cols):
                future[a_col] = [ai[j] for ai in a_shorter]  # Known in advance
        if ta is not None:
            future['ds'] = ta  # override with user supplied future times

        # Next, we wish to add contemporaneously observed variables
        #
        # This is somewhat problematic, for how should we bring exogenously observed variables forward?
        # The simplest answer is, don't use them - only supply 1-vector y observations
        # prophet implicitly assumes all exogenous are known, which is a pretty big shortcoming.
        #
        # However, if we are trying to support y[1:], ...
        #   - It seems consistent to use prophet to predict these forward,
        #   - It also seems likely that this will lead to over-fitting.
        # I'm open to ideas here. Perhaps perform some hackery could effect attenuation of the coefficients
        # assigned to y[1],... such as jiggling past observations. For now we use prophet on each
        # one individually, feeding them the known in advance 'a' variables.

        n_exog = len(y[0]) - 1
        if n_exog > 0:
            for j, y_col in enumerate(y_cols):
                if j > 0:
                    yj = [yi[j] for yi in y_shorter]
                    if recursive:
                        yj_hat, yj_hat_std, yj_forecast, yj_m = prophet_iskater_factory(
                            y=yj,
                            k=k,
                            a=a_shorter,
                            freq=freq,
                            n_max=n_max,
                            recursive=False)
                    else:
                        yj_hat = [yj[-1]] * k
                    future[y_col] = yj + list(yj_hat)

        # Call the prediction function
        forecast = m.predict(future)
        x = list(forecast['yhat'].values[-k:]
                 )  # Use m.plot(forecast) to take a peak

        # Interpret confidence level difference as scale to be returned. TODO: set alpha properly so this really is 1-std
        x_std = list([
            u - l for u, l in zip(forecast['yhat_upper'].values[-k:],
                                  forecast['yhat_lower'].values[-k:])
        ])

        if return_forecast:
            return x, x_std, forecast, m
        else:
            return x, x_std
Beispiel #11
0
    def test_copy(self):
        df = DATA_all.copy()
        df['cap'] = 200.
        df['binary_feature'] = [0] * 255 + [1] * 255
        # These values are created except for its default values
        holiday = pd.DataFrame({
            'ds': pd.to_datetime(['2016-12-25']),
            'holiday': ['x']
        })
        products = itertools.product(
            ['linear', 'logistic'],  # growth
            [None, pd.to_datetime(['2016-12-25'])],  # changepoints
            [3],  # n_changepoints
            [0.9],  # changepoint_range
            [True, False],  # yearly_seasonality
            [True, False],  # weekly_seasonality
            [True, False],  # daily_seasonality
            [None, holiday],  # holidays
            ['additive', 'multiplicative'],  # seasonality_mode
            [1.1],  # seasonality_prior_scale
            [1.1],  # holidays_prior_scale
            [0.1],  # changepoint_prior_scale
            [100],  # mcmc_samples
            [0.9],  # interval_width
            [200]  # uncertainty_samples
        )
        # Values should be copied correctly
        for product in products:
            m1 = Prophet(*product)
            m1.country_holidays = 'US'
            m1.history = m1.setup_dataframe(df.copy(), initialize_scales=True)
            m1.set_auto_seasonalities()
            m2 = diagnostics.prophet_copy(m1)
            self.assertEqual(m1.growth, m2.growth)
            self.assertEqual(m1.n_changepoints, m2.n_changepoints)
            self.assertEqual(m1.changepoint_range, m2.changepoint_range)
            if m1.changepoints is None:
                self.assertEqual(m1.changepoints, m2.changepoints)
            else:
                self.assertTrue(m1.changepoints.equals(m2.changepoints))
            self.assertEqual(False, m2.yearly_seasonality)
            self.assertEqual(False, m2.weekly_seasonality)
            self.assertEqual(False, m2.daily_seasonality)
            self.assertEqual(m1.yearly_seasonality, 'yearly'
                             in m2.seasonalities)
            self.assertEqual(m1.weekly_seasonality, 'weekly'
                             in m2.seasonalities)
            self.assertEqual(m1.daily_seasonality, 'daily' in m2.seasonalities)
            if m1.holidays is None:
                self.assertEqual(m1.holidays, m2.holidays)
            else:
                self.assertTrue((m1.holidays == m2.holidays).values.all())
            self.assertEqual(m1.country_holidays, m2.country_holidays)
            self.assertEqual(m1.seasonality_mode, m2.seasonality_mode)
            self.assertEqual(m1.seasonality_prior_scale,
                             m2.seasonality_prior_scale)
            self.assertEqual(m1.changepoint_prior_scale,
                             m2.changepoint_prior_scale)
            self.assertEqual(m1.holidays_prior_scale, m2.holidays_prior_scale)
            self.assertEqual(m1.mcmc_samples, m2.mcmc_samples)
            self.assertEqual(m1.interval_width, m2.interval_width)
            self.assertEqual(m1.uncertainty_samples, m2.uncertainty_samples)

        # Check for cutoff and custom seasonality and extra regressors
        changepoints = pd.date_range('2012-06-15', '2012-09-15')
        cutoff = pd.Timestamp('2012-07-25')
        m1 = Prophet(changepoints=changepoints)
        m1.add_seasonality('custom', 10, 5)
        m1.add_regressor('binary_feature')
        m1.fit(df)
        m2 = diagnostics.prophet_copy(m1, cutoff=cutoff)
        changepoints = changepoints[changepoints < cutoff]
        self.assertTrue((changepoints == m2.changepoints).all())
        self.assertTrue('custom' in m2.seasonalities)
        self.assertTrue('binary_feature' in m2.extra_regressors)
Beispiel #12
0
class FBProphetModel(PredictionModel):
    """Facebook's Prophet prediction model."""

    def __init__(self, params: dict, transformation: str = "none"):
        super().__init__(params, name="FBProphet", transformation=transformation)

        # Stuff needed to make Prophet shut up during training.
        self.suppress_stdout_stderr = suppress_stdout_stderr
        self.fbmodel = Prophet()
        try:
            self.fbprophet_parameters = params["model_parameters"]["fbprophet_parameters"]
        except KeyError:
            self.fbprophet_parameters = None

    def train(self, input_data: DataFrame, extra_regressors: DataFrame = None):
        """Overrides PredictionModel.train()"""

        if self.fbprophet_parameters is not None:
            try:
                timeseries_name = input_data.columns[0]
                date_format = self.fbprophet_parameters["holidays_dataframes"]["date_format"]
                holidays = pd.read_csv(self.fbprophet_parameters["holidays_dataframes"][timeseries_name])
                holidays.loc[:, "ds"].apply(lambda x: pd.to_datetime(x, format=date_format))
                self.fbmodel = Prophet(holidays=holidays)
                log.debug(f"Using a dataframe for holidays...")
            except KeyError:
                self.fbmodel = Prophet()

            try:
                holiday_country = self.fbprophet_parameters["holiday_country"]
                self.fbmodel.add_country_holidays(country_name=holiday_country)
                log.debug(f"Set {holiday_country} as country for holiday calendar...")
            except KeyError:
                pass

        else:
            self.fbmodel = Prophet()

        if extra_regressors is not None:
            # We could apply self.transformation also on the extra regressors.
            # From tests, it looks like it doesn't change much/it worsens the forecasts.
            input_data = input_data.join(extra_regressors)
            input_data.reset_index(inplace=True)
            column_indices = [0, 1]
            new_names = ['ds', 'y']
            old_names = input_data.columns[column_indices]
            input_data.rename(columns=dict(zip(old_names, new_names)), inplace=True)
            [self.fbmodel.add_regressor(col) for col in extra_regressors.columns]

        else:
            input_data.reset_index(inplace=True)
            input_data.columns = ['ds', 'y']

        with self.suppress_stdout_stderr():
            self.fbmodel.fit(input_data)

        #######################
        # param_grid = {
        #     'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
        #     'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
        # }
        # param_grid = {
        #     'changepoint_prior_scale': [0.001, 0.01],
        #     'seasonality_prior_scale': [0.01, 0.1],
        # }
        #
        # if extra_regressors is not None:
        #     input_data = input_data.join(extra_regressors)
        #     input_data.reset_index(inplace=True)
        #     column_indices = [0, 1]
        #     new_names = ['ds', 'y']
        #     old_names = input_data.columns[column_indices]
        #     input_data.rename(columns=dict(zip(old_names, new_names)), inplace=True)
        #
        # else:
        #     input_data.reset_index(inplace=True)
        #     input_data.columns = ['ds', 'y']
        #
        # # Generate all combinations of parameters
        # all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
        # rmses = []  # Store the RMSEs for each params here
        #
        # # Use cross validation to evaluate all parameters
        # for params in all_params:
        #     m = Prophet(**params)
        #     [m.add_regressor(col) for col in extra_regressors.columns] if extra_regressors is not None else None
        #     with self.suppress_stdout_stderr():
        #         m.fit(input_data)  # Fit model with given params
        #         df_cv = cross_validation(m, horizon=self.prediction_lags, parallel="processes")
        #         df_p = performance_metrics(df_cv, rolling_window=1)
        #         rmses.append(df_p['rmse'].values[0])
        #
        # # Find the best parameters
        # tuning_results = pd.DataFrame(all_params)
        # tuning_results['rmse'] = rmses
        #
        # best_params = all_params[np.argmin(rmses)]
        # print(best_params)
        #
        # self.fbmodel = Prophet(**best_params)
        # [self.fbmodel.add_regressor(col) for col in extra_regressors.columns] if extra_regressors is not None else None
        # with self.suppress_stdout_stderr():
        #     self.fbmodel.fit(input_data)

    def predict(self, future_dataframe: DataFrame, extra_regressors: DataFrame = None) -> DataFrame:
        """Overrides PredictionModel.predict()"""
        future = future_dataframe.reset_index()
        future.rename(columns={'index': 'ds'}, inplace=True)

        if extra_regressors is not None:
            future.set_index('ds', inplace=True)
            future = future.join(extra_regressors.copy())
            future.reset_index(inplace=True)

        forecast = self.fbmodel.predict(future)


        forecast.set_index('ds', inplace=True)

        return forecast
Beispiel #13
0
import pandas as pd
from prophet import Prophet

# float_precision='high' required for pd.read_csv to match precision of Rover.read_csv
df = pd.read_csv('examples/example_wp_log_peyton_manning.csv',
                 float_precision='high')


def nfl_sunday(ds):
    date = pd.to_datetime(ds)
    if date.weekday() == 6 and (date.month > 8 or date.month < 2):
        return 1
    else:
        return 0


df['nfl_sunday'] = df['ds'].apply(nfl_sunday)

m = Prophet()
m.add_regressor('nfl_sunday')
m.fit(df)

future = m.make_future_dataframe(periods=365)
future['nfl_sunday'] = future['ds'].apply(nfl_sunday)

forecast = m.predict(future)

m.plot(forecast).savefig('/tmp/py_regressors.png')
m.plot_components(forecast).savefig('/tmp/py_regressors2.png')