예제 #1
0
    def save(self):
        fbmodel = self.fbmodel
        self.fbmodel = serialize.model_to_json(fbmodel)

        # Save the model to disk
        try:
            if not os.path.exists('./config/models'):
                os.makedirs('./config/models')
            with open(f'./config/models/{self.id()}', 'wb+') as f:
                pickle.dump(self, f)
        finally:
            self.fbmodel = fbmodel
예제 #2
0
    def _discover_model(self):
        df_train, self.max_cap = self._transform_features(self.log)
        # print(df_train)
        days = df_train.ds.max() - df_train.iloc[int(len(df_train) * 0.8)].ds
        periods = days * 0.5

        param_grid = {
            'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
            'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0]
        }

        # Generate all combinations of parameters
        all_params = [
            dict(zip(param_grid.keys(), v))
            for v in itertools.product(*param_grid.values())
        ]
        rmses = []  # Store the RMSEs for each params here

        # Use cross validation to evaluate all parameters
        for params in all_params:
            params['growth'] = 'logistic'
            try:
                m = Prophet(**params).fit(
                    df_train)  # Fit model with given params
                df_cv = cross_validation(m,
                                         horizon=days,
                                         period=periods,
                                         parallel="processes")
                df_p = performance_metrics(df_cv, rolling_window=1)
                rmses.append(df_p['rmse'].values[0])
            except:
                # df_train.to_csv('df_train_fail.csv')
                traceback.print_exc()
                pass

        # Find the best parameters
        tuning_results = pd.DataFrame(all_params)
        tuning_results['rmse'] = rmses
        best_params = all_params[np.argmin(rmses)]
        m = Prophet(**best_params).fit(df_train)
        self.temp_output
        with open(
                os.path.join(self.temp_output,
                             self.parms['file'].split('.')[0] + '_prf.json'),
                'w') as fout:
            json.dump(model_to_json(m), fout, cls=NumpyEncoder)  # Save model
        return {'loss': tuning_results.iloc[np.argmin(rmses)].rmse}
예제 #3
0
    def test_simple_serialize(self):
        m = Prophet()
        days = 30
        N = DATA.shape[0]
        df = DATA.head(N - days)
        m.fit(df)

        future = m.make_future_dataframe(2, include_history=False)
        fcst = m.predict(future)

        model_str = model_to_json(m)
        # Make sure json doesn't get too large in the future
        self.assertTrue(len(model_str) < 200000)
        z = json.loads(model_str)
        self.assertEqual(z['__fbprophet_version'], '1.0')

        m2 = model_from_json(model_str)

        # Check that m and m2 are equal
        self.assertEqual(m.__dict__.keys(), m2.__dict__.keys())
        for k, v in m.__dict__.items():
            if k in ['stan_fit', 'stan_backend']:
                continue
            if k == 'params':
                self.assertEqual(v.keys(), m2.params.keys())
                for kk, vv in v.items():
                    self.assertTrue(np.array_equal(vv, m2.params[kk]))
            elif k in PD_SERIES and v is not None:
                self.assertTrue(v.equals(m2.__dict__[k]))
            elif k in PD_DATAFRAME and v is not None:
                pd.testing.assert_frame_equal(v, m2.__dict__[k])
            elif k == 'changepoints_t':
                self.assertTrue(np.array_equal(v, m.__dict__[k]))
            else:
                self.assertEqual(v, m2.__dict__[k])
        self.assertTrue(m2.stan_fit is None)
        self.assertTrue(m2.stan_backend is None)

        # Check that m2 makes the same forecast
        future2 = m2.make_future_dataframe(2, include_history=False)
        fcst2 = m2.predict(future2)

        self.assertTrue(
            np.array_equal(fcst['yhat'].values, fcst2['yhat'].values))
예제 #4
0
def train_prophet(train_df, model_location_prophet):
    c.start()
    X = train_df.drop(['ds', 'y'], axis=1)

    model = Prophet(growth='linear',
                    seasonality_mode='multiplicative',
                    weekly_seasonality=True)
    model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
    model.add_country_holidays(country_name='US')

    for col in X:
        model.add_regressor(col)

    # Fit and predict
    model.fit(train_df)
    with open(model_location_prophet, 'w') as file_out:
        json.dump(model_to_json(model), file_out)

    print(f'Prophet model fitted: {c.stop()} seconds')

    return model
예제 #5
0
    def test_full_serialize(self):
        # Construct a model with all attributes
        holidays = pd.DataFrame({
            'ds':
            pd.to_datetime(['2012-06-06', '2013-06-06']),
            'holiday': ['seans-bday'] * 2,
            'lower_window': [0] * 2,
            'upper_window': [1] * 2,
        })
        # Test with holidays and country_holidays
        m = Prophet(
            holidays=holidays,
            seasonality_mode='multiplicative',
            changepoints=['2012-07-01', '2012-10-01', '2013-01-01'],
        )
        m.add_country_holidays(country_name='US')
        m.add_seasonality(name='conditional_weekly',
                          period=7,
                          fourier_order=3,
                          prior_scale=2.,
                          condition_name='is_conditional_week')
        m.add_seasonality(name='normal_monthly',
                          period=30.5,
                          fourier_order=5,
                          prior_scale=2.)
        df = DATA.copy()
        df['is_conditional_week'] = [0] * 255 + [1] * 255
        m.add_regressor('binary_feature', prior_scale=0.2)
        m.add_regressor('numeric_feature', prior_scale=0.5)
        m.add_regressor('numeric_feature2',
                        prior_scale=0.5,
                        mode='multiplicative')
        m.add_regressor('binary_feature2', standardize=True)
        df['binary_feature'] = ['0'] * 255 + ['1'] * 255
        df['numeric_feature'] = range(510)
        df['numeric_feature2'] = range(510)
        df['binary_feature2'] = [1] * 100 + [0] * 410

        train = df.head(400)
        test = df.tail(100)

        m.fit(train)
        future = m.make_future_dataframe(periods=100, include_history=False)
        fcst = m.predict(test)
        # Serialize!
        m2 = model_from_json(model_to_json(m))

        # Check that m and m2 are equal
        self.assertEqual(m.__dict__.keys(), m2.__dict__.keys())
        for k, v in m.__dict__.items():
            if k in ['stan_fit', 'stan_backend']:
                continue
            if k == 'params':
                self.assertEqual(v.keys(), m2.params.keys())
                for kk, vv in v.items():
                    self.assertTrue(np.array_equal(vv, m2.params[kk]))
            elif k in PD_SERIES and v is not None:
                self.assertTrue(v.equals(m2.__dict__[k]))
            elif k in PD_DATAFRAME and v is not None:
                pd.testing.assert_frame_equal(v, m2.__dict__[k])
            elif k == 'changepoints_t':
                self.assertTrue(np.array_equal(v, m.__dict__[k]))
            else:
                self.assertEqual(v, m2.__dict__[k])
        self.assertTrue(m2.stan_fit is None)
        self.assertTrue(m2.stan_backend is None)

        # Check that m2 makes the same forecast
        future = m2.make_future_dataframe(periods=100, include_history=False)
        fcst2 = m2.predict(test)

        self.assertTrue(
            np.array_equal(fcst['yhat'].values, fcst2['yhat'].values))
예제 #6
0
forecast = m.predict(future)
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())
plt.figure(figsize=(15, 10))
fig1 = m.plot(forecast)
plt.show(fig1)

a = plot_yearly(m)

pm25_UP012_test['ds'] = [str(x) for x in pm25_UP012_test['ds']]
forecast['ds'] = [str(x) for x in forecast['ds']]

pm25_UP012['ds'] = [str(x) for x in pm25_UP012['ds']]

y = []
yhat = []

for i in [x for x in pm25_UP012['ds']]:
    #     print(i)
    if i in [x for x in forecast['ds']]:
        print(i)
        y.append(pm25_UP012.iloc[[x for x in pm25_UP012['ds']].index(i), -1])
        yhat.append(forecast.iloc[[x for x in forecast['ds']].index(i), -1])

r2_score(y, yhat)

with open('serialized_model.json', 'w') as fout:
    json.dump(model_to_json(m), fout)  # Save model

with open('serialized_model.json', 'r') as fin:
    m = model_from_json(json.load(fin))  # Load model
예제 #7
0
def save_model(model, path):
    with open(f"{path}/model.json", 'w') as fout:
        json.dump(model_to_json(model), fout)
    return f'saved model at {path}/model.json'
예제 #8
0
def model_train():
    """
    function to train model
    call load aavail dataframe
    split loaded data by country
    train models and save to local storage
    log output
    """

    ## start timer for runtime
    time_start = time.time()

    #remove
    print('calling load aavail data')
    ## data ingestion
    df = load_aavail_data()

    #remove
    print('splitting dfs into individual country dfs')

    #split df into dfs per country
    df_individual_countries = split_preprocessed_df(df, top10countries)

    #remove
    print('now about to train models')

    #train and save model to models directory for each country
    for dfname, df in df_individual_countries.items():

        #remove
        print('to train: ' + str(dfname))
        print(df.head())
        print('NaNs:')
        print(df.isna().sum())
        model = Prophet()
        model.fit(df)

        modelname = 'model_' + str(dfname) + '.json'

        #remove
        print(str(modelname) + ' trained')

        with open(os.path.join((MODEL_DIR), modelname), 'w') as f:
            modeltosave = model_to_json(model)
            json.dump(modeltosave, f)
            print('saved ' + modelname)

    m, s = divmod(time.time() - time_start, 60)
    h, m = divmod(m, 60)
    runtime = "%03d:%02d:%02d" % (h, m, s)

    #remove
    print('updating train log')

    ## update the log file
    update_train_log(
        df['ds'].shape,
        #eval_test,
        runtime,
        MODEL_VERSION,
        MODEL_VERSION_NOTE,
    )

    #remove
    print('updated train log')