def arimax_by_month_fit_predict(y_train,y_test,month,param):
    """The fit_predict function for the ARIMAX by month eviction model.
    Uses exogenous variable CASANF0URN (unemployment rate in previous year) along with
    the standard time lagged AR and MA features.

    Parameters:
    y_train
    y_test
    month -- month being predicted, along with the two successive months
    param -- the AR, I and MA parameters for the ARIMA model.

    Output:
    temp_df - ARIMAX predictions of eviction totals for three successive, future months
    """

    model = pf.ARIMAX(data=y_train,formula='Eviction_Notice~1+CASANF0URN',ar=param[0], ma=param[2])
    model.fit()
    y_hat = model.predict(h=3,oos_data=y_test)

    actual = y_test.Eviction_Notice.values.tolist()
    predicted = y_hat.Eviction_Notice.values.tolist()

    temp_df = pd.DataFrame({'actual_evictions':actual,\
                                'predicted_evictions':predicted,'months_ahead':[1,2,3]})
    temp_df['month_year']=pd.Series([month,month+pd.offsets.MonthBegin(1),month+pd.offsets.MonthBegin(2)]).values


    return temp_df
Exemple #2
0
def test2_predict_is_length():
    """
    Tests that the length of the predict IS dataframe is equal to no of steps h
    """
    model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.t())
    x = model.fit()
    assert(model.predict_is(h=5).shape[0] == 5)
Exemple #3
0
    def time_series(self, train_X, train_y, test_X, test_y, ar=1, ma=1, gridsearch=False, gridsearch_training_frac=0.7):
        """
        Apply a time series ARIMAX approach, and use the created network to predict the outcome for both the test and
        training set. It returns the predictions for the training and test set. Parameters can be provided around the
        learning algorithm and a grid search can also be performed.
        """

        if gridsearch:
            ar, ma, d = self.gridsearch_time_series(train_X, train_y, gridsearch_training_frac=gridsearch_training_frac,
                                                    error='mse')

        train_dataset = copy.deepcopy(train_X)
        formula = train_y.name + '~1+' + "+".join(train_X.columns)
        train_dataset[train_y.name] = train_y
        test_dataset = copy.deepcopy(test_X)
        test_dataset[test_y.name] = test_y

        model = pf.ARIMAX(data=train_dataset, formula=formula, ar=ar, ma=ma)
        model.fit()
        model_pred = model.predict(h=len(train_y.index) - max(ar, ma), oos_data=train_dataset)
        values = np.empty((len(model_pred) + max(ar, ma), 1))
        values[:] = np.nan
        values[max(ar, ma):] = model_pred.values
        pred_train = pd.DataFrame(values, index=train_y.index, columns=[train_y.name])
        pred_train.iloc[max(ar, ma):, :] = model_pred.values
        pred_test = pd.DataFrame(model.predict(h=len(test_y.index), oos_data=test_dataset).values, index=test_y.index,
                                 columns=[test_y.name])

        return pred_train, pred_test
Exemple #4
0
def test_bbvi_elbo():
    """
    Tests that the ELBO increases
    """
    model = pf.ARIMAX(formula="y ~ x1", data=data, ar=1, ma=1, family=pf.t())
    x = model.fit('BBVI',iterations=200, record_elbo=True)
    assert(x.elbo_records[-1]>x.elbo_records[0])
Exemple #5
0
def multivariate_arima():
    '''
    Reads the data and fits the ARIMAX model
    Prints the Acccuracy Score

    Inputs:
        None

    Outputs:
        None
    '''

    data = preprocessing.main()
    n_train_hours = 52 * 3
    train = data.iloc[:n_train_hours, :]
    test = data.iloc[n_train_hours:, :]

    model = pf.ARIMAX(data=train, formula = 'milk~1+cheese+dry+corn+Value', \
                        ar=9, ma=0, integ=1)
    x = model.fit("MLE")
    x.summary()

    # model.plot_fit(figsize=(15,5))
    # model.plot_predict(h=38,past_values=20,figsize=(15,5), oos_data=test)

    yhat = model.predict(h=38, oos_data=test)
    pred_chg = yhat > 0
    actual_chg = test.iloc[:-1, 0].diff() > 0
    print accuracy_score(actual_chg, pred_chg)
Exemple #6
0
def test2_predict_is_nans():
    """
    Tests that the predictions in-sample are not NaNs
    """
    model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.t())
    x = model.fit()
    x.summary()
    assert(len(model.predict_is(h=5).values[np.isnan(model.predict_is(h=5).values)]) == 0)
Exemple #7
0
def test2_ppc():
    """
    Tests PPC value
    """
    model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.t())
    x = model.fit('BBVI', iterations=100)
    p_value = model.ppc()
    assert(0.0 <= p_value <= 1.0)
Exemple #8
0
def test_predict_length():
    """
    Tests that the length of the predict dataframe is equal to no of steps h
    """
    model = pf.ARIMAX(formula="y ~ x1", data=data, ar=2, ma=2, family=pf.t())
    x = model.fit()
    x.summary()
    assert(model.predict(h=5, oos_data=data_oos).shape[0] == 5)
Exemple #9
0
def test_predict_nans():
    """
    Tests that the predictions are not NaNs
    """
    model = pf.ARIMAX(formula="y ~ x1", data=data, ar=2, ma=2, family=pf.t())
    x = model.fit()
    x.summary()
    assert(len(model.predict(h=5, oos_data=data_oos).values[np.isnan(model.predict(h=5, 
        oos_data=data_oos).values)]) == 0)
Exemple #10
0
def test2_predict_is_nonconstant():
    """
    We should not really have predictions that are constant (should be some difference)...
    This captures bugs with the predict function not iterating forward
    """
    model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.t())
    x = model.fit()
    predictions = model.predict_is(h=10, intervals=False)
    assert(not np.all(predictions.values==predictions.values[0]))
Exemple #11
0
def test2_sample_model():
    """
    Tests sampling function
    """
    model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.t())
    x = model.fit('BBVI', iterations=100)
    sample = model.sample(nsims=100)
    assert(sample.shape[0]==100)
    assert(sample.shape[1]==len(data)-2)
def build_model(data, ar=4, ma=4, integ=0, target=None):
    model = pf.ARIMAX(
        data=data,
        # formula='{}~casos_est_min + casos_est_max+ casos+ p_rt1 + p_inc100k +nivel'.format(target),
        ar=4,
        formula='{}~'.format(target) +
        '+'.join([col for col in data.columns if col != target][:10]),
        ma=4,
        integ=0)
    return model
Exemple #13
0
def test_no_terms():
    """
    Tests the length of the latent variable vector for an ARIMAX model
    with no AR or MA terms, and tests that the values are not nan
    """
    model = pf.ARIMAX(formula="y ~ x1", data=data, ar=0, ma=0)
    x = model.fit()
    assert (len(model.latent_variables.z_list) == 3)
    lvs = np.array([i.value for i in model.latent_variables.z_list])
    assert (len(lvs[np.isnan(lvs)]) == 0)
Exemple #14
0
def test_laplace():
    """
    Tests an ARIMAX model estimated with Laplace approximation, and tests that the latent variable
    vector length is correct, and that value are not nan
    """
    model = pf.ARIMAX(formula="y ~ x1", data=data, ar=1, ma=1, family=pf.t())
    x = model.fit('Laplace')
    assert(len(model.latent_variables.z_list) == 6)
    lvs = np.array([i.value for i in model.latent_variables.z_list])
    assert(len(lvs[np.isnan(lvs)]) == 0)
Exemple #15
0
def test_mh():
    """
    Tests an ARIMAX model estimated with Metropolis-Hastings, and tests that the latent variable
    vector length is correct, and that value are not nan
    """
    model = pf.ARIMAX(formula="y ~ x1", data=data, ar=1, ma=1, family=pf.t())
    x = model.fit('M-H',nsims=300)
    assert(len(model.latent_variables.z_list) == 6)
    lvs = np.array([i.value for i in model.latent_variables.z_list])
    assert(len(lvs[np.isnan(lvs)]) == 0)
Exemple #16
0
def test2_pml():
    """
    Tests an ARIMAX model estimated with PML, with multiple predictors, and 
    tests that the latent variable vector length is correct, and that value are not nan
    """
    model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=1, ma=1, family=pf.t())
    x = model.fit('PML')
    assert(len(model.latent_variables.z_list) == 7)
    lvs = np.array([i.value for i in model.latent_variables.z_list])
    assert(len(lvs[np.isnan(lvs)]) == 0)
Exemple #17
0
def test_bbvi_mini_batch():
    """
    Tests an ARIMA model estimated with BBVI and that the length of the latent variable
    list is correct, and that the estimated latent variables are not nan
    """
    model = pf.ARIMAX(formula="y ~ x1", data=data, ar=1, ma=1, family=pf.t())
    x = model.fit('BBVI',iterations=100, mini_batch=32)
    assert(len(model.latent_variables.z_list) == 6)
    lvs = np.array([i.value for i in model.latent_variables.z_list])
    assert(len(lvs[np.isnan(lvs)]) == 0)
Exemple #18
0
def test_couple_terms():
    """
    Tests the length of the latent variable vector for an ARIMAX model
    with 1 AR and 1 MA term, and tests that the values are not nan
    """
    model = pf.ARIMAX(formula="y ~ x1", data=data, ar=1, ma=1, family=pf.t())
    x = model.fit()
    assert(len(model.latent_variables.z_list) == 6)
    lvs = np.array([i.value for i in model.latent_variables.z_list])
    assert(len(lvs[np.isnan(lvs)]) == 0)
Exemple #19
0
def test2_bbvi():
    """
    Tests an ARIMAX model estimated with BBVI, and tests that the latent variable
    vector length is correct, and that value are not nan
    """
    model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=1, ma=1, family=pf.t())
    x = model.fit('BBVI',iterations=100)
    assert(len(model.latent_variables.z_list) == 7)
    lvs = np.array([i.value for i in model.latent_variables.z_list])
    assert(len(lvs[np.isnan(lvs)]) == 0)
Exemple #20
0
def test2_predict_is_intervals_mh():
    """
    Tests prediction intervals are ordered correctly
    """
    model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.t())
    x = model.fit('M-H', nsims=400)
    predictions = model.predict_is(h=10, intervals=True)
    assert(np.all(predictions['99% Prediction Interval'].values > predictions['95% Prediction Interval'].values))
    assert(np.all(predictions['95% Prediction Interval'].values > predictions[model.data_name].values))
    assert(np.all(predictions[model.data_name].values > predictions['5% Prediction Interval'].values))
    assert(np.all(predictions['5% Prediction Interval'].values > predictions['1% Prediction Interval'].values))
def a_test_bbvi_elbo():
    """
    Tests that the ELBO increases
    """
    model = pf.ARIMAX(formula="y ~ x1",
                      data=data,
                      ar=1,
                      ma=1,
                      family=pf.Exponential())
    x = model.fit('BBVI', iterations=300, record_elbo=True, map_start=False)
    assert (x.elbo_records[-1] > x.elbo_records[0])
def arimax_by_zip_fit_predict(y_train,y_test,param):
    """Fits and predicts 1,2 and 3 months into the future for the given eviction dataset.
     Exogenous variable is used to perform a linear regression underlying the ARIMA fit. """

    model = pf.ARIMAX(data=y_train, formula='Eviction_Notice~1+CASANF0URN',
          ar=param[0], ma=param[2])
    model.fit()
    y_hat = model.predict(h=3,oos_data=y_test)
    predicted = y_hat.Eviction_Notice.values.tolist()
    actual = y_test.Eviction_Notice.values.tolist()

    return predicted, actual
Exemple #23
0
def test2_predict_intervals_bbvi():
    """
    Tests prediction intervals are ordered correctly
    """
    model = pf.ARIMAX(formula="y ~ x1 + x2", data=data, ar=2, ma=2, family=pf.t())
    x = model.fit('BBVI', iterations=100)
    predictions = model.predict(h=10, oos_data=data_oos, intervals=True)

    assert(np.all(predictions['99% Prediction Interval'].values > predictions['95% Prediction Interval'].values))
    assert(np.all(predictions['95% Prediction Interval'].values > predictions[model.data_name].values))
    assert(np.all(predictions[model.data_name].values > predictions['5% Prediction Interval'].values))
    assert(np.all(predictions['5% Prediction Interval'].values > predictions['1% Prediction Interval'].values))
def a_test_predict_is_nonconstant():
    """
    We should not really have predictions that are constant (should be some difference)...
    This captures bugs with the predict function not iterating forward
    """
    model = pf.ARIMAX(formula="y ~ x1",
                      data=data,
                      ar=1,
                      ma=1,
                      family=pf.Exponential())
    x = model.fit('BBVI', iterations=200)
    predictions = model.predict_is(h=10, fit_method='BBVI', intervals=False)
    assert (not np.all(predictions.values == predictions.values[0]))
def test2_predict_nonconstant():
    """
    We should not really have predictions that are constant (should be some difference)...
    This captures bugs with the predict function not iterating forward
    """
    model = pf.ARIMAX(formula="y ~ x1 + x2",
                      data=data,
                      ar=1,
                      ma=1,
                      family=pf.Poisson())
    x = model.fit('BBVI', iterations=200)
    predictions = model.predict(h=10, oos_data=data_oos, intervals=False)
    print(predictions)
    assert (not np.all(predictions.values == predictions.values[0]))
Exemple #26
0
def test_predict_is_intervals():
    """
    Tests prediction intervals are ordered correctly
    """
    model = pf.ARIMAX(formula="y ~ x1", data=data, ar=2, ma=2)
    x = model.fit()
    predictions = model.predict_is(h=10, intervals=True)
    assert (np.all(predictions['99% Prediction Interval'].values >
                   predictions['95% Prediction Interval'].values))
    assert (np.all(predictions['95% Prediction Interval'].values > predictions[
        model.data_name].values))
    assert (np.all(predictions[model.data_name].values >
                   predictions['5% Prediction Interval'].values))
    assert (np.all(predictions['5% Prediction Interval'].values >
                   predictions['1% Prediction Interval'].values))
def test_bbvi_mini_batch_elbo():
    """
    Tests that the ELBO increases
    """
    model = pf.ARIMAX(formula="y ~ x1",
                      data=data,
                      ar=1,
                      ma=1,
                      family=pf.Poisson())
    x = model.fit('BBVI',
                  iterations=300,
                  mini_batch=32,
                  record_elbo=True,
                  map_start=False)
    assert (x.elbo_records[-1] > x.elbo_records[0])
Exemple #28
0
def arimax_base_rmse_mode(train_input, train_target, test_input, test_target):
    train_input_diff_arr = np.array([])
    train_columns_name = []
    train_input_column = int(train_input.shape[1])
    for i in range(train_input_column):
        if (i % 2 == 0):
            train_columns_name.append('price_' + str(i))
        else:
            train_columns_name.append('totaltx_' + str(i))
        train_input_diff = np.diff(train_input[:, i])
        if i == 0:
            train_input_diff_arr = train_input_diff
        else:
            train_input_diff_arr = np.dstack(
                (train_input_diff_arr, train_input_diff))

    columns_name = copy.deepcopy(train_columns_name)
    columns_name.append('current_price')
    train_target_diff = np.diff(train_target)
    train_input_diff_arr = np.dstack((train_input_diff_arr, train_target_diff))

    train_input_diff_arr = pd.DataFrame(train_input_diff_arr[0],
                                        columns=columns_name)

    model = pf.ARIMAX(data=train_input_diff_arr,
                      formula="current_price~totaltx_5",
                      ar=2,
                      ma=2,
                      integ=0)

    model_1 = model.fit("MLE")
    model_1.summary()

    test_input_pd = pd.DataFrame(test_input, columns=train_columns_name)
    test_target_pd = pd.DataFrame(test_target, columns=['current_price'])
    test_input_target = pd.concat([test_input_pd, test_target_pd], axis=1)

    pred = model.predict(
        h=test_input_target.shape[0],
        oos_data=test_input_target,
        intervals=True,
    )
    arimax_base_rmse = mean_squared_error(
        [test_input_target.iloc[0, 6]],
        [(train_target[99]) + pred.current_price[99]])
    print("arimax_base_rmse:", arimax_base_rmse)
    return arimax_base_rmse
def test2_predict_is_intervals():
    """
    Tests prediction intervals are ordered correctly
    """
    model = pf.ARIMAX(formula="y ~ x1 + x2",
                      data=data,
                      ar=2,
                      ma=2,
                      family=pf.Poisson())
    x = model.fit()
    predictions = model.predict_is(h=10, intervals=True)
    assert (np.all(predictions['99% Prediction Interval'].values >=
                   predictions['95% Prediction Interval'].values))
    assert (np.all(predictions['95% Prediction Interval'].values >=
                   predictions['5% Prediction Interval'].values))
    assert (np.all(predictions['5% Prediction Interval'].values >=
                   predictions['1% Prediction Interval'].values))
def a_test2_predict_is_intervals_bbvi():
    """
    Tests prediction intervals are ordered correctly
    """
    model = pf.ARIMAX(formula="y ~ x1 + x2",
                      data=data,
                      ar=2,
                      ma=2,
                      family=pf.Exponential())
    x = model.fit('BBVI', iterations=100)
    predictions = model.predict_is(h=10, intervals=True)
    assert (np.all(predictions['99% Prediction Interval'].values >=
                   predictions['95% Prediction Interval'].values))
    assert (np.all(predictions['95% Prediction Interval'].values >=
                   predictions['5% Prediction Interval'].values))
    assert (np.all(predictions['5% Prediction Interval'].values >=
                   predictions['1% Prediction Interval'].values))