예제 #1
0
def test_readme_examples():
    # Random training data
    x = np.random.randn(100, 2)
    y = np.random.randn(100)

    # Build a non-linear autoregression model with exogenous inputs
    # using Random Forest regression as the base model
    mdl1 = NARX(
        RandomForestRegressor(n_estimators=10),
        auto_order=2,
        exog_order=[2, 2],
        exog_delay=[1, 1])
    mdl1.fit(x, y)
    ypred1 = mdl1.predict(x, y, step=3)
    assert len(ypred1) == 100

    if has_xgboost:
        # Build a general autoregression model and make multi-step prediction
        # directly using XGBRegressor as the base model
        mdl2 = DirectAutoRegressor(
            XGBRegressor(n_estimators=10),
            auto_order=2,
            exog_order=[2, 2],
            exog_delay=[1, 1],
            pred_step=3)
        mdl2.fit(x, y)
        ypred2 = mdl2.predict(x, y)
        assert len(ypred2) == 100
예제 #2
0
def test_forecast():
    x = np.random.randn(100, 1)
    y = np.random.randn(100)
    mdl = NARX(LinearRegression(), auto_order=2, exog_order=[2])
    mdl.fit(x, y)
    y_forecast = mdl.forecast(x, y, step=10, X_future=np.random.randn(9, 1))
    assert len(y_forecast) == 10
def narx(df):
    x = df
    y = df
    mdl = NARX(RandomForestRegressor(),
               auto_order=2,
               exog_order=[2],
               exog_delay=[1])
    mdl.fit(x, y)
    ypred = mdl.predict(x, y, step=3)
    return ypred
예제 #4
0
def test_TimeSeriesRegressor_grid_search():
    np.random.seed(0)
    X = pd.DataFrame(np.random.randn(100, 2))
    y = pd.Series(np.random.randn(100))
    na = 3
    nb = [3, 3]
    nk = [1, 1]
    mdl = NARX(Ridge(), auto_order=na, exog_order=nb, exog_delay=nk)

    para_grid = {'alpha': [0, 0.1, 0.3]}
    mdl.grid_search(X, y, para_grid)
예제 #5
0
def test_TimeSeriesRegressor_create_features(na, nb, nk):
    np.random.seed(0)
    X = pd.DataFrame(np.random.randn(100, 2))
    y = pd.Series(np.random.randn(100))
    mdl = NARX(LinearRegression(), auto_order=na, exog_order=nb, exog_delay=nk)

    Xfeatures_act, ytarget_act = mdl._preprocess_data(X.values, y.values)

    Xfeatures_exp, ytarget_exp = helper_preprocess(X, y, na, nb, nk)

    np.testing.assert_array_equal(Xfeatures_act, Xfeatures_exp)
    np.testing.assert_array_equal(ytarget_act, ytarget_exp)
예제 #6
0
def test_NARX():
    x = np.random.randn(100, 1)
    y = np.random.randn(100)
    mdl = NARX(RandomForestRegressor(), auto_order=2, exog_order=[2])
    mdl.fit(x, y)
    ypred = mdl.predict(x, y, step=3)
    print(ypred)

    x = np.random.randn(100, 1)
    y = np.random.randn(100)
    mdl = NARX(RandomForestRegressor(), auto_order=1, exog_order=[1])
    mdl.fit(x, y)
    ypred = mdl.predict(x, y, step=3)
    print(ypred)
예제 #7
0
def test_TimeSeriesRegressor_predict():
    np.random.seed(0)
    X = pd.DataFrame(np.random.randn(100, 2))
    y = pd.Series(np.random.randn(100))
    na = 3
    nb = [3, 3]
    nk = [1, 1]
    step = 2
    mdl = NARX(LinearRegression(), auto_order=na, exog_order=nb, exog_delay=nk)

    mdl.fit(X, y)
    ypred_act = mdl.predict(X, y, step=step)
    mdl.score(X, y, step=step, method="r2")
    mdl.score(X, y, step=step, method="mse")

    # -------- manual computation ---------------
    kernel_mdl = LinearRegression()
    Xfeatures_exp, ytarget_exp = helper_preprocess(X,
                                                   y,
                                                   na,
                                                   nb,
                                                   nk,
                                                   removeNA=False)
    mask = np.isnan(ytarget_exp) | np.isnan(Xfeatures_exp).any(axis=1)
    kernel_mdl.fit(Xfeatures_exp[~mask, :], ytarget_exp[~mask])

    ypred_exp1 = np.empty(X.shape[0]) * np.nan
    ypred_exp1[~mask] = kernel_mdl.predict(Xfeatures_exp[~mask, :])

    X1 = copy.deepcopy(Xfeatures_exp)
    X2 = copy.deepcopy(Xfeatures_exp)
    # Xfeatures_updated = mdl._update_lag_features(X1, ypred_exp1)

    X2[:, 1:3] = X2[:, 0:2]
    X2[:, 0] = ypred_exp1

    X2[:, 4:6] = X2[:, 3:5]
    X2[:, 3] = shift(X2[:, 3], -1)

    X2[:, 7:9] = X2[:, 6:8]
    X2[:, 6] = shift(X2[:, 6], -1)
    mask = ~np.isnan(X2).any(axis=1)

    ypred_exp2 = np.empty(X2.shape[0]) * np.nan
    ypred_exp2[mask] = kernel_mdl.predict(X2[mask, :])
    ypred_exp2 = np.concatenate([np.empty(2) * np.nan, ypred_exp2])[0:len(y)]

    # print(X2)
    # print(ypred_act)
    np.testing.assert_array_almost_equal(ypred_act, ypred_exp2)
예제 #8
0
def test_preprocess_data():
    estimator = NARX(LinearRegression(),
                     auto_order=2,
                     exog_order=[2, 3],
                     exog_delay=[1, 2])

    X = np.array([[1., 3.], [2., 7.], [4., 6.], [3., 8.], [5., 5.], [2.5, 4.5],
                  [3., 3.8]])
    y = np.array([1., 5., 7., 4., 6., 3., 2.])
    features, target = estimator._preprocess_data(X, y)

    y_exp = np.array([3., 2.])
    X_exp = np.array([[6., 4., 3., 4., 6., 7., 3.],
                      [3., 6., 5., 3., 8., 6., 7.]])

    np.testing.assert_array_equal(target, y_exp)
    np.testing.assert_array_equal(features, X_exp)

    estimator.fit(X, y)
예제 #9
0
def test_forecast_and_predict_consistency():
    np.random.seed(0)
    x = np.random.randn(10, 1)
    y = np.random.randn(10)
    mdl = NARX(LinearRegression(), auto_order=2, exog_order=[2])
    mdl.fit(x, y)

    # 1-step
    ypred = mdl.predict(x, y, step=1)
    yforecast = mdl.forecast(x[:-1, :], y[:-1], step=1)
    np.testing.assert_almost_equal(ypred[-1], yforecast[-1])

    # 2-step
    ypred = mdl.predict(x, y, step=2)
    X_future = x[-2:-1, :]
    #  print(X_future)
    yforecast = mdl.forecast(x[:-2, :], y[:-2], step=2, X_future=X_future)
    np.testing.assert_almost_equal(ypred[-1], yforecast[-1])

    # 3-step
    ypred = mdl.predict(x, y, step=3)
    X_future = x[-3:-1, :]
    yforecast = mdl.forecast(x[:-3, :], y[:-3], step=3, X_future=X_future)
    np.testing.assert_almost_equal(ypred[-1], yforecast[-1])
def narx_rf(df):

    x = df
    y = df

    mdl = NARX(RandomForestRegressor(),
               auto_order=2,
               exog_order=[2],
               exog_delay=[1])
    para_grid = {'n_estimators': [10, 30, 100]}
    mdl.grid_search(x, y, para_grid, verbose=2)

    # Best hyper-parameters are set after grid search, print the model to see the difference
    print(mdl)
    mdl.fit(x, y)
    ypred = mdl.predict(x, y, step=3)
    return ypred
예제 #11
0
import jsonpreprocess as jp
import pandas
from datetime import datetime
from fireTS.models import NARX
from sklearn.linear_model import LinearRegression

if __name__ == "__main__":
    data = jp.getJSONObjectCP("link-chainlink", "2019-02-20", "2020-02-08")
    df = jp.convertJSONToDataFrame(data, datetime(2019, 2, 20),
                                   datetime(2020, 2, 8))
    xtrain = pandas.concat([df["Today"][0:300], df["Volume"][0:300]],
                           axis=1,
                           keys=["Today", "Volume", "Lag1"])
    ytrain = df["Today"][0:300]
    xtest = pandas.concat([df["Today"][300:], df["Volume"][300:]],
                          axis=1,
                          keys=["Today", "Volume", "Lag1"])
    ytest = df["Today"][300:]
    print(xtrain)
    narx_mdl = NARX(LinearRegression(),
                    auto_order=6,
                    exog_order=[2, 2],
                    exog_delay=[0, 0])
    narx_mdl.fit(xtrain, ytrain)
    ypred = narx_mdl.predict(xtest, ytest, step=3)
    print(ypred)
예제 #12
0
def test_forecast_exog_delay():
    np.random.seed(0)
    x = np.random.randn(10, 1)
    y = np.random.randn(10)

    # delay 0
    mdl = NARX(LinearRegression(),
               auto_order=2,
               exog_order=[2],
               exog_delay=[0])
    mdl.fit(x, y)
    yforecast = mdl.forecast(x[:-1, :], y[:-1], step=1)
    np.testing.assert_almost_equal(yforecast, [-0.50000582])

    # delay 1
    mdl = NARX(LinearRegression(),
               auto_order=2,
               exog_order=[2],
               exog_delay=[1])
    mdl.fit(x, y)
    yforecast = mdl.forecast(x[:-1, :], y[:-1], step=1)
    np.testing.assert_almost_equal(yforecast, [-0.53345719])

    # delay 2
    mdl = NARX(LinearRegression(),
               auto_order=2,
               exog_order=[2],
               exog_delay=[2])
    mdl.fit(x, y)
    yforecast = mdl.forecast(x[:-1, :], y[:-1], step=1)
    np.testing.assert_almost_equal(yforecast, [-0.61640028])
예제 #13
0
# Scale data to range from 0 to 1
comp_scaled, comp_scaler = modeling_utilities.scale_dataset(
    comp_df['average_compound'].to_numpy())
pos_rate_scaled, pos_scaler = modeling_utilities.scale_dataset(
    pos_df['pos_rate'].to_numpy())

# Split data into training and testing
x_train, x_test, y_train, y_test = train_test_split(comp_scaled,
                                                    pos_rate_scaled,
                                                    test_size=0.20,
                                                    random_state=None,
                                                    shuffle=False)

# Create and train NARX model
model = NARX(RandomForestRegressor(),
             auto_order=2,
             exog_order=[2],
             exog_delay=[1])
model.fit(x_train, y_train)

# Use the model to create a prediction and plot the results
full_prediction = model.predict(comp_scaled, pos_rate_scaled, step=3)
full_pred_rescaled = pos_scaler.inverse_transform(
    full_prediction.reshape(-1, 1))
modeling_utilities.plot_prediction(
    comp_df['day'], pos_df['pos_rate'], full_pred_rescaled, len(y_train),
    'Prediction of COVID-19 Positivity Rate with NARX Model')

# Print MSE for both training and testing
mse = mean_squared_error(pos_rate_scaled[5:len(y_train)],
                         full_prediction[5:len(y_train)])
print('Training MSE =', mse)