_check_scoring, cross_validate, cross_val_predict, _check_averaging
from pmdarima.datasets import load_airpassengers
import pytest
import numpy as np
from unittest import mock

y = load_airpassengers()
exogenous = np.random.RandomState(1).rand(y.shape[0], 2)


@pytest.mark.parametrize('cv', [
    SlidingWindowForecastCV(window_size=100, step=24, h=1),
    RollingForecastCV(initial=120, step=12, h=1),
])
@pytest.mark.parametrize('est', [
    ARIMA(order=(2, 1, 1), maxiter=2, simple_differencing=True),
    ARIMA(order=(1, 1, 2),
          seasonal_order=(0, 1, 1, 12),
          maxiter=2,
          simple_differencing=True,
          suppress_warnings=True),
    Pipeline([
        ("fourier", FourierFeaturizer(m=12)),
        ("arima", ARIMA(order=(2, 1, 0), maxiter=2, simple_differencing=True))
    ])
])
@pytest.mark.parametrize('verbose', [0, 2, 4])
@pytest.mark.parametrize('exog', [None, exogenous])
def test_cv_scores(cv, est, verbose, exog):
    scores = cross_val_score(est,
                             y,
def test_cross_val_predict_error():
    cv = SlidingWindowForecastCV(step=24, h=1)
    with pytest.raises(ValueError):
        cross_val_predict(ARIMA(order=(2, 1, 0), maxiter=3), y, cv=cv)
Exemple #3
0
        assert not os.path.exists(new_loc)

        # Show we get an OSError now
        with pytest.raises(OSError) as ose:
            joblib.load(pkl_file)
        assert "Does it still" in str(ose), ose

    finally:
        _unlink_if_exists(pkl_file)
        _unlink_if_exists(new_loc)


@pytest.mark.parametrize(
    'model', [
        # ARMA
        ARIMA(order=(1, 0, 0)),

        # ARIMA
        ARIMA(order=(1, 1, 2)),

        # SARIMAX
        ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12))
    ]
)
def test_issue_104(model):
    # Issue 104 shows that observations were not being updated appropriately.
    # We need to make sure they update for ALL models (ARMA, ARIMA, SARIMAX)
    endog = wineind
    train, test = endog[:125], endog[125:]

    model.fit(train)
Exemple #4
0
def test_to_dict_raises_attribute_error_on_unfit_model():
    modl = ARIMA(order=(1, 1, 0))
    with pytest.raises(AttributeError):
        modl.to_dict()
Exemple #5
0
def test_oob_for_issue_28():
    # Continuation of above: can we do one with an exogenous array, too?
    xreg = rs.rand(hr.shape[0], 4)
    arima = ARIMA(order=(2, 1, 2),
                  suppress_warnings=True,
                  out_of_sample_size=10).fit(
        y=hr, exogenous=xreg)

    oob = arima.oob()
    assert not np.isnan(oob)

    # Assert that the endog shapes match. First is equal to the original,
    # and the second is the differenced array, with original shape - d.
    assert np.allclose(arima.arima_res_.data.endog, hr, rtol=1e-2)
    assert arima.arima_res_.model.endog.shape[0] == hr.shape[0] - 1

    # Now assert the same for exog
    assert np.allclose(arima.arima_res_.data.exog, xreg, rtol=1e-2)
    assert arima.arima_res_.model.exog.shape[0] == xreg.shape[0] - 1

    # Compare the OOB score to an equivalent fit on data - 10 obs, but
    # without any OOB scoring, and we'll show that the OOB scoring in the
    # first IS in fact only applied to the first (train - n_out_of_bag)
    # samples
    arima_no_oob = ARIMA(
        order=(2, 1, 2), suppress_warnings=True,
        out_of_sample_size=0).fit(y=hr[:-10],
                                  exogenous=xreg[:-10, :])

    scoring = get_callable(arima_no_oob.scoring, VALID_SCORING)
    preds = arima_no_oob.predict(n_periods=10, exogenous=xreg[-10:, :])
    assert np.allclose(oob, scoring(hr[-10:], preds), rtol=1e-2)

    # Show that the model parameters are not the same because the model was
    # updated.
    xreg_test = rs.rand(5, 4)
    assert not np.allclose(arima.params(), arima_no_oob.params(), rtol=1e-2)

    # Now assert on the forecast differences.
    with_oob_forecasts = arima.predict(n_periods=5, exogenous=xreg_test)
    no_oob_forecasts = arima_no_oob.predict(n_periods=5,
                                            exogenous=xreg_test)

    with pytest.raises(AssertionError):
        assert_array_almost_equal(with_oob_forecasts, no_oob_forecasts)

    # But after we update the no_oob model with the latest data, we should
    # be producing the same exact forecasts

    # First, show we'll fail if we try to add observations with no exogenous
    with pytest.raises(ValueError):
        arima_no_oob.update(hr[-10:], None)

    # Also show we'll fail if we try to add mis-matched shapes of data
    with pytest.raises(ValueError):
        arima_no_oob.update(hr[-10:], xreg_test)

    # Show we fail if we try to add observations with a different dim exog
    with pytest.raises(ValueError):
        arima_no_oob.update(hr[-10:], xreg_test[:, :2])

    # Actually add them now, and compare the forecasts (should be the same)
    arima_no_oob.update(hr[-10:], xreg[-10:, :])
    assert np.allclose(with_oob_forecasts,
                       arima_no_oob.predict(n_periods=5,
                                            exogenous=xreg_test),
                       rtol=1e-2)
    labels_windows = reader.label_windows.get(dataset_name)

    # Evaluate the fit residuals to identify outliers.
    whole_df = df.copy()
    df = df.iloc[10:, :]

    models_dir = "fitted_models"
    if not os.path.isdir(models_dir):
        os.mkdir(models_dir)
    fname = "{}_model.pkl".format(dataset_name[:-4])
    fpath = os.path.join(models_dir, fname)
    if not os.path.isfile(fpath) or args.overwrite:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            arima = ARIMA(
                order=saved_parameters["order"],
                seasonal_order=saved_parameters["seasonal_order"]
            )
            arima.fit(df.value)
            print("Saving fitted model on disk")
            joblib.dump(arima, fpath, compress=3)
    else:
        print("Reading model from disk")
        arima = joblib.load(fpath)

    gt_pred, gt_windows = get_gt_arrays(
        df.index, df.index, labels, labels_windows
    )

    # Compute metrics
    metrics_columns = ["precision", "recall", "f_score", "nab_score"]
    Metrics = collections.namedtuple("Metrics", metrics_columns)
Exemple #7
0
class Model:
    def select_data(self):
        # Merge columns into a single dataframe of observed values, based on date
        dataset = files.data_main.join(files.data_exo.set_index('date'),
                                       on='date').dropna()
        # Select part of the precipitation dataframe that corresponds to the forecast
        obs_end = dataset.tail(1)['date'].values[0]
        exo_prev = files.data_exo[(files.data_exo['date'] > obs_end)]
        # Select predict dates
        self.dates_prev = exo_prev['date']
        # Reshape
        endo_obs = np.array(dataset['endo_value'])
        self.endo_obs = endo_obs.reshape(-1, 1)
        exo_obs = np.array(dataset['exo_value'])
        self.exo_obs = exo_obs.reshape(-1, 1)
        exo_prev = np.array(exo_prev['exo_value'])
        self.exo_prev = exo_prev.reshape(-1, 1)

    def normalize(self):
        # Calculate lambda only if doesn't have zero values
        n_zeros = len(self.endo_obs[self.endo_obs <= 0])
        if n_zeros == 0:
            self.endo_obs2, self.lambda_boxcox = boxcox(self.endo_obs)
        else:
            self.lambda_boxcox = -999
        # Limit lambda values
        if abs(self.lambda_boxcox[0]) > 1:
            self.endo_obs2 = self.endo_obs
            self.lambda_boxcox = -999
        #print(self.endo_obs2, self.lambda_boxcox)

    def run_auto(self):
        self.arima_model = auto_arima(self.endo_obs2,
                                      start_p=0,
                                      start_d=0,
                                      start_q=0,
                                      max_p=3,
                                      max_d=1,
                                      max_q=3,
                                      start_P=0,
                                      start_Q=0,
                                      D=1,
                                      seasonal=False,
                                      m=1,
                                      exogeneous=self.exo_obs,
                                      trace=True,
                                      error_action='ignore',
                                      suppress_warnings=True,
                                      stepwise=True)
        #print(model.arima_model.summary())
        # Compile parameters to list
        self.parameters = [
            self.arima_model.order, self.arima_model.seasonal_order,
            self.lambda_boxcox[0],
            self.arima_model.aic()
        ]
        print(self.parameters)
        return (self.arima_model)

    def run_auto_arimax(self):
        lower_aic = float(99999)
        best_pdq = [0, 0, 0]
        param = list(itertools.product(range(0, 4), range(0, 2), range(0, 4)))
        for pdq in param:
            #print(pdq)
            try:
                self.arima_model = ARIMA(order=pdq,
                                         suppress_warnings=True).fit(
                                             y=self.endo_obs2,
                                             exogenous=self.exo_obs)
                if self.arima_model.aic() < lower_aic:
                    lower_aic = self.arima_model.aic()
                    best_pdq = tuple(self.arima_model.order)
            except:
                continue
        #print(model.arima_model.summary())
        # Compile parameters to list
        self.parameters = [best_pdq, self.lambda_boxcox[0], lower_aic]
        print(self.parameters)
        return (self.arima_model)

    def run_auto_sarimax(self):
        lower_aic = float(99999)
        best_pdq = [0, 0, 0]
        best_spdq = [0, 0, 0]
        param = list(itertools.product(range(0, 4), range(0, 2), range(0, 4)))
        m = 1  # frequency
        param_seasonal = [(x[0], x[1], x[2], m) for x in list(
            itertools.product(range(0, 4), range(0, 2), range(0, 4)))]
        for pdq in param:
            for spdq in param_seasonal:
                try:
                    mod = sm.tsa.statespace.SARIMAX(
                        self.endo_obs2,
                        exog=self.exo_obs,
                        order=pdq,
                        seasonal_order=spdq,
                        enforce_stationarity=False,
                        enforce_invertibility=False)
                    self.arima_model = mod.fit(disp=0)
                    print('ARIMA{}x{}{} - AIC:{}'.format(
                        pdq, spdq, m, self.arima_model.aic))
                    if self.arima_model.aic() < lower_aic:
                        lower_aic = self.arima_model.aic()
                        best_pdq = tuple(pdq)
                        best_spdq = tuple(spdq)
                except:
                    continue
        #print(model.arima_model.summary())
        # Compile parameters to list
        self.parameters = [
            best_pdq, best_spdq, self.lambda_boxcox[0], lower_aic
        ]
        print(self.parameters)
        return (self.arima_model)

    def forecast(self):
        #self.predict = self.arima_model.predict(n_periods=self.exo_prev.shape[0], exogenous=self.exo_prev)
        self.predict = self.arima_model.predict(
            n_periods=self.exo_prev.shape[0],
            exogenous=self.exo_prev,
            return_conf_int=True,
            alpha=0.7)

    def renormalize(self):
        if self.lambda_boxcox == float(-999):
            self.predict_mean = self.predict[0]
            self.predict_down = self.predict[1][:, 0]
            self.predict_up = self.predict[1][:, 1]
        else:
            self.predict_mean = inv_boxcox(self.predict[0], self.lambda_boxcox)
            self.predict_down = inv_boxcox(self.predict[1][:, 0],
                                           self.lambda_boxcox)
            self.predict_up = inv_boxcox(self.predict[1][:, 1],
                                         self.lambda_boxcox)
        # Join predict dates with values into a dataframe
        df_final = pd.DataFrame(self.predict_mean, self.dates_prev)
        df_final.columns = ['endo_value']
        return (df_final)
if args.fit or args.auto_fit:
    if args.auto_fit:
        arima = auto_arima(
            train,
            stepwise=True,
            trace=1,
            m=args.period,
            information_criterion="aicc",
            seasonal=args.seasonal,
            error_action="ignore",
            suppress_warnings=True,
        )
    elif args.fit:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            arima = ARIMA(order=args.order,
                          seasonal_order=args.seasonal_order)
            arima.fit(train)

    print(arima.summary())

    residuals = arima.resid()
    print("train lengths: data={} resid={}".format(
        train_len, residuals.shape[0]))
    len_delta = train_len - residuals.shape[0]

    # Diagnostics plot
    arima.plot_diagnostics(lags=50)
    box_ljung(residuals, nlags=20).format()
    plt.gcf().suptitle('Diagnostics Plot')
    plt.figure()
    plt.plot(df.value.index[len_delta:train_len],
Exemple #9
0
# It may be a lost cause, but by all means prove me wrong.


def example_pmd():
    s = {}
    y, a = hospital_with_exog(k=3)
    x = [pmd_exogenous(y=yj, s=s, k=3, a=aj) for yj, aj in zip(y[:500], a)]
    return s


def arima_res_to_dict(arima_res):
    state = arima_res.__dict__
    return state


def pmd_to_dict(pmd):
    pmd['model'] = pmd['model'].__getstate__()
    pmd['model']['arima_res_'] = arima_res_to_dict(pmd['model']['arima_res_'])
    return pmd


def pmd_from_dict(pmd):
    pmd['model']['arima_res_'] = ''


if __name__ == '__main__':
    pmd = example_pmd()
    model = pmd['model']
    model1 = ARIMA(**model.get_params())
    prms = model.__dict__['arima_res_'].__dict__['_results'].params
Exemple #10
0
# -*- coding: utf-8 -*-

from sklearn.base import clone
from pmdarima.arima import ARIMA, AutoARIMA
from pmdarima.pipeline import Pipeline
from pmdarima.datasets import load_wineind
from pmdarima.preprocessing import FourierFeaturizer
import pytest

y = load_wineind()


@pytest.mark.parametrize(
    'est', [
        ARIMA(order=(2, 1, 1)),
        AutoARIMA(seasonal=False, maxiter=3),
        Pipeline([
            ("fourier", FourierFeaturizer(m=12)),
            ("arima", AutoARIMA(seasonal=False, stepwise=True,
                                suppress_warnings=True, d=1, max_p=2, max_q=0,
                                start_q=0, start_p=1,
                                maxiter=3, error_action='ignore'))
        ])
    ]
)
def test_clonable(est):
    # fit it, then clone it
    est.fit(y)
    est2 = clone(est)
    assert isinstance(est2, est.__class__)
    assert est is not est2
from pmdarima.arima import ARIMA

import pytest
import os
import platform

lynx = load_lynx()

# test images directories
travis = os.environ.get("TESTING_ON_TRAVIS", "false").lower() == "true"

# Do not test on travis because they hate MPL
if not travis:

    # base images are created on Mac/Darwin. Windows needs a higher tolerance
    if platform.system() == "Windows":
        tolerance = 10
    else:
        tolerance = 5

    @pytest.mark.parametrize('model_type,model', [
        pytest.param('arma', ARIMA(order=(1, 0, 0))),
        pytest.param('arima', ARIMA(order=(1, 1, 0))),
        pytest.param('sarimax',
                     ARIMA(order=(1, 1, 0), seasonal_order=(1, 0, 0, 12)))
    ])
    @pytest.mark.mpl_image_compare(tolerance=tolerance)
    def test_plot_diagnostics(model_type, model):
        model.fit(lynx)
        return model.plot_diagnostics(figsize=(15, 12))
Exemple #12
0
def run():
    symbol = input("Enter ticker symbol: ")

    now = dt.datetime.now()
    timeFinish = now + dt.timedelta(minutes=minutes)

    while (now < timeFinish):
        try:
            now = dt.datetime.now()

            client = Client(environment=PRACTICE,
                            account_id="",
                            access_token=ACCESS_TOKEN)

            json_data = []

            json_data = client.get_instrument_history(instrument=symbol,
                                                      granularity=timeframe,
                                                      candle_format="midpoint",
                                                      count=1440)
            json_data = json_data['candles']
            df = pd.DataFrame(json_data)

            data = df.copy()
            data = data.set_index('time')[['closeMid']]
            data = data.set_index(pd.to_datetime(data.index))
            data.columns = [CLOSE]

            # Rescale data
            lnprice = np.log(data)

            # Create and fit the model
            model_temp = auto_arima(lnprice.values,
                                    start_p=1,
                                    start_q=1,
                                    max_p=1,
                                    max_q=1,
                                    m=4,
                                    start_P=0,
                                    seasonal=False,
                                    d=1,
                                    D=1,
                                    trace=True,
                                    error_action='ignore',
                                    suppress_warnings=True,
                                    stepwise=True)

            model = ARIMA(order=model_temp.order)
            fit = model.fit(lnprice.values)

            # Predict
            future_forecast = fit.predict(n_periods=n_periods_ahead)
            future_forecast = np.exp(future_forecast)

            # Calculations
            lowest = min(future_forecast[0], future_forecast[-1])
            highest = max(future_forecast[0], future_forecast[-1])
            current = data[CLOSE].iloc[-1]
            x = ((future_forecast[0] - future_forecast[-1]) /
                 future_forecast[0]) * 100
            slope = (future_forecast[0] -
                     future_forecast[-1]) / n_periods_ahead
            degree = math.degrees(math.atan(slope))

            # Trending
            if (x > 0):
                trending = "Positivly / Call"
            else:
                trending = "Negativaly / Put"

            # View
            print("==========================")
            print("Current Price: ", current)
            print("Highest price: ", highest)
            print("Lowest Price: ", lowest)
            print("Trending: ", trending)
            print("Degrees: ", degree)
            print("==========================" + "\n")
        except Exception as e:
            print(e)

        time.sleep(SLEEP)

    return 0
Exemple #13
0
def test_not_fitted_error():
    with pytest.raises(sk.NotFittedError) as nfe:
        mod = ARIMA((0, 1, 0))
        sk.check_is_fitted(mod, "arima_res_")
    assert "Model has not been fit!" in pytest_error_str(nfe)
    _check_scoring, cross_validate
from pmdarima.datasets import load_wineind
import pytest
import numpy as np
from unittest import mock

y = load_wineind()
exogenous = np.random.RandomState(1).rand(y.shape[0], 2)


@pytest.mark.parametrize('cv', [
    SlidingWindowForecastCV(window_size=100, step=24, h=1),
    RollingForecastCV(initial=150, step=12, h=1),
])
@pytest.mark.parametrize('est', [
    ARIMA(order=(2, 1, 1), seasonal_order=(0, 0, 0, 1)),
    ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12)),
    Pipeline([("fourier", FourierFeaturizer(m=12)),
              ("arima", ARIMA(order=(2, 1, 0), maxiter=3))])
])
@pytest.mark.parametrize('verbose', [0, 2, 4])
@pytest.mark.parametrize('exog', [None, exogenous])
def test_cv_scores(cv, est, verbose, exog):
    scores = cross_val_score(est,
                             y,
                             exogenous=exog,
                             scoring='mean_squared_error',
                             cv=cv,
                             verbose=verbose)
    assert isinstance(scores, np.ndarray)
    }

    with pytest.raises(ValueError) as ve:
        pipeline.predict(3, **kwargs)
    assert "'n_periods'" in pytest_error_str(ve)

    # Assert that we can update the model
    pipeline.update(test, maxiter=5)

    # And that the fourier transformer was updated properly...
    assert pipeline.steps_[0][1].n_ == wineind.shape[0]


@pytest.mark.parametrize('pipeline', [
    Pipeline([
        ("arma", ARIMA(order=(2, 0, 0)))
    ]),

    Pipeline([
        ("arima", ARIMA(order=(2, 1, 0)))
    ]),

    Pipeline([
        ("sarimax", ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)))
    ]),

    Pipeline([
        ("fourier", FourierFeaturizer(m=12)),
        ("arma", ARIMA(order=(2, 0, 0)))
    ]),
Exemple #16
0
    with pytest.raises(ValueError) as ve:
        pipeline.predict(3, **kwargs)
    assert "'n_periods'" in pytest_error_str(ve)

    # Assert that we can update the model
    pipeline.update(test, maxiter=5)

    # And that the fourier transformer was updated properly...
    assert pipeline.steps_[0][1].n_ == wineind.shape[0]


@pytest.mark.parametrize(
    'pipeline',
    [
        Pipeline([("arma", ARIMA(order=(2, 0, 0)))]),
        Pipeline([("arima", ARIMA(order=(2, 1, 0)))]),
        Pipeline([
            ("sarimax", ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)))
        ]),
        Pipeline([("fourier", FourierFeaturizer(m=12)),
                  ("arma", ARIMA(order=(2, 0, 0)))]),
        Pipeline([("fourier", FourierFeaturizer(m=12)),
                  ("arima", ARIMA(order=(2, 1, 0)))]),

        # one with a boxcox transformer
        Pipeline([("boxcox", BoxCoxEndogTransformer()),
                  ("fourier", FourierFeaturizer(m=12)),
                  ("arima",
                   AutoARIMA(seasonal=False,
                             stepwise=True,
Exemple #17
0
    X = np.random.RandomState(1).rand(vec.shape[0], 2)
    auto_arima(vec,
               X=X,
               out_of_sample_size=1,
               seasonal=False,
               suppress_warnings=True)

    # This is a way to force it:
    ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec, X=X)


@pytest.mark.parametrize(
    # will be m - d
    'model',
    [
        ARIMA(order=(2, 0, 0)),  # arma
        ARIMA(order=(2, 1, 0)),  # arima
        ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)),  # sarimax
    ])
def test_predict_in_sample_conf_int(model):
    model.fit(wineind)
    expected_m_dim = wineind.shape[0]
    preds, confints = model.predict_in_sample(return_conf_int=True, alpha=0.05)
    assert preds.shape[0] == expected_m_dim
    assert confints.shape == (expected_m_dim, 2)


@pytest.mark.parametrize(
    'model',
    [
        ARIMA(order=(2, 0, 0)),  # arma
Exemple #18
0
        [-0.10692387, 0.98852139],
        [-0.10692387, 0.98852139],
        [-0.10692387, 0.98852139],
        [-0.10692387, 0.98852139],
        [-0.10692387, 0.98852139]
    ])

    _, intervals = arma.predict(n_periods=10, return_conf_int=True,
                                alpha=0.05)
    assert_array_almost_equal(intervals, expected_intervals)


@pytest.mark.parametrize(
    # will be m - d
    'model, expected_m_dim', [
        pytest.param(ARIMA(order=(2, 0, 0)), wineind.shape[0]),  # arma
        pytest.param(ARIMA(order=(2, 1, 0)), wineind.shape[0] - 1),  # arima
        pytest.param(ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)),
                     wineind.shape[0]),  # sarimax
    ]
)
def test_predict_in_sample_conf_int(model, expected_m_dim):
    model.fit(wineind)
    preds, confints = model.predict_in_sample(return_conf_int=True, alpha=0.05)
    assert preds.shape[0] == expected_m_dim
    assert confints.shape == (expected_m_dim, 2)


def test_with_oob():
    # show we can fit with CV (kinda)
    arima = ARIMA(order=(2, 1, 2),
Exemple #19
0
class ARIMAModel(BaseModel):
    def __init__(self):
        """
        Initialize Model
        """
        self.seasonal = True
        self.metric = 'mse'
        self.model = None
        self.model_init = False

    def _build(self, **config):
        """
        build the models and initialize.
        :param config: hyperparameters for the model
        """
        p = config.get('p', 2)
        d = config.get('d', 0)
        q = config.get('q', 2)
        self.seasonal = config.get('seasonality_mode', True)
        P = config.get('P', 1)
        D = config.get('D', 0)
        Q = config.get('Q', 1)
        m = config.get('m', 7)
        self.metric = config.get('metric', self.metric)

        order = (p, d, q)
        if not self.seasonal:
            seasonal_order = (0, 0, 0, 0)
        else:
            seasonal_order = (P, D, Q, m)

        self.model = ARIMA(order=order,
                           seasonal_order=seasonal_order,
                           suppress_warnings=True)

    def fit_eval(self, data, validation_data, **config):
        """
        Fit on the training data from scratch.
        :param data: A 1-D numpy array as the training data
        :param validation_data: A 1-D numpy array as the evaluation data
        :return: the evaluation metric value
        """

        if not self.model_init:
            # Estimating differencing term (d) and seasonal differencing term (D)
            kpss_diffs = ndiffs(data, alpha=0.05, test='kpss', max_d=6)
            adf_diffs = ndiffs(data, alpha=0.05, test='adf', max_d=6)
            d = max(adf_diffs, kpss_diffs)
            D = 0 if not self.seasonal else nsdiffs(data, m=7, max_D=12)
            config.update(d=d, D=D)

            self._build(**config)
            self.model_init = True

        self.model.fit(data)
        val_metric = self.evaluate(x=None,
                                   target=validation_data,
                                   metrics=[self.metric])[0].item()
        return {self.metric: val_metric}

    def predict(self, x=None, horizon=24, update=False, rolling=False):
        """
        Predict horizon time-points ahead the input x in fit_eval
        :param x: ARIMA predicts the horizon steps foreward from the training data.
            So x should be None as it is not used.
        :param horizon: the number of steps forward to predict
        :param update: whether to update the original model
        :param rolling: whether to use rolling prediction
        :return: predicted result of length horizon
        """
        if x is not None:
            raise ValueError("x should be None")
        if update and not rolling:
            raise Exception(
                "We don't support updating model without rolling prediction currently"
            )
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling predict"
            )

        if not update and not rolling:
            forecasts = self.model.predict(n_periods=horizon)
        elif rolling:
            if not update:
                self.save("tmp.pkl")

            forecasts = []
            for step in range(horizon):
                fc = self.model.predict(n_periods=1).item()
                forecasts.append(fc)

                # Updates the existing model with a small number of MLE steps for rolling prediction
                self.model.update(fc)

            if not update:
                self.restore("tmp.pkl")
                os.remove("tmp.pkl")

        return forecasts

    def evaluate(self, target, x=None, metrics=['mse'], rolling=False):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param target: target for evaluation.
        :param x: ARIMA predicts the horizon steps foreward from the training data.
            So x should be None as it is not used.
        :param metrics: a list of metrics in string format
        :param rolling: whether to use rolling prediction
        :return: a list of metric evaluation results
        """
        if x is not None:
            raise ValueError("We don't support input x currently")
        if target is None:
            raise ValueError("Input invalid target of None")
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling evaluate"
            )

        forecasts = self.predict(horizon=len(target), rolling=rolling)

        return [Evaluator.evaluate(m, target, forecasts) for m in metrics]

    def save(self, checkpoint_file):
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling save")
        with open(checkpoint_file, 'wb') as fout:
            pickle.dump(self.model, fout)

    def restore(self, checkpoint_file):
        with open(checkpoint_file, 'rb') as fin:
            self.model = pickle.load(fin)
        self.model_init = True
Exemple #20
0
def test_oob_sarimax():
    xreg = rs.rand(wineind.shape[0], 2)
    fit = ARIMA(order=(1, 1, 1),
                seasonal_order=(0, 1, 1, 12),
                maxiter=5,
                out_of_sample_size=15).fit(y=wineind, exogenous=xreg)

    fit_no_oob = ARIMA(order=(1, 1, 1),
                       seasonal_order=(0, 1, 1, 12),
                       out_of_sample_size=0,
                       maxiter=5,
                       suppress_warnings=True).fit(y=wineind[:-15],
                                                   exogenous=xreg[:-15, :])

    # now assert some of the same things here that we did in the former test
    oob = fit.oob()

    # compare scores:
    scoring = get_callable(fit_no_oob.scoring, VALID_SCORING)
    no_oob_preds = fit_no_oob.predict(n_periods=15, exogenous=xreg[-15:, :])
    assert np.allclose(oob, scoring(wineind[-15:], no_oob_preds), rtol=1e-2)

    # show params are no longer the same
    assert not np.allclose(fit.params(), fit_no_oob.params(), rtol=1e-2)

    # show we can add the new samples and get the exact same forecasts
    xreg_test = rs.rand(5, 2)
    fit_no_oob.update(wineind[-15:], xreg[-15:, :])
    assert np.allclose(fit.predict(5, xreg_test),
                       fit_no_oob.predict(5, xreg_test),
                       rtol=1e-2)

    # And also the params should be close now after updating
    assert np.allclose(fit.params(), fit_no_oob.params())

    # Show we can get a confidence interval out here
    preds, conf = fit.predict(5, xreg_test, return_conf_int=True)
    assert all(isinstance(a, np.ndarray) for a in (preds, conf))
Exemple #21
0
def ragged_fill_series(
    series,
    function=np.nanmean,
    backup_fill_method=np.nanmean,
    est_series=None,
    fitted_arma=None,
    arma_full_series=None,
):
    """Filling in the ragged ends of a series, adhering to the periodicity of the series. If there is only one observation and periodicity cannot be determined, series will be returned unchanged.

    parameters:
            :series: list/pandas Series: the series to fill the ragged edges of. Missings should be np.nans
    :function: the function to fill nas with (e.g. np.nanmean, etc.). Use "ARMA" for ARMA filling
    :backup_fill_method: function: which function to fill ragged edges with in case ARMA can't be estimated
    :est_series: list/pandas Series: optional, the series to calculate the fillna and/or ARMA function on. Should not have nas filled in yet by any method. E.g. a train set. If None, will calculated based on itself.
    :fitted_arma: optional, fitted ARMA model if available to avoid reestimating every time in the `gen_ragged_X` function
    :arma_full_series: optional, for_full_arma_dataset output of `gen_dataset` function. Fitting the ARMA model on the full series history rather than just the series provided

    output:
            :return: pandas Series with filled ragged edges
    """
    result = pd.Series(series).copy()
    if est_series is None:
        est_series = result.copy()

    # periodicity of the series, to see which to fill in
    nonna_bools = ~pd.isna(series)
    nonna_indices = list(
        nonna_bools.index[nonna_bools])  # existing indices with values
    # if there is only one non-na observation, can't determine periodicity or position in full series, don't fill anything
    if len(nonna_indices) > 1:
        periodicity = int(
            (pd.Series(result[~pd.isna(result)].index) -
             (pd.Series(result[~pd.isna(result)].index)).shift()
             ).mode()[0])  # how often data comes (quarterly, monthly, etc.)
        last_nonna = result.index[result.notna()][-1]
        fill_indices = nonna_indices + [
            int(nonna_indices[-1] + periodicity * i)
            for i in range(1, (len(series) - last_nonna))
        ]  # indices to be filled in, including only the correct periodicity
        fill_indices = [x for x in fill_indices if x in series.index
                        ]  # cut down on the indices if went too long

        if function == "ARMA":
            # estimate the model if not given
            if fitted_arma is None:
                fitted_arma = estimate_arma(est_series)
            # instantiate model with previously estimated parameters (i.e. on train set)
            arma = ARIMA(order=fitted_arma.order)
            arma.set_params(**fitted_arma.get_params())

            # refit the model on the full series to this point
            if arma_full_series is not None:
                y = list(arma_full_series[~pd.isna(arma_full_series)])
                present = list(result[~pd.isna(result)])
                # limit the series to the point where actuals are
                end_index = 0
                for i in range(len(present), len(y) + 1):
                    if list(y[(i - len(present)):i]) == list(present):
                        end_index = i
                y = y[:end_index]
            # refit model on just this series
            else:
                y = list(result[~pd.isna(result)])  # refit the model on data
                present = y.copy()
            # can fail if not enough datapoints for order of ARMA process
            try:
                arma.fit(y, error_action="ignore")
                preds = arma.predict(n_periods=int(len(series) - last_nonna))
                fills = list(present) + list(preds)
                fills = fills[:len(fill_indices)]
            except:
                fills = list(result[~pd.isna(result)]) + [
                    backup_fill_method(est_series)
                ] * (len(series) - last_nonna)
                fills = fills[:len(fill_indices)]
            result[fill_indices] = fills
        else:
            fills = list(result[~pd.isna(result)]) + [function(est_series)] * (
                len(series) - last_nonna)
            fills = fills[:len(fill_indices)]
            result[fill_indices] = fills

    return result, fitted_arma