Exemple #1
0
def test_oob_sarimax():
    xreg = rs.rand(wineind.shape[0], 2)
    fit = ARIMA(order=(1, 1, 1),
                seasonal_order=(0, 1, 1, 12),
                out_of_sample_size=15).fit(y=wineind, exogenous=xreg)

    fit_no_oob = ARIMA(order=(1, 1, 1),
                       seasonal_order=(0, 1, 1, 12),
                       out_of_sample_size=0,
                       suppress_warnings=True).fit(y=wineind[:-15],
                                                   exogenous=xreg[:-15, :])

    # now assert some of the same things here that we did in the former test
    oob = fit.oob()

    # compare scores:
    scoring = get_callable(fit_no_oob.scoring, VALID_SCORING)
    no_oob_preds = fit_no_oob.predict(n_periods=15, exogenous=xreg[-15:, :])
    assert np.allclose(oob, scoring(wineind[-15:], no_oob_preds), rtol=1e-2)

    # show params are no longer the same
    assert not np.allclose(fit.params(), fit_no_oob.params(), rtol=1e-2)

    # show we can add the new samples and get the exact same forecasts
    xreg_test = rs.rand(5, 2)
    fit_no_oob.update(wineind[-15:], xreg[-15:, :])
    assert np.allclose(fit.predict(5, xreg_test),
                       fit_no_oob.predict(5, xreg_test),
                       rtol=1e-2)

    # Show we can get a confidence interval out here
    preds, conf = fit.predict(5, xreg_test, return_conf_int=True)
    assert all(isinstance(a, np.ndarray) for a in (preds, conf))
Exemple #2
0
class ARIMAModel(BaseModel):
    def __init__(self):
        """
        Initialize Model
        """
        self.seasonal = True
        self.metric = 'mse'
        self.model = None
        self.model_init = False

    def _build(self, **config):
        """
        build the models and initialize.
        :param config: hyperparameters for the model
        """
        p = config.get('p', 2)
        d = config.get('d', 0)
        q = config.get('q', 2)
        self.seasonal = config.get('seasonality_mode', True)
        P = config.get('P', 1)
        D = config.get('D', 0)
        Q = config.get('Q', 1)
        m = config.get('m', 7)
        self.metric = config.get('metric', self.metric)

        order = (p, d, q)
        if not self.seasonal:
            seasonal_order = (0, 0, 0, 0)
        else:
            seasonal_order = (P, D, Q, m)

        self.model = ARIMA(order=order,
                           seasonal_order=seasonal_order,
                           suppress_warnings=True)

    def fit_eval(self, data, validation_data, **config):
        """
        Fit on the training data from scratch.
        :param data: A 1-D numpy array as the training data
        :param validation_data: A 1-D numpy array as the evaluation data
        :return: the evaluation metric value
        """

        if not self.model_init:
            # Estimating differencing term (d) and seasonal differencing term (D)
            kpss_diffs = ndiffs(data, alpha=0.05, test='kpss', max_d=6)
            adf_diffs = ndiffs(data, alpha=0.05, test='adf', max_d=6)
            d = max(adf_diffs, kpss_diffs)
            D = 0 if not self.seasonal else nsdiffs(data, m=7, max_D=12)
            config.update(d=d, D=D)

            self._build(**config)
            self.model_init = True

        self.model.fit(data)
        val_metric = self.evaluate(x=None,
                                   target=validation_data,
                                   metrics=[self.metric])[0].item()
        return {self.metric: val_metric}

    def predict(self, x=None, horizon=24, update=False, rolling=False):
        """
        Predict horizon time-points ahead the input x in fit_eval
        :param x: ARIMA predicts the horizon steps foreward from the training data.
            So x should be None as it is not used.
        :param horizon: the number of steps forward to predict
        :param update: whether to update the original model
        :param rolling: whether to use rolling prediction
        :return: predicted result of length horizon
        """
        if x is not None:
            raise ValueError("x should be None")
        if update and not rolling:
            raise Exception(
                "We don't support updating model without rolling prediction currently"
            )
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling predict"
            )

        if not update and not rolling:
            forecasts = self.model.predict(n_periods=horizon)
        elif rolling:
            if not update:
                self.save("tmp.pkl")

            forecasts = []
            for step in range(horizon):
                fc = self.model.predict(n_periods=1).item()
                forecasts.append(fc)

                # Updates the existing model with a small number of MLE steps for rolling prediction
                self.model.update(fc)

            if not update:
                self.restore("tmp.pkl")
                os.remove("tmp.pkl")

        return forecasts

    def evaluate(self, target, x=None, metrics=['mse'], rolling=False):
        """
        Evaluate on the prediction results and y. We predict horizon time-points ahead the input x
        in fit_eval before evaluation, where the horizon length equals the second dimension size of
        y.
        :param target: target for evaluation.
        :param x: ARIMA predicts the horizon steps foreward from the training data.
            So x should be None as it is not used.
        :param metrics: a list of metrics in string format
        :param rolling: whether to use rolling prediction
        :return: a list of metric evaluation results
        """
        if x is not None:
            raise ValueError("We don't support input x currently")
        if target is None:
            raise ValueError("Input invalid target of None")
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling evaluate"
            )

        forecasts = self.predict(horizon=len(target), rolling=rolling)

        return [Evaluator.evaluate(m, target, forecasts) for m in metrics]

    def save(self, checkpoint_file):
        if self.model is None:
            raise Exception(
                "Needs to call fit_eval or restore first before calling save")
        with open(checkpoint_file, 'wb') as fout:
            pickle.dump(self.model, fout)

    def restore(self, checkpoint_file):
        with open(checkpoint_file, 'rb') as fin:
            self.model = pickle.load(fin)
        self.model_init = True
Exemple #3
0
def test_oob_for_issue_28():
    # Continuation of above: can we do one with an exogenous array, too?
    xreg = rs.rand(hr.shape[0], 4)
    arima = ARIMA(order=(2, 1, 2),
                  suppress_warnings=True,
                  out_of_sample_size=10).fit(y=hr, exogenous=xreg)

    oob = arima.oob()
    assert not np.isnan(oob)

    # Assert that the endog shapes match. First is equal to the original,
    # and the second is the differenced array
    assert np.allclose(arima.arima_res_.data.endog, hr, rtol=1e-2)
    assert arima.arima_res_.model.endog.shape[0] == hr.shape[0]

    # Now assert the same for exog
    assert np.allclose(arima.arima_res_.data.exog, xreg, rtol=1e-2)
    assert arima.arima_res_.model.exog.shape[0] == xreg.shape[0]

    # Compare the OOB score to an equivalent fit on data - 10 obs, but
    # without any OOB scoring, and we'll show that the OOB scoring in the
    # first IS in fact only applied to the first (train - n_out_of_bag)
    # samples
    arima_no_oob = ARIMA(order=(2, 1, 2),
                         suppress_warnings=True,
                         out_of_sample_size=0).fit(y=hr[:-10],
                                                   exogenous=xreg[:-10, :])

    scoring = val.get_scoring_metric(arima_no_oob.scoring)
    preds = arima_no_oob.predict(n_periods=10, exogenous=xreg[-10:, :])
    assert np.allclose(oob, scoring(hr[-10:], preds), rtol=1e-2)

    # Show that the model parameters are not the same because the model was
    # updated.
    xreg_test = rs.rand(5, 4)
    assert not np.allclose(arima.params(), arima_no_oob.params(), rtol=1e-2)

    # Now assert on the forecast differences.
    with_oob_forecasts = arima.predict(n_periods=5, exogenous=xreg_test)
    no_oob_forecasts = arima_no_oob.predict(n_periods=5, exogenous=xreg_test)

    with pytest.raises(AssertionError):
        assert_array_almost_equal(with_oob_forecasts, no_oob_forecasts)

    # But after we update the no_oob model with the latest data, we should
    # be producing the same exact forecasts

    # First, show we'll fail if we try to add observations with no exogenous
    with pytest.raises(ValueError):
        arima_no_oob.update(hr[-10:], None)

    # Also show we'll fail if we try to add mis-matched shapes of data
    with pytest.raises(ValueError):
        arima_no_oob.update(hr[-10:], xreg_test)

    # Show we fail if we try to add observations with a different dim exog
    with pytest.raises(ValueError):
        arima_no_oob.update(hr[-10:], xreg_test[:, :2])

    # Actually add them now, and compare the forecasts (should be the same)
    arima_no_oob.update(hr[-10:], xreg[-10:, :])
    assert np.allclose(with_oob_forecasts,
                       arima_no_oob.predict(n_periods=5, exogenous=xreg_test),
                       rtol=1e-2)
Exemple #4
0
def model_plot(days):
    days = int(days)
    pd.plotting.register_matplotlib_converters()

    df = pd.read_csv('data/new_york.csv')
    df['Date'] = pd.to_datetime(df['Date'])

    #converting data to daily usage.
    df.index = df.Date
    df = df.drop('Date', axis=1)
    # resample the dataframe every 1 day (D) and sum ovr each day
    df = df.resample('D').sum()
    df = df.tz_localize(None)

    nyc_weather = pd.read_csv('data/weather/weatherNY.csv')
    nyc_weather['DATE'] = pd.to_datetime(nyc_weather['DATE'])
    nyc_weather = nyc_weather.set_index('DATE')
    nyc_weather.drop(['NAME','STATION'],axis=1,inplace=True)
    nyc_weather = nyc_weather['2015-07-01':'2020-08-10']

    df = df[:'2020-08-10']

    #trying 1 day increments with EXOG. MAYBE BEST CANDIDATE? with fourier terms june to june as 638 and august to august 516
    day = days
    real_values = []
    predictions = []

    df1 = df["2016":"2019"]
    nyc_weather = nyc_weather["2016":"2019"]

    y = df1.Consumption

    exog = pd.DataFrame({'date': y.index})
    exog = exog.set_index(pd.PeriodIndex(exog['date'], freq='D'))
    exog['is_weekend'] = np.where(exog.index.dayofweek < 5,0,1)

    #add weather data
    exog['TMIN'] = nyc_weather['TMIN'].values
    exog['sin1'] = np.sin(2 * np.pi * exog.index.dayofyear / 638)
    exog['cos1'] = np.cos(2 * np.pi * exog.index.dayofyear / 638)
    exog['sin2'] = np.sin(4 * np.pi * exog.index.dayofyear /638)
    exog['cos2'] = np.cos(4 * np.pi * exog.index.dayofyear /638)
    exog['sin3'] = np.sin(2 * np.pi * exog.index.dayofyear / 516)
    exog['cos3'] = np.cos(2 * np.pi * exog.index.dayofyear / 516)
    exog['sin4'] = np.sin(4 * np.pi * exog.index.dayofyear /516)
    exog['cos4'] = np.cos(4 * np.pi * exog.index.dayofyear /516)



    exog = exog.drop(columns=['date'])

    num_to_update = 0
    y_to_train = y.iloc[:(len(y)-100)]    
    exog_to_train = exog.iloc[:(len(y)-100)]

    dates = []

    steps = []

    for i in range(5):

        #first iteration train the model
        if i == 0:
            arima_exog_model = ARIMA(order=(3, 0, 1), seasonal_order=(2, 0, 0, 7),exogenous=exog_to_train, error_action='ignore',
                                    initialization='approximate_diffuse', suppress_warnings=True).fit(y=y_to_train)  

            preds = arima_exog_model.predict_in_sample(exog_to_train)            
            #first prediction
            y_to_test = y.iloc[(len(y)-100):(len(y)-100+day)]
            y_exog_to_test = exog.iloc[(len(y)-100):(len(y)-100+day)]
            y_arima_exog_forecast = arima_exog_model.predict(n_periods=day, exogenous=y_exog_to_test)
            
            real_values.append(y_to_test.values)
            predictions.append(y_arima_exog_forecast.tolist())
            
            dates.append(y_to_test.index)
            steps.append(y_to_test.index[-1])
                                                    
            #y_arima_exog_forecast = arima_exog_model.predict(n_periods=2, exogenous=exog_to_test)
        else:
            y_to_update = y.iloc[(len(y)-100+num_to_update):(len(y)-100+num_to_update)+day]
            exog_to_update = exog.iloc[(len(y)-100+num_to_update):(len(y)-100+num_to_update)+day]

            #to test
            to_test = y.iloc[(len(y)-100+num_to_update)+day:(len(y)-100+num_to_update)+(day*2)]
            exog_to_test = exog.iloc[(len(y)-100+num_to_update)+day:(len(y)-100+num_to_update)+(day*2)]
            #update the model

            arima_exog_model.update(y_to_update,exogenous=exog_to_update)
            y_arima_exog_forecast = arima_exog_model.predict(n_periods=day, exogenous=exog_to_test)

            dates.append(to_test.index)
            steps.append(to_test.index[-1])

            predictions.append(y_arima_exog_forecast.tolist())    
            real_values.append(to_test.values)
            
            num_to_update += day


    predict =  [item for sublist in predictions for item in sublist]
    true = [item for sublist in real_values for item in sublist]
    dates = [item for sublist in dates for item in sublist]

    #for viz purposes
    y_to_train2 = y_to_train[-200:]
    preds = preds[-200:]
    y_to_train2 = y_to_train2.to_frame()
    fig = go.Figure()
    # Create and style traces
    fig.add_trace(go.Scatter(x=y_to_train2.index, y=y_to_train2.Consumption, name='True values',
                            line=dict(color='firebrick', width=4,dash='dot')))

    fig.add_trace(go.Scatter(x=y_to_train2.index, y=preds[-200:], name='In-sample Prediction',
                            line=dict(color='royalblue', width=4)))

    fig.add_trace(go.Scatter(x=dates, y=predict, name='Prediction',
                            line=dict(color='green', width=4)))

    fig.add_trace(go.Scatter(x=dates, y=true, name='True',
                            line=dict(color='firebrick', width=4,dash='dot')))

    fig.update_layout(title='Electricity Consumption in New York',
                    xaxis_title='Date',
                    yaxis_title='Consumption',
                    xaxis_showgrid=True,
                    yaxis_showgrid=True,
                    #autosize=False,
                    #width=500,
                    #height=500,
                    paper_bgcolor=app_colors['background'], 
                    plot_bgcolor=app_colors['background'])


    return fig