_check_scoring, cross_validate, cross_val_predict, _check_averaging from pmdarima.datasets import load_airpassengers import pytest import numpy as np from unittest import mock y = load_airpassengers() exogenous = np.random.RandomState(1).rand(y.shape[0], 2) @pytest.mark.parametrize('cv', [ SlidingWindowForecastCV(window_size=100, step=24, h=1), RollingForecastCV(initial=120, step=12, h=1), ]) @pytest.mark.parametrize('est', [ ARIMA(order=(2, 1, 1), maxiter=2, simple_differencing=True), ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12), maxiter=2, simple_differencing=True, suppress_warnings=True), Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("arima", ARIMA(order=(2, 1, 0), maxiter=2, simple_differencing=True)) ]) ]) @pytest.mark.parametrize('verbose', [0, 2, 4]) @pytest.mark.parametrize('exog', [None, exogenous]) def test_cv_scores(cv, est, verbose, exog): scores = cross_val_score(est, y,
def test_cross_val_predict_error(): cv = SlidingWindowForecastCV(step=24, h=1) with pytest.raises(ValueError): cross_val_predict(ARIMA(order=(2, 1, 0), maxiter=3), y, cv=cv)
assert not os.path.exists(new_loc) # Show we get an OSError now with pytest.raises(OSError) as ose: joblib.load(pkl_file) assert "Does it still" in str(ose), ose finally: _unlink_if_exists(pkl_file) _unlink_if_exists(new_loc) @pytest.mark.parametrize( 'model', [ # ARMA ARIMA(order=(1, 0, 0)), # ARIMA ARIMA(order=(1, 1, 2)), # SARIMAX ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12)) ] ) def test_issue_104(model): # Issue 104 shows that observations were not being updated appropriately. # We need to make sure they update for ALL models (ARMA, ARIMA, SARIMAX) endog = wineind train, test = endog[:125], endog[125:] model.fit(train)
def test_to_dict_raises_attribute_error_on_unfit_model(): modl = ARIMA(order=(1, 1, 0)) with pytest.raises(AttributeError): modl.to_dict()
def test_oob_for_issue_28(): # Continuation of above: can we do one with an exogenous array, too? xreg = rs.rand(hr.shape[0], 4) arima = ARIMA(order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=10).fit( y=hr, exogenous=xreg) oob = arima.oob() assert not np.isnan(oob) # Assert that the endog shapes match. First is equal to the original, # and the second is the differenced array, with original shape - d. assert np.allclose(arima.arima_res_.data.endog, hr, rtol=1e-2) assert arima.arima_res_.model.endog.shape[0] == hr.shape[0] - 1 # Now assert the same for exog assert np.allclose(arima.arima_res_.data.exog, xreg, rtol=1e-2) assert arima.arima_res_.model.exog.shape[0] == xreg.shape[0] - 1 # Compare the OOB score to an equivalent fit on data - 10 obs, but # without any OOB scoring, and we'll show that the OOB scoring in the # first IS in fact only applied to the first (train - n_out_of_bag) # samples arima_no_oob = ARIMA( order=(2, 1, 2), suppress_warnings=True, out_of_sample_size=0).fit(y=hr[:-10], exogenous=xreg[:-10, :]) scoring = get_callable(arima_no_oob.scoring, VALID_SCORING) preds = arima_no_oob.predict(n_periods=10, exogenous=xreg[-10:, :]) assert np.allclose(oob, scoring(hr[-10:], preds), rtol=1e-2) # Show that the model parameters are not the same because the model was # updated. xreg_test = rs.rand(5, 4) assert not np.allclose(arima.params(), arima_no_oob.params(), rtol=1e-2) # Now assert on the forecast differences. with_oob_forecasts = arima.predict(n_periods=5, exogenous=xreg_test) no_oob_forecasts = arima_no_oob.predict(n_periods=5, exogenous=xreg_test) with pytest.raises(AssertionError): assert_array_almost_equal(with_oob_forecasts, no_oob_forecasts) # But after we update the no_oob model with the latest data, we should # be producing the same exact forecasts # First, show we'll fail if we try to add observations with no exogenous with pytest.raises(ValueError): arima_no_oob.update(hr[-10:], None) # Also show we'll fail if we try to add mis-matched shapes of data with pytest.raises(ValueError): arima_no_oob.update(hr[-10:], xreg_test) # Show we fail if we try to add observations with a different dim exog with pytest.raises(ValueError): arima_no_oob.update(hr[-10:], xreg_test[:, :2]) # Actually add them now, and compare the forecasts (should be the same) arima_no_oob.update(hr[-10:], xreg[-10:, :]) assert np.allclose(with_oob_forecasts, arima_no_oob.predict(n_periods=5, exogenous=xreg_test), rtol=1e-2)
labels_windows = reader.label_windows.get(dataset_name) # Evaluate the fit residuals to identify outliers. whole_df = df.copy() df = df.iloc[10:, :] models_dir = "fitted_models" if not os.path.isdir(models_dir): os.mkdir(models_dir) fname = "{}_model.pkl".format(dataset_name[:-4]) fpath = os.path.join(models_dir, fname) if not os.path.isfile(fpath) or args.overwrite: with warnings.catch_warnings(): warnings.simplefilter("ignore") arima = ARIMA( order=saved_parameters["order"], seasonal_order=saved_parameters["seasonal_order"] ) arima.fit(df.value) print("Saving fitted model on disk") joblib.dump(arima, fpath, compress=3) else: print("Reading model from disk") arima = joblib.load(fpath) gt_pred, gt_windows = get_gt_arrays( df.index, df.index, labels, labels_windows ) # Compute metrics metrics_columns = ["precision", "recall", "f_score", "nab_score"] Metrics = collections.namedtuple("Metrics", metrics_columns)
class Model: def select_data(self): # Merge columns into a single dataframe of observed values, based on date dataset = files.data_main.join(files.data_exo.set_index('date'), on='date').dropna() # Select part of the precipitation dataframe that corresponds to the forecast obs_end = dataset.tail(1)['date'].values[0] exo_prev = files.data_exo[(files.data_exo['date'] > obs_end)] # Select predict dates self.dates_prev = exo_prev['date'] # Reshape endo_obs = np.array(dataset['endo_value']) self.endo_obs = endo_obs.reshape(-1, 1) exo_obs = np.array(dataset['exo_value']) self.exo_obs = exo_obs.reshape(-1, 1) exo_prev = np.array(exo_prev['exo_value']) self.exo_prev = exo_prev.reshape(-1, 1) def normalize(self): # Calculate lambda only if doesn't have zero values n_zeros = len(self.endo_obs[self.endo_obs <= 0]) if n_zeros == 0: self.endo_obs2, self.lambda_boxcox = boxcox(self.endo_obs) else: self.lambda_boxcox = -999 # Limit lambda values if abs(self.lambda_boxcox[0]) > 1: self.endo_obs2 = self.endo_obs self.lambda_boxcox = -999 #print(self.endo_obs2, self.lambda_boxcox) def run_auto(self): self.arima_model = auto_arima(self.endo_obs2, start_p=0, start_d=0, start_q=0, max_p=3, max_d=1, max_q=3, start_P=0, start_Q=0, D=1, seasonal=False, m=1, exogeneous=self.exo_obs, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) #print(model.arima_model.summary()) # Compile parameters to list self.parameters = [ self.arima_model.order, self.arima_model.seasonal_order, self.lambda_boxcox[0], self.arima_model.aic() ] print(self.parameters) return (self.arima_model) def run_auto_arimax(self): lower_aic = float(99999) best_pdq = [0, 0, 0] param = list(itertools.product(range(0, 4), range(0, 2), range(0, 4))) for pdq in param: #print(pdq) try: self.arima_model = ARIMA(order=pdq, suppress_warnings=True).fit( y=self.endo_obs2, exogenous=self.exo_obs) if self.arima_model.aic() < lower_aic: lower_aic = self.arima_model.aic() best_pdq = tuple(self.arima_model.order) except: continue #print(model.arima_model.summary()) # Compile parameters to list self.parameters = [best_pdq, self.lambda_boxcox[0], lower_aic] print(self.parameters) return (self.arima_model) def run_auto_sarimax(self): lower_aic = float(99999) best_pdq = [0, 0, 0] best_spdq = [0, 0, 0] param = list(itertools.product(range(0, 4), range(0, 2), range(0, 4))) m = 1 # frequency param_seasonal = [(x[0], x[1], x[2], m) for x in list( itertools.product(range(0, 4), range(0, 2), range(0, 4)))] for pdq in param: for spdq in param_seasonal: try: mod = sm.tsa.statespace.SARIMAX( self.endo_obs2, exog=self.exo_obs, order=pdq, seasonal_order=spdq, enforce_stationarity=False, enforce_invertibility=False) self.arima_model = mod.fit(disp=0) print('ARIMA{}x{}{} - AIC:{}'.format( pdq, spdq, m, self.arima_model.aic)) if self.arima_model.aic() < lower_aic: lower_aic = self.arima_model.aic() best_pdq = tuple(pdq) best_spdq = tuple(spdq) except: continue #print(model.arima_model.summary()) # Compile parameters to list self.parameters = [ best_pdq, best_spdq, self.lambda_boxcox[0], lower_aic ] print(self.parameters) return (self.arima_model) def forecast(self): #self.predict = self.arima_model.predict(n_periods=self.exo_prev.shape[0], exogenous=self.exo_prev) self.predict = self.arima_model.predict( n_periods=self.exo_prev.shape[0], exogenous=self.exo_prev, return_conf_int=True, alpha=0.7) def renormalize(self): if self.lambda_boxcox == float(-999): self.predict_mean = self.predict[0] self.predict_down = self.predict[1][:, 0] self.predict_up = self.predict[1][:, 1] else: self.predict_mean = inv_boxcox(self.predict[0], self.lambda_boxcox) self.predict_down = inv_boxcox(self.predict[1][:, 0], self.lambda_boxcox) self.predict_up = inv_boxcox(self.predict[1][:, 1], self.lambda_boxcox) # Join predict dates with values into a dataframe df_final = pd.DataFrame(self.predict_mean, self.dates_prev) df_final.columns = ['endo_value'] return (df_final)
if args.fit or args.auto_fit: if args.auto_fit: arima = auto_arima( train, stepwise=True, trace=1, m=args.period, information_criterion="aicc", seasonal=args.seasonal, error_action="ignore", suppress_warnings=True, ) elif args.fit: with warnings.catch_warnings(): warnings.simplefilter("ignore") arima = ARIMA(order=args.order, seasonal_order=args.seasonal_order) arima.fit(train) print(arima.summary()) residuals = arima.resid() print("train lengths: data={} resid={}".format( train_len, residuals.shape[0])) len_delta = train_len - residuals.shape[0] # Diagnostics plot arima.plot_diagnostics(lags=50) box_ljung(residuals, nlags=20).format() plt.gcf().suptitle('Diagnostics Plot') plt.figure() plt.plot(df.value.index[len_delta:train_len],
# It may be a lost cause, but by all means prove me wrong. def example_pmd(): s = {} y, a = hospital_with_exog(k=3) x = [pmd_exogenous(y=yj, s=s, k=3, a=aj) for yj, aj in zip(y[:500], a)] return s def arima_res_to_dict(arima_res): state = arima_res.__dict__ return state def pmd_to_dict(pmd): pmd['model'] = pmd['model'].__getstate__() pmd['model']['arima_res_'] = arima_res_to_dict(pmd['model']['arima_res_']) return pmd def pmd_from_dict(pmd): pmd['model']['arima_res_'] = '' if __name__ == '__main__': pmd = example_pmd() model = pmd['model'] model1 = ARIMA(**model.get_params()) prms = model.__dict__['arima_res_'].__dict__['_results'].params
# -*- coding: utf-8 -*- from sklearn.base import clone from pmdarima.arima import ARIMA, AutoARIMA from pmdarima.pipeline import Pipeline from pmdarima.datasets import load_wineind from pmdarima.preprocessing import FourierFeaturizer import pytest y = load_wineind() @pytest.mark.parametrize( 'est', [ ARIMA(order=(2, 1, 1)), AutoARIMA(seasonal=False, maxiter=3), Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("arima", AutoARIMA(seasonal=False, stepwise=True, suppress_warnings=True, d=1, max_p=2, max_q=0, start_q=0, start_p=1, maxiter=3, error_action='ignore')) ]) ] ) def test_clonable(est): # fit it, then clone it est.fit(y) est2 = clone(est) assert isinstance(est2, est.__class__) assert est is not est2
from pmdarima.arima import ARIMA import pytest import os import platform lynx = load_lynx() # test images directories travis = os.environ.get("TESTING_ON_TRAVIS", "false").lower() == "true" # Do not test on travis because they hate MPL if not travis: # base images are created on Mac/Darwin. Windows needs a higher tolerance if platform.system() == "Windows": tolerance = 10 else: tolerance = 5 @pytest.mark.parametrize('model_type,model', [ pytest.param('arma', ARIMA(order=(1, 0, 0))), pytest.param('arima', ARIMA(order=(1, 1, 0))), pytest.param('sarimax', ARIMA(order=(1, 1, 0), seasonal_order=(1, 0, 0, 12))) ]) @pytest.mark.mpl_image_compare(tolerance=tolerance) def test_plot_diagnostics(model_type, model): model.fit(lynx) return model.plot_diagnostics(figsize=(15, 12))
def run(): symbol = input("Enter ticker symbol: ") now = dt.datetime.now() timeFinish = now + dt.timedelta(minutes=minutes) while (now < timeFinish): try: now = dt.datetime.now() client = Client(environment=PRACTICE, account_id="", access_token=ACCESS_TOKEN) json_data = [] json_data = client.get_instrument_history(instrument=symbol, granularity=timeframe, candle_format="midpoint", count=1440) json_data = json_data['candles'] df = pd.DataFrame(json_data) data = df.copy() data = data.set_index('time')[['closeMid']] data = data.set_index(pd.to_datetime(data.index)) data.columns = [CLOSE] # Rescale data lnprice = np.log(data) # Create and fit the model model_temp = auto_arima(lnprice.values, start_p=1, start_q=1, max_p=1, max_q=1, m=4, start_P=0, seasonal=False, d=1, D=1, trace=True, error_action='ignore', suppress_warnings=True, stepwise=True) model = ARIMA(order=model_temp.order) fit = model.fit(lnprice.values) # Predict future_forecast = fit.predict(n_periods=n_periods_ahead) future_forecast = np.exp(future_forecast) # Calculations lowest = min(future_forecast[0], future_forecast[-1]) highest = max(future_forecast[0], future_forecast[-1]) current = data[CLOSE].iloc[-1] x = ((future_forecast[0] - future_forecast[-1]) / future_forecast[0]) * 100 slope = (future_forecast[0] - future_forecast[-1]) / n_periods_ahead degree = math.degrees(math.atan(slope)) # Trending if (x > 0): trending = "Positivly / Call" else: trending = "Negativaly / Put" # View print("==========================") print("Current Price: ", current) print("Highest price: ", highest) print("Lowest Price: ", lowest) print("Trending: ", trending) print("Degrees: ", degree) print("==========================" + "\n") except Exception as e: print(e) time.sleep(SLEEP) return 0
def test_not_fitted_error(): with pytest.raises(sk.NotFittedError) as nfe: mod = ARIMA((0, 1, 0)) sk.check_is_fitted(mod, "arima_res_") assert "Model has not been fit!" in pytest_error_str(nfe)
_check_scoring, cross_validate from pmdarima.datasets import load_wineind import pytest import numpy as np from unittest import mock y = load_wineind() exogenous = np.random.RandomState(1).rand(y.shape[0], 2) @pytest.mark.parametrize('cv', [ SlidingWindowForecastCV(window_size=100, step=24, h=1), RollingForecastCV(initial=150, step=12, h=1), ]) @pytest.mark.parametrize('est', [ ARIMA(order=(2, 1, 1), seasonal_order=(0, 0, 0, 1)), ARIMA(order=(1, 1, 2), seasonal_order=(0, 1, 1, 12)), Pipeline([("fourier", FourierFeaturizer(m=12)), ("arima", ARIMA(order=(2, 1, 0), maxiter=3))]) ]) @pytest.mark.parametrize('verbose', [0, 2, 4]) @pytest.mark.parametrize('exog', [None, exogenous]) def test_cv_scores(cv, est, verbose, exog): scores = cross_val_score(est, y, exogenous=exog, scoring='mean_squared_error', cv=cv, verbose=verbose) assert isinstance(scores, np.ndarray)
} with pytest.raises(ValueError) as ve: pipeline.predict(3, **kwargs) assert "'n_periods'" in pytest_error_str(ve) # Assert that we can update the model pipeline.update(test, maxiter=5) # And that the fourier transformer was updated properly... assert pipeline.steps_[0][1].n_ == wineind.shape[0] @pytest.mark.parametrize('pipeline', [ Pipeline([ ("arma", ARIMA(order=(2, 0, 0))) ]), Pipeline([ ("arima", ARIMA(order=(2, 1, 0))) ]), Pipeline([ ("sarimax", ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12))) ]), Pipeline([ ("fourier", FourierFeaturizer(m=12)), ("arma", ARIMA(order=(2, 0, 0))) ]),
with pytest.raises(ValueError) as ve: pipeline.predict(3, **kwargs) assert "'n_periods'" in pytest_error_str(ve) # Assert that we can update the model pipeline.update(test, maxiter=5) # And that the fourier transformer was updated properly... assert pipeline.steps_[0][1].n_ == wineind.shape[0] @pytest.mark.parametrize( 'pipeline', [ Pipeline([("arma", ARIMA(order=(2, 0, 0)))]), Pipeline([("arima", ARIMA(order=(2, 1, 0)))]), Pipeline([ ("sarimax", ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12))) ]), Pipeline([("fourier", FourierFeaturizer(m=12)), ("arma", ARIMA(order=(2, 0, 0)))]), Pipeline([("fourier", FourierFeaturizer(m=12)), ("arima", ARIMA(order=(2, 1, 0)))]), # one with a boxcox transformer Pipeline([("boxcox", BoxCoxEndogTransformer()), ("fourier", FourierFeaturizer(m=12)), ("arima", AutoARIMA(seasonal=False, stepwise=True,
X = np.random.RandomState(1).rand(vec.shape[0], 2) auto_arima(vec, X=X, out_of_sample_size=1, seasonal=False, suppress_warnings=True) # This is a way to force it: ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec, X=X) @pytest.mark.parametrize( # will be m - d 'model', [ ARIMA(order=(2, 0, 0)), # arma ARIMA(order=(2, 1, 0)), # arima ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)), # sarimax ]) def test_predict_in_sample_conf_int(model): model.fit(wineind) expected_m_dim = wineind.shape[0] preds, confints = model.predict_in_sample(return_conf_int=True, alpha=0.05) assert preds.shape[0] == expected_m_dim assert confints.shape == (expected_m_dim, 2) @pytest.mark.parametrize( 'model', [ ARIMA(order=(2, 0, 0)), # arma
[-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139], [-0.10692387, 0.98852139] ]) _, intervals = arma.predict(n_periods=10, return_conf_int=True, alpha=0.05) assert_array_almost_equal(intervals, expected_intervals) @pytest.mark.parametrize( # will be m - d 'model, expected_m_dim', [ pytest.param(ARIMA(order=(2, 0, 0)), wineind.shape[0]), # arma pytest.param(ARIMA(order=(2, 1, 0)), wineind.shape[0] - 1), # arima pytest.param(ARIMA(order=(2, 1, 0), seasonal_order=(1, 0, 0, 12)), wineind.shape[0]), # sarimax ] ) def test_predict_in_sample_conf_int(model, expected_m_dim): model.fit(wineind) preds, confints = model.predict_in_sample(return_conf_int=True, alpha=0.05) assert preds.shape[0] == expected_m_dim assert confints.shape == (expected_m_dim, 2) def test_with_oob(): # show we can fit with CV (kinda) arima = ARIMA(order=(2, 1, 2),
class ARIMAModel(BaseModel): def __init__(self): """ Initialize Model """ self.seasonal = True self.metric = 'mse' self.model = None self.model_init = False def _build(self, **config): """ build the models and initialize. :param config: hyperparameters for the model """ p = config.get('p', 2) d = config.get('d', 0) q = config.get('q', 2) self.seasonal = config.get('seasonality_mode', True) P = config.get('P', 1) D = config.get('D', 0) Q = config.get('Q', 1) m = config.get('m', 7) self.metric = config.get('metric', self.metric) order = (p, d, q) if not self.seasonal: seasonal_order = (0, 0, 0, 0) else: seasonal_order = (P, D, Q, m) self.model = ARIMA(order=order, seasonal_order=seasonal_order, suppress_warnings=True) def fit_eval(self, data, validation_data, **config): """ Fit on the training data from scratch. :param data: A 1-D numpy array as the training data :param validation_data: A 1-D numpy array as the evaluation data :return: the evaluation metric value """ if not self.model_init: # Estimating differencing term (d) and seasonal differencing term (D) kpss_diffs = ndiffs(data, alpha=0.05, test='kpss', max_d=6) adf_diffs = ndiffs(data, alpha=0.05, test='adf', max_d=6) d = max(adf_diffs, kpss_diffs) D = 0 if not self.seasonal else nsdiffs(data, m=7, max_D=12) config.update(d=d, D=D) self._build(**config) self.model_init = True self.model.fit(data) val_metric = self.evaluate(x=None, target=validation_data, metrics=[self.metric])[0].item() return {self.metric: val_metric} def predict(self, x=None, horizon=24, update=False, rolling=False): """ Predict horizon time-points ahead the input x in fit_eval :param x: ARIMA predicts the horizon steps foreward from the training data. So x should be None as it is not used. :param horizon: the number of steps forward to predict :param update: whether to update the original model :param rolling: whether to use rolling prediction :return: predicted result of length horizon """ if x is not None: raise ValueError("x should be None") if update and not rolling: raise Exception( "We don't support updating model without rolling prediction currently" ) if self.model is None: raise Exception( "Needs to call fit_eval or restore first before calling predict" ) if not update and not rolling: forecasts = self.model.predict(n_periods=horizon) elif rolling: if not update: self.save("tmp.pkl") forecasts = [] for step in range(horizon): fc = self.model.predict(n_periods=1).item() forecasts.append(fc) # Updates the existing model with a small number of MLE steps for rolling prediction self.model.update(fc) if not update: self.restore("tmp.pkl") os.remove("tmp.pkl") return forecasts def evaluate(self, target, x=None, metrics=['mse'], rolling=False): """ Evaluate on the prediction results and y. We predict horizon time-points ahead the input x in fit_eval before evaluation, where the horizon length equals the second dimension size of y. :param target: target for evaluation. :param x: ARIMA predicts the horizon steps foreward from the training data. So x should be None as it is not used. :param metrics: a list of metrics in string format :param rolling: whether to use rolling prediction :return: a list of metric evaluation results """ if x is not None: raise ValueError("We don't support input x currently") if target is None: raise ValueError("Input invalid target of None") if self.model is None: raise Exception( "Needs to call fit_eval or restore first before calling evaluate" ) forecasts = self.predict(horizon=len(target), rolling=rolling) return [Evaluator.evaluate(m, target, forecasts) for m in metrics] def save(self, checkpoint_file): if self.model is None: raise Exception( "Needs to call fit_eval or restore first before calling save") with open(checkpoint_file, 'wb') as fout: pickle.dump(self.model, fout) def restore(self, checkpoint_file): with open(checkpoint_file, 'rb') as fin: self.model = pickle.load(fin) self.model_init = True
def test_oob_sarimax(): xreg = rs.rand(wineind.shape[0], 2) fit = ARIMA(order=(1, 1, 1), seasonal_order=(0, 1, 1, 12), maxiter=5, out_of_sample_size=15).fit(y=wineind, exogenous=xreg) fit_no_oob = ARIMA(order=(1, 1, 1), seasonal_order=(0, 1, 1, 12), out_of_sample_size=0, maxiter=5, suppress_warnings=True).fit(y=wineind[:-15], exogenous=xreg[:-15, :]) # now assert some of the same things here that we did in the former test oob = fit.oob() # compare scores: scoring = get_callable(fit_no_oob.scoring, VALID_SCORING) no_oob_preds = fit_no_oob.predict(n_periods=15, exogenous=xreg[-15:, :]) assert np.allclose(oob, scoring(wineind[-15:], no_oob_preds), rtol=1e-2) # show params are no longer the same assert not np.allclose(fit.params(), fit_no_oob.params(), rtol=1e-2) # show we can add the new samples and get the exact same forecasts xreg_test = rs.rand(5, 2) fit_no_oob.update(wineind[-15:], xreg[-15:, :]) assert np.allclose(fit.predict(5, xreg_test), fit_no_oob.predict(5, xreg_test), rtol=1e-2) # And also the params should be close now after updating assert np.allclose(fit.params(), fit_no_oob.params()) # Show we can get a confidence interval out here preds, conf = fit.predict(5, xreg_test, return_conf_int=True) assert all(isinstance(a, np.ndarray) for a in (preds, conf))
def ragged_fill_series( series, function=np.nanmean, backup_fill_method=np.nanmean, est_series=None, fitted_arma=None, arma_full_series=None, ): """Filling in the ragged ends of a series, adhering to the periodicity of the series. If there is only one observation and periodicity cannot be determined, series will be returned unchanged. parameters: :series: list/pandas Series: the series to fill the ragged edges of. Missings should be np.nans :function: the function to fill nas with (e.g. np.nanmean, etc.). Use "ARMA" for ARMA filling :backup_fill_method: function: which function to fill ragged edges with in case ARMA can't be estimated :est_series: list/pandas Series: optional, the series to calculate the fillna and/or ARMA function on. Should not have nas filled in yet by any method. E.g. a train set. If None, will calculated based on itself. :fitted_arma: optional, fitted ARMA model if available to avoid reestimating every time in the `gen_ragged_X` function :arma_full_series: optional, for_full_arma_dataset output of `gen_dataset` function. Fitting the ARMA model on the full series history rather than just the series provided output: :return: pandas Series with filled ragged edges """ result = pd.Series(series).copy() if est_series is None: est_series = result.copy() # periodicity of the series, to see which to fill in nonna_bools = ~pd.isna(series) nonna_indices = list( nonna_bools.index[nonna_bools]) # existing indices with values # if there is only one non-na observation, can't determine periodicity or position in full series, don't fill anything if len(nonna_indices) > 1: periodicity = int( (pd.Series(result[~pd.isna(result)].index) - (pd.Series(result[~pd.isna(result)].index)).shift() ).mode()[0]) # how often data comes (quarterly, monthly, etc.) last_nonna = result.index[result.notna()][-1] fill_indices = nonna_indices + [ int(nonna_indices[-1] + periodicity * i) for i in range(1, (len(series) - last_nonna)) ] # indices to be filled in, including only the correct periodicity fill_indices = [x for x in fill_indices if x in series.index ] # cut down on the indices if went too long if function == "ARMA": # estimate the model if not given if fitted_arma is None: fitted_arma = estimate_arma(est_series) # instantiate model with previously estimated parameters (i.e. on train set) arma = ARIMA(order=fitted_arma.order) arma.set_params(**fitted_arma.get_params()) # refit the model on the full series to this point if arma_full_series is not None: y = list(arma_full_series[~pd.isna(arma_full_series)]) present = list(result[~pd.isna(result)]) # limit the series to the point where actuals are end_index = 0 for i in range(len(present), len(y) + 1): if list(y[(i - len(present)):i]) == list(present): end_index = i y = y[:end_index] # refit model on just this series else: y = list(result[~pd.isna(result)]) # refit the model on data present = y.copy() # can fail if not enough datapoints for order of ARMA process try: arma.fit(y, error_action="ignore") preds = arma.predict(n_periods=int(len(series) - last_nonna)) fills = list(present) + list(preds) fills = fills[:len(fill_indices)] except: fills = list(result[~pd.isna(result)]) + [ backup_fill_method(est_series) ] * (len(series) - last_nonna) fills = fills[:len(fill_indices)] result[fill_indices] = fills else: fills = list(result[~pd.isna(result)]) + [function(est_series)] * ( len(series) - last_nonna) fills = fills[:len(fill_indices)] result[fill_indices] = fills return result, fitted_arma