def build_ARIMA(self, optimize = False, verbose = 0): # Constructs the AR model, using gridsearch if it's the first time period if self.build_iter > 2: raise ValueError('Unable to fit arima model to dataset, try using different training data') try: train = pd.Series(self.train).rolling(self.window).mean().values train = train[~np.isnan(train)] refit = ~(self.current_time%self.refit) if self.refit != 0 else 0 if(optimize or self.current_time == 0): # gridsearch for optimal # of AR terms p self.p = gridsearch_arima(self.p_ub, train, self.how) order = (self.p, 1, 0) self.arima_model = arima(train, order=order).fit() elif(refit == -1): order = (self.p, 1, 0) # uses the same order as the previous fit self.arima_model = arima(train, order=order).fit() except: self.build_iter += 1 self.p_ub +=1 self.build_ARIMA(optimize = True, verbose=1) self.build_iter = 0
def gridsearch_arima(p_ub, train, how = 'AIC'): ''' Helper function that performs a gridsearch to find a good arima model to use for forecasting demand. The optimal model can be chosen with AIC, BIC, or MSE as criterion for comparison, but AIC is recommended based on our testing (It is set as the default. ''' # This should ideally only filter warnings within this function import warnings warnings.filterwarnings("ignore") min_score = np.inf p_best = 0 # ideal number of AR terms for p in range(1, p_ub+1): for q in range(0, 1): order = (p, 1, 0) try: fit = arima(train, order = order).fit() if(how == 'AIC'): score = fit.aic elif(how == 'BIC'): score = fit.bic else: score = np.mean(fit.resid.values**2) # MSE if score < min_score: min_score = score p_best = p except: # This occurs when the model is unable to be fit with p terms. pass if min_score == np.inf: # Never was able to fit a model. raise ValueError('Unable to fit model in the provided bounds') else: return p_best
def ARIMAWrapper(x): """ Wrapper for autoregression coeffient calculator, see arima() method for details Parameters ---------- x: 1D array one dimensional array Returns ------- rhoval: float autoregression coeffient, normally it lays in interval [-1, 1] """ xin = x[~isnan(x)] rhoval = 0.0 try: xin_ar = ar(xin).fit(disp=False, method='mle', solver='bfgs', maxlag=1) rhoval = xin_ar.params[1] except Exception: try: xin_arima = arima(xin, (1, 0, 0)).fit(disp=False, method='css', solver='bfgs') rhoval = xin_arima.params[1] except Exception: rhoval = 0.92 return rhoval
def arimaOptimization(self): # Performs a grid search to find "optimal" parameters # for a ARIMA(p,d,q) p_values = range(4) d_values = range(3) q_values = range(4) for p in p_values: for d in d_values: for q in q_values: try: model = arima(self.trainData, (p, d, q)) model = model.fit() except: print('Unable to fit a ARIMA(' + str(p) + ',' + str(d) + ',' + str(q) + ').')
# Use KPSS to test for stationarity. from statsmodels.tsa.stattools import kpss kpss_stat, p_value, lags, crit = kpss(xrp) print("XRP p_value:", p_value) kpss_stat, p_value, lags, crit = kpss(eth) print("ETH p_value:", p_value) from statsmodels.tsa.arima_model import ARIMA as arima xrp_model_arima = arima( xrp, order=(4, 1, 2)) #Remember if we use d=0 we're back to using the ARMA model! eth_model_arima = sms.tsa.ARMA(eth, order=(3, 1, 2)) xrp_results_arima = xrp_model_arima.fit() eth_results_arima = eth_model_arima.fit() xrp_results_arima.summary() eth_results_arima.summary()
def fit_forecast(self, y, h): clf = arima(y, order=(self.p, self.d, self.q)) clf_fit = clf.fit(disp=0) return clf_fit.forecast(steps = h)[0][-1]