def grid_search_best_model_timeseries_arma(df, grid, cv): keys, values = zip(*grid.items()) params = [] for v in product(*values): params.append(tuple(v)) print(params) best_param = None best_score = np.infty tsp = TimeSeriesSplit(n_splits=cv) for param in params: scores = [] for train_ind, test_ind in tsp.split(df): train_data = df.iloc[train_ind] test_data = df.iloc[test_ind] try: #print(train_data, test_data) estimator = arima_model.ARMA(train_data, order=param) res = estimator.fit() #print(res.params) #get out of sample predictions with test data start and end y_pred = estimator.predict(res.params, test_data.index[0], test_data.index[-1]) #print(y_pred) y_test = test_data.values.reshape(-1) score = math.sqrt(metrics.mean_squared_error(y_test, y_pred)) scores.append(score) except: pass #print(scores) if len(scores) > 0 and np.mean(scores) < best_score: best_score = np.mean(scores) best_param = param if best_param is not None: estimator = arima_model.ARMA(df, order=best_param) res = estimator.fit() print("best parameters:" + str(best_param)) print("validation rmse:" + str(best_score)) #get insample predictions with start and end indices y_pred = estimator.predict(res.params, start=0, end=df.shape[0] - 1) y_train = df.values.reshape(-1) train_rmse = math.sqrt(metrics.mean_squared_error(y_train, y_pred)) print("train rmse:" + str(train_rmse)) return estimator, res else: return None, None
def _fit(self, ord, ix, if_plot=False): assert len( self.y[ix]) > 2 * (ord[0] + ord[1]), 'not enough data to fit' from statsmodels.tsa import arima_model ar = arima_model.ARMA(self.y[ix], ord) param = ar.fit(trend='nc') if if_plot: param.plot_predict() return param.params
def fit_ARIMA(self, col, order=(1, 1)): try: model = ARIMA.ARMA(self.ss[col]) result = model.fit(order=order) self.ss['%s_ARIMA_fitted' % col] = result.fittedvalues self.ss['%s_ARIMA_resid' % col] = result.resid except KeyError: print "Warning: %s is a bad key, ignoring" % col
def garch_group(Y, q0=1, p=1, q=1, do_plots=False): residuals = np.zeros(Y.shape[0] - q0) for y in np.transpose(Y): model = ar_model.AR(y) results = model.fit(q0) et = results.resid**2 residuals += (et - sum(et) / len(et)) / np.std(et) residuals /= Y.shape[1] model = arima_model.ARMA(residuals, (p, q)) r2 = model.fit() if do_plots: print r2.pvalues pylab.plot(r2.fittedvalues) pylab.show() else: return residuals, r2
def arma_construct(p, q, x_train, y_train, x_valid, y_valid, num_data): # This is a generic ARMA model constructor, which is used to estimate certain ARMA models on a set of data. # Inputs: p - The order of the AR model. # q - The order of the MA model. # x_train - Partition of noise set for training. # y_train - Training data evaluated using original model. # x_valid - Partition of noise set for validation. # y_valid - Validation data evaluated using original model. # num_data - Amount of data being generated. # Outputs: No outputs, put does do a lot of print statements. # First, we create the model. arma = arima_model.ARMA(y_train, order=(p, q)) arma_fit = arma.fit(disp=0) # Then, we convert the parameters to lists, so they match the format of the original construction. ma_co = arma_fit.maparams.tolist() ar_co = list(-arma_fit.arparams) # We also need to add in a 1 at the start, since those are neglected when presenting the results from # the ARMA function we use. ma_co.insert(0, 1) ar_co.insert(0, 1) # Getting the coefficients of the AR and MA models, separately. print('**** ARMA({0},{1}) ****'.format(p, q)) print('AR Coefficients: ', ["%.3f" % item for item in ar_co]) print('MA Coefficients: ', ["%.3f" % item for item in ma_co]) # Next, we calculate the error for the model. # Starting with the training error. arma_train = signal.lfilter(ma_co, ar_co, x_train) ave_t_error = (1 / (num_data / 2)) * np.sum(np.abs((y_train - arma_train) / arma_train)) print('Training Error: %.3f' % ave_t_error) # Then we calculate the validation error. arma_valid = signal.lfilter(ma_co, ar_co, x_valid) ave_v_error = (1 / (num_data / 2)) * np.sum(np.abs((y_valid - arma_valid) / arma_valid)) print('Validation Error: %.3f' % ave_v_error)
def fit_row(self, row): model = arima_model.ARMA(row, (self.p, self.q)).fit() pred = model.predict(start=0, end=len(row) - 1) params = model.params return (pred, params)
def garch(y, q0=1, p=1, q=1): model = ar_model.AR(y) results = model.fit(q0) et = results.resid**2 model = arima_model.ARMA((et - sum(et) / len(et)) / np.std(et), (p, q)) return model.fit()
print(' Coefficents of MA: {0}\n'.format(formatIter(paraMA))) print(' Sigma^2: %.4f\n' % paraSigma2) print(' Method:Moment Estimation / Inverse Correlation Function Method') ''' ## (2) use arima_model.ARMA.fit() to compute MLE we may encounter warnings, i.e. non-positive definite Hessian ,but ignore it we select start_param of optimization algorithm by arima_model.ARMA.fit() instead of results of (1) ''' filterwarnings('ignore') # ignore warnings while True: series = arima_process.arma_generate_sample(ar, ma, 100, sigma=2) armaModel = arima_model.ARMA(series, (4, 2)) try: armaResult = armaModel.fit(method='mle', trend='nc', disp=0, maxiter=10000) except ValueError as e: continue else: paraAR_mle, paraMA_mle = armaResult.arparams, armaResult.maparams paraSigma2_mle = armaResult.sigma2 break print('\n----3.1 (2)----\n') print(' Coefficents of AR: {0}\n'.format(formatIter(paraAR_mle))) print(' Coefficents of MA: {0}\n'.format(formatIter(paraMA_mle))) print(' Sigma^2: %.4f\n' % paraSigma2_mle) print(' Method:Maximum Likelihood Estimation')
self.reactor_size = (24 * 1.0, 'in^3') else: self.reactor_size = (None, None) def _load_timeseries_data(self): self.gts = df.Dataframe() self.gts.SQL_load_data( self.interface_proc, 'gas_proc_data_tbl', conditions=[ "timestamp >= '%s'" % self.run_info.info['ss_start'], "timestamp < '%s'" % self.run_info.info['ss_stop'] ]) #This line needs to automatically load the units if __name__ == "__main__": user = raw_input('User: '******'timestamp'] model = ARIMA.ARMA(test.gts['mass_flow_brush_feeder'], order=(1, 1)) result = model.fit() print result.summary() print test.gts['mass_flow_brush_feeder'] print result.fittedvalues print result.resid
data = p.read_csv(src, sep="|") #print(data) # MUST GIVE COLUMN HEADERS in CSV FILE BEFORE WE DO THIS five_year_data = data[["5Y"]] print(five_year_data) # how do we get this to work as a 1D array? #five_year_data_1d = p.Series.ravel(five_year_data) #print(five_year_data_1d) acf = calc_acf(five_year_data) print(acf[0]) print("ACF is", acf) # plt.xlim(1, 10) acf_plt = s.plot_acf(acf) pacf = calc_pacf(five_year_data, 10) print(pacf) pacf_plt = s.plot_pacf(pacf) plt.show() # Param estimation #model = a(five_year_data, order=(10,1,0)) # model_fit = model.fit(disp=0) # print(model_fit.summary()) ar_model = ar.AR(five_year_data) ar_model_fit = ar_model.fit(10) print("Params") print(ar_model_fit.params) ma_model = ma.ARMA(five_year_data.values, (0, 10)) ma_model_fit = ma_model.fit() print("MA Params") print(ma_model_fit.params)
def fit_row(self, row: np.ndarray) -> base.ApproxAndParams: model = arima_model.ARMA(row, (self._p, self._q)).fit() predicted = model.predict(start=0, end=len(row) - 1) params = model.params return base.ApproxAndParams(predicted, params)
method='SLSQP', bounds=bnds, options={'maxiter': MAX_MLE_ITER}) time_mymle_end = time.time() time_mymle = time_mymle_end - time_mymle_start assert res.success mu_hat = res.x[0] phi_hat = res.x[1] theta_hat = res.x[2] sigma2_hat = res.x[3] ### Now, for comparison purposes, estimate using the StatsModels package ### sm = arima_model.ARMA(y_vec, order=(1, 1)) time_sm_start = time.time() fittedsm = sm.fit(disp=0) time_sm_end = time.time() time_sm = time_sm_end - time_sm_start ### Display results ### print('(mu, phi, theta, sigma2) = (%f, %f, %f, %f)' % (mu, phi, theta, sigma2)) print('(mu, phi, theta, sigma2)_0 = (%f, %f, %f, %f)' % (mu_guess, phi_guess, theta_guess, sigma2_guess)) print('(mu, phi, theta, sigma2)^hat = (%f, %f, %f, %f)' % (mu_hat, phi_hat, theta_hat, sigma2_hat)) print('(mu, phi, theta, sigma2)^hatSM = (%f, %f, %f, %f)' %
plt.show() autocorrelation_plot(dfLeadsTrain['Count_Tickets']) plt.show() # In[10]: print dfLeadsTrain.columns print dfLeadsTest.tail() # In[11]: exog_cols = [ u'Day_of_Week_1', u'Day_of_Week_2', u'Day_of_Week_3', u'Day_of_Week_4', u'Day_of_Week_5', u'Day_of_Week_6', u'MONTH_END', u'HOLIDAY' ] modelARMA = am.ARMA(dfLeadsTrain.Count_Tickets, (2, 1), exog=dfLeadsTrain[exog_cols]) resultsARMA = modelARMA.fit() print(resultsARMA.summary()) # Durbin-Watson on results shows no/low positive autocorrelation # In[12]: print resultsARMA.aic, resultsARMA.bic, resultsARMA.hqic print sm.stats.durbin_watson(resultsARMA.resid.values) # In[13]: fig = plt.figure(figsize=(12, 8)) ax = fig.add_subplot(111) ax = resultsARMA.resid.plot(ax=ax)
y = util.simulate(poly=100, sinusoids=(10, 100, -20)).values hr = np.arange(365 * 96) * .25 t = hr * 3600 sinusoids = [ np.random.normal(0.0, 0.1, 365 * 96) + 10 + 3 * np.sin(hr * 2 * np.pi / 96 / .25), np.random.normal(0.0, 0.1, 365 * 96) + 15 + 3 * np.sin(hr * 2 * np.pi / 96 / .25) + 3 * np.cos(t * 2 * np.pi / 96. / .25 / 365.), np.random.normal(0.0, 1.0, 365 * 96) + 15 + 3 * np.sin(hr * 2 * np.pi / 96 / .25) + 3 * np.cos(t * 2 * np.pi / 96. / .25 / 365.) + np.random.normal(0.0, 1e-5, 365 * 96).cumsum() ] arma20 = arima_model.ARMA(y, (2, 0)).fit() y2 = arma.predict(start=10 * 96, end=12 * 96) y1 = y[10 * 96 - 1:12 * 96] plt.plot(t[10 * 96 - 1:12 * 96], zip(*[y1, y2])) plt.show() y2 = arma30.predict(start=10 * 96, end=12 * 96) plt.plot(t[10 * 96 - 1:12 * 96], zip(*[y1, y2])) plt.show() arma30.resid.plot() plt.plot(arma30.resid) plt.show() plt.plot(arma30.resid / y2) plt.plot(arma30.resid / y) plt.show() plt.plot(arma30.resid / y) plt.show()
# stats: -0.582869706432, pvalue: 0.920441499758 # h0: rho == 1 is rejected. There is no unit root. # for AAPL # MA(15) fits model the best. # stats: 3.04749309416, pvalue: 1.0 # stats: 5.14080783224, pvalue: 0.999999999468 # h0: rho == 1 is not rejected. There is unit root. test_series = goog_df from arch.unitroot import PhillipsPerron # fit MA on resid. import statsmodels.tsa.arima_model as arma for ma_lag in [7, 8, 9, 10, 11, 12, 13, 15, 20]: model = arma.ARMA(test_series, (0, ma_lag)).fit() print 'lag: {}, aic: {}'.format(ma_lag, model.aic) pp = PhillipsPerron(test_series, trend='c', lags=10, test_type='tau') print 'stats: {}, pvalue: {}'.format(pp.stat, pp.pvalue) pp = PhillipsPerron(test_series, trend='c', lags=10, test_type='rho') print 'stats: {}, pvalue: {}'.format(pp.stat, pp.pvalue) ### using adf test ### goog and aapl t-value # 0.876758903592 # 0.999070159449 test_series = goog_df from arch.unitroot import ADF adf = ADF(goog_df, lags=24) print adf.pvalue
def get_arma_coefficients(series, order=(2, 3)): """ Returns the ARMA model coefficients for the given model """ model = arima_model.ARMA(series, order).fit(disp=False) return model.params