def ARFit(self): ''' Fits a autoregressive model. ''' modelName = 'AR' errorObjs = [] runTimeObj = obj.ModelsRunTime(modelName) startTime = None totalTime = None # Step 1: set training and test values startTime = dt.datetime.now() self.fittedModel = ar.AR(self.trainData) self.fittedModel = self.fittedModel.fit() runTimeObj.setTrainingTime(dt.datetime.now() - startTime) trainingFit = pd.Series(np.ceil(self.fittedModel.fittedvalues)) startTime = dt.datetime.now() testPredictions = pd.Series( np.ceil( self.fittedModel.predict(start=len(self.trainData), end=len(self.trainData) + len(self.testData) - 1, dynamic=False))) totalTime = dt.datetime.now() - startTime # Step 2: Training again with all data for accurate forecasts self.fittedModelFinal = ar.AR(self.data) self.fittedModelFinal = self.fittedModelFinal.fit() forecasts = pd.Series( np.ceil( self.fittedModelFinal.predict(start=len(self.data), end=len(self.data) + self.horizon - 1, dynamic=False))) '''Step 3: set error for AR, the size of trainData will be different from fitted values at model. Fill initial trainingPredictions with same data as real. This will no affect the evaluation metrics. ''' errorObjs = self.setErrorData(trainingFit, testPredictions, runTimeObj) runTimeObj.setTestTime(runTimeObj.getTestTime() + totalTime) self.runTimeList.append(runTimeObj) # Add to ModelsResult list self.setModelResults(modelName, errorObjs, trainingFit, testPredictions, forecasts)
def onBars(self, bars): if self.bar_counter >= self.history_size: price = bars[self.instrument].getClose() history = self.inst_history(self.instrument) offset = self.history_size ar_res = ar_mod.AR(history).fit() prediction = ar_res.predict(start=offset, end=offset, dynamic=True)[0] predicted_price_category = self.discretize_price_diff( prediction, price) last_actual_price_category = self.discretize_price_diff( price, history[-1]) self.prediction_history.append(predicted_price_category) self.actual_history.append(last_actual_price_category) if self.bar_counter % 10 == 0: y_pred = self.prediction_history[:-1] y_true = self.actual_history[1:] self.print_cm( confusion_matrix(y_true, y_pred, labels=[1, -1, 0])) self.bar_counter += 1 self.history.append(bars)
def fit_row(self, row): model = ar_model.AR(row).fit(maxlag=self.maxlag) head = row[:self.maxlag] tail = model.predict(start=self.maxlag, end=len(row) - 1) pred = np.concatenate((head, tail)) params = model.params return (pred, params)
def find_optimal_lag_length( self, cols, time, min_lag=1, max_lag=8, criterion="aic" ): try: s = self.map_column_to_sheet(cols) multi = False except: s = self.map_column_to_sheet(cols[0]) multi = True df = s.df if multi: try: args_vector = np.append(cols, time) data = df[args_vector] data = data.set_index(time) except: data = df[cols] model = VAR(data) else: try: args_vector = np.array([cols, time]) data = df[args_vector] data = data.set_index(time) except: data = df[cols] model = s_ar.AR(data) info_loss = np.zeros(max_lag - min_lag + 1) if criterion == "aic": for i in range(max_lag - min_lag + 1): fit = model.fit(i + min_lag) info_loss[i] = fit.aic elif criterion == "bic": for i in range(max_lag - min_lag + 1): fit = model.fit(i + min_lag) info_loss[i] = fit.bic else: print("ERROR: Criterion argument not supported.") return x = np.argsort(info_loss) optimal = x[0] + min_lag utterance = ( "The optimal lag length according to the " + str(criterion) + " criterion is " ) utterance = utterance + str(optimal) + "." return QueryResult(optimal, utterance)
def PlotData(InputName): # inputData = "%s/%s" % (DATA_DIR, InputName.replace(" ", "_")) # HTM Prediction # InputName = 'E:\\MyDocuments\\GitHub\\HTM\\Tests\\PredictionCheck\\appt_htm_1steps_out.csv' pred_htm = pd.read_csv(InputName, na_values="", index_col=0, usecols=[0, 1, 2]) #pred_htm2.index = pd.to_datetime(pred_htm2.index, format = "%Y-%m-%d %H:%M:%S", errors = 'coerce') pred_htm.index = pd.to_datetime(pred_htm.index) pred_htm['MSE'] = MSE(pred_htm.Ct, pred_htm.prediction, WndwDys, DyStps) #ed_3h = pd.DataFrame(ed_3h_htm.prediction) pred = pred_htm[['Ct', 'prediction']] pred.rename(columns={"prediction": "HTM"}, inplace=True) pred_mse = pd.DataFrame(pred_htm.MSE) pred_mse.rename(columns={"MSE": "HTM"}, inplace=True) # Mean Ct over past 30 days per time step pred_mn = pred_htm[['Ct']] pred_mn['prediction'] = pred_mn.Ct.rolling(WndwDys, center=False).mean() pred_mn.prediction = pred_mn.prediction.shift(PredAhead * DyStps) pred_mn['MSE'] = MSE(pred_mn.Ct, pred_mn.prediction, WndwDys, DyStps) pred['RollMn'] = pred_mn.prediction pred_mse['RollMn'] = pred_mn.MSE # AR Model pred_ar = pred_htm[['Ct']] ar_mdl = ar_model.AR(pred_ar) ar_fit = ar_mdl.fit(maxlag=(WndwDys * DyStps)) pred_ar['prediction'] = ar_fit.predict() pred_ar['MSE'] = MSE(pred_ar.Ct, pred_ar.prediction, WndwDys, DyStps) pred['AR'] = pred_ar.prediction pred_mse['AR'] = pred_ar.MSE pred['dates'] = [date2num(date) for date in pred.index] pred_mse['dates'] = [date2num(date) for date in pred_mse.index] gs = gridspec.GridSpec(2, 1, height_ratios=[3, 1]) ax0 = plt.subplot(gs[0]) ax1 = plt.subplot(gs[1], sharex=ax0) ax0.set_prop_cycle('color', ['b', 'orange', 'g', 'r']) ax1.set_prop_cycle('color', ['orange', 'g', 'r']) MainGraph = pred.plot(x='dates', y=['Ct', 'RollMn', 'HTM', 'AR'], ax=ax0) AnomalyGraph = pred_mse.plot(x='dates', y=['RollMn', 'HTM', 'AR'], ax=ax1) dateFormatter = DateFormatter('%m/%d/%y') MainGraph.xaxis.set_major_formatter(dateFormatter) AnomalyGraph.xaxis.set_major_formatter(dateFormatter) ax1.set_xlabel('Dates') ax1.set_ylabel('MSE') ax0.set_ylabel('Traige Cts') MainGraph.legend(tuple( ['Show up', 'Rolling Mean', 'HTM', 'Auto Regression']), loc=1) AnomalyGraph.legend(tuple(['Rolling Mean', 'HTM', 'Auto Regression']), loc=1) plt.draw() plt.show()
def fit_row(self, row: np.ndarray) -> base.ApproxAndParams: model = ar_model.AR(row).fit(maxlag=self._degree) head = row[:self._degree] tail = model.predict(start=self._degree, end=len(row) - 1) predicted = np.concatenate((head, tail)) params = model.params return base.ApproxAndParams(predicted, params)
def bitUnPackDPCMModel(a): pos=0 order=a[0:8].int pos+=8 string_blocks = (a[i:i+32] for i in range(pos, pos+32*(order+1), 32)) params=np.zeros(order+1) count=0 for fl in string_blocks: pos+=32 params[count]=fl.float count+=1 nCodeWords=a[pos:pos+8].int pos+=8 string_blocks = (a[i:i+32] for i in range(pos, pos+32*(nCodeWords), 32)) codebook=np.zeros(nCodeWords) count=0 for fl in string_blocks: pos+=32 codebook[count]=fl.float count+=1 sigLen=a[pos:pos+32].int pos+=32 nBits=int(np.ceil(np.log2(nCodeWords))); string_blocks = (a[i:i+nBits] for i in range(pos, pos+nBits*(sigLen), nBits)) encodedx=np.zeros(sigLen) count=0 for fl in string_blocks: pos+=nBits encodedx[count]=fl.uint count+=1 am=ar_model.AR(range(0,sigLen)) predictor=am.fit(order) predictor.params=params return (codebook, predictor,encodedx)
def ARFit(self): ''' Fits a autoregressive model. ''' modelName = 'AR' errorObjs = [] # Step 1: set training and test values self.fittedModel = ar.AR(self.trainData) self.fittedModel = self.fittedModel.fit() trainingFit = pd.Series(self.fittedModel.fittedvalues) if self.stepType == 'multi': testPredictions = pd.Series( self.fittedModel.predict(start=len(self.trainData), end=len(self.trainData) + len(self.testData) - 1, dynamic=False)) else: testPredictions = ModelSelector.oneStepARPrediction( self.data, self.fittedModel.params, self.start, len(self.testData)) # Step 2: Training again with all data for acurate forecasts self.fittedModelFinal = ar.AR(self.data) self.fittedModelFinal = self.fittedModelFinal.fit() forecasts = pd.Series( self.fittedModelFinal.predict(start=len(self.data), end=len(self.data) + self.horizon - 1, dynamic=False)) '''Step 3: set error for AR, the size of trainData will be different from fitted values at model. Fill initial trainingPredictions with same data as real. This will no affect the evaluation metrics. ''' initialValues = self.data[:len(self.trainData) - len(trainingFit)] trainingFit = initialValues.append(trainingFit) errorObjs = self.setErrorData(trainingFit, testPredictions) # Add to ModelsResult list self.setModelResults(modelName, errorObjs, trainingFit, testPredictions, forecasts)
def __init__(self, z): self.r = ar_model.AR(z['smap'], z.index).fit(1).resid.to_frame() self.i = self.r[(self.r > self.r.std()) & (self.r.shift(-1) > 0)].dropna().index x = self.r.loc[self.i] # x = pd.concat((self.r, self.r.shift(-1)), 1).loc[self.i] # x = pd.concat((self.r, z['temp']), 1).loc[self.i] self.b = np.linalg.lstsq(x, z.loc[self.i, 'ceaza']) self.x = pd.concat((z['ceaza'], x.dot(self.b[0])), 1)
def ar_features(data, order=30): ''' Fit an order n AR model to the input single channel of data. :param data: preprocessed data :param order: :return: ''' model_ar = ar_model.AR(data) model_results = model_ar.fit(maxlag=order) return model_results.params
def ar_order_sel(data, maxorder=5000): n, p = data.shape orders = np.empty(p) # determine best order for each channel for i in range(p): model = ar_model.AR(data[:, i]) orders[i] = model.select_order(maxorder, ic='aic') order_mode, _ = mode(orders) # return the maximum order (?) return order_mode
def fit_ar_model(data): log_return = get_log_return(data) date = [i for i in data['date']] xs = [datetime.strptime(d, '%Y-%m-%d') for d in date] obj = Series(log_return, index=xs) a = ar.AR(endog=obj) fit_model = a.fit(ic='aic', trend='c', full_output=1, disp=1) order = fit_model.k_ar param = fit_model.params root = fit_model.roots variance = fit_model.sigma2 return order, param, root, variance
def auto_reg(self, y, dates, p, clean_data="greedy"): s = self.map_column_to_sheet(y) v = np.copy(y) v = np.append(v, dates) # prepare data dfClean = s.cleanData(v, clean_data) time_series = dfClean[v] time_series = time_series.set_index(dates) model = s_ar.AR(time_series) results = model.fit(p) return results
def garch_group(Y, q0=1, p=1, q=1, do_plots=False): residuals = np.zeros(Y.shape[0] - q0) for y in np.transpose(Y): model = ar_model.AR(y) results = model.fit(q0) et = results.resid**2 residuals += (et - sum(et) / len(et)) / np.std(et) residuals /= Y.shape[1] model = arima_model.ARMA(residuals, (p, q)) r2 = model.fit() if do_plots: print r2.pvalues pylab.plot(r2.fittedvalues) pylab.show() else: return residuals, r2
def armodel(y, cutlist): array = [] result = [] offset = 20 for index, x in enumerate(cutlist[1:-1]): x = int(x) x2 = x + y[x - offset:x + offset].index(max(y[x - offset:x + offset])) x2 = x2 - offset i = x2 - 200 i1 = x2 + 100 array.append(y[i:i1].copy()) for x in array: ar = ar_model.AR(x) arfit = ar.fit(maxlag=3, method='cmle', disp=0) result.append(arfit) return result
def ar_pred_err(data, pred_window_size=1000, order=30): ''' :param data: :param pred_window_size: :param order: :return: ''' model_ar = ar_model.AR(data[:-pred_window_size]) model_results = model_ar.fit(maxlag=order) pred_data = model_ar.predict(model_results.params, start=np.size(data, axis=0) - pred_window_size - 1, end=np.size(data, axis=0) - 1) fun_energy = function_energy(data[-pred_window_size - 1:]) err_energy = function_energy(data[-pred_window_size - 1:] - pred_data) return fun_energy, err_energy
def dpcmopt(sig,order,nCodeWords): am=ar_model.AR(sig) predictor=am.fit(order) leng=len(sig) preds=predictor.predict(order,leng) preds=np.append(np.zeros(order-1),preds) err=sig-preds; flag = 1 while flag: with warnings.catch_warnings(record=True) as w: codebook, idx = kmeans2(err,nCodeWords) if len(w)==0: flag=0 #print "Success!" codebook.sort() en=len(codebook) partition=(codebook[1:en]+codebook[0:en-1])/2; partition.sort() res=err return (partition, codebook, predictor)
def ARmodel(t, val, degree=2, scale=0.5): '''Fit an auto-regressive (AR) model to data and retrn some parameters The inout data can be irregularly binned, it will be resampled on a regular grid with bin-width ``scale``. Parameters ----------- t : np.ndarray input times val : np.ndarray input values degree : int degree of AR model scale : float binning ofthe resampled lightcurve Returns ------- params : list of ``(degree + 1)`` floats parameters of the model sigma2 : float sigma of the Gaussian component of the model aic : float value of the Akaike information criterion ''' if len(t) != len(val): raise ValueError('Time t and vector val must have same length.') if not _has_statsmodels: raise ImportError('statsmodels not found') trebin = np.arange(np.min(t), np.max(t), scale) valrebin = np.interp(trebin, t, val) valrebin = normalize(valrebin) modar = ar.AR(valrebin) resar = modar.fit(degree) return resar.params, resar.sigma2, resar.aic
total_return = [i for i in total_return] portfolio_return[i] = total_return sh_data = ts.get_k_data(code='000001', index=True, start='2016-01-01', end='2017-01-01', ktype='D') date = [i for i in sh_data['date']] xs = [datetime.strptime(d, '%Y-%m-%d') for d in date] portfolio_return_p = [0 for i in range(portfolio_return.__len__())] for i in range(portfolio_return.__len__()): if len(portfolio_return[i]) > 0: obj = Series(portfolio_return[i], index=xs[1:]) a = ar.AR(endog=obj) fit_model = a.fit(ic='aic', trend='c', full_output=1, disp=1) order = fit_model.k_ar portfolio_return_p[i] = order else: portfolio_return_p[i] = 0 ar_portfolio = [] ar_portfolio_order = [] for i in range(portfolio_return_p.__len__()): if portfolio_return_p[i] == i + 1: ar_portfolio.append(classification[i]) ar_portfolio_order.append(i + 1) print(ar_portfolio)
pred.rename(columns={"prediction": "HTM"}, inplace=True) pred_mse = pd.DataFrame(pred_htm.MSE) pred_mse.rename(columns={"MSE": "HTM"}, inplace=True) # %% Mean Ct over past 30 days per time step pred_mn = pred_htm[['Ct']] pred_mn['prediction'] = pred_mn.Ct.rolling(WndwDys, center=False).mean() pred_mn.prediction = pred_mn.prediction.shift(PredAhead * DyStps) pred_mn['MSE'] = MSE(pred_mn.Ct, pred_mn.prediction, WndwDys, DyStps) pred['RollMn'] = pred_mn.prediction pred_mse['RollMn'] = pred_mn.MSE # %% AR Model pred_ar = pred_htm[['Ct']] ar_mdl = ar_model.AR(pred_ar) ar_fit = ar_mdl.fit(maxlag=(WndwDys * DyStps)) pred_ar['prediction'] = ar_fit.predict() pred_ar['MSE'] = MSE(pred_ar.Ct, pred_ar.prediction, WndwDys, DyStps) pred['AR'] = pred_ar.prediction pred_mse['AR'] = pred_ar.MSE # %% HTM Prediction InputName = 'E:\\MyDocuments\\GitHub\\HTM\\Tests\\PredictionCheck\\gymdata_out.csv' pred_htm = pd.read_csv(InputName, na_values="", index_col=0, usecols=[0, 1, 2]) #pred_htm.index = pd.to_datetime(pred_htm.index, format = "%Y-%m-%d %H:%M:%S", errors = 'coerce') pred_htm.index = pd.to_datetime(pred_htm.index) pred_htm['MSE'] = MSE(pred_htm.Ct, pred_htm.prediction, WndwDys, DyStps)
print("beta: ", [qb.value().eval() for qb in qbeta]) print("mu: ", qmu.value().eval()) print("setting up variational distributions") qmu = Normal(loc=tf.Variable(0.), scale=tf.nn.softplus(tf.Variable(0.))) qbeta = [ Normal(loc=tf.Variable(0.), scale=tf.nn.softplus(tf.Variable(0.))) for i in range(p) ] print("constructing inference object") vdict = {mu: qmu} vdict.update({b: qb for b, qb in zip(beta, qbeta)}) inference_vb = ed.KLqp(vdict, data={xt: xt_true for xt, xt_true in zip(x, x_true)}) print("running inference") inference_vb.run() print("parameter estimates:") for j in range(p): print( "beta[%d]: " % j, qbeta[j].mean().eval(), ) print("mu: ", qmu.variance().eval()) ar2_sm = ar_model.AR(x_true) res = ar2_sm.fit(maxlag=2, ic=None, trend='c') print("statsmodels AR(2) params: ", res.params)
def analyze_lags(self, cols, time, preferred_criterion="aic", min_lag=1, max_lag=8): try: s = self.map_column_to_sheet(cols) multi = False except: s = self.map_column_to_sheet(cols[0]) multi = True df = s.df if multi: try: args_vector = np.append(cols, time) data = df[args_vector] data = data.set_index(time) except: data = df[cols] model = VAR(data) else: try: args_vector = np.array([cols, time]) data = df[args_vector] data = data.set_index(time) except: data = df[cols] model = s_ar.AR(data) aic = np.zeros(max_lag - min_lag + 1) bic = np.zeros(max_lag - min_lag + 1) for i in range(max_lag - min_lag + 1): fit = model.fit(i + min_lag) aic[i] = fit.aic bic[i] = fit.bic utterance = "" for i in range(max_lag - min_lag + 1): utterance = ( utterance + "AIC (" + str(i + min_lag) + " lags): " + str(aic[i]) + "\n" ) utterance = utterance + "\n\n" for i in range(max_lag - min_lag + 1): utterance = ( utterance + "BIC (" + str(i + min_lag) + " lags): " + str(bic[i]) + "\n" ) utterance = utterance + "\n\n" x = np.argsort(aic) champ = aic[x[0]] utterance = ( utterance + "Using AIC, here are the estimated proportional probabilities, using the best as a reference:" ) utterance = utterance + "\n" for i in range(max_lag - min_lag + 1): utterance = ( utterance + str(i + min_lag) + " lags: " + str(find_prob_given_AIC(champ, aic[i])) + "\n" ) optimal = self.find_optimal_lag_length( cols, time, min_lag=min_lag, max_lag=max_lag, criterion=preferred_criterion ).get_denotation() return QueryResult(optimal, utterance)
ax.legend() ax.set_title(r'$y_t= \phi_1 y_{t-1} + \phi_2 y_{t-2} + \epsilon_t$') plt.show() #plot the acf and pacf fig2, axes = plt.subplots(2) fig2.subplots_adjust(hspace=0.5) axes[0].bar(np.arange(ncorr + 1), y_acf) axes[0].set_title("Autocorrelation") axes[1].bar(np.arange(ncorr + 1), y_pacf) axes[1].set_title("Partial Autocorrelation") plt.show() #organize and print correlogram cgram = pd.DataFrame([y_acf, y_pacf]).transpose() cgram.columns = 'acf', 'pacf' print 'Correlogram' print cgram #regression using AR model reg_model = ar_model.AR(y) print '\nBIC selects order {}.'.format(reg_model.select_order(6, 'bic')) reg_results = reg_model.fit(maxlag=6, ic='bic') #print out results (ar_model doesn't come with a summary function) print 'Regression results:\nNumber of observations (T-k): {}\nOrder: {}\n'.format( reg_results.nobs, reg_results.k_ar) print 'coeff: ', reg_results.params print 'std err: ', reg_results.bse print 't-stat: ', reg_results.tvalues print 'p-value: ', reg_results.pvalues
data = p.read_csv(src, sep="|") #print(data) # MUST GIVE COLUMN HEADERS in CSV FILE BEFORE WE DO THIS five_year_data = data[["5Y"]] print(five_year_data) # how do we get this to work as a 1D array? #five_year_data_1d = p.Series.ravel(five_year_data) #print(five_year_data_1d) acf = calc_acf(five_year_data) print(acf[0]) print("ACF is", acf) # plt.xlim(1, 10) acf_plt = s.plot_acf(acf) pacf = calc_pacf(five_year_data, 10) print(pacf) pacf_plt = s.plot_pacf(pacf) plt.show() # Param estimation #model = a(five_year_data, order=(10,1,0)) # model_fit = model.fit(disp=0) # print(model_fit.summary()) ar_model = ar.AR(five_year_data) ar_model_fit = ar_model.fit(10) print("Params") print(ar_model_fit.params) ma_model = ma.ARMA(five_year_data.values, (0, 10)) ma_model_fit = ma_model.fit() print("MA Params") print(ma_model_fit.params)
use arima_process.arma_generate_sample() for generating sample of ARMA use ar_model.AR() for create AR model from sample use ar_model.AR.select_order() for determing order of AR by AIC or BIC use stattools.acovf() for computing sample acovf of series use armaME() for getting moment (Yule-Walker) estimation with sample acovf these functions are all in statsmodel.tsa except custom function armaME() ''' print('\n*********************\n chapter6: 1.5\n*********************') def dist(x): '''generate number from U(-4,4)''' return 8 * np.random.random_sample(x) - 4 ar, ma = (1, 0.9, 1.4, 0.7, 0.6), (1, ) series = arima_process.arma_generate_sample(ar, ma, 500, distrvs=dist) ARmodel = ar_model.AR(series) maxlag = 12 print('\n----order selection using AIC----\n') print('upper bound of order: %d' % maxlag) ARorder_aic = ARmodel.select_order(maxlag, 'aic', trend='nc') print('order: %d' % ARorder_aic) armaAcovf = stattools.acovf(series, nlag=ARorder_aic, fft=False) armaYW = armaME(ARorder_aic, 0, armaAcovf) print('----order selection using BIC----\n') print('upper bound of order: %d' % maxlag) ARorder_bic = ARmodel.select_order(maxlag, 'bic', trend='nc') print('order: %d' % ARorder_bic) armaAcovf = stattools.acovf(series, nlag=ARorder_bic, fft=False) armaYW = armaME(ARorder_bic, 0, armaAcovf)
def garch(y, q0=1, p=1, q=1): model = ar_model.AR(y) results = model.fit(q0) et = results.resid**2 model = arima_model.ARMA((et - sum(et) / len(et)) / np.std(et), (p, q)) return model.fit()
series = pd.Series(data, index) start = int(len(series.values) * 0.8) print('Série real:') print(series) print() seletor = ms.ModelSelector(series, 1, ['AR'], 80, stepType='multi') seletor.fit() print('Resultados do seletor:') print(seletor.modelsResult[0].trainingPrediction) print(seletor.modelsResult[0].testPrediction) print(seletor.modelsResult[0].error[0].value) print('Ajuste do AR:') AR = ar.AR(series[:start]) AR = AR.fit() trainingFit = pd.Series(AR.fittedvalues) testPredictions = pd.Series( AR.predict(start=start, end=len(series) - 1, dynamic=False)) print(AR.fittedvalues) AR2 = ar.AR(series) AR2 = AR2.fit(maxlag=AR.k_ar) AR2.k_ar = AR.k_ar AR2.k_tren = AR.k_trend AR2.params = AR.params teste2 = ms.ModelSelector.oneStepARPrediction(series, AR2.params, start, len(series) - start) print('Teste')
appt_mse = pd.DataFrame(appt_htm.MSE) appt_mse.rename(columns = {"MSE":"HTM"},inplace=True) # %% Mean Ct over past 30 days per time step appt_mn = appt_htm[['Ct']] appt_mn['prediction'] = appt_mn.Ct.rolling(WndwDys,center=False).mean() appt_mn.prediction = appt_mn.prediction.shift(PredAhead*DyStps) appt_mn['MSE'] = MSE(appt_mn.Ct,appt_mn.prediction,WndwDys,DyStps) appt['RollMn'] = appt_mn.prediction appt_mse['RollMn'] = appt_mn.MSE # %% AR Model appt_ar = appt_htm[['Ct']] ar_mdl = ar_model.AR(appt_ar) ar_fit = ar_mdl.fit(maxlag=(WndwDys*DyStps)) appt_ar['prediction'] = ar_fit.predict() appt_ar['MSE'] = MSE(appt_ar.Ct,appt_ar.prediction,WndwDys,DyStps) appt['AR'] = appt_ar.prediction appt_mse['AR'] = appt_ar.MSE # %% Plot appt['dates'] = [date2num(date) for date in appt.index] appt_mse['dates'] = [date2num(date) for date in appt_mse.index] gs = gridspec.GridSpec(2,1, height_ratios=[3, 1]) plt.figure(figsize=(10,5))
# %% Mean Ct over past 30 days per time step ed_3h_mn = ed_3h_htm[['Ct']] ed_3h_mn['prediction']=0 for i in range(0,24,3): ed_3h_mn.loc[ed_3h_mn.index.hour == i,'prediction'] = \ ed_3h_mn.loc[ed_3h_mn.index.hour == i,'Ct'].rolling(WndwDys,center=False).mean() ed_3h_mn.prediction = ed_3h_mn.prediction.shift(PredAhead*DyStps) ed_3h_mn['MSE'] = MSE(ed_3h_mn.Ct,ed_3h_mn.prediction,WndwDys,DyStps) ed_3h['RollMn'] = ed_3h_mn.prediction ed_3h_mse['RollMn'] = ed_3h_mn.MSE # %% AR Model ed_3h_ar = ed_3h_htm[['Ct']] ar_mdl = ar_model.AR(ed_3h_ar) ar_fit = ar_mdl.fit(maxlag=(WndwDys*DyStps)) ed_3h_ar['prediction'] = ar_fit.predict() ed_3h_ar['MSE'] = MSE(ed_3h_ar.Ct,ed_3h_ar.prediction,WndwDys,DyStps) ed_3h['AR'] = ed_3h_ar.prediction ed_3h_mse['AR'] = ed_3h_ar.MSE # %% ARMA model ed_3h_arima = ed_3h_htm[['Ct']] arima_mdl = arima_model.ARIMA(ed_3h_arima,(30,1,30)) arima_fit = arima_mdl.fit() ed_3h_arima['prediction'] = ed_3h_arima.predict()