def manaus(start='1983-01-31', end='1995-01-31', p=4, q=4): """ Plot the ARMA(p,q) model of the River Negro height data using statsmodels built-in ARMA class. Parameters: start (str): the data at which to begin forecasting end (str): the date at which to stop forecasting p (int): max_ar parameter q (int): max_ma parameter Return: aic_min_order (tuple): optimal order based on AIC bic_min_order (tuple): optimal order based on BIC """ # Get dataset raw = pydata('manaus') # Make DateTimeIndex manaus = pd.DataFrame(raw.values, index=pd.date_range('1903-01', '1993-01', freq='M')) manaus = manaus.drop(0, axis=1) # Reset column names manaus.columns = ['Water Level'] #Selecting the best order order = order_select(manaus.values, max_ar=p, max_ma=q, ic=['aic', 'bic'], fit_kw={'method': 'mle'}) aic = order['aic_min_order'] bic = order['bic_min_order'] #The Mle models model = ARMA(manaus, aic).fit(method='mle') fig, ax = plt.subplots(figsize=(13, 7)) fig = model.plot_predict(start=start, end=end, ax=ax) #The Aic plot ax.set_title('Manaus Dataset AIC') ax.set_xlabel('Year') ax.set_ylabel('Water Level') plt.show() model = ARMA(manaus, bic).fit(method='mle') fig, ax = plt.subplots(figsize=(13, 7)) fig = model.plot_predict(start=start, end=end, ax=ax) #The BIC plot ax.set_title('Manaus Dataset BIC') ax.set_xlabel('Year') ax.set_ylabel('Water Level') plt.show() return aic, bic
df.dropna(inplace=True) ##df['Date'] = pd.to_datetime(df['Date']) LocalTransmission = df['LocalTransmission'].astype('int32') #print (df.head()) print(df.index) result = ARMA(df, order=(0, 1)).fit(disp=False) print(result.summary()) #print(result.params) predictions = result.predict(start="2020-03-01", end="2020-05-01") #accuracy = result.score() print(predictions) ##accuracy = result.score() #print (accuracy) result.plot_predict(start="2020-03-01", end="2020-05-01") plt.suptitle('Prediction for postive cases in Egypt \n Algorithm used: MA', fontsize=12) plt.show() ##def mean_forecast_error(y, yhat): ## return y.sub(yhat).mean() def mean_forecast_error(LocalTransmission, predictions): return mean(sum(LocalTransmission, predictions)) mean_forecast_error(LocalTransmission, predictions)
y_pred_ar = ar.predict(start=90, end=99) y_pred_ma = ma.predict(start=90, end=99) y_pred_arma = arma.predict(start=90, end=99) print("MSE AR: {:.2f}".format(0.1 * np.sum(np.power(y_test - y_pred_ar, 2)))) print("MSE MA: {:.2f}".format(0.1 * np.sum(np.power(y_test - y_pred_ma, 2)))) print("MSE ARMA: {:.2f}".format(0.1 * np.sum(np.power(y_test - y_pred_arma, 2)))) # Show the results for AR and MA fig, ax = plt.subplots(2, 1, figsize=(18, 20), sharex=True) ax[0].plot(y_test, linewidth=1.0, color="r", label="Data") ar.plot_predict(start=90, end=99, plot_insample=False, ax=ax[0]) ax[1].plot(y_test, linewidth=1.0, color="r", label="Data") ma.plot_predict(start=90, end=99, plot_insample=False, ax=ax[1]) ax[0].set_title("AR(15) prediction", fontsize=16) ax[1].set_title("MA(15) prediction", fontsize=16) ax[1].set_xlabel("Time", fontsize=16) ax[0].set_ylabel("Measure", fontsize=16) ax[1].set_ylabel("Measure", fontsize=16) ax[0].legend(fontsize=16) ax[1].legend(fontsize=16) plt.show() # Show the result for ARMA
model = ARIMA(arima202, (2, 0, 2)).fit() print model.summary() model.resid.plot() plot_acf(model.resid,lags=100) plt.show df.ExplosivityIndexMax.diff(1).autocorr(1) #-0.46688 df.ExplosivityIndexMax.diff(1).plot() plt.show() #predictions of explosivity of volcanic eruptions model.plot_predict(1, 35) #ok for up to (1,35) (1,40 onwards returns error) #(1,30) shows 1960 - 2003 model.plot_predict(10, 30) #1979-1995 model.plot_predict(10, 35) #1981-2003 fig, ax = plt.subplots() ax = df['1960'].plot(ax=ax) fig = model.plot_predict(1, 35, ax=ax, plot_insample=False) #the predictive model doesn't seem to work. No error! #TODO - to do the code for splitting the data for training / test set df.head()
#result_trend.k_trend 有常数时是1,没有常数时是0 #result_trend.llf 对数似然函数值 #result_trend.maparams MA参数值 #result_trend.nobs 拟合所用观察数 #result_trend.params 模型的参数,顺序是趋势系数,k_exog外生系数,ar系数,ma系数。 #应该使用params来查看结果更好些 #result_trend.pvalues 系数的p值,基于的是z统计量 #result_trend.resid 模型残差 #result_trend.sigma2 残差的方差 #4.模型拟合度检验 #(1)残差的白噪声检验 output3 = acorr_ljungbox(result_trend.resid, boxpierce=True, lags=[6, 12], return_df=True) print(output3) #(2)模型参数的显著性检验 print(result_trend.pvalues) #这个结果貌似与R的不太一致 fig, ax = plt.subplots() ax = data.loc['1950':].plot(ax=ax) result_trend.plot_predict('2009', '2012', dynamic=True, ax=ax, plot_insample=False) plt.show()
plt.show #one big negative outlier #arima (2,0,2) from statsmodels.tsa.arima_model import ARIMA model = ARIMA(SO2_eruption[['SO2Mass']], (2, 0, 2)).fit() print model.summary() model.resid.plot() plot_acf(model.resid,lags=50) plt.show #this time the autocorelation has no negative "outlier" #predicting with arima(2,0,2) model.plot_predict(1, 10) #don't understand why this returns error as compared to volcano explosivity and earthquake magnitude ig, ax = plt.subplots() ax = SO2_eruption['2015'].plot(ax=ax) fig = model.plot_predict(1, 50, ax=ax, plot_insample=False) predictions = model.predict( '2012-01-05', '2016-03-30', dynamic=True, ) mean_absolute_error(test, predictions) model.summary()
model.resid.plot() plot_acf(model.resid,lags=50) plt.show #seems like arma(1,0) is sufficient given little fluctuations in autocorrelation values after 3 #TODO - to do the code for splitting the data for training / test set df.head() n = len(df.mag) train = df.mag[:int(.75*n)] test = df.mag[int(.75*n):] import statsmodels.api as sm from sklearn.metrics import mean_absolute_error model = sm.tsa.ARMA(train, (1, 0)).fit() model.plot_predict(1,35) #The max is (1,35) 40 and above returns error. This is only for two months' worth of quakes in 1960 (Jan-Feb) model.plot_predict(1,20) #For Jan 1960 only. fig, ax = plt.subplots() ax = df['1960'].plot(ax=ax) fig = model.plot_predict(1, 35, ax=ax, plot_insample=False) #whole of 1960 #Other years, the visualization doesn't seem to work