def predict_wavelet(data_array): """ 获取向前一步的序列预测值 参数 ---------- data_array : numpy array类型 股票价格序列. 返回值 ---------- ans : float类型 向前一步预测值 """ A, B, C = pywt.wavedec(data_array, 'db4', mode='sym', level=2) #利用AIC准则确定ARMA模型的阶 order_A = sm.tsa.arma_order_select_ic(A, ic='aic')['aic_min_order'] order_B = sm.tsa.arma_order_select_ic(B, ic='aic')['aic_min_order'] order_C = sm.tsa.arma_order_select_ic(C, ic='aic')['aic_min_order'] #确定阶数以后就构建ARMA模型 model_A = ARMA(A, order=order_A) results_A = model_A.fit() model_B = ARMA(B, order=order_B) results_B = model_B.fit() model_C = ARMA(C, order=order_C) results_C = model_C.fit() #根据预测的步数确定delta lag = [0, 0, 1] # 预测小波系数 pre_A = model_A.predict(results_A.params, 1, len(A) + lag[0]) pre_B = model_B.predict(results_B.params, 1, len(B) + lag[1]) pre_C = model_C.predict(results_C.params, 1, len(C) + lag[2]) # 利用预测的小波系数重构形成最终的预测值 predict_array = pywt.waverec([pre_A, pre_B, pre_C], 'db4') ans = predict_array[-1] return ans
def sm_arma(file='weather.npy', p=3, q=3, n=30): """ Build an ARMA model with statsmodel and predict future n values. Parameters: file (str): data file p (int): maximum order of autoregressive model q (int): maximum order of moving average model n (int): number of values to predict Return: aic (float): aic of optimal model """ #Initialize the data and parameters for the data z = np.diff(np.load(file)) l = len(z) min_aic = np.inf bestp = 0 bestq = 0 datetime_col = pd.date_range(start='04-13-2019t19:56', periods=(l), freq="1h") data = pd.DataFrame(z, index=datetime_col, columns=["weather"]) #Idendifying the best model groups for i in range(1, p + 1): for j in range(1, q + 1): model = ARMA(z, order=(i, j)) model = model.fit(method='mle', trend='c') pred = model.predict(start=0, end=(l + 30)) aic = model.aic if aic < min_aic: min_aic = aic bestp = i bestq = j model = ARMA(z, order=(bestp, bestq)) model = model.fit(method='mle', trend='c') pred = model.predict(start=0, end=(l + 30)) #Plotting the best fit models and results plt.figure(figsize=(12, 8)) datetime_col = pd.date_range(start='04-13-2019t19:56', periods=(l + n + 1), freq="1h") pred_df = pd.DataFrame(pred, index=datetime_col, columns=["weather"]) data["weather"].plot(label="Data") pred_df["weather"].plot(label="Predicted") plt.title("Stats ARMA(" + str(bestp) + "," + str(bestq) + ")") plt.ylabel("Change in Temperature") plt.xlabel("Dates") plt.legend() plt.show() return min_aic
def preTest(data, size, pre_size=1): fre_size = pre_size - size index_list = np.array(data['closeIndex'])[:size] # 最后10个数据排除用来做预测 date_list1 = np.array(data['tradeDate'])[:size] predata = data for x in range(0, pre_size): predata = predata.append({'closeIndex': 0}, ignore_index=True) index_for_predict = np.array(predata['closeIndex'])[fre_size:] # 预测的真实值序列 date_list2 = np.array(predata['tradeDate'])[fre_size:] # 分解 A2,D2,D1 = pywt.wavedec(index_list,'db4',mode='sym',level=2) # 分解得到第4层低频部分系数和全部4层高频部分系数 coeff = [A2,D2,D1] # 对每层小波系数求解模型系数 order_A2 = sm.tsa.arma_order_select_ic(A2,ic='aic')['aic_min_order'] # AIC准则求解模型阶数p,q order_D2 = sm.tsa.arma_order_select_ic(D2,ic='aic')['aic_min_order'] # AIC准则求解模型阶数p,q order_D1 = sm.tsa.arma_order_select_ic(D1,ic='aic')['aic_min_order'] # AIC准则求解模型阶数p,q # 对每层小波系数构建ARMA模型 # 值得注意的是,有时候用AIC准则求解的模型参数来建模会报错,这时候请调节数据时间长度。 model_A2 = ARMA(A2,order=order_A2) # 建立模型 model_D2 = ARMA(D2,order=order_D2) model_D1 = ARMA(D1,order=order_D1) results_A2 = model_A2.fit() results_D2 = model_D2.fit() results_D1 = model_D1.fit() A2_all,D2_all,D1_all = pywt.wavedec(np.array(predata['closeIndex']),'db4',mode='sym',level=2) # 对所有序列分解 delta = [len(A2_all)-len(A2),len(D2_all)-len(D2),len(D1_all)-len(D1)] # 求出差值,则delta序列对应的为每层小波系数ARMA模型需要预测的步数 # 预测小波系数 包括in-sample的和 out-sample的需要预测的小波系数 pA2 = model_A2.predict(params=results_A2.params,start=1,end=len(A2)+delta[0]) pD2 = model_D2.predict(params=results_D2.params,start=1,end=len(D2)+delta[1]) pD1 = model_D1.predict(params=results_D1.params,start=1,end=len(D1)+delta[2]) print(len(pA2)) print(len(pD2)) print(len(pD1)) # 重构 coeff_new = [pA2,pD2,pD1] denoised_index = pywt.waverec(coeff_new,'db4') # 输出10个预测值 # temp_data_wt = {'real_value':index_for_predict,'pre_value_wt':denoised_index[fre_size:],'err_wt':denoised_index[fre_size:]-index_for_predict,'err_rate_wt/%':(denoised_index[fre_size:]-index_for_predict)/index_for_predict*100} # predict_wt = pd.DataFrame(temp_data_wt,index = date_list2,columns=['real_value','pre_value_wt','err_wt','err_rate_wt/%']) # print(predict_wt) print(denoised_index[10:]) print(index_for_predict[10:]) print("*********************************************************************")
def getWaves(a,b): price=np.array(df.iloc[a:b+1,2]) # 小波分解 #A8,D8,D7,D6,D5,D4,D3,D2,D1=pywt.wavedec(price,'db4',mode='sym',level=4) A8, D4, D3, D2, D1 = pywt.wavedec(price, 'db4', mode='sym', level=4) #coff=[A2,D2,D1] coff=[A8,D4,D3,D2,D1] # ARIMA定阶 order_A8=sm.tsa.arma_order_select_ic(A8,ic='aic')['aic_min_order'] # order_D8 = sm.tsa.arma_order_select_ic(D8, ic='aic')['aic_min_order'] # order_D7 = sm.tsa.arma_order_select_ic(D7, ic='aic')['aic_min_order'] # order_D6 = sm.tsa.arma_order_select_ic(D6, ic='aic')['aic_min_order'] # order_D5 = sm.tsa.arma_order_select_ic(D5, ic='aic')['aic_min_order'] order_D4 = sm.tsa.arma_order_select_ic(D4, ic='aic')['aic_min_order'] order_D3 = sm.tsa.arma_order_select_ic(D3, ic='aic')['aic_min_order'] order_D2=sm.tsa.arma_order_select_ic(D2,ic='aic')['aic_min_order'] order_D1=sm.tsa.arma_order_select_ic(D1,ic='aic')['aic_min_order'] #print(order_A2,order_D1,order_D2) #AMRA模型建模 model_A8=ARMA(A8,order=order_A8) #model_D8 = ARMA(D8, order=order_D8) #model_D7 = ARMA(D2, order=order_D7) #model_D6 = ARMA(D1, order=order_D6) #model_D5 = ARMA(D2, order=order_D5) model_D4 = ARMA(D1, order=order_D4) model_D3 = ARMA(D2, order=order_D3) model_D2=ARMA(D2,order=order_D2) model_D1=ARMA(D1,order=order_D1) # 拟合数据 result_A8=model_A8.fit() # result_D8 = model_D8.fit() # result_D7=model_D7.fit() # result_D6 = model_D6.fit() # result_D5 = model_D5.fit() result_D4 = model_D4.fit() result_D3 = model_D3.fit() result_D2 = model_D2.fit() result_D1 = model_D1.fit() pA8=model_A8.predict(params=result_A8.params,start=1,end=len(A8)) # pD8 = model_D8.predict(params=result_D8.params, start=1, end=len(D8)) # pD7 = model_D7.predict(params=result_D7.params, start=1, end=len(D7)) # pD6 = model_D6.predict(params=result_D6.params, start=1, end=len(D6)) # pD5 = model_D5.predict(params=result_D5.params, start=1, end=len(D5)) pD4 = model_D4.predict(params=result_D4.params, start=1, end=len(D4)) pD3 = model_D3.predict(params=result_D3.params, start=1, end=len(D3)) pD2 = model_D2.predict(params=result_D2.params, start=1, end=len(D2)) pD1 = model_D1.predict(params=result_D1.params, start=1, end=len(D1)) # coffnew=[pA2,pD2,pD1] coffnew=[pA8,pD4,pD3,pD2,pD1] return coffnew
def arma_preds(df2, p_max=5, q_max=5, start_date=-10, end_date=5): df2 = index_date_convert(df2) for col in df2.columns: df2[col] = df2[col].astype(float) # GridSearch for p, q values of ARIMA model d = {} for p in range(1, p_max + 1): for q in range(1, q_max + 1): try: arma_model = ARMA(df2, (p, q)).fit() if arma_model.aic > 0: d[(p, q)] = arma_model.aic except: continue lst = sorted(d.items(), key=lambda x: x[1], reverse=False) p, q = lst[0][0] # fitting to get predictions from_date = str(date.today().year + start_date) + '-01-01' to_date = str(date.today().year + end_date) + '-01-01' arma_model = ARMA(df2, (p, q)).fit() preds = arma_model.predict(from_date, to_date) preds_df = pd.DataFrame(preds, columns=['preds']) preds_df.index = preds_df.index.year return preds_df
def ARMA_mse_predictions(arma_order, channel, training_set=None, test_set=None, training_ixs=None, step_ahead=60): """ get MSE of predictions for Exercise 1 for specified ARMA order. """ assert isinstance( arma_order, (tuple, list)) and len(arma_order) == 2, "arma_oder must be a len 2 list." assert isinstance( channel, int) and 0 < channel <= 6, "channel should be an int 1,2,3,4,5." training_set, test_set, training_ixs = _get_stairs_ar_prediction_data( training_set, test_set, training_ixs) n = len(training_set) assert len( test_set) == n, "training set and test set of different lengths." assert len(training_ixs ) == n, "training set and training_ixs of different lengths." mses = [] for trn, tst, i in zip(training_set, test_set, training_ixs): ar_model = ARMA(trn[channel].values, order=arma_order) ar_model = ar_model.fit(method='css') forecast = ar_model.predict(start=i, end=i + step_ahead - 1) actual = tst[channel].values[:step_ahead] mses.append(np.mean((actual - forecast)**2)) return mses
def draw_ma(ts, w): ma = ARMA(ts, order=(0, w)).fit(disp=-1) ts_predict_ma = ma.predict() ar = ARMA(ts, order=(w, 0)).fit(disp=-1) ts_predict_ar = ar.predict() plt.clf() plt.plot(ts_predict_ar, label="AR") plt.plot(ts_predict_ma, label="MA") #plt.plot(ts, label = "ORG") plt.legend(loc="best") plt.title("MA Test %s" % w) plt.savefig("test_ma_" + str(w) + ".pdf", format='pdf') return ts_predict_ma
def run_arma(original_ts, maxar=7, maxma=7): print(original_ts.columns[0], 'start arma') original_ts_log = np.log(original_ts) if test_stationarity(original_ts_log.ix[:, 0]) < 0.01: diffn = 0 diff_original_ts_log = original_ts_log print('平稳,不需要差分') else: diffn = best_diff(original_ts_log, maxdiff=8) diff_original_ts_log = produce_diffed_timeseries(original_ts_log, diffn) print('差分滞后阶数为'+str(diffn)+',已完成差分') order = choose_order(diff_original_ts_log, maxar, maxma) # order = (2, 3) print('模型的阶数为: ' + str(order)) model = ARMA(diff_original_ts_log.ix[:, 0], order).fit(disp='-1', method='css') f = model.forecast(steps=3, alpha=0.05)[0] p = model.predict() predict = predict_recover(p, original_ts_log, diffn, 'predict') forecast = predict_recover(f, original_ts_log, diffn, 'forecast') #查看niheqingkuang # p = model.predict() # p = predict_recover(p, original_ts_log, diffn, 'predict') # plt.plot(p) # plt.plot(original_ts) # plt.show() return predict, forecast
def getPredictWaveCoff(a,b,level): data_train=df.iloc[a:b+1,2] data_whole=df.iloc[a:b+29,2] adList=pywt.wavedec(data_train, 'db1', level=level) adList_all=pywt.wavedec(data_whole,'db1',level=level) coffnew=[] for i in range(len(adList)): delta=len(adList_all[i])-len(adList[i]) if b==6899: print('aaa') order = sm.tsa.arma_order_select_ic(adList[i], ic='aic')['aic_min_order'] model=ARMA(adList[i],order=order) try: result=model.fit() except: print(b+1,'except') model = ARMA(adList[i], order=(0, 0)) result = model.fit() p=model.predict(params=result.params,start=a,end=len(adList[i])+delta) coffnew.append(p) data_predict=pywt.waverec(coffnew,'db1') data_predict=list(data_predict) l=len(data_predict) # print('test',data_predict[-1],data_predict[-2],data_predict[-3],list(data_train)[-1]) return data_predict[-29:-1]
def history_based_recommend(): loginUser = User.query.filter_by(username=current_user.username).first() if loginUser is None: abort(404) portrait = Portrait.query.filter_by(user=loginUser).first() import pandas as pd from math import ceil data = pd.read_excel('app/main/analysis/data/timeuse.xls', index_col=0) #month为索引 last_year_data = list(data['2016-1-01':]['use']) from statsmodels.tsa.arima_model import ARMA dta = data.diff(1)[1:] #差分处理,提高序列稳定性 arma_mod01 = ARMA(dta, (10, 1)).fit() predict_sunspots = arma_mod01.predict('2017-1-01', '2017-12-01', dynamic=True) predict_sunspots[0] = ceil(predict_sunspots[0] + data['2016-12-01':]['use']) for i in range(len(predict_sunspots) - 1): predict_sunspots[i + 1] = ceil(predict_sunspots[i] + predict_sunspots[i + 1]) data = list(predict_sunspots) return render_template('history_based_recommend.html', data=data, last_year_data=last_year_data, portrait=portrait, base64=base64)
def draw_ar(ts, w): arma = ARMA(ts, order=(w, 0)).fit(disp=-1) ts_predict = arma.predict() plt.clf() plt.plot(ts_predict, label="PDT") plt.plot(ts, label="ORG") plt.legend(loc="best") plt.title("AR Test %s" % w) plt.savefig("test_ar_" + str(w) + ".pdf", format='pdf')
def draw_ar(ts, w): arma = ARMA(ts, order=(w, 0)).fit(disp=-1) ts_predict = arma.predict() plt.clf() plt.plot(ts_predict, label="PDT") plt.plot(ts, label="ORG") plt.legend(loc="best") plt.title("AR Test %s" % w) plt.show() return arma #arma.conf_int() 置信水平
def draw_ar(ts, w): arma = ARMA(ts, order=(w, 0)).fit(disp=-1) # ts_predict = arma.predict('2016', '2019', dynamic=True) ts_predict = arma.predict() plt.clf() plt.plot(ts_predict['2016':'2019'], 'r:', label="PDT") plt.plot(ts['2010':'2015'], '-', label="ORG") plt.legend(loc="best") plt.title("AR Test %s" % w) plt.show()
def _ARMA(self, data: pd.Series, AR_q: int = 1, MA_p: int = 1): try: ar_ma = ARMA(data, order=(AR_q, MA_p)).fit(disp=0) except Exception as e: print(e) print("尝试采用其他滞后阶数") forecast = np.nan else: forecast = ar_ma.predict()[-1] return forecast
def wavelet_ARMA_model(timeseries, result_length): timeseries = add_predict_term_to_timeseries(timeseries, 0.0) index_list = np.array(timeseries)[:-result_length] date_list1 = np.array(timeseries.index)[:-result_length] index_for_predict = np.array(timeseries)[-result_length:] date_list2 = np.array(timeseries.index)[-result_length:] #分解 A2,D2,D1 = pywt.wavedec(index_list,'db4',mode='sym',level=2) coeff=[A2,D2,D1] # 对每层小波系数求解模型系数 order_A2 = sm.tsa.arma_order_select_ic(A2,ic='aic')['aic_min_order'] order_D2 = sm.tsa.arma_order_select_ic(D2,ic='aic')['aic_min_order'] order_D1 = sm.tsa.arma_order_select_ic(D1,ic='aic')['aic_min_order'] #对每层小波系数构建ARMA模型 model_A2 = ARMA(A2,order=order_A2) model_D2 = ARMA(D2,order=order_D2) model_D1 = ARMA(D1,order=order_D1) results_A2 = model_A2.fit() results_D2 = model_D2.fit() results_D1 = model_D1.fit() A2_all,D2_all,D1_all = pywt.wavedec(np.array(timeseries),'db4',mode='sym',level=2) delta = [len(A2_all)-len(A2),len(D2_all)-len(D2),len(D1_all)-len(D1)] pA2 = model_A2.predict(params=results_A2.params,start=1,end=len(A2)+delta[0]) pD2 = model_D2.predict(params=results_D2.params,start=1,end=len(D2)+delta[1]) pD1 = model_D1.predict(params=results_D1.params,start=1,end=len(D1)+delta[2]) coeff_new = [pA2,pD2,pD1] denoised_index = pywt.waverec(coeff_new,'db4') temp_data_wt = {'pre_value':denoised_index[-result_length:]} Wavelet_TS = pd.DataFrame(temp_data_wt,index=date_list2,columns=['pre_value']) return Wavelet_TS['pre_value']
def arma_forecast(self, ts, p, q,): arma = ARMA(ts, order=(p, q)).fit(disp=-1) ts_predict = arma.predict() next_ret = arma.forecast(1)[0] #print("Forecast stock extra return of next day: ", next_ret) # plt.clf() # plt.plot(ts_predict, label="Predicted") # plt.plot(ts, label="Original") # plt.legend(loc="best") # plt.title("AR Test {},{}".format(p, q)) # #plt.show() return next_ret, arma.summary2()
def Do_ARMA(WIFIAPTag, p, q, Draw=False): Tag_Time_Series = GetTimeSeries(WIFIAPTag) # ARMA model from statsmodels.tsa.arima_model import ARMA arma_mod = ARMA(Tag_Time_Series, (p, q)).fit() Predict = arma_mod.predict(start='2016-9-14 15:0:0', end='2016-9-14 17:50:0') if Draw == True: plt.rc('figure', figsize=(12, 8)) plt.plot(arma_mod.fittedvalues, 'r') plt.plot(Tag_Time_Series) plt.plot(Predict, 'g-') return Predict
def Do_ARMA(WIFIAPTag, p, q, TrainTime, PredictTime): Tag_Time_Series = GetTimeSeries(WIFIAPTag) ARMA_Time = [ PredictTime[0] - timedelta(3), PredictTime[0] - timedelta(0, 0, 0, 0, 10, 0) ] #ARMA_Time = [pd.datetime(2016,9,11,6,0,0),pd.datetime(2016,9,14,15,0,0)] Tag_Time_Series = Get_Part_of_TimeSeries(Tag_Time_Series, ARMA_Time) # ARMA model from statsmodels.tsa.arima_model import ARMA arma_mod = ARMA(Tag_Time_Series, (p, q)).fit() Predict = arma_mod.predict(start=str(PredictTime[0]), end=str(PredictTime[1])) return Predict
def draw_ma(ts, w): ma = ARMA(ts, order=(0, w)).fit(disp=-1) # ts_predict_ma = ma.predict('2016', '2019', dynamic=True) ts_predict_ma = ma.predict() plt.clf() plt.plot(ts['2010':'2015'], label="ORG") # plt.plot(ts_predict_ma) plt.plot(ts_predict_ma['2016':'2019'], ':', label="PDT") plt.legend(loc="best") plt.title("MA Test %s" % w) plt.show() return ts_predict_ma
def fit_model_and_forecast(id_list, config): # Cast collection of distinct time series IDs into Python list id_list = list(id_list) # Open connections to S3 File System s3 = s3fs.S3FileSystem() s3_open1 = s3.open s3_open2 = boto.connect_s3(host=config['s3_host']) # Loop over time series IDs for i, id in enumerate(id_list): # Determine S3 file path and load data into pandas dataframe file_path = s3.glob(config['path_training_data_parquet'] + 'ID=' + str(id) + '/*.parquet') df_data = ParquetFile(file_path,open_with=s3_open1).to_pandas() # Sort time series data according to original ordering df_data = df_data.sort_values('ORDER') # Initialize dataframe to store forecast df_forecasts = pd.DataFrame(np.nan, index=range(0, config['len_eval']), columns=['FORECAST']) # Add columns with ID, true data and ordering information df_forecasts.insert(0, 'ID', id, allow_duplicates=True) df_forecasts.insert(1, 'ORDER', np.arange(1, config['len_eval'] + 1)) df_forecasts.insert(2, 'DATA', df_data['DATA'][range((config['len_series'] - config['len_eval']), config['len_series'])].values, allow_duplicates=True) # Loop over successive estimation windows for j, train_end in enumerate(range((config['len_series'] - config['len_eval'] - 1), (config['len_series'] - 1))): # Fit ARMA(2,2) model and forecast one-step ahead model = ARMA(df_data['DATA'][range(0, train_end+1)], (2, 2)).fit(disp=False) df_forecasts.at[j, 'FORECAST'] = model.predict(train_end+1, train_end+1) # Write dataframe with forecast to S3 in Parquet file format path = config['path_forecasts'] + 'ID=' + str(id) + '.parquet' write(path, df_forecasts, write_index=False, append=False, open_with=s3_open1) # Save fitted ARMA model to S3 in pickle file format path = config['path_models'] + 'ID=' + str(id) + '.model' with s3io.open(path, mode='w', s3_connection=s3_open2) as s3_file: joblib.dump(model, s3_file)
def Do_ARMA(WIFIAPTag,p,q,Draw = False): Tag_Time_Series = GetTimeSeries(WIFIAPTag) ARMA_Time = [pd.datetime(2016,9,22,6,0,0),PredictTime[0] - timedelta(0,0,0,0,10,0)] #ARMA_Time = [pd.datetime(2016,9,11,6,0,0),pd.datetime(2016,9,14,15,0,0)] Tag_Time_Series = Get_Part_of_TimeSeries(Tag_Time_Series,ARMA_Time) # ARMA model from statsmodels.tsa.arima_model import ARMA arma_mod = ARMA(Tag_Time_Series,(p,q)).fit() Predict = arma_mod.predict(start=str(PredictTime[0]),end=str(PredictTime[1])) if Draw == True: plt.rc('figure', figsize=(12, 8)) plt.plot(arma_mod.fittedvalues,'r') plt.plot(Tag_Time_Series) plt.plot(Predict,'g-') return Predict
def draw_ar(ts, w): arma = ARMA(ts, order=(w,0)).fit(disp=-1) ts_predict = arma.predict() pred_error = (xdata_pred-xdata).dropna() #计算残差 pred_error=pred_error[pred_error>0] lb,p =acorr_ljungbox(pred_error,lags=lagnum) h=(p<0.05).sum() #p值小于0.05,认为是非白噪声 if h>0: print('模型ARIMA(0,1,1)不符合白噪声检验') else: print('模型ARIMA(0,1,1)符合白噪声检验') plt.clf() plt.plot(ts_predict, label="PDT") plt.plot(ts, label = "ORG") plt.legend(loc="best") plt.title("AR Test %s" % w)
def ARMA_model(datasets, dim, order=(3, 3)): train = datasets.train_dl[::100, dim] test = datasets.ori_data[::10, dim] tempModel = ARMA(train, order).fit() delta = tempModel.fittedvalues - train # 残差 score = 1 - delta.var() / train.var() print('score is', score) # 它的值在0-1之间,越接近1,拟合效果越好 predicts = tempModel.predict(301, 300 + len(test), dynamic=True) print(len(predicts)) mse = mean_squared_error(test, predicts) comp = pd.DataFrame() comp['original'] = test comp['predict'] = predicts comp.plot() print(comp) #plt.show() return mse
class ts_AR: """ Parameters ------------ ts_train:训练数据 error_fun:使用的误差函数 Attributes ------------ fittedModel:拟合的模型 """ def __init__(self, ts_train, error_fun=None): self.ts_train = ts_train # 训练时间序列 self.error_fun = error_fun self.fittedModel = None def __adf_test(self): adftest = adfuller(self.ts_train, autolag='AIC') # 'Test Statistic','p-value','Lags Used','Number of Observations Used',XX # (-0.0, 0.958532086060056, 9, 10, {'1%': -4.331573, '5%': -3.23295, '10%': -2.7487}, -582.412544847778) adf_res = pd.Series(adftest[0:4], index=[ 'Test Statistic', 'p-value', 'Lags Used', 'Number of Observations Used' ]) for key, value in adftest[4].items(): adf_res['Critical Value (%s)' % key] = value return adf_res def fit(self): adf_res = self.__adf_test() w = int(adf_res['Lags Used']) self.fittedModel = ARMA(self.ts_train, order=(w, 0)).fit(disp=-1) def get_fittedvalues(self): #预测拟合值 fittedvalues = self.fittedModel.predict( ) # 等价于self.fittedModel.fittedvalues Series类型 fittedvalues = np.round(fittedvalues) # 将预测的销量四舍五入 return fittedvalues #array类型 def predict(self, forcast_period=2): y_forcast = self.fittedModel.forecast(forcast_period)[0] y_forcast = np.round(y_forcast) # 将预测的销量四舍五入 return y_forcast
def ARMA_plot_predictions(arma_order, channel, training_set=None, test_set=None, training_ixs=None, title=None): """ plot predictions for Exercise 1 for specified ARMA order. """ assert isinstance( arma_order, (tuple, list)) and len(arma_order) == 2, "arma_oder must be a len 2 list." assert isinstance( channel, int) and 0 < channel <= 6, "channel should be an int 1,2,3,4,5." training_set, test_set, training_ixs = _get_stairs_ar_prediction_data( training_set, test_set, training_ixs) n = len(training_set) assert len( test_set) == n, "training set and test set of different lengths." assert len(training_ixs ) == n, "training set and training_ixs of different lengths." f, axs = plt.subplots(*_splt_gridsize(n)) axs = axs.ravel() for trn, tst, i, ax in zip(training_set, test_set, training_ixs, axs): ar_model = ARMA(trn[channel].values, order=arma_order) ar_model = ar_model.fit(method='css') forecast = ar_model.predict(start=i, end=i + 59) ax.plot(np.arange(i - 60, i), trn[channel][-60:]) ax.plot(np.arange(i, i + 60), tst[channel].values) ax.plot(np.arange(i, i + 60), forecast) ax.axvline(i, color='k', linestyle=':') f.set_size_inches(8, 6) if title: f.suptitle(title) plt.tight_layout()
def wrangle(self): print("Wrangling data for %s ... " % self.NAME) sp500 = self.row_data['sp500'] vix = self.row_data['vix'] sp500['o'] = np.log(sp500['Open']) - np.log(sp500['Close'].shift(1)) sp500['u'] = np.log(sp500['High']) - np.log(sp500['Open']) sp500['d'] = np.log(sp500['Low']) - np.log(sp500['Open']) sp500['c'] = np.log(sp500['Close']) - np.log(sp500['Open']) #suppose n=20 n = 20 sp500['rs'] = sp500['u'] * (sp500['u'] - sp500['c']) + sp500['d'] * ( sp500['d'] - sp500['c']) sp500['V_rs'] = sp500['rs'].rolling(window=n).sum() / n series_vo = (sp500['o'] - sp500['o'].rolling(window=n).mean())**2 sp500['V_o'] = series_vo.rolling(window=n).sum() / (n - 1) series_vc = (sp500['c'] - sp500['c'].rolling(window=20).mean())**2 sp500['V_c'] = series_vc.rolling(window=n).sum() / (n - 1) k = 0.34 / (1.34 + (n + 1) / (n - 1)) sp500['V'] = sp500['V_o'] + k * sp500['V_c'] + (1 - k) * sp500['V_rs'] #GARCH style, actually is ARMA to variance model = ARMA(sp500['V'][39:], order=(1, 1)).fit() vix = vix.iloc[78:] vix['forecast'] = np.sqrt(model.predict() * 252) * 100 vix['VRP'] = vix['Close'] - vix['forecast'] vix = vix[(vix['Date'] >= '1990-06-08') & (vix['Date'] <= '2019-11-05')] vix = vix.set_index('Date') vix.index = pd.to_datetime(vix.index) self.wrangled_data = vix[['VRP']]
return tmp_data diffed_ts = diff_ts(dta_log,d=[1,1]) test_stationarity.testStationarity(diffed_ts) test_stationarity.draw_acf_pacf(diffed_ts,l=31) model = arima_model(diffed_ts) pdb.set_trace() model.get_proper_model() print 'bic:',model.bic,'p:',model.p,'q:',model.q print model.properModel.forecast()[0] # print model.forecast_next_day_value(type='day') model2=ARMA(diffed_ts,(model.p,1,model.q)).fit() model2.summary2() predict_sunspots = model2.predict('2090','2100',dynamic=True) a = model2.forecast(5)[0] a_ts = predict_diff_recover(a,d=[1,1]) log_a = np.exp(a_ts) print log_a pdb.set_trace() model.certain_model(6,0) predict_ts = model.properModel.predict() diff_recover_ts = predict_diff_recover(predict_ts,d=[1,1]) log_recover = np.exp(diff_recover_ts)
plt.style.use('fivethirtyeight') df = pd.read_excel("../00Daily/Egypt.xlsx", squeeze=True, parse_dates=True) df = df[["Date", "LocalTransmission"]] df.set_index("Date", inplace=True) df.dropna(inplace=True) ##df['Date'] = pd.to_datetime(df['Date']) LocalTransmission = df['LocalTransmission'].astype('int32') #print (df.head()) print(df.index) result = ARMA(df, order=(0, 1)).fit(disp=False) print(result.summary()) #print(result.params) predictions = result.predict(start="2020-03-01", end="2020-05-01") #accuracy = result.score() print(predictions) ##accuracy = result.score() #print (accuracy) result.plot_predict(start="2020-03-01", end="2020-05-01") plt.suptitle('Prediction for postive cases in Egypt \n Algorithm used: MA', fontsize=12) plt.show() ##def mean_forecast_error(y, yhat): ## return y.sub(yhat).mean()
result_D1 = model_D1.fit()''' '''plt.subplot(312) plt.plot(D2,'red') plt.plot(result_D2.fittedvalues,'blue') plt.title('D2') plt.subplot(313) plt.plot(D1,'red') plt.plot(result_D1.fittedvalues,'blue') plt.title('D1') plt.show() #分解所有的序列 A2_all,D2_all,D1_all = pywt.wavedec(np.array(data['weiyi']),'db4',mode='sym',level=2) detal = [len(A2_all)-len(A2),len(D2_all)-len(D2),len(D1_all)-len(D1)] print(detal)''' pqxwy1 = model_qxwy1.predict(params=result_qxwy1.params,start=0,end=len(data['value'])+10) plt.figure() plt.plot(data['value'],'red') plt.plot(result_qxwy1.fittedvalues,'blue') plt.plot(pqxwy1,'green') plt.title('qxwy') plt.show() '''pD2 = model_D2.predict(params=result_D2.params,start=0,end=len(D2)+detal[1]) pD1 = model_D1.predict(params=result_D1.params,start=0,end=len(D1)+detal[1])''' '''for i in range(1,len(coeff)): coeff[i]=pywt.threshold(coeff[i],value=na[i-1],mode='hard') #重构 rsc_data = pywt.waverec(coeff,'db4',mode='sym') plt.figure
plt.title('Partial Autocorrelation Function (p=1)') plt.tight_layout() ''' In this plot, the two dotted lines on either sides of 0 are the confidence interevals. These can be used to determine the p and q values as: - p: The lag value where the PACF chart crosses the upper confidence interval for the first time, in this case p=1. - q: The lag value where the ACF chart crosses the upper confidence interval for the first time, in this case q=1. ''' ''' ### Fit ARMA model with statsmodels 1. Define the model by calling `ARMA()` and passing in the p and q parameters. 2. The model is prepared on the training data by calling the `fit()` function. 3. Predictions can be made by calling the `predict()` function and specifying the index of the time or times to be predicted. ''' from statsmodels.tsa.arima_model import ARMA model = ARMA(x, order=(1,1)).fit() # fit model print(model.summary()) plt.plot(x) plt.plot(model.predict(), color='red') plt.title('RSS: %.4f'% sum((model.fittedvalues-x)**2))
plt.plot(d2, 'blue') plt.plot(result_d2.fittedvalues, 'red') plt.title('model_d2') plt.subplot(3, 1, 3) plt.plot(d1, 'blue') plt.plot(result_d1.fittedvalues, 'red') plt.title('model_d1') plt.show() a2_all, d2_all, d1_all = pywt.wavedec(list_hourly_load, 'db4', mode='sym', level=2) delta = [len(a2_all) - len(a2), len(d2_all) - len(d2), len(d1_all) - len(d1)] print(delta) pa2 = model_a2.predict(params=result_a2.params, start=1, end=len(a2) + delta[0]) pd2 = model_d2.predict(params=result_d2.params, start=1, end=len(d2) + delta[1]) pd1 = model_d1.predict(params=result_d1.params, start=1, end=len(d1) + delta[2]) predict_values = pywt.waverec([pa2, pd2, pd1], 'db4') print(np.shape(predict_values)) plt.plot(list_hourly_load[20710:20758], label="$Observed$", c='green') plt.plot(predict_values[20710:20758], label="$Predicted", c='red') plt.xlabel('Hour') plt.ylabel('Electricity load, kW') plt.show() # mape = statistics.mape([y_test_true[i]*1000 for i in range(0,len(y_test_true))],(predicted_values)*1000
print 'Results of Dickey-Fuller Test:' dftest = adfuller(timeseries, autolag='AIC') dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used']) for key,value in dftest[4].items(): dfoutput['Critical Value (%s)'%key] = value print dfoutput ts_log = np.log(ts) # ts_log_diff = ts_log - ts_log.shift() # ts_log_diff.dropna(inplace=True) # test_stationarity(ts_log_diff) from statsmodels.tsa.arima_model import ARIMA, ARMAResults, ARMA arma_mod30 = ARMA(ts_log , (2,1)).fit() # # print ts_log # # print arma_mod30 predict_sunspots = arma_mod30.predict('26-09-2014 00:00', '26-04-2015 23:00', dynamic=True) p_exp = np.exp(predict_sunspots) print len(p_exp) # from math import round f = open("output.csv","w") f.write("Datetime,Count\n") for i in range(1,len(p_exp)): f.write(datetime_col[i-1] + "," + str(int(round(p_exp[i])))) f.write("\n") f.write("26-04-2015 23:00,"+str(int(round(p_exp[5111])))) f.write("\n")