return tmp_data diffed_ts = diff_ts(dta_log, d=[12, 1]) pdb.set_trace() test_stationarity.testStationarity(diffed_ts) test_stationarity.draw_acf_pacf(diffed_ts, l=3) model = arima_model(diffed_ts) pdb.set_trace() model.get_proper_model() print 'bic:', model.bic, 'p:', model.p, 'q:', model.q # print model.properModel.forecast()[0] # print model.forecast_next_day_value(type='day') # model2=ARMA(diffed_ts,(1,1,1)).fit() model2 = ARMA(dta_log, (2, 1, 1)).fit() model2.summary2() predict_sunspots = model2.predict('6', '7', dynamic=True) a = model2.forecast(5)[0] a_ts = predict_diff_recover(a, d=[1, 1]) log_a = np.exp(a_ts) # pdb.set_trace() # fig = plt.figure(figsize=(12,8)) # ax1 = fig.add_subplot(111) # diff1 = dta.diff(1) # diff1.plot(ax=ax1) # plt.show() # dta = dta.diff(3) # fig = plt.figure(figsize=(12,8))
def namuda(n): for i in range(len(n)): x = sigma*(np.log(n[i]))**0.5 na.append(x) return na na = namuda(n) print(na) #得到ARMA模型系数 arma_qxwy1 = sm.tsa.arma_order_select_ic(test_weiyi,ic = 'aic')['aic_min_order']#根据aic准测选取系数 '''arma_D2 = sm.tsa.arma_order_select_ic(D2,ic = 'aic')['aic_min_order'] arma_D1 = sm.tsa.arma_order_select_ic(D1,ic = 'aic')['aic_min_order'] print(arma_A2,arma_D2,arma_D1)''' #ARMA模型 model_qxwy1 = ARMA(test_weiyi,order=arma_qxwy1) '''model_D2 = ARMA(D2,order=arma_D2) model_D1 = ARMA(D1,order=arma_D1)''' result_qxwy1 = model_qxwy1.fit() '''result_D2 = model_D2.fit() result_D1 = model_D1.fit()''' '''plt.subplot(312) plt.plot(D2,'red') plt.plot(result_D2.fittedvalues,'blue') plt.title('D2') plt.subplot(313) plt.plot(D1,'red') plt.plot(result_D1.fittedvalues,'blue') plt.title('D1') plt.show()
output_lst = [] # generate submit version for ampm in ampms.keys(): for route in routes: for weekday in weekdays: # log-smooth sub = np.log(tolist(data[route][weekday][ampm])) # sub = np.log(tolist(data['C-3'][6]['am'])) # fit best model order = st.arma_order_select_ic(sub, max_ar=5, max_ma=5, ic=['aic', 'bic', 'hqic']) model = ARMA(sub, order=order.bic_min_order) result_arma = model.fit(disp=-1, method='css') predict = result_arma.predict() start = len(sub) - len(predict) end = start + len(predict) + 6 # fig = result_arma.plot_predict(start, end) # fig.suptitle("%s %s %s" %(route,weekday,ampm)) forecast = result_arma.predict(start, end)[-6:] # print (np.exp(sub)) for x in np.exp(forecast): output_lst.append(x) # actual = test.get_group((route,weekday,ampm))[-6:]['avg-time'].values.tolist() print("%s %s %s" % (route, weekday, ampm))
# size = int(len(ts7) * .8) data = diff(ts7) train = data[1:size] test = data[size:] adr = [] rmse2 = [] adr2 = [] rmse22 = [] relmae2 = [] relmae22 = [] for z in range(1, 30): model = ARMA(train, (z, 0)) modelfit = model.fit(maxiter=100, method='css') coef = modelfit.params pred = predictAR(modelfit, test) tru = test[len(coef) - 1:] bench = test[len(coef) - 2:-1] pred2 = antidiffroll(pred, ts7[size + len(coef) - 1:]) tru2 = antidiffroll(tru, ts7[size + len(coef) - 1:]) bench2 = antidiffroll(bench, ts7[size + len(coef) - 2:]) adr.append(adjrsq(pred, tru, coef)) rmse2.append(rmse(pred, tru)) relmae2.append(relmae(pred, tru, bench))
import numpy as np import matplotlib.pyplot as plt from statsmodels.tsa.arima_process import ArmaProcess ar1 = np.array([1]) ma1 = np.array([1, -0.9]) MA_object1 = ArmaProcess(ar1, ma1) simulated_data_1 = MA_object1.generate_sample(nsample=1000) ''' INSTRUCTIONS * Import the class ARMA in the module statsmodels.tsa.arima_model * Create an instance of the ARMA class called mod using the simulated data simulated_data_1 and the (p,q) order of the model (in this case, for an MA(1), order=(0,1) * Fit the model mod using the method .fit() and save it in a results object called res * Plot the in-sample and out-of-sample forecasts of the data using the .plot_predict() method * Start the forecast 10 data points before the end of the 1000 point series at 990, and end the forecast 10 data points after the end of the series at point 1010 ''' # Import the ARMA module from statsmodels from statsmodels.tsa.arima_model import ARMA # Forecast the first MA(1) model mod = ARMA(simulated_data_1, order=(0, 1)) res = mod.fit() res.plot_predict(start=990, end=1010) plt.show()
import matplotlib.pyplot as plt from statsmodels.tsa.arima_process import ArmaProcess ar1 = np.array([1, -0.9]) ma1 = np.array([1]) AR_object1 = ArmaProcess(ar1, ma1) simulated_data_1 = AR_object1.generate_sample(nsample=1000) ''' INSTRUCTIONS * Import the class ARMA in the module statsmodels.tsa.arima_model * Create an instance of the ARMA class called mod using the simulated data simulated_data_1 and the order (p,q) of the model (in this case, for an AR(1) order=(1,0) * Fit the model mod using the method .fit() and save it in a results object called res * Plot the in-sample and out-of-sample forecasts of the data using the plot_predict() method * Start the forecast 10 data points before the end of the 1000 point series at 990, and end the forecast 10 data points after the end of the series at point 1010 ''' # Import the ARMA module from statsmodels from statsmodels.tsa.arima_model import ARMA # Forecast the first AR(1) model mod = ARMA(simulated_data_1, order=(1, 0)) res = mod.fit() res.plot_predict(start=990, end=1010) plt.show()
plt.subplot(2,1,2) ar2 = np.array([1]) ma2 = np.array([1, -0.9]) MA_object2 = ArmaProcess(ar2, ma2) simulated_data_2 = MA_object2.generate_sample(nsample=100) plt.plot(simulated_data_2) plt.show() plot_acf(simulated_data_1) plot_acf(simulated_data_2) ### TO ESTIMATE PARAMETERS FROM DATA from statsmodels.tsa.arima_model import ARMA mode = ARMA(simulated_data_1, order = (0,1)) #order = (2,0) means AR(2) model ## order = (0,1) means MA(1) model mode_result = mode.fit() mode_result.summary() mode_result.params mode_result.plot_predict(start = 80, end = 120) df1mode = ARMA(df1['Adj Close'].resample('M').last().dropna(), order = (0,1)) df1mode_result = df1mode.fit() df1mode_result.params df1mode_result.plot_predict(start='1997-09-30', end = '2018-01-31', alpha=.05) ####FORECAST FUTURE VALUE WITH CONFIDENCE INTERVAL
""" Let's Forecast Interest Rates You will now use the forecasting techniques you learned in the last exercise and apply it to real data rather than simulated data. You will revisit a dataset from the first chapter: the annual data of 10-year interest rates going back 56 years, which is in a Series called interest_rate_data. Being able to forecast interest rates is of enormous importance, not only for bond investors but also for individuals like new homeowners who must decide between fixed and floating rate mortgages. You saw in the first chapter that there is some mean reversion in interest rates over long horizons. In other words, when interest rates are high, they tend to drop and when they are low, they tend to rise over time. Currently they are below long-term rates, so they are expected to rise, but an AR model attempts to quantify how much they are expected to rise. Instructions 100 XP Import the class ARMA in the module statsmodels.tsa.arima_model. Create an instance of the ARMA class called mod using the annual interest rate data and choosing the order for an AR(1) model. Fit the model mod using the method .fit() and save it in a results object called res. Plot the in-sample and out-of-sample forecasts of the data using the .plot_predict() method. Pass the arguments start=0 to start the in-sample forecast from the beginning, and choose end to be '2022' to forecast several years in the future. """ # Import the ARMA module from statsmodels from statsmodels.tsa.arima_model import ARMA # Forecast interest rates using an AR(1) model mod = ARMA(interest_rate_data, order=(1, 0)) res = mod.fit() # Plot the original series and the forecasted series res.plot_predict(start=0, end='2022') plt.legend(fontsize=8) plt.show()
### Auto Regression and Moving Average (ARMA) model ### # train / test split # test - predictions of last 5 years of the Adj. Close price # ts_data = ts_log # log transformed data ts_data = ts_log_diff # log transformed residuals test_length = 1265 train, test = ts_data[1:len(ts_data) - test_length], ts_data[len(ts_data) - test_length:] # fit ARMA model # AR order 2, MA order 0 for log transformed data # AR order 4, MA order 2 for log residuals arma = ARMA(train, order=(4, 2)).fit() predictions = arma.predict(start=len(train), end=len(train) + len(test) - 1) predictions.index = test.index # plot AR model fitted values plt.figure(figsize=(15, 10)) plt.plot(ts_log_diff, label="Log transformed Residuals") plt.plot(arma.fittedvalues, color='red', label="ARMA model fitted Residuals") plt.title( "Auto Regression and Moving Average model ARMA(4,2) - log Fitted Residuals" ) plt.legend() plt.savefig("plots_lena/ARMA log Fitted Residuals") plt.show() # plot AR model prediction results
def get_prediction(train_data, p, q): with warnings.catch_warnings(): warnings.filterwarnings("ignore") model = ARMA(train_data, (p, q)).fit(disp=0) return model.forecast(1)[0][0]
def test_arma(timeseries): with warnings.catch_warnings(): warnings.filterwarnings("ignore") #order = st.arma_order_select_ic(timeseries, max_ar=5,max_ma=5,ic=['aic', 'bic', 'hqic']) model = ARMA(timeseries, (7, 2)).fit() return model.forecast(1)[0][0]
#* AIC=-2 ln(L) + 2 k 中文名字:赤池信息量 akaike information criterion (AIC) # * BIC=-2 ln(L) + ln(n)*k 中文名字:贝叶斯信息量 bayesian information criterion (BIC) # * HQ=-2 ln(L) + ln(ln(n))*k hannan-quinn criterion (HQ) # AIC方式定信息准则 + ARMA --------!!!模型检验中也要对应修改!!!------------------------------ pmax = int(len(xdata2) / 10) # 一般阶数不超过length/10 qmax = int(len(xdata2) / 10) # 一般阶数不超过length/10 matrix = [] # aic矩阵 for p in range(pmax + 1): tmp = [] for q in range(qmax + 1): try: #存在部分为空值,会报错 # tmp.append(ARMA(xdata2, (p,q)).fit().bic) # BIC方式 tmp.append(ARMA(xdata2, (p, q)).fit().aic) # AIC方式 # tmp.append(ARMA(xdata2, (p,q)).fit().hq) # HQ方式 except: tmp.append(None) matrix.append(tmp) matrix = pd.DataFrame(matrix) # 从中可以找出最小值 print matrix print matrix.stack() # In[5]: # 第 * 4 * 步--C盘---------模型检验 # 确定模型后,需要检验其残差序列是否是白噪声,若不是,说明,残差中还存在有用的信息,需要修改模型或者进一步提取。 # 若其残差不是白噪声,重新更换p,q的值,重新确定
print(yhat3) print(model_fit.summary()) print("BIC: ", model_fit.bic) mse = np.square(np.subtract(test["Close_Value"], yhat3)).mean() print("MSE: ", mse) #plot x = list(range(len(test))) plt.plot(x, test["Close_Value"], c='blue') plt.plot(x, yhat3, c='green') plt.legend() plt.show() # MOVING AVERAGE from statsmodels.tsa.arima_model import ARMA # fit model model = ARMA(train["Close_Value"], order=(0, 1)) model_fit = model.fit(disp=False) # make prediction yhat = model_fit.predict(1, 2548) print(yhat) print(model_fit.summary()) print("BIC: ", model_fit.bic) mse = np.square(np.subtract(test["Close_Value"], yhat)).mean() print("MSE: ", mse) #plot plt.plot(x, test["Close_Value"], c='blue') plt.plot(x, yhat, c='green') plt.legend() plt.show() # AUTO-REGRESSIVE MOVING AVERAGE
def machinelearning(prediction): prediction.delete_many({}) today = datetime.datetime.now() todaydash = today.strftime('%Y-%m-%d') DY = datetime.timedelta(days=730) earlier = today - DY today = today.strftime('%Y%m%d') earlier_2years = earlier.strftime('%Y%m%d') name = ['Ethereum', 'Litecoin'] for item in name: url = 'https://coinmarketcap.com/currencies/' + item + '/historical-data/?start=' + earlier_2years + '&end=' + today page = urllib.request.urlopen(url) soup = bs4.BeautifulSoup(page, 'html.parser') cost_2years = soup.find_all("tr", class_="text-right") date = [] open_cost = [] for i in range(len(cost_2years)): datenum = (cost_2years[i].text).split('\n')[1] datenum = parser.parse(datenum) date.append(datenum) open_cost.append(float((cost_2years[i].text).split('\n')[2])) today_open = open_cost.pop(0) date.pop(0) cost = pd.DataFrame(open_cost, date) cost.columns = ['Price at 12am'] cost.index.name = 'Date' #sort data in ascending order sorting = cost.index.sort_values(ascending=True) cost2 = cost.reindex(sorting) chg_cost = cost2.diff(7) chg_cost = chg_cost.dropna() #ARMA model mod = ARMA(chg_cost, order=(8, 1)) result = mod.fit() forecast = result.forecast()[0] invert = len(cost2) - 8 prediction_arma = forecast + cost2.iloc[invert, 0] #Support Vector Machine RBF Model b = np.array(range(0, len(open_cost))) b = b.reshape(-1, 1) svr_rbf = SVR(kernel='rbf', C=1e3, gamma=.05) svr_rbf.fit(b, cost2) svrrbf = svr_rbf.predict(len(cost2) + 1)[0] post = { 'Name': item, 'arma': round(prediction_arma[0], 2), 'svrrbf': round(svrrbf, 2), 'todayopen': today_open, 'date': todaydash } prediction.insert_one(post)
327.45 , 328.19 , 330.92] # Autoregression (AR) example # fit model modelAR = AR(data) modelAR_fit = modelAR.fit() # make prediction yhatAR = modelAR_fit.predict(len(data), len(data)) print(yhatAR) # End Autoregression # Moving Average (MA) example # fit model modelMA = ARMA(data, order=(0, 1)) modelMA_fit = modelMA.fit(disp=False) # make prediction yhatMA = modelMA_fit.predict(len(data), len(data)) print(yhatMA) # End Moving Average # # Autoregressive Moving Average (ARMA) example # # fit model # modelARMA = ARMA(data, order=(2, 1)) # modelARMA_fit = modelARMA.fit(disp=False) # # make prediction # yhatARMA = modelARMA_fit.predict(len(data), len(data)) # print(yhatARMA) # # End Autoregressive Moving Average
# test_predict[i][j] = pre print('======================= ARMA for test ===============================') loss = 0 error_count = 0 index_all = np.zeros([run_times, 2]) error_index = np.zeros(run_times) test_target = np.zeros([run_times, output_steps]) test_prediction = np.zeros([run_times, output_steps]) for r in range(run_times): print('run ' + str(r)) i = np.random.randint(data.shape[0]) j = np.random.randint(test_data.shape[-1] - output_steps) train_df = pd.DataFrame(data[i][j:split[0] + split[1] + j]) train_df.index = pd.DatetimeIndex(timestamps[j:split[0] + split[1] + j]) try: results = ARMA(train_df, order=(2, 2)).fit(trend='nc', disp=-1) except: error_index[error_count] = r error_count += 1 continue pre, _, _ = results.forecast(output_steps) test_real = test_data[i][j:j + output_steps] index_all[r] = [i, j] test_target[r] = test_real test_prediction[r] = pre loss += np.sum(np.square(pre - test_real)) print('================ calculate rmse for test data ============') #n_rmse_val = np.sqrt(np.sum(np.square(val_predict - val_real))*1.0/np.prod(val_real.shape)) #n_rmse_test = np.sqrt(np.sum(np.square(test_predict - test_real))*1.0/np.prod(test_real.shape)) #rmse_val = pre_process.real_loss(n_rmse_val) #rmse_test = pre_process.real_loss(n_rmse_test)
# make prediction yhat = model_fit.predict(len(monthlyGrossSet), len(monthlyGrossSet), typ='levels') print(yhat) if __name__ == "__main__": # there are 52 records(weeks) for each year testmonth = ["Jan", "Mar", "May", "Jul", "Sep", "Oct", "Dec"] # for month in testmonth: # monthlySimpleAvg(month) # wma1(month) # wma2(month) # ar1(month) # movingAverage(month) # arma1((month)) # arima1(month) # ar3(month) # ma3(month) # arma3(month) # arima3(month) set = [1, 2, 3, 4, 5, 6] # fit model model = ARMA(set, order=(0, 1)) model_fit = model.fit(disp=False) # make prediction res = model_fit.predict(len(set), len(set)) print(res)
# Import the module for estimating an ARMA model from statsmodels.tsa.arima_model import ARMA # Fit the data to an AR(1) model and print AIC: mod_ar1 = ARMA(chg_temp, order=(1, 0)) res_ar1 = mod_ar1.fit() print("The AIC for an AR(1) is: ", res_ar1.aic) # Fit the data to an AR(2) model and print AIC: mod_ar2 = ARMA(chg_temp, order=(2, 0)) res_ar2 = mod_ar2.fit() print("The AIC for an AR(2) is: ", res_ar2.aic) # Fit the data to an ARMA(1,1) model and print AIC: mod_arma11 = ARMA(chg_temp, order=(1, 1)) res_arma11 = mod_arma11.fit() print("The AIC for an ARMA(1,1) is: ", res_arma11.aic)
from matplotlib import style import math from statistics import mean plt.style.use('fivethirtyeight') df = pd.read_excel("../00Daily/Egypt.xlsx", squeeze=True, parse_dates=True) df = df[["Date", "LocalTransmission"]] df.set_index("Date", inplace=True) df.dropna(inplace=True) ##df['Date'] = pd.to_datetime(df['Date']) LocalTransmission = df['LocalTransmission'].astype('int32') #print (df.head()) print(df.index) result = ARMA(df, order=(0, 1)).fit(disp=False) print(result.summary()) #print(result.params) predictions = result.predict(start="2020-03-01", end="2020-05-01") #accuracy = result.score() print(predictions) ##accuracy = result.score() #print (accuracy) result.plot_predict(start="2020-03-01", end="2020-05-01") plt.suptitle('Prediction for postive cases in Egypt \n Algorithm used: MA', fontsize=12) plt.show() ##def mean_forecast_error(y, yhat):
# 创建数据 data = [ 3821, 4236, 3758, 6783, 4664, 2589, 2538, 3542, 4626, 5886, 6233, 4199, 3561, 2335, 5636, 3524, 4327, 6064, 3912, 1356, 4305, 4379, 4592, 4233, 4281, 1613, 1233, 4514, 3431, 2159, 2322, 4239, 4733, 2268, 5397, 5821, 6115, 6631, 6474, 4134, 2728, 5753, 7130, 7860, 6991, 7499, 5301, 2808, 6755, 6658, 6944, 6372, 8380, 7366, 6352, 8333, 8281, 11548, 10823, 13642, 9973, 6723, 13416, 12205, 13942, 9590, 11693, 9276, 6519, 6863, 8237, 10122, 8646, 9749, 5346, 4836, 9806, 7502, 9387, 11078, 9832, 6886, 4285, 8351, 9725, 11844, 12387, 10666, 7072, 6429 ] data = pd.Series(data) data_index = sm.tsa.datetools.dates_from_range('1901', '1990') #print(data_index) # 绘制数据图 data.index = pd.Index(data_index) data.plot(figsize=(12, 8)) plt.show() # 创建ARMA模型# 创建ARMA模型 # 人工指定p=7, 1=0 arma = ARMA(data, (7, 0)).fit() print('AIC: %0.4lf' % arma.aic) # 模型预测 predict_y = arma.predict('1990', '2000') # 预测结果绘制 fig, ax = plt.subplots(figsize=(12, 8)) ax = data.ix['1901':].plot(ax=ax) predict_y.plot(ax=ax) plt.show()
raise ValueError('What you input is not pd.Series type!') tmp_data.dropna(inplace=True) return tmp_data diffed_ts = diff_ts(dta_log,d=[1,1]) test_stationarity.testStationarity(diffed_ts) test_stationarity.draw_acf_pacf(diffed_ts,l=31) model = arima_model(diffed_ts) pdb.set_trace() model.get_proper_model() print 'bic:',model.bic,'p:',model.p,'q:',model.q print model.properModel.forecast()[0] # print model.forecast_next_day_value(type='day') model2=ARMA(diffed_ts,(model.p,1,model.q)).fit() model2.summary2() predict_sunspots = model2.predict('2090','2100',dynamic=True) a = model2.forecast(5)[0] a_ts = predict_diff_recover(a,d=[1,1]) log_a = np.exp(a_ts) print log_a pdb.set_trace() model.certain_model(6,0) predict_ts = model.properModel.predict()
if False: plt.subplot(121) plt.plot(lag_acf) plt.title('ACF (q=1)') plt.subplot(122) plt.plot(lag_pacf) plt.title('PACF (q=1)') plt.tight_layout() plt.show() # Prediction with ARMA models # 1. define the model by calling ARMA # 2. The model is prepared on the training data by calling the fit() function # 3. Predictions can be made by calling the predict() function and specifying # the index of the time or times to be predicted. from statsmodels.tsa.arima_model import ARMA x = df['Gym'].astype('float') model = ARMA(x, order=(1, 1)).fit() print(model.summary()) if True: plt.plot(x, label='Gym original') plt.plot(model.predict(), color='red', label='predicted') plt.title('RSS: %.4f' % sum((model.fittedvalues - x)**2)) plt.legend(loc='best') plt.show() print('The End.')
plt.subplot(224) plt.plot(df_year.Price, '-', label='按年') plt.legend() plt.show() # 设置参数范围 ps = range(0, 3) qs = range(0, 3) parameters = product(ps, qs) parameters_list = list(parameters) # 寻找最优ARMA模型参数,即best_aic最小 results = [] best_aic = float("inf") # 正无穷 for param in parameters_list: try: model = ARMA(df_month.Price, order=(param[0], param[1])).fit() except ValueError: print('参数错误:', param) continue aic = model.aic if aic < best_aic: best_model = model best_aic = aic best_param = param results.append([param, model.aic]) # 输出最优模型 print('最优模型: ', best_model.summary()) # 设置future_month,需要预测的时间date_list df_month2 = df_month[['Price']] future_month = 3
def __init__(self, interval, ticker, kwargs): ''' Collect historical data and calculate parameters for volume predictions and AR1 Args: TODAY: a datetime object of today T_START_TIME: today's market opening time T_END_TIME: today's market closing time LASSO_LAMBDA: lambda of lasso method N_TICK_THRESHOULD: the least number of tick data a valid file should contain DATA_PATH: the absolute path to tick data for this ticker _interval: how many secs to update volume percentage prediction _interval_timedelta: interval in timedelta format _semi_n_interval: half of the total number of intervals today _n_interval: the total number of intervals today _features_to_train: 10 valid days of CA, rolling mean and rolling linear prediction _histo_volume: 10 valid days of traded volume in each interval _intraday_percentage: expected trading volumn in each interval _AR_pars_pm: mu and phi for AR(1) in the morning _AR_pars_pm: mu and phi for AR(1) in the afternoon _CA_today: todays traded vulumes during todays call auction _predicted_V: today's total predicted trading volume _is_V_predicted: a flag of is V predicted _iter: in which interval the current pushed tick _last_update: the interval last updated in VWAP_log _datetime_index: the index for VWAP_log _today_vol: a list of today's trading volume in each interval _p_per: a list of predicted trading volume percentage _p_vol: a list of predicted trading volume _VWAP_log: the log file of the predicted volume volume and its percentage and true volume in each interval Methods: pred_V: predict today's total trading volume push_tick: push tick data get_prediction: print out ''' if (interval % 5 != 0) or (7200 % interval != 0): raise ValueError( 'interval must be a multiple of 5 secs and can divide 2 hours') if not ticker in listdir(kwargs['DATA_PATH']): raise Exception('no data for %s' % ticker) self.TODAY = kwargs['TODAY'] # self.TODAY = datetime.strptime(today_for_test, "%Y-%m-%d") # Tracey to notice self.T_START_TIME = kwargs['T_START_TIME'] # self.T_START_TIME = self.TODAY.replace(hour = 9, minute = 30, second = 0, microsecond = 0) self.T_END_TIME = kwargs['T_END_TIME'] # self.T_END_TIME = self.TODAY.replace(hour = 15, minute = 00, second = 0, microsecond = 0) self.LASSO_LAMBDA = kwargs['LASSO_LAMBDA'] self.N_TICK_THRESHOLD = kwargs['N_TICK_THRESHOLD'] self.DATA_PATH = kwargs['DATA_PATH'] + ticker + '/' # self.DATA_PATH = './data_path/' # Tracey to notice self._interval = interval self._interval_timedelta = timedelta(seconds=self._interval) self._semi_n_interval = int(self.HALFTIME / self._interval_timedelta) self._n_interval = 2 * self._semi_n_interval self._features_to_train = np.ones((11, 3), dtype=float) # CA, M, L, A self._histo_volume = np.full((10, self._n_interval), 0, dtype=float) # historical trading volume self._intraday_percentage = [ 1 / self._n_interval ] * self._n_interval # notice .sum() =self._n_interval # self._AR_pars = np.array([1,0],dtype =float) # (u and phi) self._AR_pars = [0., 1.] self._CA_today = 0 self._predicted_V = 0. self._is_V_predicted = 0 self._last_update = 0 self._iter = 0 self._datetime_index = ([ str(dt) for dt in datetime_range( self.T_START_TIME, self.T_START_TIME.replace( hour=11, minute=30, second=0, microsecond=0), timedelta(seconds=self._interval)) ] + [ str(dt) for dt in datetime_range( self.T_START_TIME.replace( hour=13, minute=0, second=0, microsecond=0), self.T_END_TIME, timedelta(seconds=self._interval)) ]) self._today_vol = [0.] * self._n_interval self._p_per = [0.] * self._n_interval self._p_vol = [0] * self._n_interval self._VWAP_log = {} files = set([ filename for filename in listdir(self.DATA_PATH) if filename.endswith('.csv') ]) history_date = self.TODAY x_output = np.append( np.arange(0 + self._interval, 7200 + self._interval, self._interval), np.arange(12600 + self._interval, 19800 + self._interval, self._interval)) past_days = 0 iter = 1 # get data for intraday prediction while iter < 11: if not bool(files): raise Exception('Insufficient historical data') history_date = history_date - timedelta(days=1) past_days += 1 if history_date.weekday() in set([5, 6]): continue filename = str(ticker) + str(history_date.strftime( '%Y-%m-%d')) + '.csv' ## Tracey to notice if filename in files: files.remove(filename) else: continue try: dat = pd.read_csv(self.DATA_PATH + filename) except Exception: print('Error in reading %s, go to the previous day.' % filename) continue if dat.shape[0] < self.N_TICK_THRESHOLD: print('File %s has few data for prediction' % filename) continue if past_days > 20: warnings.warn( 'Lack historical data. Time span of data for predicting intraday_volume of today has exceeded 20 days.' 'We are using data %d days from today' % past_days) try: dat.columns = ['DateTime', 'Volume'] # there will be Microsecond dat.DateTime = [ datetime.strptime( str(history_date.strftime('%Y-%m-%d')) + ' ' + dt, "%Y-%m-%d %H:%M:%S") for dt in dat.DateTime ] # datetime to time difference self.H_START_TIME = history_date.replace(hour=9, minute=30, second=0, microsecond=0) dat['TimeStamp'] = [(dt - self.H_START_TIME).total_seconds() for dt in dat.DateTime] dat = dat.as_matrix(columns=['TimeStamp', 'Volume']) datCA = dat[dat[:, 0] < 0] self._features_to_train[10 - iter, 0] = datCA[:, 1].sum() dat = dat[dat[:, 0] > 0] # Tracey by reviewing the data from ctp finds it impossible if any(t >= 198000 for t in dat[:, 0]): dat = np.vstack((dat[dat[:, 0] < 19800], [19800, dat[dat[:, 0] >= 19800, 1].sum()])) dat[-1, 0] = 198000 x_input = np.append(0, dat[:, 0]) volume_cumsum = np.append(0, dat[:, 1].cumsum()) y_interp = scipy.interpolate.interp1d( x_input, volume_cumsum) # ,interval) intraday_volume = y_interp(x_output) intraday_volume = np.append( intraday_volume[0], (intraday_volume[1:] - intraday_volume[:-1])) self._histo_volume[10 - iter] = intraday_volume except Exception: print('Error when read file %s, you may check its format' % filename) continue iter += 1 iter = 11 # 这个不需要 # get data for roll_mean and roll_linear volume_sums = np.zeros(5, dtype=float) history_date = self.TODAY - timedelta(days=past_days) while iter < 16: if not bool(files): raise Exception('Insufficient historical data') history_date = history_date - timedelta(days=1) past_days += 1 if history_date.weekday() in set([5, 6]): continue # filename = str(history_date.strftime('%Y-%m-%d'))+'.csv' ## Tracey to notice filename = str(ticker) + str(history_date.strftime( '%Y-%m-%d')) + '.csv' ## Tracey to notice if filename in files: files.remove(filename) else: continue try: dat = pd.read_csv(self.DATA_PATH + filename) except Exception: print('Error in reading %s, go to the previous day.' % filename) continue if past_days > 30: warnings.warn( 'Lack historical data. Time span of data for predicting total trading volume of today has exceeded 30 days.' ) try: dat = pd.read_csv(self.DATA_PATH + filename) dat.columns = ['DateTime', 'Volume'] self.H_START_TIME = history_date.replace(hour=9, minute=30, second=0, microsecond=0) dat.DateTime = [ datetime.strptime( str(history_date.strftime('%Y-%m-%d')) + ' ' + dt, "%Y-%m-%d %H:%M:%S") for dt in dat.DateTime ] volume_sums[15 - iter] = dat[ dat.DateTime > self.H_START_TIME].Volume.sum() except Exception: print('Error when read file ' + filename + ', you may check its format') continue iter += 1 # preparing sample for predicting today's total volume self.volume_to_train = self._histo_volume.sum(axis=1) volume_sums = np.append(volume_sums, self.volume_to_train) self._features_to_train[:, 1] = rolling_mean(volume_sums) self._features_to_train[:, 2] = rolling_linear(volume_sums) # get intraday pattern and intialize intraday prediction intraday_mean = self._histo_volume.mean(axis=0) self._p_vol[0] = float(intraday_mean[0]) self._p_vol[self._semi_n_interval] = float( intraday_mean[self._semi_n_interval]) self._intraday_percentage = list( np.divide(intraday_mean, intraday_mean.sum()) * self._n_interval) if any(i < 1 / (self._n_interval * 10) for i in self._intraday_percentage): warnings.warn( 'adjust intraday trading volume pattern for irregular data') tmp = np.divide(intraday_mean, intraday_mean.sum()) * self._n_interval if np.any(tmp < 0.1): warnings.warn( 'adjust intraday trading volume pattern for irregular data') tmp[tmp >= 0.1] = tmp[tmp > 0.1] * sum(self._n_interval - tmp[tmp < 0.1]) / sum( tmp[tmp >= 0.1]) tmp[tmp < 0.1] = 0.1 self._intraday_percentage = list(tmp) self._p_per[0] = self._intraday_percentage[0] / self._n_interval self._p_per[self._semi_n_interval] = self._intraday_percentage[ self._semi_n_interval] / self._n_interval self._VWAP_log[self._datetime_index[0]] = get_log( None, self._p_vol[0], self._p_per[0]) # compute AR arma = ARMA((self._histo_volume[-1] / self._intraday_percentage)[0:self._n_interval], order=(1, 0)) self._AR_pars = arma.fit().params.tolist()
#Defining training and testing data training_set = delhi[delhi["Year"] <= 2015] test_set = delhi[delhi["Year"] > 2015] #acf and pacf plots from statsmodels.graphics.tsaplots import plot_acf acf = plot_acf(delhi["AvgTemperature"], lags=9000) from statsmodels.graphics.tsaplots import plot_pacf pacf = plot_pacf(delhi["AvgTemperature"], lags=10) fig5 = plt.savefig("/home/vaishnavi/Desktop/Final/Screenshots") plt.show() #MA model from statsmodels.tsa.arima_model import ARMA model_MA = ARMA(training_set["AvgTemperature"], order=(0, 2)) model_fit_MA = model_MA.fit() predictions_MA = model_fit_MA.predict(test_set.index[0], test_set.index[-1]) fig5 = plt.figure(figsize=(15, 5)) plt.ylabel("Temperature", fontsize=20) plt.plot(test_set["AvgTemperature"], label="Original Data") plt.plot(predictions_MA, label="Predictions") fig6 = plt.savefig("/home/vaishnavi/Desktop/Final/Screenshots") plt.show() #plt.legend() #RMSE for MA model mse = mean_squared_error(predictions_MA, test_set["AvgTemperature"]) print(mse**0.5)
print 'p-value: ', d_order0[1] print'Critical values: ', d_order0[4] if d_order0[0] > d_order0[4]['5%']: print 'Time Series is nonstationary' else: print 'Time Series is stationary' # # selecting parameter order = sm.tsa.arma_order_select_ic(ts_diff_1, max_ar=6, max_ma=3, ic=['aic']) # print order try: # ARMA model model = ARMA(ts_diff_1,(order['aic_min_order'][0],order['aic_min_order'][1])) predict_diff_1 = model.fit(disp=False).forecast(14)[0] # restore predict = np.cumsum(predict_diff_1) predict = predict + np.mean(ts[-7:]) # use continuity is better stander = sklearn.preprocessing.StandardScaler() predict = stander.fit_transform(predict) predict = stander.fit(ts[-7:]).inverse_transform(predict) predict = np.round(predict) print predict predict_result = np.vstack((predict_result,predict))
else: list_hourly_load[j] = sum - 3 print(k) list_hourly_load = np.array(list_hourly_load) shifted_value = list_hourly_load.mean() list_hourly_load -= shifted_value a2, d2, d1 = pywt.wavedec(list_hourly_load[:-48], 'db4', mode='sym', level=2) # lhl = pywt.waverec([a2, d2, d1], 'db4') # print(np.shape(a2),np.shape(d2),np.shape(d1),np.shape(lhl)) # order_a2 = sm.tsa.arma_order_select_ic(a2, ic='aic')['aic_min_order'] # order_d2 = sm.tsa.arma_order_select_ic(d2, ic='aic')['aic_min_order'] # order_d1 = sm.tsa.arma_order_select_ic(d1, ic='aic')['aic_min_order'] order_a2 = [3, 2] # p ,q order_d2 = [4, 1, 2] # p, d ,q order_d1 = [4, 1, 2] print(order_a2, order_d2, order_d1) model_a2 = ARMA(a2, order=order_a2) model_d2 = ARIMA(d2, order=order_d2) model_d1 = ARIMA(d1, order=order_d1) result_a2 = model_a2.fit() result_d2 = model_d2.fit() result_d1 = model_d1.fit() plt.figure(figsize=(10, 15)) plt.subplot(3, 1, 1) plt.plot(a2, 'blue') plt.plot(result_a2.fittedvalues, 'red') plt.title('model_a2') plt.subplot(3, 1, 2) plt.plot(d2, 'blue') plt.plot(result_d2.fittedvalues, 'red') plt.title('model_d2') plt.subplot(3, 1, 3)
plt.legend() plt.subplot(224) plt.plot(df_year.Weighted_Price, '-', label='按年') plt.legend() plt.show() # 设置参数范围 ps = range(0, 3) qs = range(0, 3) parameters = product(ps, qs) parameters_list = list(parameters) # 寻找最优ARMA模型参数,即best_aic最小 results = [] best_aic = float("inf") # 正无穷 for param in parameters_list: try: model = ARMA(df_month.Weighted_Price, order=(param[0], param[1])).fit() except ValueError: print('参数错误:', param) continue aic = model.aic if aic < best_aic: best_model = model best_aic = aic best_param = param results.append([param, model.aic]) # 输出最优模型 result_table = pd.DataFrame(results) result_table.columns = ['parameters', 'aic'] print('最优模型: ', best_model.summary()) # 比特币预测 df_month2 = df_month[['Weighted_Price']]
D_data.plot() # 时序图 plt.show() plot_acf(D_data).show() # 自相关图 plt.show() plot_pacf(D_data).show() # 偏自相关图 plt.show() print(u'1阶差分序列的ADF检验结果为:', ADF(D_data[u'dst差分'])) print(u'差分序列的白噪声检验结果为:', acorr_ljungbox(D_data, lags=1)) data[u'dst'] = data[u'dst'].astype(float) pmax = int(len(data) / 10) qmax = int(len(data) / 10) bic_matrix = [] for p in range(pmax + 1): tmp = [] for q in range(qmax + 1): try: tmp.append(ARMA(data, (p, q)).fit().bic) except: tmp.append(None) bic_matrix.append(tmp) bic_matrix = pd.DataFrame(bic_matrix) # 从中可以找出最小值 # print(bic_matrix) p, q = bic_matrix.stack().idxmin() print(u'bic最小的P值和q值为:%s、%s' % (p, q)) model = ARMA(data, (p, q)).fit() model.summary2() # 给出一份模型报告 forecast = model.forecast(5) # 作为期5天的预测,返回预测结果、标准误差、置信区间 print(forecast)
plt.show() print() #------------------------------- # fit model model = AR(ydata) model_fit = model.fit() #------------------------------- # make prediction #yhat = model_fit.predict(len(xdata), len(ydata)) yhat = model_fit.predict( start= 90, end = 110 ) print('Predicted value for Auto Regression ', yhat) print("="*50) #------------------------------------------------------------------------------------------------- # MA example # fit model model = ARMA(ydata, order=(0, 1)) model_fit = model.fit(disp=False) # make prediction yhat = model_fit.predict( start= 90, end = 110 ) print('Predicted value for Moving Average 0,1 ',yhat) print("="*50) #------------------------------------------------------------------------------------------------- # ARMA example newdata = [random() for x in range(1, 100)] # change order model = ARMA(newdata, order=(2, 1)) model_fit = model.fit(disp=False) # make prediction yhat = model_fit.predict( start= 90, end = 110 ) print('Predicted value for Moving Average 2,1 ',yhat) print("="*50)