Exemple #1
0
    return tmp_data


diffed_ts = diff_ts(dta_log, d=[12, 1])
pdb.set_trace()
test_stationarity.testStationarity(diffed_ts)
test_stationarity.draw_acf_pacf(diffed_ts, l=3)
model = arima_model(diffed_ts)
pdb.set_trace()
model.get_proper_model()
print 'bic:', model.bic, 'p:', model.p, 'q:', model.q
# print model.properModel.forecast()[0]
# print model.forecast_next_day_value(type='day')

# model2=ARMA(diffed_ts,(1,1,1)).fit()
model2 = ARMA(dta_log, (2, 1, 1)).fit()
model2.summary2()
predict_sunspots = model2.predict('6', '7', dynamic=True)
a = model2.forecast(5)[0]
a_ts = predict_diff_recover(a, d=[1, 1])
log_a = np.exp(a_ts)

# pdb.set_trace()
# fig = plt.figure(figsize=(12,8))
# ax1 = fig.add_subplot(111)
# diff1 = dta.diff(1)
# diff1.plot(ax=ax1)
# plt.show()

# dta = dta.diff(3)
# fig = plt.figure(figsize=(12,8))
def namuda(n):
    for i in  range(len(n)):
        x = sigma*(np.log(n[i]))**0.5
        na.append(x)
    return na
na = namuda(n)
print(na)


#得到ARMA模型系数
arma_qxwy1 = sm.tsa.arma_order_select_ic(test_weiyi,ic = 'aic')['aic_min_order']#根据aic准测选取系数
'''arma_D2 = sm.tsa.arma_order_select_ic(D2,ic = 'aic')['aic_min_order']
arma_D1 = sm.tsa.arma_order_select_ic(D1,ic = 'aic')['aic_min_order']
print(arma_A2,arma_D2,arma_D1)'''
#ARMA模型
model_qxwy1 = ARMA(test_weiyi,order=arma_qxwy1)
'''model_D2 = ARMA(D2,order=arma_D2)
model_D1 = ARMA(D1,order=arma_D1)'''
result_qxwy1 = model_qxwy1.fit()
'''result_D2 = model_D2.fit()
result_D1 = model_D1.fit()'''

'''plt.subplot(312)
plt.plot(D2,'red')
plt.plot(result_D2.fittedvalues,'blue')
plt.title('D2')
plt.subplot(313)
plt.plot(D1,'red')
plt.plot(result_D1.fittedvalues,'blue')
plt.title('D1')
plt.show()
Exemple #3
0
output_lst = []
# generate submit version
for ampm in ampms.keys():
    for route in routes:
        for weekday in weekdays:
            # log-smooth
            sub = np.log(tolist(data[route][weekday][ampm]))
            # sub = np.log(tolist(data['C-3'][6]['am']))
            # fit best model
            order = st.arma_order_select_ic(sub,
                                            max_ar=5,
                                            max_ma=5,
                                            ic=['aic', 'bic', 'hqic'])

            model = ARMA(sub, order=order.bic_min_order)
            result_arma = model.fit(disp=-1, method='css')
            predict = result_arma.predict()

            start = len(sub) - len(predict)
            end = start + len(predict) + 6
            # fig = result_arma.plot_predict(start, end)
            # fig.suptitle("%s %s %s" %(route,weekday,ampm))

            forecast = result_arma.predict(start, end)[-6:]
            # print (np.exp(sub))
            for x in np.exp(forecast):
                output_lst.append(x)
            # actual = test.get_group((route,weekday,ampm))[-6:]['avg-time'].values.tolist()
            print("%s %s %s" % (route, weekday, ampm))
Exemple #4
0

#
size = int(len(ts7) * .8)
data = diff(ts7)
train = data[1:size]
test = data[size:]

adr = []
rmse2 = []
adr2 = []
rmse22 = []
relmae2 = []
relmae22 = []
for z in range(1, 30):
    model = ARMA(train, (z, 0))
    modelfit = model.fit(maxiter=100, method='css')
    coef = modelfit.params

    pred = predictAR(modelfit, test)
    tru = test[len(coef) - 1:]
    bench = test[len(coef) - 2:-1]

    pred2 = antidiffroll(pred, ts7[size + len(coef) - 1:])
    tru2 = antidiffroll(tru, ts7[size + len(coef) - 1:])
    bench2 = antidiffroll(bench, ts7[size + len(coef) - 2:])

    adr.append(adjrsq(pred, tru, coef))
    rmse2.append(rmse(pred, tru))
    relmae2.append(relmae(pred, tru, bench))
Exemple #5
0
import numpy as np
import matplotlib.pyplot as plt

from statsmodels.tsa.arima_process import ArmaProcess

ar1 = np.array([1])
ma1 = np.array([1, -0.9])

MA_object1 = ArmaProcess(ar1, ma1)
simulated_data_1 = MA_object1.generate_sample(nsample=1000)
'''
INSTRUCTIONS

*   Import the class ARMA in the module statsmodels.tsa.arima_model
*   Create an instance of the ARMA class called mod using the simulated data simulated_data_1 and the (p,q) order of the model (in this case, for an MA(1), order=(0,1)
*   Fit the model mod using the method .fit() and save it in a results object called res
*   Plot the in-sample and out-of-sample forecasts of the data using the .plot_predict() method
*   Start the forecast 10 data points before the end of the 1000 point series at 990, and end the forecast 10 data points after the end of the series at point 1010
'''

# Import the ARMA module from statsmodels
from statsmodels.tsa.arima_model import ARMA

# Forecast the first MA(1) model
mod = ARMA(simulated_data_1, order=(0, 1))

res = mod.fit()
res.plot_predict(start=990, end=1010)

plt.show()
Exemple #6
0
import matplotlib.pyplot as plt

from statsmodels.tsa.arima_process import ArmaProcess

ar1 = np.array([1, -0.9])
ma1 = np.array([1])

AR_object1 = ArmaProcess(ar1, ma1)
simulated_data_1 = AR_object1.generate_sample(nsample=1000)

'''
INSTRUCTIONS

*   Import the class ARMA in the module statsmodels.tsa.arima_model
*   Create an instance of the ARMA class called mod using the simulated data simulated_data_1 and the order (p,q) of the model (in this case, for an AR(1) order=(1,0)
*   Fit the model mod using the method .fit() and save it in a results object called res
*   Plot the in-sample and out-of-sample forecasts of the data using the plot_predict() method
*   Start the forecast 10 data points before the end of the 1000 point series at 990, and end the forecast 10 data points after the end of the series at point 1010
'''

# Import the ARMA module from statsmodels
from statsmodels.tsa.arima_model import ARMA

# Forecast the first AR(1) model
mod = ARMA(simulated_data_1, order=(1, 0))

res = mod.fit()

res.plot_predict(start=990, end=1010)

plt.show()
Exemple #7
0
plt.subplot(2,1,2)
ar2 = np.array([1])
ma2 = np.array([1, -0.9])
MA_object2 = ArmaProcess(ar2, ma2)
simulated_data_2 = MA_object2.generate_sample(nsample=100)
plt.plot(simulated_data_2)
plt.show()

plot_acf(simulated_data_1)
plot_acf(simulated_data_2)



### TO ESTIMATE PARAMETERS FROM DATA
from statsmodels.tsa.arima_model import ARMA
mode = ARMA(simulated_data_1, order = (0,1)) #order = (2,0) means AR(2) model ## order = (0,1) means MA(1) model
mode_result = mode.fit()
mode_result.summary()
mode_result.params
mode_result.plot_predict(start = 80, end = 120)



df1mode = ARMA(df1['Adj Close'].resample('M').last().dropna(), order = (0,1))
df1mode_result = df1mode.fit()
df1mode_result.params
df1mode_result.plot_predict(start='1997-09-30', end = '2018-01-31', alpha=.05) ####FORECAST FUTURE VALUE WITH CONFIDENCE INTERVAL



Exemple #8
0
"""
Let's Forecast Interest Rates
You will now use the forecasting techniques you learned in the last exercise and apply it to real data rather than simulated data. You will revisit a dataset from the first chapter: the annual data of 10-year interest rates going back 56 years, which is in a Series called interest_rate_data. Being able to forecast interest rates is of enormous importance, not only for bond investors but also for individuals like new homeowners who must decide between fixed and floating rate mortgages.

You saw in the first chapter that there is some mean reversion in interest rates over long horizons. In other words, when interest rates are high, they tend to drop and when they are low, they tend to rise over time. Currently they are below long-term rates, so they are expected to rise, but an AR model attempts to quantify how much they are expected to rise.

Instructions
100 XP
Import the class ARMA in the module statsmodels.tsa.arima_model.
Create an instance of the ARMA class called mod using the annual interest rate data and choosing the order for an AR(1) model.
Fit the model mod using the method .fit() and save it in a results object called res.
Plot the in-sample and out-of-sample forecasts of the data using the .plot_predict() method.
Pass the arguments start=0 to start the in-sample forecast from the beginning, and choose end to be '2022' to forecast several years in the future.
"""
# Import the ARMA module from statsmodels
from statsmodels.tsa.arima_model import ARMA

# Forecast interest rates using an AR(1) model
mod = ARMA(interest_rate_data, order=(1, 0))
res = mod.fit()

# Plot the original series and the forecasted series
res.plot_predict(start=0, end='2022')
plt.legend(fontsize=8)
plt.show()
Exemple #9
0
### Auto Regression and Moving Average (ARMA) model ###

# train / test split
# test - predictions of last 5 years of the Adj. Close price

# ts_data = ts_log # log transformed data
ts_data = ts_log_diff  # log transformed residuals
test_length = 1265
train, test = ts_data[1:len(ts_data) - test_length], ts_data[len(ts_data) -
                                                             test_length:]

# fit ARMA model
# AR order 2, MA order 0 for log transformed data
# AR order 4, MA order 2 for log residuals
arma = ARMA(train, order=(4, 2)).fit()
predictions = arma.predict(start=len(train), end=len(train) + len(test) - 1)
predictions.index = test.index

# plot AR model fitted values
plt.figure(figsize=(15, 10))
plt.plot(ts_log_diff, label="Log transformed Residuals")
plt.plot(arma.fittedvalues, color='red', label="ARMA model fitted Residuals")
plt.title(
    "Auto Regression and Moving Average model ARMA(4,2) - log Fitted Residuals"
)
plt.legend()
plt.savefig("plots_lena/ARMA log Fitted Residuals")
plt.show()

# plot AR model prediction results
def get_prediction(train_data, p, q):
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        model = ARMA(train_data, (p, q)).fit(disp=0)
    return model.forecast(1)[0][0]
def test_arma(timeseries):
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")
        #order = st.arma_order_select_ic(timeseries, max_ar=5,max_ma=5,ic=['aic', 'bic', 'hqic'])
        model = ARMA(timeseries, (7, 2)).fit()
    return model.forecast(1)[0][0]
Exemple #12
0
#* AIC=-2 ln(L) + 2 k 中文名字:赤池信息量 akaike information criterion (AIC)
# * BIC=-2 ln(L) + ln(n)*k 中文名字:贝叶斯信息量 bayesian information criterion (BIC)
# * HQ=-2 ln(L) + ln(ln(n))*k hannan-quinn criterion (HQ)

# AIC方式定信息准则 + ARMA --------!!!模型检验中也要对应修改!!!------------------------------

pmax = int(len(xdata2) / 10)  # 一般阶数不超过length/10
qmax = int(len(xdata2) / 10)  # 一般阶数不超过length/10

matrix = []  # aic矩阵
for p in range(pmax + 1):
    tmp = []
    for q in range(qmax + 1):
        try:  #存在部分为空值,会报错
            #             tmp.append(ARMA(xdata2, (p,q)).fit().bic) #  BIC方式
            tmp.append(ARMA(xdata2, (p, q)).fit().aic)  #  AIC方式
#             tmp.append(ARMA(xdata2, (p,q)).fit().hq) #  HQ方式
        except:
            tmp.append(None)

    matrix.append(tmp)

matrix = pd.DataFrame(matrix)  # 从中可以找出最小值
print matrix
print matrix.stack()

# In[5]:

# 第 * 4 * 步--C盘---------模型检验
# 确定模型后,需要检验其残差序列是否是白噪声,若不是,说明,残差中还存在有用的信息,需要修改模型或者进一步提取。
# 若其残差不是白噪声,重新更换p,q的值,重新确定
Exemple #13
0
print(yhat3)
print(model_fit.summary())
print("BIC: ", model_fit.bic)
mse = np.square(np.subtract(test["Close_Value"], yhat3)).mean()
print("MSE: ", mse)
#plot
x = list(range(len(test)))
plt.plot(x, test["Close_Value"], c='blue')
plt.plot(x, yhat3, c='green')
plt.legend()
plt.show()

# MOVING AVERAGE
from statsmodels.tsa.arima_model import ARMA
# fit model
model = ARMA(train["Close_Value"], order=(0, 1))
model_fit = model.fit(disp=False)
# make prediction
yhat = model_fit.predict(1, 2548)
print(yhat)
print(model_fit.summary())
print("BIC: ", model_fit.bic)
mse = np.square(np.subtract(test["Close_Value"], yhat)).mean()
print("MSE: ", mse)
#plot
plt.plot(x, test["Close_Value"], c='blue')
plt.plot(x, yhat, c='green')
plt.legend()
plt.show()

# AUTO-REGRESSIVE MOVING AVERAGE
def machinelearning(prediction):
    prediction.delete_many({})

    today = datetime.datetime.now()
    todaydash = today.strftime('%Y-%m-%d')
    DY = datetime.timedelta(days=730)
    earlier = today - DY
    today = today.strftime('%Y%m%d')
    earlier_2years = earlier.strftime('%Y%m%d')

    name = ['Ethereum', 'Litecoin']
    for item in name:
        url = 'https://coinmarketcap.com/currencies/' + item + '/historical-data/?start=' + earlier_2years + '&end=' + today
        page = urllib.request.urlopen(url)
        soup = bs4.BeautifulSoup(page, 'html.parser')
        cost_2years = soup.find_all("tr", class_="text-right")

        date = []
        open_cost = []
        for i in range(len(cost_2years)):
            datenum = (cost_2years[i].text).split('\n')[1]
            datenum = parser.parse(datenum)
            date.append(datenum)
            open_cost.append(float((cost_2years[i].text).split('\n')[2]))

        today_open = open_cost.pop(0)
        date.pop(0)
        cost = pd.DataFrame(open_cost, date)
        cost.columns = ['Price at 12am']
        cost.index.name = 'Date'

        #sort data in ascending order
        sorting = cost.index.sort_values(ascending=True)
        cost2 = cost.reindex(sorting)

        chg_cost = cost2.diff(7)
        chg_cost = chg_cost.dropna()

        #ARMA model
        mod = ARMA(chg_cost, order=(8, 1))
        result = mod.fit()
        forecast = result.forecast()[0]
        invert = len(cost2) - 8
        prediction_arma = forecast + cost2.iloc[invert, 0]

        #Support Vector Machine RBF Model
        b = np.array(range(0, len(open_cost)))
        b = b.reshape(-1, 1)
        svr_rbf = SVR(kernel='rbf', C=1e3, gamma=.05)
        svr_rbf.fit(b, cost2)
        svrrbf = svr_rbf.predict(len(cost2) + 1)[0]

        post = {
            'Name': item,
            'arma': round(prediction_arma[0], 2),
            'svrrbf': round(svrrbf, 2),
            'todayopen': today_open,
            'date': todaydash
        }

        prediction.insert_one(post)
327.45	,
328.19	,
330.92]

# Autoregression (AR) example
# fit model
modelAR = AR(data)
modelAR_fit = modelAR.fit()
# make prediction
yhatAR = modelAR_fit.predict(len(data), len(data))
print(yhatAR)
# End Autoregression

# Moving Average (MA) example
# fit model
modelMA = ARMA(data, order=(0, 1))
modelMA_fit = modelMA.fit(disp=False)
# make prediction
yhatMA = modelMA_fit.predict(len(data), len(data))
print(yhatMA)
# End Moving Average

# # Autoregressive Moving Average (ARMA) example
# # fit model
# modelARMA = ARMA(data, order=(2, 1))
# modelARMA_fit = modelARMA.fit(disp=False)
# # make prediction
# yhatARMA = modelARMA_fit.predict(len(data), len(data))
# print(yhatARMA)
# # End Autoregressive Moving Average
Exemple #16
0
#         test_predict[i][j] = pre
print('======================= ARMA for test ===============================')
loss = 0
error_count = 0
index_all = np.zeros([run_times, 2])
error_index = np.zeros(run_times)
test_target = np.zeros([run_times, output_steps])
test_prediction = np.zeros([run_times, output_steps])
for r in range(run_times):
    print('run ' + str(r))
    i = np.random.randint(data.shape[0])
    j = np.random.randint(test_data.shape[-1] - output_steps)
    train_df = pd.DataFrame(data[i][j:split[0] + split[1] + j])
    train_df.index = pd.DatetimeIndex(timestamps[j:split[0] + split[1] + j])
    try:
        results = ARMA(train_df, order=(2, 2)).fit(trend='nc', disp=-1)
    except:
        error_index[error_count] = r
        error_count += 1
        continue
    pre, _, _ = results.forecast(output_steps)
    test_real = test_data[i][j:j + output_steps]
    index_all[r] = [i, j]
    test_target[r] = test_real
    test_prediction[r] = pre
    loss += np.sum(np.square(pre - test_real))
print('================ calculate rmse for test data ============')
#n_rmse_val = np.sqrt(np.sum(np.square(val_predict - val_real))*1.0/np.prod(val_real.shape))
#n_rmse_test = np.sqrt(np.sum(np.square(test_predict - test_real))*1.0/np.prod(test_real.shape))
#rmse_val = pre_process.real_loss(n_rmse_val)
#rmse_test = pre_process.real_loss(n_rmse_test)
Exemple #17
0
    # make prediction
    yhat = model_fit.predict(len(monthlyGrossSet),
                             len(monthlyGrossSet),
                             typ='levels')
    print(yhat)


if __name__ == "__main__":
    # there are 52 records(weeks) for each year
    testmonth = ["Jan", "Mar", "May", "Jul", "Sep", "Oct", "Dec"]
    # for month in testmonth:
    # monthlySimpleAvg(month)
    # wma1(month)
    # wma2(month)
    # ar1(month)
    # movingAverage(month)
    # arma1((month))
    # arima1(month)
    # ar3(month)
    # ma3(month)
    # arma3(month)
    # arima3(month)

    set = [1, 2, 3, 4, 5, 6]
    # fit model
    model = ARMA(set, order=(0, 1))
    model_fit = model.fit(disp=False)
    # make prediction
    res = model_fit.predict(len(set), len(set))
    print(res)
Exemple #18
0
# Import the module for estimating an ARMA model
from statsmodels.tsa.arima_model import ARMA

# Fit the data to an AR(1) model and print AIC:
mod_ar1 = ARMA(chg_temp, order=(1, 0))
res_ar1 = mod_ar1.fit()
print("The AIC for an AR(1) is: ", res_ar1.aic)

# Fit the data to an AR(2) model and print AIC:
mod_ar2 = ARMA(chg_temp, order=(2, 0))
res_ar2 = mod_ar2.fit()
print("The AIC for an AR(2) is: ", res_ar2.aic)

# Fit the data to an ARMA(1,1) model and print AIC:
mod_arma11 = ARMA(chg_temp, order=(1, 1))
res_arma11 = mod_arma11.fit()
print("The AIC for an ARMA(1,1) is: ", res_arma11.aic)
Exemple #19
0
from matplotlib import style
import math
from statistics import mean

plt.style.use('fivethirtyeight')

df = pd.read_excel("../00Daily/Egypt.xlsx", squeeze=True, parse_dates=True)
df = df[["Date", "LocalTransmission"]]
df.set_index("Date", inplace=True)
df.dropna(inplace=True)
##df['Date'] = pd.to_datetime(df['Date'])
LocalTransmission = df['LocalTransmission'].astype('int32')
#print (df.head())
print(df.index)

result = ARMA(df, order=(0, 1)).fit(disp=False)
print(result.summary())
#print(result.params)
predictions = result.predict(start="2020-03-01", end="2020-05-01")
#accuracy = result.score()
print(predictions)
##accuracy = result.score()
#print (accuracy)

result.plot_predict(start="2020-03-01", end="2020-05-01")
plt.suptitle('Prediction for postive cases in Egypt \n Algorithm used: MA',
             fontsize=12)

plt.show()

##def mean_forecast_error(y, yhat):
# 创建数据
data = [
    3821, 4236, 3758, 6783, 4664, 2589, 2538, 3542, 4626, 5886, 6233, 4199,
    3561, 2335, 5636, 3524, 4327, 6064, 3912, 1356, 4305, 4379, 4592, 4233,
    4281, 1613, 1233, 4514, 3431, 2159, 2322, 4239, 4733, 2268, 5397, 5821,
    6115, 6631, 6474, 4134, 2728, 5753, 7130, 7860, 6991, 7499, 5301, 2808,
    6755, 6658, 6944, 6372, 8380, 7366, 6352, 8333, 8281, 11548, 10823, 13642,
    9973, 6723, 13416, 12205, 13942, 9590, 11693, 9276, 6519, 6863, 8237,
    10122, 8646, 9749, 5346, 4836, 9806, 7502, 9387, 11078, 9832, 6886, 4285,
    8351, 9725, 11844, 12387, 10666, 7072, 6429
]
data = pd.Series(data)
data_index = sm.tsa.datetools.dates_from_range('1901', '1990')
#print(data_index)

# 绘制数据图
data.index = pd.Index(data_index)
data.plot(figsize=(12, 8))
plt.show()
# 创建ARMA模型# 创建ARMA模型
# 人工指定p=7, 1=0
arma = ARMA(data, (7, 0)).fit()
print('AIC: %0.4lf' % arma.aic)
# 模型预测
predict_y = arma.predict('1990', '2000')
# 预测结果绘制
fig, ax = plt.subplots(figsize=(12, 8))
ax = data.ix['1901':].plot(ax=ax)
predict_y.plot(ax=ax)
plt.show()
Exemple #21
0
                raise ValueError('What you input is not pd.Series type!')
        tmp_data.dropna(inplace=True)
    return tmp_data


diffed_ts = diff_ts(dta_log,d=[1,1])
test_stationarity.testStationarity(diffed_ts)
test_stationarity.draw_acf_pacf(diffed_ts,l=31) 
model = arima_model(diffed_ts)
pdb.set_trace()
model.get_proper_model()
print 'bic:',model.bic,'p:',model.p,'q:',model.q
print model.properModel.forecast()[0]
# print model.forecast_next_day_value(type='day')

model2=ARMA(diffed_ts,(model.p,1,model.q)).fit()
model2.summary2()
predict_sunspots = model2.predict('2090','2100',dynamic=True)
a = model2.forecast(5)[0]
a_ts = predict_diff_recover(a,d=[1,1])
log_a = np.exp(a_ts)


print log_a
pdb.set_trace()



model.certain_model(6,0)

predict_ts = model.properModel.predict()
Exemple #22
0
if False:
    plt.subplot(121)
    plt.plot(lag_acf)
    plt.title('ACF (q=1)')

    plt.subplot(122)
    plt.plot(lag_pacf)
    plt.title('PACF (q=1)')
    plt.tight_layout()
    plt.show()

# Prediction with ARMA models
# 1. define the model by calling ARMA
# 2. The model is prepared on the training data by calling the fit() function
# 3. Predictions can be made by calling the predict() function and specifying
#    the index of the time or times to be predicted.
from statsmodels.tsa.arima_model import ARMA

x = df['Gym'].astype('float')
model = ARMA(x, order=(1, 1)).fit()
print(model.summary())
if True:
    plt.plot(x, label='Gym original')
    plt.plot(model.predict(), color='red', label='predicted')
    plt.title('RSS: %.4f' % sum((model.fittedvalues - x)**2))
    plt.legend(loc='best')
    plt.show()

print('The End.')
plt.subplot(224)
plt.plot(df_year.Price, '-', label='按年')
plt.legend()
plt.show()

# 设置参数范围
ps = range(0, 3)
qs = range(0, 3)
parameters = product(ps, qs)
parameters_list = list(parameters)
# 寻找最优ARMA模型参数,即best_aic最小
results = []
best_aic = float("inf")  # 正无穷
for param in parameters_list:
    try:
        model = ARMA(df_month.Price, order=(param[0], param[1])).fit()
    except ValueError:
        print('参数错误:', param)
        continue
    aic = model.aic
    if aic < best_aic:
        best_model = model
        best_aic = aic
        best_param = param
    results.append([param, model.aic])
# 输出最优模型
print('最优模型: ', best_model.summary())

# 设置future_month,需要预测的时间date_list
df_month2 = df_month[['Price']]
future_month = 3
Exemple #24
0
    def __init__(self, interval, ticker, kwargs):
        '''
        Collect historical data and calculate parameters for volume predictions and AR1

        Args:
            TODAY: a datetime object of today
            T_START_TIME: today's market opening time
            T_END_TIME: today's market closing time
            LASSO_LAMBDA: lambda of lasso method
            N_TICK_THRESHOULD: the least number of tick data a valid file should contain
            DATA_PATH: the absolute path to tick data for this ticker
            _interval: how many secs to update volume percentage prediction
            _interval_timedelta: interval in timedelta format
            _semi_n_interval: half of the total number of intervals today
            _n_interval: the total number of intervals today 
            _features_to_train: 10 valid days of CA, rolling mean and rolling linear prediction
            _histo_volume: 10 valid days of traded volume in each interval
            _intraday_percentage: expected trading volumn in each interval
            _AR_pars_pm: mu and phi for AR(1) in the morning
            _AR_pars_pm: mu and phi for AR(1) in the afternoon
            _CA_today: todays traded vulumes during todays call auction
            _predicted_V: today's total predicted trading volume
            _is_V_predicted: a flag of is V predicted
            _iter: in which interval the current pushed tick
            _last_update: the interval last updated in VWAP_log
            _datetime_index: the index for VWAP_log
            _today_vol: a list of today's trading volume in each interval
            _p_per: a list of predicted trading volume percentage
            _p_vol: a list of predicted trading volume
            _VWAP_log: the log file of the predicted volume volume and its percentage and true volume in each interval
        
        Methods:
            pred_V: predict today's total trading volume
            push_tick: push tick data
            get_prediction: print out  
        '''
        if (interval % 5 != 0) or (7200 % interval != 0):
            raise ValueError(
                'interval must be a multiple of 5 secs and can divide 2 hours')

        if not ticker in listdir(kwargs['DATA_PATH']):
            raise Exception('no data for %s' % ticker)

        self.TODAY = kwargs['TODAY']
        # self.TODAY = datetime.strptime(today_for_test, "%Y-%m-%d")  # Tracey to notice
        self.T_START_TIME = kwargs['T_START_TIME']
        # self.T_START_TIME = self.TODAY.replace(hour = 9, minute = 30, second = 0, microsecond = 0)
        self.T_END_TIME = kwargs['T_END_TIME']
        # self.T_END_TIME = self.TODAY.replace(hour = 15, minute = 00, second = 0, microsecond = 0)
        self.LASSO_LAMBDA = kwargs['LASSO_LAMBDA']
        self.N_TICK_THRESHOLD = kwargs['N_TICK_THRESHOLD']
        self.DATA_PATH = kwargs['DATA_PATH'] + ticker + '/'
        # self.DATA_PATH = './data_path/' # Tracey to notice
        self._interval = interval
        self._interval_timedelta = timedelta(seconds=self._interval)
        self._semi_n_interval = int(self.HALFTIME / self._interval_timedelta)
        self._n_interval = 2 * self._semi_n_interval
        self._features_to_train = np.ones((11, 3), dtype=float)  # CA, M, L, A
        self._histo_volume = np.full((10, self._n_interval), 0,
                                     dtype=float)  # historical trading volume
        self._intraday_percentage = [
            1 / self._n_interval
        ] * self._n_interval  # notice .sum() =self._n_interval
        # self._AR_pars = np.array([1,0],dtype =float) # (u and phi)
        self._AR_pars = [0., 1.]
        self._CA_today = 0
        self._predicted_V = 0.
        self._is_V_predicted = 0
        self._last_update = 0
        self._iter = 0
        self._datetime_index = ([
            str(dt) for dt in datetime_range(
                self.T_START_TIME,
                self.T_START_TIME.replace(
                    hour=11, minute=30, second=0, microsecond=0),
                timedelta(seconds=self._interval))
        ] + [
            str(dt) for dt in datetime_range(
                self.T_START_TIME.replace(
                    hour=13, minute=0, second=0, microsecond=0),
                self.T_END_TIME, timedelta(seconds=self._interval))
        ])
        self._today_vol = [0.] * self._n_interval
        self._p_per = [0.] * self._n_interval
        self._p_vol = [0] * self._n_interval
        self._VWAP_log = {}

        files = set([
            filename for filename in listdir(self.DATA_PATH)
            if filename.endswith('.csv')
        ])
        history_date = self.TODAY
        x_output = np.append(
            np.arange(0 + self._interval, 7200 + self._interval,
                      self._interval),
            np.arange(12600 + self._interval, 19800 + self._interval,
                      self._interval))

        past_days = 0
        iter = 1

        # get data for intraday prediction
        while iter < 11:

            if not bool(files):
                raise Exception('Insufficient historical data')

            history_date = history_date - timedelta(days=1)
            past_days += 1

            if history_date.weekday() in set([5, 6]):
                continue

            filename = str(ticker) + str(history_date.strftime(
                '%Y-%m-%d')) + '.csv'  ## Tracey to notice

            if filename in files:
                files.remove(filename)
            else:
                continue

            try:
                dat = pd.read_csv(self.DATA_PATH + filename)
            except Exception:
                print('Error in reading %s, go to the previous day.' %
                      filename)
                continue

            if dat.shape[0] < self.N_TICK_THRESHOLD:
                print('File %s has few data for prediction' % filename)
                continue

            if past_days > 20:
                warnings.warn(
                    'Lack historical data. Time span of data for predicting intraday_volume of today has exceeded 20 days.'
                    'We are using data %d days from today' % past_days)

            try:
                dat.columns = ['DateTime',
                               'Volume']  # there will be Microsecond
                dat.DateTime = [
                    datetime.strptime(
                        str(history_date.strftime('%Y-%m-%d')) + ' ' + dt,
                        "%Y-%m-%d %H:%M:%S") for dt in dat.DateTime
                ]

                # datetime to time difference
                self.H_START_TIME = history_date.replace(hour=9,
                                                         minute=30,
                                                         second=0,
                                                         microsecond=0)
                dat['TimeStamp'] = [(dt - self.H_START_TIME).total_seconds()
                                    for dt in dat.DateTime]
                dat = dat.as_matrix(columns=['TimeStamp', 'Volume'])

                datCA = dat[dat[:, 0] < 0]
                self._features_to_train[10 - iter, 0] = datCA[:, 1].sum()
                dat = dat[dat[:, 0] > 0]

                # Tracey by reviewing the data from ctp finds it impossible
                if any(t >= 198000 for t in dat[:, 0]):
                    dat = np.vstack((dat[dat[:, 0] < 19800],
                                     [19800, dat[dat[:, 0] >= 19800,
                                                 1].sum()]))
                dat[-1, 0] = 198000
                x_input = np.append(0, dat[:, 0])
                volume_cumsum = np.append(0, dat[:, 1].cumsum())
                y_interp = scipy.interpolate.interp1d(
                    x_input, volume_cumsum)  # ,interval)
                intraday_volume = y_interp(x_output)
                intraday_volume = np.append(
                    intraday_volume[0],
                    (intraday_volume[1:] - intraday_volume[:-1]))
                self._histo_volume[10 - iter] = intraday_volume
            except Exception:
                print('Error when read file %s, you may check its format' %
                      filename)
                continue

            iter += 1

        iter = 11  # 这个不需要

        # get data for roll_mean and roll_linear
        volume_sums = np.zeros(5, dtype=float)
        history_date = self.TODAY - timedelta(days=past_days)
        while iter < 16:

            if not bool(files):
                raise Exception('Insufficient historical data')

            history_date = history_date - timedelta(days=1)
            past_days += 1

            if history_date.weekday() in set([5, 6]):
                continue

            # filename = str(history_date.strftime('%Y-%m-%d'))+'.csv' ## Tracey to notice
            filename = str(ticker) + str(history_date.strftime(
                '%Y-%m-%d')) + '.csv'  ## Tracey to notice
            if filename in files:
                files.remove(filename)
            else:
                continue

            try:
                dat = pd.read_csv(self.DATA_PATH + filename)
            except Exception:
                print('Error in reading %s, go to the previous day.' %
                      filename)
                continue

            if past_days > 30:
                warnings.warn(
                    'Lack historical data. Time span of data for predicting total trading volume of today has exceeded 30 days.'
                )

            try:
                dat = pd.read_csv(self.DATA_PATH + filename)
                dat.columns = ['DateTime', 'Volume']
                self.H_START_TIME = history_date.replace(hour=9,
                                                         minute=30,
                                                         second=0,
                                                         microsecond=0)
                dat.DateTime = [
                    datetime.strptime(
                        str(history_date.strftime('%Y-%m-%d')) + ' ' + dt,
                        "%Y-%m-%d %H:%M:%S") for dt in dat.DateTime
                ]
                volume_sums[15 - iter] = dat[
                    dat.DateTime > self.H_START_TIME].Volume.sum()
            except Exception:
                print('Error when read file ' + filename +
                      ', you may check its format')
                continue

            iter += 1

        # preparing sample for predicting today's total volume
        self.volume_to_train = self._histo_volume.sum(axis=1)
        volume_sums = np.append(volume_sums, self.volume_to_train)
        self._features_to_train[:, 1] = rolling_mean(volume_sums)
        self._features_to_train[:, 2] = rolling_linear(volume_sums)

        # get intraday pattern and intialize intraday prediction
        intraday_mean = self._histo_volume.mean(axis=0)
        self._p_vol[0] = float(intraday_mean[0])
        self._p_vol[self._semi_n_interval] = float(
            intraday_mean[self._semi_n_interval])
        self._intraday_percentage = list(
            np.divide(intraday_mean, intraday_mean.sum()) * self._n_interval)
        if any(i < 1 / (self._n_interval * 10)
               for i in self._intraday_percentage):
            warnings.warn(
                'adjust intraday trading volume pattern for irregular data')
        tmp = np.divide(intraday_mean, intraday_mean.sum()) * self._n_interval
        if np.any(tmp < 0.1):
            warnings.warn(
                'adjust intraday trading volume pattern for irregular data')
            tmp[tmp >= 0.1] = tmp[tmp > 0.1] * sum(self._n_interval -
                                                   tmp[tmp < 0.1]) / sum(
                                                       tmp[tmp >= 0.1])
            tmp[tmp < 0.1] = 0.1
        self._intraday_percentage = list(tmp)
        self._p_per[0] = self._intraday_percentage[0] / self._n_interval
        self._p_per[self._semi_n_interval] = self._intraday_percentage[
            self._semi_n_interval] / self._n_interval
        self._VWAP_log[self._datetime_index[0]] = get_log(
            None, self._p_vol[0], self._p_per[0])

        # compute AR
        arma = ARMA((self._histo_volume[-1] /
                     self._intraday_percentage)[0:self._n_interval],
                    order=(1, 0))
        self._AR_pars = arma.fit().params.tolist()
Exemple #25
0
#Defining training and testing data
training_set = delhi[delhi["Year"] <= 2015]
test_set = delhi[delhi["Year"] > 2015]

#acf and pacf plots
from statsmodels.graphics.tsaplots import plot_acf
acf = plot_acf(delhi["AvgTemperature"], lags=9000)
from statsmodels.graphics.tsaplots import plot_pacf
pacf = plot_pacf(delhi["AvgTemperature"], lags=10)
fig5 = plt.savefig("/home/vaishnavi/Desktop/Final/Screenshots")
plt.show()

#MA model
from statsmodels.tsa.arima_model import ARMA
model_MA = ARMA(training_set["AvgTemperature"], order=(0, 2))
model_fit_MA = model_MA.fit()
predictions_MA = model_fit_MA.predict(test_set.index[0], test_set.index[-1])

fig5 = plt.figure(figsize=(15, 5))
plt.ylabel("Temperature", fontsize=20)
plt.plot(test_set["AvgTemperature"], label="Original Data")
plt.plot(predictions_MA, label="Predictions")
fig6 = plt.savefig("/home/vaishnavi/Desktop/Final/Screenshots")
plt.show()
#plt.legend()

#RMSE for MA model
mse = mean_squared_error(predictions_MA, test_set["AvgTemperature"])
print(mse**0.5)
Exemple #26
0
    print 'p-value: ', d_order0[1]
    print'Critical values: ', d_order0[4]

    if d_order0[0] > d_order0[4]['5%']:
        print 'Time Series is  nonstationary'
    else:
        print 'Time Series is stationary'

    # # selecting parameter
    order = sm.tsa.arma_order_select_ic(ts_diff_1, max_ar=6, max_ma=3, ic=['aic'])
    # print order

    try:

        # ARMA model
        model = ARMA(ts_diff_1,(order['aic_min_order'][0],order['aic_min_order'][1]))
        predict_diff_1 = model.fit(disp=False).forecast(14)[0]

        # restore
        predict = np.cumsum(predict_diff_1)
        predict = predict + np.mean(ts[-7:])

        # use continuity is better
        stander = sklearn.preprocessing.StandardScaler()
        predict = stander.fit_transform(predict)
        predict = stander.fit(ts[-7:]).inverse_transform(predict)
        predict = np.round(predict)

        print predict
        predict_result = np.vstack((predict_result,predict))
Exemple #27
0
        else: list_hourly_load[j] = sum - 3
print(k)
list_hourly_load = np.array(list_hourly_load)
shifted_value = list_hourly_load.mean()
list_hourly_load -= shifted_value
a2, d2, d1 = pywt.wavedec(list_hourly_load[:-48], 'db4', mode='sym', level=2)
# lhl = pywt.waverec([a2, d2, d1], 'db4')
# print(np.shape(a2),np.shape(d2),np.shape(d1),np.shape(lhl))
# order_a2 = sm.tsa.arma_order_select_ic(a2, ic='aic')['aic_min_order']
# order_d2 = sm.tsa.arma_order_select_ic(d2, ic='aic')['aic_min_order']
# order_d1 = sm.tsa.arma_order_select_ic(d1, ic='aic')['aic_min_order']
order_a2 = [3, 2]  # p ,q
order_d2 = [4, 1, 2]  # p, d ,q
order_d1 = [4, 1, 2]
print(order_a2, order_d2, order_d1)
model_a2 = ARMA(a2, order=order_a2)
model_d2 = ARIMA(d2, order=order_d2)
model_d1 = ARIMA(d1, order=order_d1)
result_a2 = model_a2.fit()
result_d2 = model_d2.fit()
result_d1 = model_d1.fit()
plt.figure(figsize=(10, 15))
plt.subplot(3, 1, 1)
plt.plot(a2, 'blue')
plt.plot(result_a2.fittedvalues, 'red')
plt.title('model_a2')
plt.subplot(3, 1, 2)
plt.plot(d2, 'blue')
plt.plot(result_d2.fittedvalues, 'red')
plt.title('model_d2')
plt.subplot(3, 1, 3)
Exemple #28
0
plt.legend()
plt.subplot(224)
plt.plot(df_year.Weighted_Price, '-', label='按年')
plt.legend()
plt.show()
# 设置参数范围
ps = range(0, 3)
qs = range(0, 3)
parameters = product(ps, qs)
parameters_list = list(parameters)
# 寻找最优ARMA模型参数,即best_aic最小
results = []
best_aic = float("inf")  # 正无穷
for param in parameters_list:
    try:
        model = ARMA(df_month.Weighted_Price, order=(param[0], param[1])).fit()
    except ValueError:
        print('参数错误:', param)
        continue
    aic = model.aic
    if aic < best_aic:
        best_model = model
        best_aic = aic
        best_param = param
    results.append([param, model.aic])
# 输出最优模型
result_table = pd.DataFrame(results)
result_table.columns = ['parameters', 'aic']
print('最优模型: ', best_model.summary())
# 比特币预测
df_month2 = df_month[['Weighted_Price']]
Exemple #29
0
D_data.plot()  # 时序图
plt.show()
plot_acf(D_data).show()  # 自相关图
plt.show()
plot_pacf(D_data).show()  # 偏自相关图
plt.show()
print(u'1阶差分序列的ADF检验结果为:', ADF(D_data[u'dst差分']))
print(u'差分序列的白噪声检验结果为:', acorr_ljungbox(D_data, lags=1))

data[u'dst'] = data[u'dst'].astype(float)
pmax = int(len(data) / 10)
qmax = int(len(data) / 10)
bic_matrix = []
for p in range(pmax + 1):
    tmp = []
    for q in range(qmax + 1):
        try:
            tmp.append(ARMA(data, (p, q)).fit().bic)
        except:
            tmp.append(None)
    bic_matrix.append(tmp)
bic_matrix = pd.DataFrame(bic_matrix)  # 从中可以找出最小值
#  print(bic_matrix)
p, q = bic_matrix.stack().idxmin()
print(u'bic最小的P值和q值为:%s、%s' % (p, q))

model = ARMA(data, (p, q)).fit()
model.summary2()  # 给出一份模型报告
forecast = model.forecast(5)  # 作为期5天的预测,返回预测结果、标准误差、置信区间
print(forecast)
plt.show()
print()
#-------------------------------
# fit model
model = AR(ydata)
model_fit = model.fit()
#-------------------------------
# make prediction
#yhat = model_fit.predict(len(xdata), len(ydata))
yhat = model_fit.predict( start= 90, end = 110 )
print('Predicted value for Auto Regression ', yhat)
print("="*50)
#-------------------------------------------------------------------------------------------------
# MA example
# fit model
model = ARMA(ydata, order=(0, 1))
model_fit = model.fit(disp=False)
# make prediction
yhat = model_fit.predict( start= 90, end = 110 )
print('Predicted value for Moving Average 0,1 ',yhat)
print("="*50)
#-------------------------------------------------------------------------------------------------
# ARMA example
newdata = [random() for x in range(1, 100)]
# change order
model = ARMA(newdata, order=(2, 1))
model_fit = model.fit(disp=False)
# make prediction
yhat = model_fit.predict( start= 90, end = 110 )
print('Predicted value for Moving Average 2,1 ',yhat)
print("="*50)