def summary_arma_estimate(self, maximum_p, date, count): factor_ret_vector, bias_vector = test_regression.get_factor_ret_vector( factor_real, date=date, count=count) result_df_arma = pd.DataFrame(index=range(1, maximum_p), columns=[ 'weights_predict', 'bias_predict', 'loss', 'direction_precision' ], dtype='float') descendant = list(factor_real.columns) today_factor_exposure = factor_real.loc[date, :] today_index = MktData.index.get_loc(date) tommorow_index = today_index + 1 tommorow_ret = MktData.loc[MktData.index[tommorow_index], (descendant, 'ret')] tommorow_ret.index = tommorow_ret.index.droplevel(1) for p in range(1, maximum_p): arma_weights = ARMA(factor_ret_vector, order=(p, 0)).fit(disp=1) weights_predict_arma = np.asscalar(arma_weights.forecast(1)[0]) result_df_arma.loc[p, 'weights_predict'] = weights_predict_arma arma_bias = ARMA(bias_vector, order=(p, 0)).fit(disp=-1) bias_predict_arma = np.asscalar(arma_bias.forecast(1)[0]) result_df_arma.loc[p, 'bias_predict'] = bias_predict_arma predict_tommorow_ret = weights_predict_arma * today_factor_exposure + bias_predict_arma loss = np.sum((predict_tommorow_ret - tommorow_ret)**2) direction_precision = (predict_tommorow_ret * tommorow_ret > 0).sum() / len(tommorow_ret) result_df_arma.loc[p, 'loss'] = loss result_df_arma.loc[p, 'direction_precision'] = direction_precision return result_df_arma
def multi_regression_arma(self, factor_ts, Mkt_ts, date, param_df, maximum_p, previous_load_count): group_stock = list(factor_ts.columns) today_factor_index = factor_ts.index.get_loc(date) today_ret_index = Mkt_ts.index.get_loc(date) tommorrow_ret_index = today_ret_index + 1 today_factor_exposure = factor_ts.loc[date, :] tommorrow_ret = Mkt_ts.loc[Mkt_ts.index[tommorrow_ret_index], (group_stock, 'ret')] tommorrow_ret.index = tommorrow_ret.index.droplevel(level=1) weights_ts = [] intercept_ts = [] for factor_index, mkt_index in zip( range(today_factor_index, today_factor_index - previous_load_count - 1, -1), range(today_ret_index, today_ret_index - previous_load_count - 1, -1)): assert Mkt_ts.index[mkt_index] == factor_ts.index[factor_index] assert Mkt_ts.index[mkt_index + 1] == factor_ts.index[factor_index + 1] temp_factor_exposure = factor_ts.loc[factor_ts.index[factor_index - 1], group_stock] temp_factor_exposure = temp_factor_exposure.fillna(0) temp_mkt_ret = Mkt_ts.loc[Mkt_ts.index[mkt_index], (group_stock, 'ret')] temp_mkt_ret = temp_mkt_ret.fillna(0) temp_mkt_ret.index = temp_mkt_ret.index.droplevel(level=1) try: clf = LinearRegression() clf.fit( np.array(temp_factor_exposure).reshape(-1, 1), temp_mkt_ret) weights = clf.coef_[0] intercept = clf.intercept_ weights_ts.append(weights) intercept_ts.append(intercept) except ValueError: print(temp_factor_exposure.isna()) print(temp_mkt_ret.isna()) for p in range(1, maximum_p): arma_weights = ARMA(weights_ts, order=(p, 0)).fit(disp=-1) predict_factor_ret = np.asscalar(arma_weights.forecast(1)[0]) arma_intercept = ARMA(intercept_ts, order=(p, 0)).fit(disp=-1) predict_factor_intercept = np.asscalar( arma_intercept.forecast(1)[0]) param_df.loc[p, 'weights'] = predict_factor_ret param_df.loc[p, 'intercept'] = predict_factor_intercept predict_ret = predict_factor_ret * today_factor_exposure + predict_factor_intercept square_loss = np.sum((predict_ret - tommorrow_ret)**2) mul = tommorrow_ret * predict_ret direction_precision = (mul > 0).sum() / len(mul) param_df.loc[p, 'direction_precision'] = direction_precision param_df.loc[p, 'loss'] = square_loss
def run_arma(original_ts, maxar=7, maxma=7): print(original_ts.columns[0], 'start arma') original_ts_log = np.log(original_ts) if test_stationarity(original_ts_log.ix[:, 0]) < 0.01: diffn = 0 diff_original_ts_log = original_ts_log print('平稳,不需要差分') else: diffn = best_diff(original_ts_log, maxdiff=8) diff_original_ts_log = produce_diffed_timeseries(original_ts_log, diffn) print('差分滞后阶数为'+str(diffn)+',已完成差分') order = choose_order(diff_original_ts_log, maxar, maxma) # order = (2, 3) print('模型的阶数为: ' + str(order)) model = ARMA(diff_original_ts_log.ix[:, 0], order).fit(disp='-1', method='css') f = model.forecast(steps=3, alpha=0.05)[0] p = model.predict() predict = predict_recover(p, original_ts_log, diffn, 'predict') forecast = predict_recover(f, original_ts_log, diffn, 'forecast') #查看niheqingkuang # p = model.predict() # p = predict_recover(p, original_ts_log, diffn, 'predict') # plt.plot(p) # plt.plot(original_ts) # plt.show() return predict, forecast
def installment_year_add_trend_feature(features, gr, feature_name, prefix): gr[feature_name].fillna(0, inplace=True) y = gr[feature_name].values try: x = np.arange(0, len(y)).reshape(-1, 1) lr = LinearRegression() lr.fit(x, y) trend = lr.coef_[0] predict = lr.predict(x.shape[0])[0] except: trend = np.nan predict = np.nan features['{}{}'.format(prefix, feature_name)] = trend features['{}{}'.format(prefix, feature_name) + "_predict"] = predict try: y = gr.loc[gr[feature_name] != 0, feature_name].values x = np.arange(0, len(y)).reshape(-1, 1) lr.fit(x, y) tr = lr.coef_[0] pr = lr.predict(y.shape[0])[0] except: tr = np.nan pr = np.nan features['{}{}'.format(prefix, feature_name) + "_non"] = tr features['{}{}'.format(prefix, feature_name) + "_non_predict"] = pr try: y = gr.loc[gr[feature_name] != 0, feature_name].values arma = ARMA(y, order=(0, 1)).fit() ma_tr = arma.maparams[0] ma_pr = arma.forecast(steps=1)[0][0] except: ma_tr = np.nan ma_pr = np.nan features['{}{}'.format(prefix, feature_name) + "_ma"] = ma_tr features['{}{}'.format(prefix, feature_name) + "_ma_predict"] = ma_pr try: y = gr.loc[gr[feature_name] != 0, feature_name].values arma = ARMA(y, order=(1, 0)).fit() ar_tr = arma.arparams[0] ar_pr = arma.forecast(steps=1)[0][0] except: ar_tr = np.nan ar_pr = np.nan features['{}{}'.format(prefix, feature_name) + "_ar"] = ar_tr features['{}{}'.format(prefix, feature_name) + "_ar_predict"] = ar_pr return features
def func2(data,order): global model # 第一行的数据,差分后恢复需要这个 first = data.iloc[0] data = data.diff(1).iloc[1:] model = ARMA(data, order=order).fit(disp=0) predict_sunspots = model.forecast(steps=35)[0] result = np.append(np.array([first]), predict_sunspots).cumsum() return result[1:]
def build_model(time_series, n_steps, p, q, start): # arma = sm.tsa.ARMA(time_series,(p,q)).fit() # show_prediction(arma, time_series) p, d, q = (1, 3, 1) arma = ARMA(time_series, (0, 1)).fit(disp=-1, maxiter=100) f, err95, ci95 = arma.forecast(steps=n_steps) # 95% CI _, err99, ci99 = arma.forecast(steps=n_steps, alpha=0.01) # 99% CI idx = pd.date_range(start, periods=n_steps, freq='D') fc_95 = pd.DataFrame(np.column_stack([f, ci95]), index=idx, columns=['forecast', 'lower_ci_95', 'upper_ci_95']) fc_99 = pd.DataFrame(np.column_stack([ci99]), index=idx, columns=['lower_ci_99', 'upper_ci_99']) fc_all = fc_95.combine_first(fc_99) # show_forecast(fc_all) return fc_all
def get_factor_ret_predict_arma(self, factor_ts, date, p, count): descendant = list(factor_ts.columns) today_factor_exposure = factor_ts.loc[date, :] today_index = MktData.index.get_loc(date) tommorow_index = today_index + 1 tommorow_ret = MktData.loc[MktData.index[tommorow_index], (descendant, 'ret')] tommorow_ret.index = tommorow_ret.index.droplevel(1) factor_ret_vector, bias_vector = self.get_factor_ret_vector( factor_ts, date, count=count) arma_weights = ARMA(factor_ret_vector, order=(p, 0)).fit(disp=1) weights_predict_arma = np.asscalar(arma_weights.forecast(1)[0]) arma_bias = ARMA(bias_vector, order=(p, 0)).fit(disp=1) bias_predict_arma = np.asscalar(arma_bias.forecast(1)[0]) predict_tommorow_ret = weights_predict_arma * today_factor_exposure + bias_predict_arma loss = np.sum((predict_tommorow_ret - tommorow_ret)**2) direction_precision = (predict_tommorow_ret * tommorow_ret > 0).sum() / len(tommorow_ret) print(loss, direction_precision) return predict_tommorow_ret
def arma_forecast(self, ts, p, q,): arma = ARMA(ts, order=(p, q)).fit(disp=-1) ts_predict = arma.predict() next_ret = arma.forecast(1)[0] #print("Forecast stock extra return of next day: ", next_ret) # plt.clf() # plt.plot(ts_predict, label="Predicted") # plt.plot(ts, label="Original") # plt.legend(loc="best") # plt.title("AR Test {},{}".format(p, q)) # #plt.show() return next_ret, arma.summary2()
def _get_volume_stats(self, daily_volume: pd.DataFrame): daily_volume["lv"] = np.log(daily_volume.volume) if len(daily_volume) > ROLLING_WINDOW * 3: daily_volume["mu_lv"] = daily_volume.lv.rolling( ROLLING_WINDOW, min_periods=1).mean().transform(np.ceil) daily_volume["excess_lv"] = daily_volume.lv - daily_volume.mu_lv model = ARMA(daily_volume.reset_index().excess_lv.dropna(), (2, 1)).fit(disp=False) predicted_excess = model.forecast(1)[0] predicted_daily_lv = daily_volume.mu_lv.array[-1] + predicted_excess else: # fallback to volume geometric mean if there are too few observations predicted_daily_lv = daily_volume.volume[-ROLLING_WINDOW:].mean() return predicted_daily_lv, daily_volume.lv[-ROLLING_WINDOW:].var()
class ts_AR: """ Parameters ------------ ts_train:训练数据 error_fun:使用的误差函数 Attributes ------------ fittedModel:拟合的模型 """ def __init__(self, ts_train, error_fun=None): self.ts_train = ts_train # 训练时间序列 self.error_fun = error_fun self.fittedModel = None def __adf_test(self): adftest = adfuller(self.ts_train, autolag='AIC') # 'Test Statistic','p-value','Lags Used','Number of Observations Used',XX # (-0.0, 0.958532086060056, 9, 10, {'1%': -4.331573, '5%': -3.23295, '10%': -2.7487}, -582.412544847778) adf_res = pd.Series(adftest[0:4], index=[ 'Test Statistic', 'p-value', 'Lags Used', 'Number of Observations Used' ]) for key, value in adftest[4].items(): adf_res['Critical Value (%s)' % key] = value return adf_res def fit(self): adf_res = self.__adf_test() w = int(adf_res['Lags Used']) self.fittedModel = ARMA(self.ts_train, order=(w, 0)).fit(disp=-1) def get_fittedvalues(self): #预测拟合值 fittedvalues = self.fittedModel.predict( ) # 等价于self.fittedModel.fittedvalues Series类型 fittedvalues = np.round(fittedvalues) # 将预测的销量四舍五入 return fittedvalues #array类型 def predict(self, forcast_period=2): y_forcast = self.fittedModel.forecast(forcast_period)[0] y_forcast = np.round(y_forcast) # 将预测的销量四舍五入 return y_forcast
def predict_by_all(timestamps, data, test_data, num_sample, split, output_steps): warnings.filterwarnings("ignore") index_all = np.zeros([test_data.shape[1] - output_steps, num_sample]) valid_num = np.zeros(test_data.shape[1] - output_steps) error_all = [] real = np.zeros([test_data.shape[1] - output_steps, num_sample]) predict = np.zeros([test_data.shape[1] - output_steps, num_sample]) #station_sample = np.random.randint(data.shape[0], size=num_sample) station_sample = np.arange(data.shape[0]) #widgets = ['Train: ', Percentage(), ' ', Bar('-'), ' ', ETA()] #pbar = ProgressBar(widgets=widgets, maxval=test_data.shape[0]-output_steps).start() for t in xrange(test_data.shape[1] - output_steps): #pbar.update(t) if t % 10 == 0: print(t) error_index = [] for r in xrange(num_sample): # t: which time slot # i: which station i = station_sample[r] train_df = pd.DataFrame(data[i][t:split[0] + t]) train_df.index = pd.DatetimeIndex(timestamps[t:split[0] + t]) try: results = ARMA(train_df, order=(2, 2)).fit(trend='c', disp=-1) except: error_index.append(r) continue pre, _, _ = results.forecast(output_steps) test_real = test_data[i][t:t + output_steps] real[t, r] = test_real predict[t, r] = pre index_all[t] = station_sample error_all.append(error_index) valid_num[t] = num_sample - len(error_index) #pbar.finish() return real, predict, index_all, error_all, valid_num
def test_arma(timeseries): with warnings.catch_warnings(): warnings.filterwarnings("ignore") #order = st.arma_order_select_ic(timeseries, max_ar=5,max_ma=5,ic=['aic', 'bic', 'hqic']) model = ARMA(timeseries, (7, 2)).fit() return model.forecast(1)[0][0]
def get_prediction(train_data, p, q): with warnings.catch_warnings(): warnings.filterwarnings("ignore") model = ARMA(train_data, (p, q)).fit(disp=0) return model.forecast(1)[0][0]
print('在AIC矩阵中去掉[%s,%s]组合,重新进行计算' % (p, q)) matrix.iloc[p, q] = np.nan arimafail = arma continue else: print(p, q) print(u'模型ARMA(%s,%s)符合白噪声检验' % (p, q)) break # In[6]: arma.summary() # 当p,q值为0,0时,summary方法报错 # In[7]: forecast_values, forecasts_standard_error, forecast_confidence_interval = arma.forecast( 5) forecast_values # In[8]: predictdata = pd.DataFrame(xtest_value) predictdata.insert(1, 'CWXT_DB:184:C:\\_predict', forecast_values) predictdata.rename(columns={ 'CWXT_DB:184:C:\\': u'实际值', 'CWXT_DB:184:C:\_predict': u'预测值' }, inplace=True) predictdata.info() # In[10]:
def arma_forecast(self, ts, p, q): arma = ARMA(ts, order=(p, q)).fit(disp=-1) # ts_predict = arma.predict() next_ret = arma.forecast(1)[0] return next_ret, arma.summary2()
model = ARMA(ts_diff_2, order=(1, 1)) result_arma = model.fit(disp=-1, method='css') predict_ts = result_arma.predict() # 一阶差分还原 diff_shift_ts = ts_diff_1.shift(1) diff_recover_1 = predict_ts.add(diff_shift_ts) # 再次一阶差分还原 rol_shift_ts = rol_mean.shift(1) diff_recover = diff_recover_1.add(rol_shift_ts) # 移动平均还原 rol_sum = ts_log.rolling(window=11).sum() rol_recover = diff_recover * 12 - rol_sum.shift(1) # 对数还原 log_recover = np.exp(rol_recover) log_recover.dropna(inplace=True) print log_recover ts = ts[log_recover.index] # 过滤没有预测的记录 plt.figure(facecolor='white') log_recover.plot(color='blue', label='Predict') ts.plot(color='red', label='Original') plt.legend(loc='best') plt.title('RMSE: %.4f' % np.sqrt(sum((log_recover - ts)**2) / ts.size)) plt.show() print test_stationarity.proper_model(ts_log) print model.forecast()
# 将训练好的模型保存到train_model.m中 joblib.dump(model, joblib_path) model_name="statsmodels_ES_model.pkl" pickle_path=os.path.join(model_root_path,model_name) # 将训练好的模型保存到train_model.pkl中 pickle.dump(model,open(pickle_path,"wb")) ''' ############################################################################ # 测试ARIMA index = pd.date_range('5/1/2018', periods=20, freq='d') ts = pd.Series([1.0, 2, 3, 4, 3, 6, 3, 7, 3, 5, 1, 2, 3, 4, 3, 6, 3, 7, 3, 5], index=index) model = ARMA(ts, order=(2, 1)) model = model.fit(disp=-1, method='css') y_fit = model.predict() y_hat = model.forecast(4)[0] print(y_fit) print(y_hat) model_root_path = "C://Users//Kang//Desktop//model_management//saved_models" model_name = "statsmodels_ARMA_model.m" joblib_path = os.path.join(model_root_path, model_name) # 将训练好的模型保存到train_model.m中 joblib.dump(model, joblib_path) model_name = "statsmodels_ARMA_model.pkl" pickle_path = os.path.join(model_root_path, model_name) # 将训练好的模型保存到train_model.pkl中 pickle.dump(model, open(pickle_path, "wb"))
dfa1 = dfa[:500].copy() ################# all_num = 0 predict_list = [] vol_list = [] except_num = 0 for i in range(len(dfa1) - 5): try: pq = st.arma_order_select_ic(list(dfa1['r'][i:i + 5]), max_ar=3, max_ma=3, ic=['bic']).bic_min_order arma = ARMA(list(dfa1['r'][i:i + 5]), pq).fit(disp=False) output1 = arma.forecast() garch = arch_model(dfa1['r'][i:i + 5], vol='Garch', p=1, o=0, q=1, dist='Normal').fit() output2 = garch.forecast() vol = np.sqrt(output2.variance.tail(1).values) vol_list = np.append(vol_list, vol) if vol >= dfa1['std'][i + 4]: output = output1[0] - 2 * vol else: output = output1[0] + 8 * vol
D_data.plot() # 时序图 plt.show() plot_acf(D_data).show() # 自相关图 plt.show() plot_pacf(D_data).show() # 偏自相关图 plt.show() print(u'1阶差分序列的ADF检验结果为:', ADF(D_data[u'dst差分'])) print(u'差分序列的白噪声检验结果为:', acorr_ljungbox(D_data, lags=1)) data[u'dst'] = data[u'dst'].astype(float) pmax = int(len(data) / 10) qmax = int(len(data) / 10) bic_matrix = [] for p in range(pmax + 1): tmp = [] for q in range(qmax + 1): try: tmp.append(ARMA(data, (p, q)).fit().bic) except: tmp.append(None) bic_matrix.append(tmp) bic_matrix = pd.DataFrame(bic_matrix) # 从中可以找出最小值 # print(bic_matrix) p, q = bic_matrix.stack().idxmin() print(u'bic最小的P值和q值为:%s、%s' % (p, q)) model = ARMA(data, (p, q)).fit() model.summary2() # 给出一份模型报告 forecast = model.forecast(5) # 作为期5天的预测,返回预测结果、标准误差、置信区间 print(forecast)
error_index = np.zeros(run_times) test_target = np.zeros([run_times, output_steps]) test_prediction = np.zeros([run_times, output_steps]) for r in range(run_times): print('run ' + str(r)) i = np.random.randint(data.shape[0]) j = np.random.randint(test_data.shape[-1] - output_steps) train_df = pd.DataFrame(data[i][j:split[0] + split[1] + j]) train_df.index = pd.DatetimeIndex(timestamps[j:split[0] + split[1] + j]) try: results = ARMA(train_df, order=(2, 2)).fit(trend='nc', disp=-1) except: error_index[error_count] = r error_count += 1 continue pre, _, _ = results.forecast(output_steps) test_real = test_data[i][j:j + output_steps] index_all[r] = [i, j] test_target[r] = test_real test_prediction[r] = pre loss += np.sum(np.square(pre - test_real)) print('================ calculate rmse for test data ============') #n_rmse_val = np.sqrt(np.sum(np.square(val_predict - val_real))*1.0/np.prod(val_real.shape)) #n_rmse_test = np.sqrt(np.sum(np.square(test_predict - test_real))*1.0/np.prod(test_real.shape)) #rmse_val = pre_process.real_loss(n_rmse_val) #rmse_test = pre_process.real_loss(n_rmse_test) #print('val loss is ' + str(n_rmse_val) + ' , ' + str(rmse_val)) #print('test loss is ' + str(n_rmse_test) + ' , ' + str(rmse_test)) #print('val loss is ' + str(n_rmse_val)) print('run times: ' + str(run_times)) print('error count: ' + str(error_count))
def ts_arma(ts, p, q, start,end): arma = ARMA(ts, order=(p, q)).fit(disp = -1) print("未来五年:", arma.forecast(5)[0]) ts_predict_arma = arma.predict(start,end) print(arma.summary()) return ts_predict_arma
# find the lag with the smallest aic lag = min(aics, key=aics.get) print('Optimal lag = {}'.format(lag)) # train-test procedure using moving window series_len = len(series_monthly) train_len = len(train) test_len = len(test) y_pred = pd.Series([]) coefficients = [] confidence_intervals = [[], []] for i in range(test_len): print('Train - test iteration: i = {} from {}'.format(i + 1, test_len)) dynamic_train = series_monthly.iloc[i:i + train_len] model = ARMA(dynamic_train, order=(lag, 0)).fit(**PARAMS) results = model.forecast(1) confidence_intervals[0].extend([results[2][0][0]]) confidence_intervals[1].extend([results[2][0][1]]) y_pred = y_pred.append(pd.Series(results[0], index=[test.index[i]]), verify_integrity=True) coefficients.append(model.params) # plot test-predicted data f = plt.figure() plt.plot(test, color='blue') plt.plot(y_pred, color='orange') plt.fill_between(test.index, confidence_intervals[0], confidence_intervals[1], color='lightgrey') plt.gcf().set_size_inches(10, plt.gcf().get_size_inches()[1])
#模型拟合 from statsmodels.tsa.arima_model import ARMA Ct_ARMA = ARMA(Ct["Column2"], order=(4,2)).fit() print(Ct_ARMA.summary()) # In[116]: plt.plot(TSdata['Column1'], TSdata["Column2"], "o-") plt.plot(Ct["Column1"][0:53], Ct_ARMA.fittedvalues, 'o-') foresee = Ct_ARMA.forecast(6)[0].tolist() plt.plot(TSdata["Column1"][53:66],foresee,'*-') plt.show() # In[118]: print(TSdata[53:66]) print(foresee) # In[ ]:
ar1.summary() # the above creates the model ARMA, p =3, q=1 # next we want to plot the fitted values from the ARMA # model against the actual values in udiff # the udiff values are the log return values. plt.figure(figsize=(12, 8)) plt.plot(udiff.values, color='blue') preds = ar1.fittedvalues plt.plot(preds, color='red') plt.show() # next, let's make a 2 step ahead forecast, and plot it steps = 2 forecast = ar1.forecast(steps=steps)[0] plt.figure(figsize=(12, 8)) plt.plot(udiff.values, color='blue') preds = ar1.fittedvalues plt.plot(preds, color='red') plt.plot(pd.DataFrame(np.array([preds[-1], forecast[0]]).T, index=range( len(udiff.values) + 1, len(udiff.values) + 3)), color='green') plt.plot(pd.DataFrame(forecast, index=range( len(udiff.values) + 1, len(udiff.values) + 1 + steps)), color='green') plt.title('Display the predictions with the ARMA model')
diffed_ts = diff_ts(dta_log, d=[12, 1]) # diffed_ts = diff_ts(dta,d=[12,1]) # test_stationarity.testStationarity(diffed_ts) # test_stationarity.draw_acf_pacf(diffed_ts,l=30) model = arima_model(diffed_ts) model.get_proper_model() print 'bic:', model.bic, 'p:', model.p, 'q:', model.q print model.properModel.forecast()[0] # print model.forecast_next_day_value(type='day') model2 = ARMA(diffed_ts, (model.p, 1, model.q)).fit() model2.summary2() # predict_sunspots = model2.predict('2061','2071',dynamic=True) a = model2.forecast(10)[0][9] a_ts = predict_diff_recover(a, d=[12, 1]) # log_a = a_ts log_a = np.exp(a_ts) print log_a # pdb.set_trace() subdata.list[j].append(log_a) log_a = None np.savez('/home/wuxing/KDD/predict_only13.npz', subdata.list, j) print('--------------------------------' + str(j)) pdb.set_trace()
def func1(data,order): global model model = ARMA(data, order=order).fit(disp=0) predict_sunspots = model.forecast(steps=35)[0] return predict_sunspots
diffed_ts = diff_ts(dta_log,d=[1,1]) test_stationarity.testStationarity(diffed_ts) test_stationarity.draw_acf_pacf(diffed_ts,l=31) model = arima_model(diffed_ts) pdb.set_trace() model.get_proper_model() print 'bic:',model.bic,'p:',model.p,'q:',model.q print model.properModel.forecast()[0] # print model.forecast_next_day_value(type='day') model2=ARMA(diffed_ts,(model.p,1,model.q)).fit() model2.summary2() predict_sunspots = model2.predict('2090','2100',dynamic=True) a = model2.forecast(5)[0] a_ts = predict_diff_recover(a,d=[1,1]) log_a = np.exp(a_ts) print log_a pdb.set_trace() model.certain_model(6,0) predict_ts = model.properModel.predict() diff_recover_ts = predict_diff_recover(predict_ts,d=[1,1]) log_recover = np.exp(diff_recover_ts)
from statsmodels.tsa.arima_model import ARMA # dta = sm.datasets.sunspots.load_pandas().data # dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008')) # del dta["YEAR"] a_pd = pd.DataFrame(range(10), columns=['value']) arma_mod = sm.tsa.ARMA(a_pd, order=(0, 2)).fit(disp=False, trend='nc') print(arma_mod.params) a = range(10) order = (0, 1) arma_mod2 = ARMA(a_pd, order=order).fit(disp=False, trend='nc') print(arma_mod2.params) # predict_sunspots = arma_mod2.predict(0, 12) arma_mod2.forecast(1) # predict_sunspots = arma_mod.predict('1990', '2012', dynamic=True) # print(predict_sunspots) def proper_model(data_ts, maxLag): init_bic = float("inf") init_p = 0 init_q = 0 init_properModel = None for p in np.arange(maxLag): for q in np.arange(maxLag): model = ARMA(data_ts, order=(p, q)) try: results_ARMA = model.fit(disp=-1, method='css')
from db_tools import *
ADF(Ct) plot_acf(Ct,lags=50); plot_pacf(Ct,lags=50); import statsmodels.tsa.stattools as ts ts.arma_order_select_ic(Ct,max_ar=3,max_ma=3,ic=['aic','bic','hqic']) from statsmodels.tsa.arima_model import ARMA Ct_ARMA=ARMA(Ct,order=(3,0)).fit() Ct_ARMA.summary() plt.plot(Ct,'o-',Ct_ARMA.fittedvalues); Ct_05=pd.DataFrame({' 实际值':TSdata['2018-05'].Close}); #2018-05 收盘价数据 Ct_05[' 预测值']=Ct_ARMA.forecast(22)[0] # 模型预测数据 Ct_05[' 绝对误差']=Ct_05[' 实际值']-Ct_05[' 预测值']; Ct_05[' 相对误差(%)']=Ct_05[' 绝对误差']/Ct_05[' 实际值']*100; Ct_05 # 第6章 大数据分析简介 ## 6.1 大数据的概念 ## 6.2 Python文本预处理 ### 字符串的基本操作 #### 字符串的统计 len('abc') S=["asfef", "qwerty", "yuiop", "b", "stuff.blah.yech"]; len(S) [len(s) for s in S]