def time_sequence_predict(ts): ''' # 查看数据格式 print ts.head() print ts.head().index #对数变换 draw_ts(ts) draw_trend(ts, 25) draw_acf_pacf(ts) ''' #平滑法 ts_log = np.log(ts) ''' draw_ts(ts_log) print testStationarity(ts) draw_trend(ts_log, 25) #差分 diff_25 = ts_log.diff(25) diff_25.dropna(inplace=True) diff_25_1 = diff_25.diff(1) diff_25_1.dropna(inplace=True) print testStationarity(diff_25_1) draw_trend(diff_25_1, 25) draw_acf_pacf(diff_25_1) #季节性分解 decomposition = seasonal_decompose(ts_log, model="additive") trend = decomposition.trend seasonal = decomposition.seasonal residual = decomposition.resid ''' #模型识别 rol_mean = ts_log.rolling(window=25).mean() rol_mean.dropna(inplace=True) ts_diff_1 = rol_mean.diff(1) ts_diff_1.dropna(inplace=True) ''' print testStationarity(ts_diff_1) ''' ts_diff_2 = ts_diff_1.diff(1) ts_diff_2.dropna(inplace=True) ### ts_diff_2 = ts_diff_2.diff(1) ts_diff_2.dropna(inplace=True) ts_diff_2 = ts_diff_2.diff(1) ts_diff_2.dropna(inplace=True) #print ts_diff_2 ''' draw_trend(ts_diff_2, 25) draw_acf_pacf(ts_diff_2) ''' ### model = ARMA(ts_diff_2, order=(4, 1)) result_arma = model.fit(disp=-1, method='css') #样本拟合 predict_ts = result_arma.predict() # 一阶差分还原 diff_shift_ts = ts_diff_1.shift(1) diff_recover_1 = predict_ts.add(diff_shift_ts) # 再次一阶差分还原 rol_shift_ts = rol_mean.shift(1) diff_recover = diff_recover_1.add(rol_shift_ts) # 移动平均还原 rol_sum = ts_log.rolling(window=24).sum() rol_recover = diff_recover * 25 - rol_sum.shift(1) # 对数还原 log_recover = np.exp(rol_recover) log_recover.dropna(inplace=True) ts = ts[log_recover.index] # 过滤没有预测的记录 ''' plt.figure(facecolor='white') log_recover.plot(color='blue', label='Predict') ts.plot(color='red', label='Original') plt.legend(loc='best') plt.title('RMSE: %.4f'% np.sqrt(sum((log_recover-ts)**2)/ts.size)) plt.show() ''' #ARIMA diffed_ts = diff_ts(ts_log, d=[25, 1]) model = arima_modeling(ts_diff_2) model.certain_model(4, 1) predict_ts = model.properModel.predict() diff_recover_ts = predict_diff_recover(predict_ts, d=[25, 1]) log_recover = np.exp(diff_recover_ts) ts = ts[log_recover.index] # 过滤没有预测的记录 ''' plt.figure(facecolor='white') log_recover.plot(color='blue', label='Predict') ts.plot(color='red', label='Original') plt.legend(loc='best') plt.title('RMSE: %.4f'% np.sqrt(sum((log_recover-ts)**2)/ts.size)) plt.show() ''' #滚动预测 ts_train = ts_log[:'2016-06-30'] date_test = pd.date_range('2016-07-01', periods=31, freq='D').tolist() ts_test = [0 for i in range(31)] ts_test = pd.Series(ts_test) ts_test.index = pd.Index(date_test) diffed_ts = diff_ts(ts_train, [25, 1]) forecast_list = [] for i, dta in enumerate(ts_test): if i % 7 == 0: model = arima_modeling(ts_diff_2) model.certain_model(4, 1) forecast_data = forecast_next_day_data(model, type='day') forecast_list.append(forecast_data) add_today_data(model, ts_train, forecast_data, [25, 1], type='day') predict_ts = pd.Series(data=forecast_list, index=ts_test['2016-07-01':].index) log_recover = np.exp(predict_ts) original_ts = ts['2016-07-01':] ''' plt.figure(facecolor='white') log_recover.plot(color='blue', label='Predict') original_ts.plot(color='red', label='Original') plt.legend(loc='best') plt.title('RMSE: %.4f'% np.sqrt(sum((log_recover-original_ts)**2)/original_ts.size)) plt.show() ''' return log_recover