Example #1
0
def time_sequence_predict(ts):
    '''
    # 查看数据格式
    print ts.head()
    print ts.head().index
    
    #对数变换
    draw_ts(ts)
    
    draw_trend(ts, 25)
    draw_acf_pacf(ts)
    '''
    #平滑法
    ts_log = np.log(ts)
    '''
    draw_ts(ts_log)
    print testStationarity(ts)
    draw_trend(ts_log, 25)
    
    #差分
    diff_25 = ts_log.diff(25)
    diff_25.dropna(inplace=True)
    diff_25_1 = diff_25.diff(1)
    diff_25_1.dropna(inplace=True)
    print testStationarity(diff_25_1)
    
    draw_trend(diff_25_1, 25)
    draw_acf_pacf(diff_25_1)
    
    #季节性分解
    decomposition = seasonal_decompose(ts_log, model="additive")
    trend = decomposition.trend
    seasonal = decomposition.seasonal
    residual = decomposition.resid
    '''
    #模型识别
    rol_mean = ts_log.rolling(window=25).mean()
    rol_mean.dropna(inplace=True)
    ts_diff_1 = rol_mean.diff(1)
    ts_diff_1.dropna(inplace=True)
    '''
    print testStationarity(ts_diff_1)
    '''
    ts_diff_2 = ts_diff_1.diff(1)
    ts_diff_2.dropna(inplace=True)
    ###
    ts_diff_2 = ts_diff_2.diff(1)
    ts_diff_2.dropna(inplace=True)
    ts_diff_2 = ts_diff_2.diff(1)
    ts_diff_2.dropna(inplace=True)
    #print ts_diff_2
    '''
    draw_trend(ts_diff_2, 25)
    draw_acf_pacf(ts_diff_2)
    '''
    ###
    model = ARMA(ts_diff_2, order=(4, 1))
    result_arma = model.fit(disp=-1, method='css')

    #样本拟合
    predict_ts = result_arma.predict()
    # 一阶差分还原
    diff_shift_ts = ts_diff_1.shift(1)
    diff_recover_1 = predict_ts.add(diff_shift_ts)
    # 再次一阶差分还原
    rol_shift_ts = rol_mean.shift(1)
    diff_recover = diff_recover_1.add(rol_shift_ts)
    # 移动平均还原
    rol_sum = ts_log.rolling(window=24).sum()
    rol_recover = diff_recover * 25 - rol_sum.shift(1)
    # 对数还原
    log_recover = np.exp(rol_recover)
    log_recover.dropna(inplace=True)

    ts = ts[log_recover.index]  # 过滤没有预测的记录
    '''
    plt.figure(facecolor='white')
    log_recover.plot(color='blue', label='Predict')
    ts.plot(color='red', label='Original')
    plt.legend(loc='best')
    plt.title('RMSE: %.4f'% np.sqrt(sum((log_recover-ts)**2)/ts.size))
    plt.show()
    '''
    #ARIMA
    diffed_ts = diff_ts(ts_log, d=[25, 1])
    model = arima_modeling(ts_diff_2)
    model.certain_model(4, 1)
    predict_ts = model.properModel.predict()
    diff_recover_ts = predict_diff_recover(predict_ts, d=[25, 1])
    log_recover = np.exp(diff_recover_ts)

    ts = ts[log_recover.index]  # 过滤没有预测的记录
    '''
    plt.figure(facecolor='white')
    log_recover.plot(color='blue', label='Predict')
    ts.plot(color='red', label='Original')
    plt.legend(loc='best')
    plt.title('RMSE: %.4f'% np.sqrt(sum((log_recover-ts)**2)/ts.size))
    plt.show()
    '''
    #滚动预测
    ts_train = ts_log[:'2016-06-30']
    date_test = pd.date_range('2016-07-01', periods=31, freq='D').tolist()
    ts_test = [0 for i in range(31)]
    ts_test = pd.Series(ts_test)
    ts_test.index = pd.Index(date_test)

    diffed_ts = diff_ts(ts_train, [25, 1])
    forecast_list = []

    for i, dta in enumerate(ts_test):
        if i % 7 == 0:
            model = arima_modeling(ts_diff_2)
            model.certain_model(4, 1)
        forecast_data = forecast_next_day_data(model, type='day')
        forecast_list.append(forecast_data)
        add_today_data(model, ts_train, forecast_data, [25, 1], type='day')

    predict_ts = pd.Series(data=forecast_list,
                           index=ts_test['2016-07-01':].index)
    log_recover = np.exp(predict_ts)
    original_ts = ts['2016-07-01':]
    '''
    plt.figure(facecolor='white')
    log_recover.plot(color='blue', label='Predict')
    original_ts.plot(color='red', label='Original')
    plt.legend(loc='best')
    plt.title('RMSE: %.4f'% np.sqrt(sum((log_recover-original_ts)**2)/original_ts.size))
    plt.show()
    '''
    return log_recover