def multi_emd_ann(lag=3, num_trial=20, hidden=128, epochs=20): pre_data_tf_result = pd.DataFrame() # 百分比结果 real_result = pd.DataFrame() # 预测重构值 time_ = [] # 时间 mape, mae, mse, rmse = [], [], [], [] for j in range(num_trial): pr = None start_time = time.time() for i in range(len(imfs)): d = seq_tf_matrix(imfs[i], n=lag+1) x = d[:, :-1] y = d[:, -1] if pr is None: # 预测的值,子序列预测结果 pr = ann(x, y, test_num=test_num, hidden=hidden, batch_size=batch_size, epochs=epochs) else: # 预测的值,子序列结果直接相加 pr = pr + ann(x, y, test_num=test_num, hidden=hidden, batch_size=batch_size, epochs=epochs) end_time = time.time() t = (end_time - start_time) restore_value = restore_data(pr, data[-test_num - 1:-1]) # 还原预测值 mape_, mae_, mse_, rmse_ = loss_function(restore_value, data[-test_num:]) pre_data_tf_result[str(j + 1) + '_times_lag' + str(lag)] = pr real_result[str(j + 1) + '_times_lag' + str(lag)] = restore_value time_.append(t / 60) # 分钟 mape.append(mape_) mae.append(mae_) mse.append(mse_) rmse.append(rmse_) # 预测结果 pre_data_tf_result['test_percentage'] = data_tf[-test_num:] # 把真实的需要预测的百分比值加入 real_result['test_value'] = data[-test_num:] # 把真实的需要预测的原值加入 pre_data_tf_result.to_csv('../result/' + name_data + '/data_tf_result/lag_' + str(lag) + '_multi_emd_ann_data_tf_result.csv') real_result.to_csv('../result/' + name_data + '/real_result/lag_' + str(lag) + '_multi_emd_ann_real_result.csv') # 预测结果评价指标 result_evaluation = {'lag': lag, 'num_sub_sequences': len(imfs), 'time': time_, 'mape': mape, 'mae': mae, 'mse': mse, 'rmse': rmse} fw = open('../result/' + name_data + '/multi_emd_ann_result_evaluation.json', 'a') fw.write(json.dumps(result_evaluation) + '\n') fw.close() # arima(15) # lag = [3, 4, 5, 6, 7, 8, 9] # for lag in lag: # only_ann(lag, 2) # only_lstm(lag, 2) # single_emd_ann(lag, 2) # single_emd_lstm(lag, 2) # multi_emd_ann(lag, 2) # multi_emd_lstm(lag, 2) # single_emd_lstm(3, 2, epochs=20) # single_emd_lstm(3, 5, epochs=30) # single_emd_ann(3, 1)
def only_lstm(lag=3, num_trial=20, hidden=128, epochs=20): x = seq_tf_matrix(data_tf, n=lag + 1) # 转换序列成矩阵,n-1个滞后项,共n列 x = x[:, :-1] x = np.reshape(x, (x.shape[0], x.shape[1], 1)) # 转化成输入格式 y = x[:, -1] pre_data_tf_result = pd.DataFrame() # 变换数据预测结果 real_result = pd.DataFrame() # 变换数据预测结果重构值 time_ = [] # 时间 mape, mae, mse, rmse = [], [], [], [] for i in range(num_trial): start_time = time.time() pr = lstm(x, y, test_num=test_num, batch_size=batch_size, epochs=epochs, hidden=hidden) end_time = time.time() restore_value = restore_data(pr, data[-test_num - 1:-1]) # 还原预测值 mape_, mae_, mse_, rmse_ = loss_function(restore_value, data[-test_num:]) pre_data_tf_result[str(i + 1) + '_times_lag' + str(lag)] = pr real_result[str(i + 1) + '_times_lag' + str(lag)] = restore_value # 保存第i次的评估指标 time_.append((end_time - start_time) / 60) # 分钟 mape.append(mape_) mae.append(mae_) mse.append(mse_) rmse.append(rmse_) # 预测结果 pre_data_tf_result['test_percentage'] = data_tf[ -test_num:] # 把真实的需要预测的百分比值加入 real_result['test_value'] = data[-test_num:] # 把真实的需要预测的原值加入 pre_data_tf_result.to_csv('../' + ada_result + '/' + name_data + '/data_tf_result/lag_' + str(lag) + '_only_lstm_data_tf_result.csv') real_result.to_csv('../' + ada_result + '/' + name_data + '/real_result/lag_' + str(lag) + '_only_lstm_real_result.csv') # 预测结果评价指标 result_evaluation = { 'lag': lag, 'time': time_, 'mape': mape, 'mae': mae, 'mse': mse, 'rmse': rmse } fw = open( '../' + ada_result + '/' + name_data + '/only_lstm_result_evaluation.json', 'a') fw.write(json.dumps(result_evaluation) + '\n') fw.close()
def get_data(l, lag=3): """ 默认滞后3项,预测一步 l 的最后一项不参与分解 """ decomposer = EMD(l[:-1]) # l的最后一项不参与分解 imfs = decomposer.decompose() # 包括m个imf和一个res项 # 得到如下的输入样本,第一个样本(1,lag,m+1),即lag个滞后项,每一项有m+1个元素 # [[imf1_1,imf2_1,...,imfm_1,res_1],[imf1_2,imf2_2,...,imfm_2,res_2],...,[imf1_lag,imf2_lag,...,imfm_lag,res_lag]] x = seq_tf_matrix(imfs.T, lag) # y为输出结果,未来一步的预测值 y = l[-len(x):] return x, y
def single_emd_ann(lag=3, num_trial=20, hidden=256, epochs=20): x = seq_tf_matrix(imfs.T, n=lag+1) # 转换序列成矩阵,n-1个滞后项,共n列 x = x[:, :-1, :] x = np.reshape(x, (x.shape[0], x.shape[1]*x.shape[2])) y = data_tf[-len(x):] pre_data_tf_result = pd.DataFrame() # 百分比结果 real_result = pd.DataFrame() # 预测重构值 time_ = [] # 时间 mape, mae, mse, rmse = [], [], [], [] for i in range(num_trial): start_time = time.time() pr = ann(x, y, test_num=test_num, hidden=hidden, batch_size=batch_size, epochs=epochs) # 预测的值 end_time = time.time() restore_value = restore_data(pr, data[-test_num - 1:-1]) # 还原预测值 mape_, mae_, mse_, rmse_ = loss_function(restore_value, data[-test_num:]) # 保存第i次的结果 pre_data_tf_result[str(i+1)+'_times_lag'+str(lag)] = pr real_result[str(i+1)+'_times_lag'+str(lag)] = restore_value # 保存第i次的评估指标 time_.append((end_time - start_time) / 60) # 分钟 mape.append(mape_) mae.append(mae_) mse.append(mse_) rmse.append(rmse_) # 预测结果 pre_data_tf_result['test_percentage'] = data_tf[-test_num:] # 把真实的需要预测的百分比值加入 real_result['test_value'] = data[-test_num:] # 把真实的需要预测的原值加入 pre_data_tf_result.to_csv('../result/'+name_data+'/data_tf_result/lag_'+str(lag)+'_single_emd_ann_data_tf_result.csv') real_result.to_csv('../result/'+name_data+'/real_result/lag_'+str(lag)+'_single_emd_ann_real_result.csv') # 预测结果评价指标 result_evaluation = {'lag': lag, 'num_sub_sequences': len(imfs), 'time': time_, 'mape': mape, 'mae': mae, 'mse': mse, 'rmse': rmse} fw = open('../result/'+name_data+'/single_emd_ann_result_evaluation.json', 'a') fw.write(json.dumps(result_evaluation) + '\n') fw.close()
def multi_emd_aann(lag=3, num_trial=2, hidden=128, epochs=20, ignore=ignore): pre_data_tf_result = pd.DataFrame() # 百分比结果 real_result = pd.DataFrame() # 预测重构值 time_ = [] # 时间 mape, mae, mse, rmse = [], [], [], [] for j in range(num_trial): result = [] start_time = time.time() # 100(test_num)个测试样本 for k in range(test_num): decomposer = EMD(data_tf[:-test_num + k]) # 最后一项不参与分解 imfs = decomposer.decompose() # 包括m个imf和一个res项 pr = None for i in range(len(imfs)): d = seq_tf_matrix(np.hstack((imfs[i], [0])), n=lag + 1) # 给imfs[i]加上一个值作为最后一项的真实值,只占个位子 x = d[:, :-1] if ignore: x = x[:, :-ignore] # 忽略与预测值最近的ignore项 y = d[:, -1] if pr is None: pr = ann(x, y, test_num=1, batch_size=batch_size, hidden=hidden, epochs=epochs) # 预测的值,子序列预测结果 else: pr = pr + ann(x, y, test_num=1, batch_size=batch_size, hidden=hidden, epochs=epochs) # 预测的值,子序列结果直接相加 result.append(pr[0]) end_time = time.time() pr = np.array(result) restore_value = restore_data(pr, data[-test_num - 1:-1]) # 还原预测值 mape_, mae_, mse_, rmse_ = loss_function(restore_value, data[-test_num:]) # 保存第i次的结果 pre_data_tf_result[str(j + 1) + '_times_lag' + str(lag)] = pr real_result[str(j + 1) + '_times_lag' + str(lag)] = restore_value # 保存第i次的评估指标 time_.append((end_time - start_time) / 60) # 分钟 mape.append(mape_) mae.append(mae_) mse.append(mse_) rmse.append(rmse_) # 预测结果 pre_data_tf_result['test_percentage'] = data_tf[ -test_num:] # 把真实的,需要预测的百分比值加入 real_result['test_value'] = data[-test_num:] # 把真实的需要预测的原值加入 pre_data_tf_result.to_csv('../' + ada_result + '/' + name_data + '/data_tf_result/lag_' + str(lag) + '_multi_emd_aann_data_tf_result.csv') real_result.to_csv('../' + ada_result + '/' + name_data + '/real_result/lag_' + str(lag) + '_multi_emd_aann_real_result.csv') # 预测结果评价指标 result_evaluation = { 'lag': lag, 'time': time_, 'mape': mape, 'mae': mae, 'mse': mse, 'rmse': rmse } fw = open( '../' + ada_result + '/' + name_data + '/multi_emd_aann_result_evaluation.json', 'a') fw.write(json.dumps(result_evaluation) + '\n') fw.close()
path = '../data/' + name_data + '.csv' # 数据的地址 df_01 = pd.read_csv(path, encoding='gbk') # 读取数据 df_data = df_01[['Date', 'Close']].set_index('Date').iloc[::-1] # 把数据按日期排列 df_data['Close'] = df_data['Close'].astype('float64') data = np.array(df_data) data = np.reshape(data, (len(data), )) # 转换成(sample,)np.array diff = data_trans(data) # print(theta(series_minmax(data)[0])) # print(theta(series_minmax(diff)[0])) # print(corrcoef_imfs(series_minmax(data)[0])) # print(corrcoef_imfs(series_minmax(diff)[0])) print(matrix_cor(seq_tf_matrix(data, 10))) # 上证指数 # [0.9925052213533833, 0.993386133667623, 0.9942571787506765, 0.9950962804128652, 0.9960115824395269, # 0.9969224666313158, 0.9977260830415531, 0.9984645658860706, 0.9992588904110402, 1.0] # 标普500 # [0.9980923986063942, 0.9982712262593705, 0.998458288931568, 0.9986495525239756, 0.9988430656437745, # 0.9990563718109147, 0.9992787669886709, 0.9994977021953116, 0.9997361622154102, 1.0] print(matrix_cor(seq_tf_matrix(diff, 10))) # 上证指数 # [-0.008651145443581408, 0.006834999895765309, 0.021172706952804994, -0.051951892446751614, 0.003523074227788529, # 0.07219929582013276, 0.045227720273789414, -0.037241245123885554, 0.03602161724884085, 1.0] # 标普500 # [-0.01350694261104616, -0.017031069396570293, -0.008112646905554516, -0.003528964957479074, -0.038046959576339454, # -0.016926027633093563, 0.007025123013691551, -0.03670731870127638, -0.048211988418971646, 1.0]