Beispiel #1
0
def multi_emd_ann(lag=3, num_trial=20, hidden=128, epochs=20):

    pre_data_tf_result = pd.DataFrame()  # 百分比结果
    real_result = pd.DataFrame()  # 预测重构值
    time_ = []  # 时间
    mape, mae, mse, rmse = [], [], [], []
    for j in range(num_trial):
        pr = None
        start_time = time.time()
        for i in range(len(imfs)):
            d = seq_tf_matrix(imfs[i], n=lag+1)
            x = d[:, :-1]
            y = d[:, -1]
            if pr is None:  # 预测的值,子序列预测结果
                pr = ann(x, y, test_num=test_num, hidden=hidden, batch_size=batch_size, epochs=epochs)
            else:  # 预测的值,子序列结果直接相加
                pr = pr + ann(x, y, test_num=test_num, hidden=hidden, batch_size=batch_size, epochs=epochs)
        end_time = time.time()
        t = (end_time - start_time)
        restore_value = restore_data(pr, data[-test_num - 1:-1])  # 还原预测值
        mape_, mae_, mse_, rmse_ = loss_function(restore_value, data[-test_num:])

        pre_data_tf_result[str(j + 1) + '_times_lag' + str(lag)] = pr
        real_result[str(j + 1) + '_times_lag' + str(lag)] = restore_value

        time_.append(t / 60)  # 分钟
        mape.append(mape_)
        mae.append(mae_)
        mse.append(mse_)
        rmse.append(rmse_)
    # 预测结果
    pre_data_tf_result['test_percentage'] = data_tf[-test_num:]  # 把真实的需要预测的百分比值加入
    real_result['test_value'] = data[-test_num:]  # 把真实的需要预测的原值加入
    pre_data_tf_result.to_csv('../result/' + name_data + '/data_tf_result/lag_' + str(lag) + '_multi_emd_ann_data_tf_result.csv')
    real_result.to_csv('../result/' + name_data + '/real_result/lag_' + str(lag) + '_multi_emd_ann_real_result.csv')
    # 预测结果评价指标
    result_evaluation = {'lag': lag, 'num_sub_sequences': len(imfs), 'time': time_, 'mape': mape,
                         'mae': mae, 'mse': mse, 'rmse': rmse}

    fw = open('../result/' + name_data + '/multi_emd_ann_result_evaluation.json', 'a')
    fw.write(json.dumps(result_evaluation) + '\n')
    fw.close()


# arima(15)
# lag = [3, 4, 5, 6, 7, 8, 9]
# for lag in lag:
#     only_ann(lag, 2)
#     only_lstm(lag, 2)
#     single_emd_ann(lag, 2)
#     single_emd_lstm(lag, 2)
#     multi_emd_ann(lag, 2)
#     multi_emd_lstm(lag, 2)
# single_emd_lstm(3, 2, epochs=20)
# single_emd_lstm(3, 5, epochs=30)
# single_emd_ann(3, 1)
Beispiel #2
0
def only_lstm(lag=3, num_trial=20, hidden=128, epochs=20):
    x = seq_tf_matrix(data_tf, n=lag + 1)  # 转换序列成矩阵,n-1个滞后项,共n列
    x = x[:, :-1]
    x = np.reshape(x, (x.shape[0], x.shape[1], 1))  # 转化成输入格式
    y = x[:, -1]

    pre_data_tf_result = pd.DataFrame()  # 变换数据预测结果
    real_result = pd.DataFrame()  # 变换数据预测结果重构值
    time_ = []  # 时间
    mape, mae, mse, rmse = [], [], [], []

    for i in range(num_trial):
        start_time = time.time()
        pr = lstm(x,
                  y,
                  test_num=test_num,
                  batch_size=batch_size,
                  epochs=epochs,
                  hidden=hidden)
        end_time = time.time()
        restore_value = restore_data(pr, data[-test_num - 1:-1])  # 还原预测值
        mape_, mae_, mse_, rmse_ = loss_function(restore_value,
                                                 data[-test_num:])
        pre_data_tf_result[str(i + 1) + '_times_lag' + str(lag)] = pr
        real_result[str(i + 1) + '_times_lag' + str(lag)] = restore_value
        # 保存第i次的评估指标
        time_.append((end_time - start_time) / 60)  # 分钟
        mape.append(mape_)
        mae.append(mae_)
        mse.append(mse_)
        rmse.append(rmse_)
        # 预测结果
    pre_data_tf_result['test_percentage'] = data_tf[
        -test_num:]  # 把真实的需要预测的百分比值加入
    real_result['test_value'] = data[-test_num:]  # 把真实的需要预测的原值加入
    pre_data_tf_result.to_csv('../' + ada_result + '/' + name_data +
                              '/data_tf_result/lag_' + str(lag) +
                              '_only_lstm_data_tf_result.csv')
    real_result.to_csv('../' + ada_result + '/' + name_data +
                       '/real_result/lag_' + str(lag) +
                       '_only_lstm_real_result.csv')
    # 预测结果评价指标
    result_evaluation = {
        'lag': lag,
        'time': time_,
        'mape': mape,
        'mae': mae,
        'mse': mse,
        'rmse': rmse
    }

    fw = open(
        '../' + ada_result + '/' + name_data +
        '/only_lstm_result_evaluation.json', 'a')
    fw.write(json.dumps(result_evaluation) + '\n')
    fw.close()
Beispiel #3
0
def get_data(l, lag=3):
    """
    默认滞后3项,预测一步
    l 的最后一项不参与分解
    """
    decomposer = EMD(l[:-1])  # l的最后一项不参与分解
    imfs = decomposer.decompose()  # 包括m个imf和一个res项
    #  得到如下的输入样本,第一个样本(1,lag,m+1),即lag个滞后项,每一项有m+1个元素
    #  [[imf1_1,imf2_1,...,imfm_1,res_1],[imf1_2,imf2_2,...,imfm_2,res_2],...,[imf1_lag,imf2_lag,...,imfm_lag,res_lag]]
    x = seq_tf_matrix(imfs.T, lag)
    #  y为输出结果,未来一步的预测值
    y = l[-len(x):]
    return x, y
Beispiel #4
0
def single_emd_ann(lag=3, num_trial=20,  hidden=256, epochs=20):

    x = seq_tf_matrix(imfs.T, n=lag+1)  # 转换序列成矩阵,n-1个滞后项,共n列
    x = x[:, :-1, :]
    x = np.reshape(x, (x.shape[0], x.shape[1]*x.shape[2]))
    y = data_tf[-len(x):]

    pre_data_tf_result = pd.DataFrame()  # 百分比结果
    real_result = pd.DataFrame()  # 预测重构值
    time_ = []  # 时间
    mape, mae, mse, rmse = [], [], [], []
    for i in range(num_trial):
        start_time = time.time()
        pr = ann(x, y, test_num=test_num, hidden=hidden, batch_size=batch_size, epochs=epochs)  # 预测的值
        end_time = time.time()
        restore_value = restore_data(pr, data[-test_num - 1:-1])  # 还原预测值
        mape_, mae_, mse_, rmse_ = loss_function(restore_value, data[-test_num:])
        # 保存第i次的结果
        pre_data_tf_result[str(i+1)+'_times_lag'+str(lag)] = pr
        real_result[str(i+1)+'_times_lag'+str(lag)] = restore_value
        # 保存第i次的评估指标
        time_.append((end_time - start_time) / 60)  # 分钟
        mape.append(mape_)
        mae.append(mae_)
        mse.append(mse_)
        rmse.append(rmse_)
    # 预测结果
    pre_data_tf_result['test_percentage'] = data_tf[-test_num:]  # 把真实的需要预测的百分比值加入
    real_result['test_value'] = data[-test_num:]  # 把真实的需要预测的原值加入
    pre_data_tf_result.to_csv('../result/'+name_data+'/data_tf_result/lag_'+str(lag)+'_single_emd_ann_data_tf_result.csv')
    real_result.to_csv('../result/'+name_data+'/real_result/lag_'+str(lag)+'_single_emd_ann_real_result.csv')
    # 预测结果评价指标
    result_evaluation = {'lag': lag, 'num_sub_sequences': len(imfs), 'time': time_, 'mape': mape,
                         'mae': mae, 'mse': mse, 'rmse': rmse}

    fw = open('../result/'+name_data+'/single_emd_ann_result_evaluation.json', 'a')
    fw.write(json.dumps(result_evaluation) + '\n')
    fw.close()
Beispiel #5
0
def multi_emd_aann(lag=3, num_trial=2, hidden=128, epochs=20, ignore=ignore):
    pre_data_tf_result = pd.DataFrame()  # 百分比结果
    real_result = pd.DataFrame()  # 预测重构值
    time_ = []  # 时间
    mape, mae, mse, rmse = [], [], [], []
    for j in range(num_trial):
        result = []
        start_time = time.time()
        # 100(test_num)个测试样本
        for k in range(test_num):
            decomposer = EMD(data_tf[:-test_num + k])  # 最后一项不参与分解
            imfs = decomposer.decompose()  # 包括m个imf和一个res项
            pr = None
            for i in range(len(imfs)):
                d = seq_tf_matrix(np.hstack((imfs[i], [0])),
                                  n=lag + 1)  # 给imfs[i]加上一个值作为最后一项的真实值,只占个位子
                x = d[:, :-1]
                if ignore:
                    x = x[:, :-ignore]  # 忽略与预测值最近的ignore项
                y = d[:, -1]
                if pr is None:
                    pr = ann(x,
                             y,
                             test_num=1,
                             batch_size=batch_size,
                             hidden=hidden,
                             epochs=epochs)  # 预测的值,子序列预测结果
                else:
                    pr = pr + ann(x,
                                  y,
                                  test_num=1,
                                  batch_size=batch_size,
                                  hidden=hidden,
                                  epochs=epochs)  # 预测的值,子序列结果直接相加
            result.append(pr[0])
        end_time = time.time()
        pr = np.array(result)
        restore_value = restore_data(pr, data[-test_num - 1:-1])  # 还原预测值
        mape_, mae_, mse_, rmse_ = loss_function(restore_value,
                                                 data[-test_num:])
        # 保存第i次的结果
        pre_data_tf_result[str(j + 1) + '_times_lag' + str(lag)] = pr
        real_result[str(j + 1) + '_times_lag' + str(lag)] = restore_value
        # 保存第i次的评估指标
        time_.append((end_time - start_time) / 60)  # 分钟
        mape.append(mape_)
        mae.append(mae_)
        mse.append(mse_)
        rmse.append(rmse_)
        # 预测结果
    pre_data_tf_result['test_percentage'] = data_tf[
        -test_num:]  # 把真实的,需要预测的百分比值加入
    real_result['test_value'] = data[-test_num:]  # 把真实的需要预测的原值加入
    pre_data_tf_result.to_csv('../' + ada_result + '/' + name_data +
                              '/data_tf_result/lag_' + str(lag) +
                              '_multi_emd_aann_data_tf_result.csv')
    real_result.to_csv('../' + ada_result + '/' + name_data +
                       '/real_result/lag_' + str(lag) +
                       '_multi_emd_aann_real_result.csv')
    # 预测结果评价指标
    result_evaluation = {
        'lag': lag,
        'time': time_,
        'mape': mape,
        'mae': mae,
        'mse': mse,
        'rmse': rmse
    }

    fw = open(
        '../' + ada_result + '/' + name_data +
        '/multi_emd_aann_result_evaluation.json', 'a')
    fw.write(json.dumps(result_evaluation) + '\n')
    fw.close()
Beispiel #6
0
path = '../data/' + name_data + '.csv'  # 数据的地址

df_01 = pd.read_csv(path, encoding='gbk')  # 读取数据
df_data = df_01[['Date', 'Close']].set_index('Date').iloc[::-1]  # 把数据按日期排列
df_data['Close'] = df_data['Close'].astype('float64')
data = np.array(df_data)
data = np.reshape(data, (len(data), ))  # 转换成(sample,)np.array

diff = data_trans(data)

# print(theta(series_minmax(data)[0]))
# print(theta(series_minmax(diff)[0]))

# print(corrcoef_imfs(series_minmax(data)[0]))
# print(corrcoef_imfs(series_minmax(diff)[0]))

print(matrix_cor(seq_tf_matrix(data, 10)))
# 上证指数
# [0.9925052213533833, 0.993386133667623, 0.9942571787506765, 0.9950962804128652, 0.9960115824395269,
# 0.9969224666313158, 0.9977260830415531, 0.9984645658860706, 0.9992588904110402, 1.0]
# 标普500
# [0.9980923986063942, 0.9982712262593705, 0.998458288931568, 0.9986495525239756, 0.9988430656437745,
# 0.9990563718109147, 0.9992787669886709, 0.9994977021953116, 0.9997361622154102, 1.0]
print(matrix_cor(seq_tf_matrix(diff, 10)))
# 上证指数
# [-0.008651145443581408, 0.006834999895765309, 0.021172706952804994, -0.051951892446751614, 0.003523074227788529,
#  0.07219929582013276, 0.045227720273789414, -0.037241245123885554, 0.03602161724884085, 1.0]
# 标普500
# [-0.01350694261104616, -0.017031069396570293, -0.008112646905554516, -0.003528964957479074, -0.038046959576339454,
#  -0.016926027633093563, 0.007025123013691551, -0.03670731870127638, -0.048211988418971646, 1.0]