Ejemplo n.º 1
0
def getdata_test(df, chunk_size_x, chunk_size_y):
    list = df['Unnamed: 0'].tolist()
    data_set = df.iloc[:, 15:].values
    data_set = data_set.astype('float64')
    #归一化
    scaler = StandardScaler()

    train_data_set = np.array(data_set)
    #得到数据集
    reframed_train_data_set = np.array(
        getDataSet(train_data_set, chunk_size_x, chunk_size_y, True,
                   list).values)
    reframed_train_data_set = reframed_train_data_set[:, :16 * chunk_size_x +
                                                      1]

    data = reframed_train_data_set[:, :-chunk_size_y]
    scaler.fit(data)
    data = scaler.transform(data)
    test_x = data
    data_y = reframed_train_data_set[:, -chunk_size_y:]
    label = np.zeros_like(data_y)
    label[data_y > 0] = 1
    label[data_y > 0.5] = 2
    label[data_y > 1.5] = 3
    test_y = label
    test_x = test_x.reshape((test_x.shape[0], 16))
    return test_x, test_y, scaler
Ejemplo n.º 2
0
def getdata_test(df, chunk_size_x, chunk_size_y):
    list = df['Unnamed: 0'].tolist()
    data_set = df.iloc[:, 15:].values
    data_set = data_set.astype('float64')
    #归一化
    scaler = StandardScaler()

    train_data_set = np.array(data_set)
    #得到数据集
    reframed_train_data_set = np.array(
        getDataSet(train_data_set, chunk_size_x, chunk_size_y, True,
                   list).values)
    reframed_train_data_set = reframed_train_data_set[:, :16 * chunk_size_x +
                                                      1]

    data = reframed_train_data_set[:, :-chunk_size_y]
    scaler.fit(data)
    data = scaler.transform(data)
    test_x = data

    data_y = reframed_train_data_set[:, -chunk_size_y:]
    label = np.zeros_like(data_y)
    label[data_y > 0] = 1
    label[data_y > 0.5] = 2
    label[data_y > 1.5] = 3
    # label = to_categorical(label, num_classes=4)
    test_y = label.reshape(-1)
    # 将数据集重构为符合LSTM要求的数据格式,即 [样本,时间步,特征维度:16]
    test_x = test_x.reshape((test_x.shape[0], chunk_size_x, 16))
    return test_x, test_y, scaler
Ejemplo n.º 3
0
def getdata_2(df, chunk_size_x, chunk_size_y):
    list = df['Unnamed: 0'].tolist()
    data_set = df.iloc[:, 15:].values
    data_set = data_set.astype('float64')
    #归一化
    scaler = StandardScaler()
    # scaler = MinMaxScaler()

    train_data_set = np.array(data_set)
    #得到数据集
    reframed_train_data_set = np.array(
        getDataSet(train_data_set, chunk_size_x, chunk_size_y, True,
                   list).values)
    reframed_train_data_set = reframed_train_data_set[:, :16 * chunk_size_x +
                                                      1]
    # 数据集划分,选取前60%天的数据作为训练集,中间20%天作为验证集,其余的作为测试集

    data_y = reframed_train_data_set[:, -chunk_size_y:]
    label = np.zeros_like(data_y)
    idx = data_y > 0
    # label[data_y > 0] = 1
    label[data_y > 0.5] = 1
    label[data_y > 1.5] = 2
    label = label[idx]
    train_days = int(len(label) * 0.6)
    valid_days = int(len(label) * 0.2)
    label = to_categorical(label, num_classes=3)
    train_y = label[:train_days]
    valid_y = label[train_days:train_days + valid_days]
    test_y = label[train_days + valid_days:]

    data = reframed_train_data_set[:, :-chunk_size_y]
    scaler.fit(data)
    data = scaler.transform(data)
    idx = idx.reshape(-1)
    data = data[idx]
    train_x = data[:train_days, :]
    valid_x = data[train_days:train_days + valid_days, :]
    test_x = data[train_days + valid_days:, :]

    # 将数据集重构为符合LSTM要求的数据格式,即 [样本,时间步,特征维度:16]
    train_x = train_x.reshape((train_x.shape[0], chunk_size_x, 16))
    valid_x = valid_x.reshape((valid_x.shape[0], chunk_size_x, 16))
    test_x = test_x.reshape((test_x.shape[0], chunk_size_x, 16))

    return train_x, train_y, valid_x, valid_y, test_x, test_y, scaler
Ejemplo n.º 4
0
def getdata(df, chunk_size_x, chunk_size_y):
    list = df['Unnamed: 0'].tolist()
    data_set = df.iloc[:, 15:].values
    data_set = data_set.astype('float64')

    train_data_set = np.array(data_set)
    #得到数据集
    reframed_train_data_set = np.array(
        getDataSet(train_data_set, chunk_size_x, chunk_size_y, True,
                   list).values)
    reframed_train_data_set = reframed_train_data_set[:, :16 * chunk_size_x +
                                                      1]
    # 数据集划分,选取前70%天的数据作为训练集,中间20%天作为验证集,其余的作为测试集
    train_days = int(len(reframed_train_data_set) * 0.7)
    valid_days = int(len(reframed_train_data_set) * 0.2)

    # 归一化
    scaler = StandardScaler()
    data = reframed_train_data_set[:, :-chunk_size_y]
    scaler.fit(data)
    data = scaler.transform(data)
    train_x = data[:train_days, :]
    valid_x = data[train_days:train_days + valid_days, :]
    test_x = data[train_days + valid_days:, :]

    data_y = reframed_train_data_set[:, -chunk_size_y:]
    label = np.zeros_like(data_y)
    label[data_y > 0] = 1
    label[data_y > 0.5] = 2
    label[data_y > 1.5] = 3
    # label = to_categorical(label, num_classes=4)
    train_y = label[:train_days]
    valid_y = label[train_days:train_days + valid_days]
    test_y = label[train_days + valid_days:]

    train_x = train_x.reshape((train_x.shape[0], 16))
    valid_x = valid_x.reshape((valid_x.shape[0], 16))
    test_x = test_x.reshape((test_x.shape[0], 16))
    return train_x, train_y, valid_x, valid_y, test_x, test_y, scaler
Ejemplo n.º 5
0
def gettrain_rand(df, chunk_size_x, chunk_size_y):
    list = df['Unnamed: 0'].tolist()
    trainsize = int(df.shape[0] * 0.9)
    data_set = df.iloc[:trainsize, 15:].values
    data_set = data_set.astype('float64')
    # 归一化
    scaler = StandardScaler()
    train_data_set = np.array(data_set)
    # 得到数据集
    reframed_train_data_set = np.array(
        getDataSet(train_data_set, chunk_size_x, chunk_size_y, True,
                   list).values)
    reframed_train_data_set = reframed_train_data_set[:, :16 * chunk_size_x +
                                                      1]
    data = reframed_train_data_set[:, :-chunk_size_y]
    scaler.fit(data)
    data = scaler.transform(data)
    data_y = reframed_train_data_set[:, -chunk_size_y:]
    label = np.zeros_like(data_y)
    label[data_y > 0] = 1
    label[data_y > 0.5] = 2
    label[data_y > 1.5] = 3
    label = to_categorical(label, num_classes=4)
    train_x, rest_x, train_y, rest_y = train_test_split(data,
                                                        label,
                                                        test_size=0.2,
                                                        random_state=0)
    valid_x, test_x, valid_y, test_y = train_test_split(rest_x,
                                                        rest_y,
                                                        test_size=0.5,
                                                        random_state=0)

    train_x = train_x.reshape((train_x.shape[0], chunk_size_x, 16))
    valid_x = valid_x.reshape((valid_x.shape[0], chunk_size_x, 16))
    test_x = test_x.reshape((test_x.shape[0], chunk_size_x, 16))

    return train_x, train_y, valid_x, valid_y, test_x, test_y