def getdata_test(df, chunk_size_x, chunk_size_y): list = df['Unnamed: 0'].tolist() data_set = df.iloc[:, 15:].values data_set = data_set.astype('float64') #归一化 scaler = StandardScaler() train_data_set = np.array(data_set) #得到数据集 reframed_train_data_set = np.array( getDataSet(train_data_set, chunk_size_x, chunk_size_y, True, list).values) reframed_train_data_set = reframed_train_data_set[:, :16 * chunk_size_x + 1] data = reframed_train_data_set[:, :-chunk_size_y] scaler.fit(data) data = scaler.transform(data) test_x = data data_y = reframed_train_data_set[:, -chunk_size_y:] label = np.zeros_like(data_y) label[data_y > 0] = 1 label[data_y > 0.5] = 2 label[data_y > 1.5] = 3 test_y = label test_x = test_x.reshape((test_x.shape[0], 16)) return test_x, test_y, scaler
def getdata_test(df, chunk_size_x, chunk_size_y): list = df['Unnamed: 0'].tolist() data_set = df.iloc[:, 15:].values data_set = data_set.astype('float64') #归一化 scaler = StandardScaler() train_data_set = np.array(data_set) #得到数据集 reframed_train_data_set = np.array( getDataSet(train_data_set, chunk_size_x, chunk_size_y, True, list).values) reframed_train_data_set = reframed_train_data_set[:, :16 * chunk_size_x + 1] data = reframed_train_data_set[:, :-chunk_size_y] scaler.fit(data) data = scaler.transform(data) test_x = data data_y = reframed_train_data_set[:, -chunk_size_y:] label = np.zeros_like(data_y) label[data_y > 0] = 1 label[data_y > 0.5] = 2 label[data_y > 1.5] = 3 # label = to_categorical(label, num_classes=4) test_y = label.reshape(-1) # 将数据集重构为符合LSTM要求的数据格式,即 [样本,时间步,特征维度:16] test_x = test_x.reshape((test_x.shape[0], chunk_size_x, 16)) return test_x, test_y, scaler
def getdata_2(df, chunk_size_x, chunk_size_y): list = df['Unnamed: 0'].tolist() data_set = df.iloc[:, 15:].values data_set = data_set.astype('float64') #归一化 scaler = StandardScaler() # scaler = MinMaxScaler() train_data_set = np.array(data_set) #得到数据集 reframed_train_data_set = np.array( getDataSet(train_data_set, chunk_size_x, chunk_size_y, True, list).values) reframed_train_data_set = reframed_train_data_set[:, :16 * chunk_size_x + 1] # 数据集划分,选取前60%天的数据作为训练集,中间20%天作为验证集,其余的作为测试集 data_y = reframed_train_data_set[:, -chunk_size_y:] label = np.zeros_like(data_y) idx = data_y > 0 # label[data_y > 0] = 1 label[data_y > 0.5] = 1 label[data_y > 1.5] = 2 label = label[idx] train_days = int(len(label) * 0.6) valid_days = int(len(label) * 0.2) label = to_categorical(label, num_classes=3) train_y = label[:train_days] valid_y = label[train_days:train_days + valid_days] test_y = label[train_days + valid_days:] data = reframed_train_data_set[:, :-chunk_size_y] scaler.fit(data) data = scaler.transform(data) idx = idx.reshape(-1) data = data[idx] train_x = data[:train_days, :] valid_x = data[train_days:train_days + valid_days, :] test_x = data[train_days + valid_days:, :] # 将数据集重构为符合LSTM要求的数据格式,即 [样本,时间步,特征维度:16] train_x = train_x.reshape((train_x.shape[0], chunk_size_x, 16)) valid_x = valid_x.reshape((valid_x.shape[0], chunk_size_x, 16)) test_x = test_x.reshape((test_x.shape[0], chunk_size_x, 16)) return train_x, train_y, valid_x, valid_y, test_x, test_y, scaler
def getdata(df, chunk_size_x, chunk_size_y): list = df['Unnamed: 0'].tolist() data_set = df.iloc[:, 15:].values data_set = data_set.astype('float64') train_data_set = np.array(data_set) #得到数据集 reframed_train_data_set = np.array( getDataSet(train_data_set, chunk_size_x, chunk_size_y, True, list).values) reframed_train_data_set = reframed_train_data_set[:, :16 * chunk_size_x + 1] # 数据集划分,选取前70%天的数据作为训练集,中间20%天作为验证集,其余的作为测试集 train_days = int(len(reframed_train_data_set) * 0.7) valid_days = int(len(reframed_train_data_set) * 0.2) # 归一化 scaler = StandardScaler() data = reframed_train_data_set[:, :-chunk_size_y] scaler.fit(data) data = scaler.transform(data) train_x = data[:train_days, :] valid_x = data[train_days:train_days + valid_days, :] test_x = data[train_days + valid_days:, :] data_y = reframed_train_data_set[:, -chunk_size_y:] label = np.zeros_like(data_y) label[data_y > 0] = 1 label[data_y > 0.5] = 2 label[data_y > 1.5] = 3 # label = to_categorical(label, num_classes=4) train_y = label[:train_days] valid_y = label[train_days:train_days + valid_days] test_y = label[train_days + valid_days:] train_x = train_x.reshape((train_x.shape[0], 16)) valid_x = valid_x.reshape((valid_x.shape[0], 16)) test_x = test_x.reshape((test_x.shape[0], 16)) return train_x, train_y, valid_x, valid_y, test_x, test_y, scaler
def gettrain_rand(df, chunk_size_x, chunk_size_y): list = df['Unnamed: 0'].tolist() trainsize = int(df.shape[0] * 0.9) data_set = df.iloc[:trainsize, 15:].values data_set = data_set.astype('float64') # 归一化 scaler = StandardScaler() train_data_set = np.array(data_set) # 得到数据集 reframed_train_data_set = np.array( getDataSet(train_data_set, chunk_size_x, chunk_size_y, True, list).values) reframed_train_data_set = reframed_train_data_set[:, :16 * chunk_size_x + 1] data = reframed_train_data_set[:, :-chunk_size_y] scaler.fit(data) data = scaler.transform(data) data_y = reframed_train_data_set[:, -chunk_size_y:] label = np.zeros_like(data_y) label[data_y > 0] = 1 label[data_y > 0.5] = 2 label[data_y > 1.5] = 3 label = to_categorical(label, num_classes=4) train_x, rest_x, train_y, rest_y = train_test_split(data, label, test_size=0.2, random_state=0) valid_x, test_x, valid_y, test_y = train_test_split(rest_x, rest_y, test_size=0.5, random_state=0) train_x = train_x.reshape((train_x.shape[0], chunk_size_x, 16)) valid_x = valid_x.reshape((valid_x.shape[0], chunk_size_x, 16)) test_x = test_x.reshape((test_x.shape[0], chunk_size_x, 16)) return train_x, train_y, valid_x, valid_y, test_x, test_y