def train(train_X, train_Y, validate_X, validate_Y, model, criterion, optimizer, epoch, early_stopping, lr_down, device): # 14.开始训练 ep_Ls = [] losses_Ls = [] lr_list_LS = [] for e in range(1, epoch + 1): var_x = Variable(train_X).to(device) var_y = Variable(train_Y).to(device) # 前向传播 out = model(var_x) loss = criterion(out, var_y) # 反向传播 optimizer.zero_grad() loss.backward() optimizer.step() valid_output = model(validate_X) valid_loss = criterion(valid_output, validate_Y) # if e % 10 == 0: # 每 10 次输出结果 # print('Epoch: {}, Loss: {:.8f}, VA_Loss: {:.8f}'.format(e, loss.item(), valid_loss.item())) ep_Ls.append(e) losses_Ls.append(zc_util.get_two_float(loss.item(), 6)) early_stopping(valid_loss, model) # 若满足 early stopping 要求 if early_stopping.early_stop: # print("Early stopping") # 结束模型训练 break return ep_Ls, losses_Ls
def train_AE(train_X, train_Y, model, criterion, optimizer, epoch, lr_down, device): ep_AE = [] losses_AE = [] lr_list_AE = [] for i in range(1, epoch + 1): x = Variable(train_X).to(device) # 前向传播 out, out_hat = model(x) loss = criterion(out, x) # 反向传播 optimizer.zero_grad() loss.backward() optimizer.step() # if i % 10 == 0: # 每 10 次输出结果 # print('Epoch: {}, Loss: {:.8f}'.format(i, loss.item())) ep_AE.append(i) losses_AE.append(zc_util.get_two_float(loss.item(), 6)) return ep_AE, losses_AE
def search(config, modelName): # 参数设置 filename = "服务器性能数据.xlsx" KPI = "主机CPU平均负载" modelName = modelName num_hour = config["num_hour"] # 历史数据个数144 pred_h = config["pred_h"] # 预测步数12 # GRU参数 input_size = 24 criterion = nn.MSELoss() epoch = config["epoch"] #1000 patience = config["patience"] #15 lr = config["lr"] #1e-2 hidden_size = config["hidden_size"] #64 # 归一化到0~1 device = torch.device("cuda") np.random.seed(113) # 1.读取data数据 df_original = zc_util.read_file(filename) # 2.在数据中建立深拷贝副本 df = df_original.copy(deep=True) # 3.使用k-近邻法填补缺失值 df[KPI] = zc_util.knn_mean(df[KPI], 24) # 4.建立自回归预测矩阵 dataframe = zc_util.Autoregressive_matrix(df[KPI], num_hour=num_hour, pred_h=pred_h) # 5.划分测试集和训练集 train_data, train_truth, validate_data, validate_truth, test_data, test_truth = zc_util.dataloader \ (dataframe, (6, 2, 2), is_Shuffle=False, pred_h=pred_h) max_value = max(train_data.max().values) min_value = min(train_data.min().values) # range_value = max_value - min_value # 6.归一化并转Tensor train_X, train_Y = zc_util.NormalizeAndToTensor(train_data, num_hour, pred_h, device) validate_X, validate_Y = zc_util.NormalizeAndToTensor( validate_data, num_hour, pred_h, device) test_X, test_Y = zc_util.NormalizeAndToTensor(test_data, num_hour, pred_h, device) # 7.建立GRU模型以及模型参数 model = GRU(input_size, hidden_size=hidden_size, output_size=pred_h, num_layer=2).to(device) # print(model_lstm) optimizer_lstm = torch.optim.Adam(model.parameters(), lr=lr) early_stopping = EarlyStopping(patience, verbose=True) # 8.开始训练 # train_X, train_Y, validate_X, validate_Y, model, criterion, optimizer, epoch, early_stopping, lr_down epochs, losses = train(train_X=train_X, train_Y=train_Y, validate_X=validate_X, validate_Y=validate_Y, model=model, criterion=criterion, optimizer=optimizer_lstm, epoch=epoch, early_stopping=early_stopping, lr_down=None, device=device) # 9.结束并保存 # zc_util.plot_df(epochs, losses_LSTM) # print('Finished Training') torch.save(model, f'{modelName}.pkl') model2 = torch.load(f'{modelName}.pkl') # 10.验证 outputs = model2(test_X) predict = (outputs * (max_value - min_value) + min_value).squeeze().detach().cpu().numpy() # (372*1*12) truth = test_truth.values.reshape(-1, pred_h) # 374*1 MSE_test = mean_squared_error(truth, predict) MAE_test = mean_absolute_error(truth, predict) # print(f"测试集整体MSE: {MSE_test}") # print(f"测试集整体RMSE: {np.sqrt(MSE_test)}") # print(f"测试集整体MAE: {MAE_test}") # print(f"测试集整体MAPE: {zc_util.mape(truth, predict)}") Verification = { "MSE_test": zc_util.get_two_float(MSE_test, 6), "RMSE_test": zc_util.get_two_float(np.sqrt(MSE_test), 6), "MAE_test": zc_util.get_two_float(MAE_test, 6), "MAPE_test": zc_util.get_two_float(zc_util.mape(truth, predict), 6) } outputs = model(train_X) predict2 = (outputs * (max_value - min_value) + min_value).squeeze().detach().cpu().numpy() truth2 = train_truth.values.reshape(-1, pred_h) MSE2_test = mean_squared_error(truth2, predict2) MAE2_test = mean_absolute_error(truth2, predict2) # print(f"测试集整体MSE: {MSE2_test}") # print(f"训练集整体RMSE: {np.sqrt(MSE2_test)}") # print(f"训练集整体MAE: {MAE2_test}") # print(f"训练集整体MAPE: {zc_util.mape(truth2, predict2)}") training = { "MSE_train": zc_util.get_two_float(MSE2_test, 6), "RMSE_train": zc_util.get_two_float(np.sqrt(MSE2_test), 6), "MAE_train": zc_util.get_two_float(MAE2_test, 6), "MAPE_train": zc_util.get_two_float(zc_util.mape(truth2, predict2), 6) } # 17.保存验证 config["input_size"] = input_size data = { "modelName": modelName, "Parameter": config, "Raw_Data": KPI, "epochs": epochs[-1], "losses": losses, "Verification": Verification, "training": training } return data
def search(config): # 参数设置 filename = "服务器性能数据.xlsx" KPI = "主机CPU平均负载" num_hour = config["num_hour"] # 历史数据个数144 pred_h = config["pred_h"] # 预测步数12 # GAE参数 criterion_AE = nn.MSELoss() epoch_AE = config["epoch_AE"] # 3000 lr_AE = config["lr_AE"] # 1e-2 input_size_AE = config["input_size_AE"] #24 hiddenSize_AE = config["hiddenSize_AE"] #256 hat_size_AE = config["hat_size_AE"] # 84 # LSTM参数 input_size_lstm = int(hat_size_AE / (num_hour / input_size_AE)) criterion_lstm = nn.MSELoss() epoch_lstm = config["epoch_lstm"] #1000 patience = config["patience"] #15 lr_lstm = config["lr_lstm"] #1e-2 LSTM_hidden_size = config["LSTM_hidden_size"] #64 # 归一化到0~1 device = torch.device("cuda") np.random.seed(113) # 1.读取data数据 df_original = zc_util.read_file(filename) # 2.在数据中建立深拷贝副本 df = df_original.copy(deep=True) # 3.使用k-近邻法填补缺失值 df[KPI] = zc_util.knn_mean(df[KPI], 24) # 4.建立自回归预测矩阵 dataframe = zc_util.Autoregressive_matrix(df[KPI], num_hour=num_hour, pred_h=pred_h) # 5.划分测试集和训练集 train_data, train_truth, validate_data, validate_truth, test_data, test_truth = zc_util.dataloader \ (dataframe, (6, 2, 2), is_Shuffle=False, pred_h=pred_h) max_value = max(train_data.max().values) min_value = min(train_data.min().values) # range_value = max_value - min_value # 6.归一化并转Tensor train_X, train_Y = zc_util.NormalizeAndToTensor(train_data, num_hour, pred_h, device) validate_X, validate_Y = zc_util.NormalizeAndToTensor( validate_data, num_hour, pred_h, device) test_X, test_Y = zc_util.NormalizeAndToTensor(test_data, num_hour, pred_h, device) # 7.建模以及模型参数 model_AE = AE(24, hidden_size=hiddenSize_AE, hat_size=hat_size_AE, num_hour=num_hour).to(device) # print(model_AE) optimizer = torch.optim.Adam(model_AE.parameters(), lr=lr_AE) # 8.开始训练 epochs_AE, losses_AE = train_AE(train_X, train_Y, model=model_AE, criterion=criterion_AE, optimizer=optimizer, epoch=epoch_AE, lr_down=None, device=device) # 10.保存AE模型 torch.save(model_AE, 'AE.pkl') # 11.加载AE模型 model_AE2 = torch.load('AE.pkl') # 12.生成隐变量 outputs, outputs_hat = model_AE2(train_X) train_X2 = outputs_hat.to(device) outputs, outputs_hat = model_AE2(validate_X) validate_X2 = outputs_hat.to(device) outputs, outputs_hat = model_AE2(test_X) test_X2 = outputs_hat.to(device) # 13.建立LSTM模型 model_lstm = lstm(input_size_lstm, hidden_size=LSTM_hidden_size, output_size=pred_h, num_layer=2).to(device) # print(model_lstm) optimizer_lstm = torch.optim.Adam(model_lstm.parameters(), lr=lr_lstm) early_stopping = EarlyStopping(patience, verbose=True) # 8.开始训练 # train_X, train_Y, validate_X, validate_Y, model, criterion, optimizer, epoch, early_stopping, lr_down epochs_LSTM, losses_LSTM = train_LSTM(train_X=train_X2, train_Y=train_Y, validate_X=validate_X2, validate_Y=validate_Y, model=model_lstm, criterion=criterion_lstm, optimizer=optimizer_lstm, epoch=epoch_lstm, early_stopping=early_stopping, lr_down=None, device=device) # 15.结束并保存 # zc_util.plot_df(epochs, losses_LSTM) # print('Finished Training') torch.save(model_lstm, 'LSTM.pkl') model_lstm2 = torch.load('LSTM.pkl') # 16.验证 outputs = model_lstm2(test_X2) predict = (outputs * (max_value - min_value) + min_value).squeeze().detach().cpu().numpy() # (372*1*12) truth = test_truth.values.reshape(-1, pred_h) # 374*1 MSE_test = mean_squared_error(truth, predict) MAE_test = mean_absolute_error(truth, predict) # print(f"测试集整体MSE: {MSE_test}") # print(f"测试集整体RMSE: {np.sqrt(MSE_test)}") # print(f"测试集整体MAE: {MAE_test}") # print(f"测试集整体MAPE: {zc_util.mape(truth, predict)}") # # print("#########################") Verification = { "MSE_test": zc_util.get_two_float(MSE_test, 6), "RMSE_test": zc_util.get_two_float(np.sqrt(MSE_test), 6), "MAE_test": zc_util.get_two_float(MAE_test, 6), "MAPE_test": zc_util.get_two_float(zc_util.mape(truth, predict), 6) } outputs = model_lstm2(train_X2) predict2 = (outputs * (max_value - min_value) + min_value).squeeze().detach().cpu().numpy() truth2 = train_truth.values.reshape(-1, pred_h) MSE2_test = mean_squared_error(truth2, predict2) MAE2_test = mean_absolute_error(truth2, predict2) # print(f"测试集整体MSE: {MSE2_test}") # print(f"训练集整体RMSE: {np.sqrt(MSE2_test)}") # print(f"训练集整体MAE: {MAE2_test}") # print(f"训练集整体MAPE: {zc_util.mape(truth2, predict2)}") training = { "MSE_train": zc_util.get_two_float(MSE2_test, 6), "RMSE_train": zc_util.get_two_float(np.sqrt(MSE2_test), 6), "MAE_train": zc_util.get_two_float(MAE2_test, 6), "MAPE_train": zc_util.get_two_float(zc_util.mape(truth2, predict2), 6) } # 17.保存验证 config["input_size_lstm"] = input_size_lstm data = { "Parameter": config, "Raw_Data": KPI, "epochs_AE": epochs_AE[-1], "losses_AE": losses_AE, "epochs_LSTM": epochs_LSTM[-1], "losses_LSTM": losses_LSTM, "Verification": Verification, "training": training } return data