def pred_all(): hko_iter = HKOIterator(pd_path=cfg.HKO_PD.RAINY_TEST, sample_mode="sequent", seq_len=in_seq + out_seq, stride=25) while (hko_iter.use_up == False): valid_batch, valid_mask, sample_datetimes, _ = \ hko_iter.sample(batch_size=1) for j in range(0, 5): calc_r(hko_iter._current_datetime, j, valid_batch[j][0][0], 'pred_r.txt') valid_batch = torch.from_numpy(valid_batch.astype(np.float32)).to( cfg.GLOBAL.DEVICE) / 255.0 valid_data = valid_batch[:in_seq, ...] valid_label = valid_batch[in_seq:in_seq + out_seq, ...] #mask = valid_mask[in_seq:in_seq + out_seq, ...].astype(int) mask = torch.from_numpy(valid_mask[IN_LEN:IN_LEN + OUT_LEN, ...].astype(int)).to( cfg.GLOBAL.DEVICE) #torch_valid_data = torch.from_numpy(valid_data).to(cfg.GLOBAL.DEVICE) with torch.no_grad(): output = encoder_forecaster(valid_data) #valid_data = np.int16(output.cpu().numpy().tolist()) loss = criterion(output, valid_label, mask) imlist = [] output = np.clip(output.cpu().numpy(), 0.0, 1.0) out = np.float32(output.tolist()) out = np.int16(255 * out) r = 0 for j in range(0, 20): #max_r=max(max_r,out[j][0][0].max()) #img = cv2.applyColorMap(out[j][0][0],cv2.COLORMAP_RAINBOW) # img=out[j][0][0] # filter(out[j][0][0]) #ave = (out[j][0][0]/480/480).sum() calc_r(hko_iter._current_datetime, j + 5, out[j][0][0], 'pred_r.txt')
from nowcasting.helpers.visualization import save_hko_movie # 使用示例代码,路径不存在,需要进一步进行修改 # 加载模型 encoder = Encoder(encoder_params[0], encoder_params[1]).to(cfg.GLOBAL.DEVICE) forecaster = Forecaster(forecaster_params[0], forecaster_params[1]) encoder_forecaster = EF(encoder, forecaster).to(cfg.GLOBAL.DEVICE) encoder_forecaster.load_state_dict( torch.load( '/home/hzzone/save/trajGRU_frame_weighted_mse/models/encoder_forecaster_45000.pth' )) # 加载数据 hko_iter = HKOIterator(pd_path=cfg.HKO_PD.RAINY_TEST, sample_mode="random", seq_len=IN_LEN + OUT_LEN) valid_batch, valid_mask, sample_datetimes, _ = \ hko_iter.sample(batch_size=1) valid_batch = valid_batch.astype(np.float32) / 255.0 valid_data = valid_batch[:IN_LEN, ...] valid_label = valid_batch[IN_LEN:IN_LEN + OUT_LEN, ...] mask = valid_mask[IN_LEN:IN_LEN + OUT_LEN, ...].astype(int) torch_valid_data = torch.from_numpy(valid_data).to(cfg.GLOBAL.DEVICE) # 生成预测结果 with torch.no_grad(): output = encoder_forecaster(torch_valid_data)
torch.cuda.set_device(0) in_seq = 5 out_seq = 20 encoder = Encoder(encoder_params[0], encoder_params[1]).to(cfg.GLOBAL.DEVICE) forecaster = Forecaster(forecaster_params[0], forecaster_params[1]) encoder_forecaster = EF(encoder, forecaster).to(cfg.GLOBAL.DEVICE) encoder_forecaster.load_state_dict( torch.load('encoder_forecaster_45000.pth' )) #save/models/encoder_forecaster_100000.pth')) torch.save(encoder_forecaster, 'full_encoder_forecaster_45000.pth') criterion = Weighted_mse_mae().to(cfg.GLOBAL.DEVICE) hko_iter = HKOIterator(pd_path=cfg.HKO_PD.RAINY_TEST, sample_mode="random", seq_len=in_seq + out_seq) valid_batch, valid_mask, sample_datetimes, _ = \ hko_iter.sample(batch_size=1) def filter(img): h, w = img.shape for i in range(h): for j in range(w): if ((i - 240)**2 + (j - 240)**2) >= 240**2: img[i][j] = 0 elif (img[i][j] <= (img.min() + 100)): img[i][j] = 0
'trajGRU_balanced_mse_mae': encoder_forecaster1, 'trajGRU_frame_weighted_mse': encoder_forecaster2, 'last_frame': LastFrame, 'rover_nonlinear': Rover() }) model_run_avarage_time = dict() with torch.no_grad(): for name, model in models.items(): is_deeplearning_model = (torch.nn.Module in model.__class__.__bases__) if is_deeplearning_model: model.eval() evaluator = HKOEvaluation(seq_len=OUT_LEN, use_central=False) hko_iter = HKOIterator(pd_path=cfg.HKO_PD.RAINY_TEST, # hko_iter = HKOIterator(pd_path=cfg.HKO_PD.RAINY_VALID, sample_mode="sequent", seq_len=IN_LEN + OUT_LEN, stride=cfg.HKO.BENCHMARK.STRIDE) model_run_avarage_time[name] = 0.0 valid_time = 0 while not hko_iter.use_up: valid_batch, valid_mask, sample_datetimes, _ = \ hko_iter.sample(batch_size=1) if valid_batch.shape[1] == 0: break if not cfg.HKO.EVALUATION.VALID_DATA_USE_UP and valid_time > cfg.HKO.EVALUATION.VALID_TIME: break valid_batch = valid_batch.astype(np.float32) / 255.0 valid_data = valid_batch[:IN_LEN, ...] valid_label = valid_batch[IN_LEN:IN_LEN + OUT_LEN, ...]
def train_and_test(encoder_forecaster, optimizer, criterion, lr_scheduler, batch_size, max_iterations, test_iteration_interval, test_and_save_checkpoint_iterations, folder_name, probToPixel=None): # HKO-7 evaluater and dataloader IN_LEN = cfg.HKO.BENCHMARK.IN_LEN OUT_LEN = cfg.HKO.BENCHMARK.OUT_LEN evaluater = HKOEvaluation(seq_len=OUT_LEN, use_central=False) train_hko_iter = HKOIterator(pd_path=cfg.HKO_PD.RAINY_TRAIN, sample_mode="random", seq_len=IN_LEN + OUT_LEN) valid_hko_iter = HKOIterator(pd_path=cfg.HKO_PD.RAINY_VALID, sample_mode="sequent", seq_len=IN_LEN + OUT_LEN, stride=cfg.HKO.BENCHMARK.STRIDE) train_loss = 0.0 save_dir = osp.join(cfg.GLOBAL.MODEL_SAVE_DIR, folder_name) if os.path.exists(save_dir): shutil.rmtree(save_dir) os.mkdir(save_dir) model_save_dir = osp.join(save_dir, 'models') log_dir = osp.join(save_dir, 'logs') all_scalars_file_name = osp.join(save_dir, "all_scalars.json") pkl_save_dir = osp.join(save_dir, 'pkl') if osp.exists(all_scalars_file_name): os.remove(all_scalars_file_name) if osp.exists(log_dir): shutil.rmtree(log_dir) if osp.exists(model_save_dir): shutil.rmtree(model_save_dir) os.mkdir(model_save_dir) writer = SummaryWriter(log_dir) for itera in tqdm(range(1, max_iterations + 1)): lr_scheduler.step() train_batch, train_mask, sample_datetimes, _ = \ train_hko_iter.sample(batch_size=batch_size) train_batch = torch.from_numpy(train_batch.astype(np.float32)).to( cfg.GLOBAL.DEVICE) / 255.0 train_data = train_batch[:IN_LEN, ...] train_label = train_batch[IN_LEN:IN_LEN + OUT_LEN, ...] encoder_forecaster.train() optimizer.zero_grad() output = encoder_forecaster(train_data) loss = criterion(output, train_label, mask) loss.backward() torch.nn.utils.clip_grad_value_(encoder_forecaster.parameters(), clip_value=50.0) optimizer.step() train_loss += loss.item() train_label_numpy = train_label.cpu().numpy() if probToPixel is None: # 未使用分类问题 output_numpy = np.clip(output.detach().cpu().numpy(), 0.0, 1.0) else: # if classification, output: S*B*C*H*W # 使用分类问题,需要转化为像素值 # 使用分类 Loss 的阈值 output_numpy = probToPixel(output.detach().cpu().numpy(), train_label, mask, lr_scheduler.get_lr()[0]) evaluater.update(train_label_numpy, output_numpy, mask.cpu().numpy()) if itera % test_iteration_interval == 0: _, _, train_csi, train_hss, _, train_mse, train_mae, train_balanced_mse, train_balanced_mae, _ = evaluater.calculate_stat( ) train_loss = train_loss / test_iteration_interval evaluater.clear_all() with torch.no_grad(): encoder_forecaster.eval() valid_hko_iter.reset() valid_loss = 0.0 valid_time = 0 while not valid_hko_iter.use_up: valid_batch, valid_mask, sample_datetimes, _ = \ valid_hko_iter.sample(batch_size=batch_size) if valid_batch.shape[1] == 0: break if not cfg.HKO.EVALUATION.VALID_DATA_USE_UP and valid_time > cfg.HKO.EVALUATION.VALID_TIME: break valid_time += 1 valid_batch = torch.from_numpy( valid_batch.astype(np.float32)).to( cfg.GLOBAL.DEVICE) / 255.0 valid_data = valid_batch[:IN_LEN, ...] valid_label = valid_batch[IN_LEN:IN_LEN + OUT_LEN, ...] mask = torch.from_numpy(valid_mask[IN_LEN:IN_LEN + OUT_LEN, ...].astype(int)).to( cfg.GLOBAL.DEVICE) output = encoder_forecaster(valid_data) loss = criterion(output, valid_label, mask) valid_loss += loss.item() valid_label_numpy = valid_label.cpu().numpy() if probToPixel is None: output_numpy = np.clip(output.detach().cpu().numpy(), 0.0, 1.0) else: output_numpy = probToPixel( output.detach().cpu().numpy(), valid_label, mask, lr_scheduler.get_lr()[0]) evaluater.update(valid_label_numpy, output_numpy, mask.cpu().numpy()) _, _, valid_csi, valid_hss, _, valid_mse, valid_mae, valid_balanced_mse, valid_balanced_mae, _ = evaluater.calculate_stat( ) evaluater.clear_all() valid_loss = valid_loss / valid_time writer.add_scalars("loss", { "train": train_loss, "valid": valid_loss }, itera) plot_result(writer, itera, (train_csi, train_hss, train_mse, train_mae, train_balanced_mse, train_balanced_mae), (valid_csi, valid_hss, valid_mse, valid_mae, valid_balanced_mse, valid_balanced_mae)) writer.export_scalars_to_json(all_scalars_file_name) train_loss = 0.0 if itera % test_and_save_checkpoint_iterations == 0: torch.save( encoder_forecaster.state_dict(), osp.join(model_save_dir, 'encoder_forecaster_{}.pth'.format(itera))) writer.close()
def train_and_test(encoder_forecaster, optimizer, criterion, lr_scheduler, batch_size, max_iterations, test_iteration_interval, test_and_save_checkpoint_iterations, folder_name, probToPixel=None): # HKO-7 evaluater and dataloader # 输入序列的长度 IN_LEN = cfg.HKO.BENCHMARK.IN_LEN # 待预测序列的长度 OUT_LEN = cfg.HKO.BENCHMARK.OUT_LEN # 调用hko文件夹中写好的评价类 evaluater = HKOEvaluation(seq_len=OUT_LEN, use_central=False) # 利用中dataloader的数据迭代器加载训练数据 train_hko_iter = HKOIterator(pd_path=cfg.HKO_PD.RAINY_TRAIN, sample_mode="random", seq_len=IN_LEN + OUT_LEN) # 设定验证集,同样利用的是之前写好的数据迭代器 valid_hko_iter = HKOIterator(pd_path=cfg.HKO_PD.RAINY_VALID, sample_mode="sequent", seq_len=IN_LEN + OUT_LEN, stride=cfg.HKO.BENCHMARK.STRIDE) train_loss = 0.0 # 确定最终模型存储的位置及路径 save_dir = osp.join(cfg.GLOBAL.MODEL_SAVE_DIR, folder_name) if os.path.exists(save_dir): shutil.rmtree(save_dir) os.mkdir(save_dir) model_save_dir = osp.join(save_dir, 'models') log_dir = osp.join(save_dir, 'logs') all_scalars_file_name = osp.join(save_dir, "all_scalars.json") pkl_save_dir = osp.join(save_dir, 'pkl') if osp.exists(all_scalars_file_name): os.remove(all_scalars_file_name) if osp.exists(log_dir): shutil.rmtree(log_dir) if osp.exists(model_save_dir): shutil.rmtree(model_save_dir) os.mkdir(model_save_dir) writer = SummaryWriter(log_dir) # 模型训练过程,不断迭代 for itera in tqdm(range(1, max_iterations + 1)): # 更新优化器学习率 lr_scheduler.step() # 提取该次喂送的数据,train_batch为当前batch中的训练数据(Shape: (seq_len, valid_batch_size, 1, height, width)) train_batch, train_mask, sample_datetimes, _ = \ train_hko_iter.sample(batch_size=batch_size) # numpy类型转换 train_batch = torch.from_numpy(train_batch.astype(np.float32)).to( cfg.GLOBAL.DEVICE) / 255.0 # 以 前IN_LEN帧作为训练数据 train_data = train_batch[:IN_LEN, ...] # 以 后OUT_LEN帧作为训练的标签 train_label = train_batch[IN_LEN:IN_LEN + OUT_LEN, ...] # mask矩阵 mask = torch.from_numpy(train_mask[IN_LEN:IN_LEN + OUT_LEN, ...].astype(int)).to( cfg.GLOBAL.DEVICE) # 开始训练 启用 BatchNormalization 和 Dropout encoder_forecaster.train() # 将梯度调零 optimizer.zero_grad() # 送入训练数据 output = encoder_forecaster(train_data) # 计算损失函数 loss = criterion(output, train_label, mask) # 反向传播 loss.backward() torch.nn.utils.clip_grad_value_(encoder_forecaster.parameters(), clip_value=50.0) # 更新参数 optimizer.step() train_loss += loss.item() train_label_numpy = train_label.cpu().numpy() if probToPixel is None: # 未使用分类问题 output_numpy = np.clip(output.detach().cpu().numpy(), 0.0, 1.0) else: # if classification, output: S*B*C*H*W # 使用分类问题,需要转化为像素值 # 使用分类 Loss 的阈值 output_numpy = probToPixel(output.detach().cpu().numpy(), train_label, mask, lr_scheduler.get_lr()[0]) evaluater.update(train_label_numpy, output_numpy, mask.cpu().numpy()) # 当前轮数应当存储模型状态 if itera % test_iteration_interval == 0: _, _, train_csi, train_hss, _, train_mse, train_mae, train_balanced_mse, train_balanced_mae, _ = evaluater.calculate_stat( ) train_loss = train_loss / test_iteration_interval evaluater.clear_all() with torch.no_grad(): encoder_forecaster.eval() valid_hko_iter.reset() valid_loss = 0.0 valid_time = 0 while not valid_hko_iter.use_up: valid_batch, valid_mask, sample_datetimes, _ = \ valid_hko_iter.sample(batch_size=batch_size) if valid_batch.shape[1] == 0: break if not cfg.HKO.EVALUATION.VALID_DATA_USE_UP and valid_time > cfg.HKO.EVALUATION.VALID_TIME: break valid_time += 1 valid_batch = torch.from_numpy( valid_batch.astype(np.float32)).to( cfg.GLOBAL.DEVICE) / 255.0 # 用于当前测试的数据 valid_data = valid_batch[:IN_LEN, ...] # 用于当前测试的标签 valid_label = valid_batch[IN_LEN:IN_LEN + OUT_LEN, ...] mask = torch.from_numpy(valid_mask[IN_LEN:IN_LEN + OUT_LEN, ...].astype(int)).to( cfg.GLOBAL.DEVICE) # 生成预测数据 output = encoder_forecaster(valid_data) # 计算损失函数 loss = criterion(output, valid_label, mask) valid_loss += loss.item() valid_label_numpy = valid_label.cpu().numpy() if probToPixel is None: output_numpy = np.clip(output.detach().cpu().numpy(), 0.0, 1.0) else: output_numpy = probToPixel( output.detach().cpu().numpy(), valid_label, mask, lr_scheduler.get_lr()[0]) evaluater.update(valid_label_numpy, output_numpy, mask.cpu().numpy()) _, _, valid_csi, valid_hss, _, valid_mse, valid_mae, valid_balanced_mse, valid_balanced_mae, _ = evaluater.calculate_stat( ) evaluater.clear_all() valid_loss = valid_loss / valid_time writer.add_scalars("loss", { "train": train_loss, "valid": valid_loss }, itera) # 向writer中写入模型测试结果 plot_result(writer, itera, (train_csi, train_hss, train_mse, train_mae, train_balanced_mse, train_balanced_mae), (valid_csi, valid_hss, valid_mse, valid_mae, valid_balanced_mse, valid_balanced_mae)) # 将writer中存储的信息写入指定的json类型文件中 writer.export_scalars_to_json(all_scalars_file_name) train_loss = 0.0 # 保存模型 if itera % test_and_save_checkpoint_iterations == 0: torch.save( encoder_forecaster.state_dict(), osp.join(model_save_dir, 'encoder_forecaster_{}.pth'.format(itera))) writer.close()