def test(self): self.net.eval() total_results = {} for data_name, data_path in self.te_data_list.items(): construct_print(f"Testing with testset: {data_name}") self.te_loader = create_loader( data_path=data_path, training=False, prefix=self.arg_dict["prefix"], get_length=False, ) self.save_path = os.path.join(self.path_dict["save"], data_name) if not os.path.exists(self.save_path): construct_print( f"{self.save_path} do not exist. Let's create it.") os.makedirs(self.save_path) results = self._test_process(save_pre=self.save_pre) msg = f"Results on the testset({data_name}:'{data_path}'): {results}" construct_print(msg) write_data_to_file(msg, self.path_dict["te_log"]) total_results[data_name] = results self.net.train() if self.arg_dict["xlsx_name"]: # save result into xlsx file. self.xlsx_recorder.write_xlsx(self.exp_name, total_results)
def test(model, mode="test", save_pre=True): model.eval() test_dataset_dict = user_config["rgb_data"]["te_data_list"] if mode == "val": test_dataset_dict = user_config["rgb_data"]["val_data_path"] total_results = {} for idx, (data_name, data_path) in enumerate(test_dataset_dict.items()): construct_print(f"Testing on the dataset: {data_name}, {data_path}") test_set = ImageFolder(root=data_path, in_size=user_config["input_size"], training=False) length = len(test_set) te_loader = create_loader( data_set=test_set, size_list=None, batch_size=batch_size_single_gpu, shuffle=False, num_workers=user_config["num_workers"], sampler=None, drop_last=False, pin_memory=True, ) save_path = os.path.join(path_config["save"], data_name) if not os.path.exists(save_path): construct_print(f"{save_path} do not exist. Let's create it.") os.makedirs(save_path) results = _test_process( model=model, length=length, te_loader=te_loader, save_pre=save_pre, save_path=save_path, ) msg = f"Results on the {mode}set({data_name}:'{data_path}'):\n{results}" write_data_to_file(msg, path_config["te_log"]) construct_print(msg) total_results[data_name.upper()] = results return total_results
def _train_per_epoch(self, curr_epoch, train_loss_record): for curr_iter_in_epoch, train_data in enumerate(self.tr_loader): num_iter_per_epoch = len(self.tr_loader) curr_iter = curr_epoch * num_iter_per_epoch + curr_iter_in_epoch self.opti.zero_grad() train_inputs, train_masks, _ = train_data train_inputs = train_inputs.to(self.dev, non_blocking=True) train_masks = train_masks.to(self.dev, non_blocking=True) train_preds = self.net(train_inputs) train_loss, loss_item_list = get_total_loss( train_preds, train_masks, self.loss_funcs) if self.amp: with self.amp.scale_loss(train_loss, self.opti) as scaled_loss: scaled_loss.backward() else: train_loss.backward() self.opti.step() if self.arg_dict["sche_usebatch"]: self.sche.step() # 仅在累计的时候使用item()获取数据 train_iter_loss = train_loss.item() train_batch_size = train_inputs.size(0) train_loss_record.update(train_iter_loss, train_batch_size) # 显示tensorboard if (self.arg_dict["tb_update"] > 0 and (curr_iter + 1) % self.arg_dict["tb_update"] == 0): self.tb_recorder.record_curve("trloss_avg", train_loss_record.avg, curr_iter) self.tb_recorder.record_curve("trloss_iter", train_iter_loss, curr_iter) self.tb_recorder.record_curve("lr", self.opti.param_groups, curr_iter) self.tb_recorder.record_image("trmasks", train_masks, curr_iter) self.tb_recorder.record_image("trsodout", train_preds.sigmoid(), curr_iter) self.tb_recorder.record_image("trsodin", train_inputs, curr_iter) # 记录每一次迭代的数据 if (self.arg_dict["print_freq"] > 0 and (curr_iter + 1) % self.arg_dict["print_freq"] == 0): lr_str = ",".join([ f"{param_groups['lr']:.7f}" for param_groups in self.opti.param_groups ]) log = ( f"{curr_iter_in_epoch}:{num_iter_per_epoch}/" f"{curr_iter}:{self.iter_num}/" f"{curr_epoch}:{self.end_epoch} " f"{self.exp_name}\n" f"Lr:{lr_str} " f"M:{train_loss_record.avg:.5f} C:{train_iter_loss:.5f} " f"{loss_item_list}") print(log) write_data_to_file(log, self.path_dict["tr_log"])
def train_epoch_prefetch_generator( curr_epoch, end_epoch, loss_funcs, model, optimizer, scheduler, tr_loader, local_rank, ): model.train() train_loss_record = AvgMeter() for train_batch_id, (train_inputs, train_masks, train_names) in enumerate( BackgroundGenerator(tr_loader, max_prefetch=2)): curr_iter = curr_epoch * len(tr_loader) + train_batch_id if user_config["sche_usebatch"]: scheduler.step(optimizer, curr_epoch=curr_iter) train_inputs = train_inputs.cuda(non_blocking=True) train_masks = train_masks.cuda(non_blocking=True) train_preds = model(train_inputs) train_loss, loss_item_list = get_total_loss(train_preds, train_masks, loss_funcs) optimizer.zero_grad() if user_config["use_amp"]: with amp.scale_loss(train_loss, optimizer) as scaled_loss: scaled_loss.backward() else: train_loss.backward() optimizer.step() if user_config["is_distributed"]: reduced_loss = allreduce_tensor(train_loss) else: reduced_loss = train_loss train_iter_loss = reduced_loss.item() train_loss_record.update(train_iter_loss, train_inputs.size(0)) if local_rank == 0: lr_str = ",".join([ f"{param_groups['lr']:.7f}" for param_groups in optimizer.param_groups ]) log = ( f"[I:{train_batch_id}/{len(tr_loader)}/{curr_iter}/{total_iter_num}][E:{curr_epoch}:{end_epoch}]>[" f"{exp_name}]" f"[Lr:{lr_str}][Avg:{train_loss_record.avg:.5f}|Cur:{train_iter_loss:.5f}|" f"{loss_item_list}]\n" f"{train_names}") if user_config["print_freq"] > 0 and ( curr_iter + 1) % user_config["print_freq"] == 0: print(log) if (user_config["record_freq"] > 0 and (curr_iter + 1) % user_config["record_freq"] == 0): tb_recorder.record_curve("trloss_avg", train_loss_record.avg, curr_iter) tb_recorder.record_curve("trloss_iter", train_loss_record.avg, curr_iter) tb_recorder.record_curve("lr", optimizer.param_groups, curr_iter) tb_recorder.record_image("trmasks", train_masks, curr_iter) tb_recorder.record_image("trsodout", train_preds.sigmoid(), curr_iter) tb_recorder.record_image("trsodin", train_inputs, curr_iter) write_data_to_file(log, path_config["tr_log"])