Beispiel #1
0
    def test(self):
        self.net.eval()

        total_results = {}
        for data_name, data_path in self.te_data_list.items():
            construct_print(f"Testing with testset: {data_name}")
            self.te_loader = create_loader(
                data_path=data_path,
                training=False,
                prefix=self.arg_dict["prefix"],
                get_length=False,
            )
            self.save_path = os.path.join(self.path_dict["save"], data_name)
            if not os.path.exists(self.save_path):
                construct_print(
                    f"{self.save_path} do not exist. Let's create it.")
                os.makedirs(self.save_path)
            results = self._test_process(save_pre=self.save_pre)
            msg = f"Results on the testset({data_name}:'{data_path}'): {results}"
            construct_print(msg)
            write_data_to_file(msg, self.path_dict["te_log"])

            total_results[data_name] = results

        self.net.train()

        if self.arg_dict["xlsx_name"]:
            # save result into xlsx file.
            self.xlsx_recorder.write_xlsx(self.exp_name, total_results)
Beispiel #2
0
def test(model, mode="test", save_pre=True):
    model.eval()

    test_dataset_dict = user_config["rgb_data"]["te_data_list"]
    if mode == "val":
        test_dataset_dict = user_config["rgb_data"]["val_data_path"]

    total_results = {}
    for idx, (data_name, data_path) in enumerate(test_dataset_dict.items()):
        construct_print(f"Testing on the dataset: {data_name}, {data_path}")
        test_set = ImageFolder(root=data_path,
                               in_size=user_config["input_size"],
                               training=False)
        length = len(test_set)
        te_loader = create_loader(
            data_set=test_set,
            size_list=None,
            batch_size=batch_size_single_gpu,
            shuffle=False,
            num_workers=user_config["num_workers"],
            sampler=None,
            drop_last=False,
            pin_memory=True,
        )
        save_path = os.path.join(path_config["save"], data_name)
        if not os.path.exists(save_path):
            construct_print(f"{save_path} do not exist. Let's create it.")
            os.makedirs(save_path)
        results = _test_process(
            model=model,
            length=length,
            te_loader=te_loader,
            save_pre=save_pre,
            save_path=save_path,
        )
        msg = f"Results on the {mode}set({data_name}:'{data_path}'):\n{results}"
        write_data_to_file(msg, path_config["te_log"])
        construct_print(msg)

        total_results[data_name.upper()] = results
    return total_results
Beispiel #3
0
    def _train_per_epoch(self, curr_epoch, train_loss_record):
        for curr_iter_in_epoch, train_data in enumerate(self.tr_loader):
            num_iter_per_epoch = len(self.tr_loader)
            curr_iter = curr_epoch * num_iter_per_epoch + curr_iter_in_epoch

            self.opti.zero_grad()

            train_inputs, train_masks, _ = train_data
            train_inputs = train_inputs.to(self.dev, non_blocking=True)
            train_masks = train_masks.to(self.dev, non_blocking=True)
            train_preds = self.net(train_inputs)

            train_loss, loss_item_list = get_total_loss(
                train_preds, train_masks, self.loss_funcs)
            if self.amp:
                with self.amp.scale_loss(train_loss, self.opti) as scaled_loss:
                    scaled_loss.backward()
            else:
                train_loss.backward()
            self.opti.step()

            if self.arg_dict["sche_usebatch"]:
                self.sche.step()

            # 仅在累计的时候使用item()获取数据
            train_iter_loss = train_loss.item()
            train_batch_size = train_inputs.size(0)
            train_loss_record.update(train_iter_loss, train_batch_size)

            # 显示tensorboard
            if (self.arg_dict["tb_update"] > 0
                    and (curr_iter + 1) % self.arg_dict["tb_update"] == 0):
                self.tb_recorder.record_curve("trloss_avg",
                                              train_loss_record.avg, curr_iter)
                self.tb_recorder.record_curve("trloss_iter", train_iter_loss,
                                              curr_iter)
                self.tb_recorder.record_curve("lr", self.opti.param_groups,
                                              curr_iter)
                self.tb_recorder.record_image("trmasks", train_masks,
                                              curr_iter)
                self.tb_recorder.record_image("trsodout",
                                              train_preds.sigmoid(), curr_iter)
                self.tb_recorder.record_image("trsodin", train_inputs,
                                              curr_iter)
            # 记录每一次迭代的数据
            if (self.arg_dict["print_freq"] > 0
                    and (curr_iter + 1) % self.arg_dict["print_freq"] == 0):
                lr_str = ",".join([
                    f"{param_groups['lr']:.7f}"
                    for param_groups in self.opti.param_groups
                ])
                log = (
                    f"{curr_iter_in_epoch}:{num_iter_per_epoch}/"
                    f"{curr_iter}:{self.iter_num}/"
                    f"{curr_epoch}:{self.end_epoch} "
                    f"{self.exp_name}\n"
                    f"Lr:{lr_str} "
                    f"M:{train_loss_record.avg:.5f} C:{train_iter_loss:.5f} "
                    f"{loss_item_list}")
                print(log)
                write_data_to_file(log, self.path_dict["tr_log"])
Beispiel #4
0
def train_epoch_prefetch_generator(
    curr_epoch,
    end_epoch,
    loss_funcs,
    model,
    optimizer,
    scheduler,
    tr_loader,
    local_rank,
):
    model.train()
    train_loss_record = AvgMeter()

    for train_batch_id, (train_inputs, train_masks, train_names) in enumerate(
            BackgroundGenerator(tr_loader, max_prefetch=2)):
        curr_iter = curr_epoch * len(tr_loader) + train_batch_id
        if user_config["sche_usebatch"]:
            scheduler.step(optimizer, curr_epoch=curr_iter)

        train_inputs = train_inputs.cuda(non_blocking=True)
        train_masks = train_masks.cuda(non_blocking=True)
        train_preds = model(train_inputs)

        train_loss, loss_item_list = get_total_loss(train_preds, train_masks,
                                                    loss_funcs)

        optimizer.zero_grad()
        if user_config["use_amp"]:
            with amp.scale_loss(train_loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            train_loss.backward()
        optimizer.step()

        if user_config["is_distributed"]:
            reduced_loss = allreduce_tensor(train_loss)
        else:
            reduced_loss = train_loss
        train_iter_loss = reduced_loss.item()
        train_loss_record.update(train_iter_loss, train_inputs.size(0))

        if local_rank == 0:
            lr_str = ",".join([
                f"{param_groups['lr']:.7f}"
                for param_groups in optimizer.param_groups
            ])
            log = (
                f"[I:{train_batch_id}/{len(tr_loader)}/{curr_iter}/{total_iter_num}][E:{curr_epoch}:{end_epoch}]>["
                f"{exp_name}]"
                f"[Lr:{lr_str}][Avg:{train_loss_record.avg:.5f}|Cur:{train_iter_loss:.5f}|"
                f"{loss_item_list}]\n"
                f"{train_names}")
            if user_config["print_freq"] > 0 and (
                    curr_iter + 1) % user_config["print_freq"] == 0:
                print(log)
            if (user_config["record_freq"] > 0
                    and (curr_iter + 1) % user_config["record_freq"] == 0):
                tb_recorder.record_curve("trloss_avg", train_loss_record.avg,
                                         curr_iter)
                tb_recorder.record_curve("trloss_iter", train_loss_record.avg,
                                         curr_iter)
                tb_recorder.record_curve("lr", optimizer.param_groups,
                                         curr_iter)
                tb_recorder.record_image("trmasks", train_masks, curr_iter)
                tb_recorder.record_image("trsodout", train_preds.sigmoid(),
                                         curr_iter)
                tb_recorder.record_image("trsodin", train_inputs, curr_iter)
                write_data_to_file(log, path_config["tr_log"])