Beispiel #1
0
def test(model, test_loader, class_weights, class_encoding):
    print("\nTesting...\n")
    num_classes = len(class_encoding)

    criterion = nn.CrossEntropyLoss(weight=class_weights)

    # Evaluation metric
    if args.ignore_unlabeled:
        ignore_index = list(class_encoding).index('unlabeled')
    else:
        ignore_index = None
    metric = IoU(num_classes, ignore_index=ignore_index)

    # Test the trained model on the test set
    test = Test(model, test_loader, criterion, metric, device)

    print(">>>> Running test dataset")

    loss, (iou, miou) = test.run_epoch(iteration_loss=False)
    class_iou = dict(zip(class_encoding.keys(), iou))

    print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou))

    # Print per class IoU
    for key, class_iou in zip(class_encoding.keys(), iou):
        print("{0}: {1:.4f}".format(key, class_iou))

    # Show a batch of samples and labels
    # if args.imshow_batch:
    if True:
        print("A batch of predictions from the test set...")
        images, _ = next(iter(test_loader))
        predict(model, images, class_encoding)
Beispiel #2
0
    def test(self, ):
        """
        Test the generator.
        """
        print("\nTesting...\n")

        num_classes = len(self.class_encoding)

        # We are going to use the CrossEntropyLoss loss function as it's most
        # frequently used in classification problems with multiple classes
        # which fits the problem. This criterion  combines LogSoftMax and
        # NLLLoss.
        criterion = nn.CrossEntropyLoss(weight=self.class_weights)

        # Evaluation metric
        ignore_index = list(class_encoding).index('unlabeled')
        metric = IoU(num_classes, ignore_index=ignore_index)

        # Test the trained model on the test set
        test = Test(self.generator, self.test_loader, criterion, metric,
                    self.device)

        print(">>>> Running test dataset")

        loss, (iou, miou) = test.run_epoch(iteration_loss=True)
        class_iou = dict(zip(class_encoding.keys(), iou))

        print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou))

        # Print per class IoU
        for key, class_iou in zip(class_encoding.keys(), iou):
            print("{0}: {1:.4f}".format(key, class_iou))
Beispiel #3
0
def test(model, test_loader, class_weights, class_encoding):
    print("Testing...")
    num_classes = len(class_encoding)
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    if use_cuda:
        criterion = criterion.cuda()

    # Evaluation metric
    if args.ignore_unlabeled:
        ignore_index = list(class_encoding).index('unlabeled')
    else:
        ignore_index = None
    metric = IoU(num_classes, ignore_index=ignore_index)

    # Test the trained model on the test set
    test = Test(model, test_loader, criterion, metric, use_cuda)

    print(">>>> Running test dataset")
    loss, (iou, miou) = test.run_epoch(args.print_step)
    class_iou = dict(zip(class_encoding.keys(), iou))

    print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou))
    # Print per class IoU
    for key, class_iou in zip(class_encoding.keys(), iou):
        print("{0}: {1:.4f}".format(key, class_iou))
Beispiel #4
0
def test(model, test_loader, class_weights, class_encoding):
    print("\nTesting...\n")

    num_classes = len(class_encoding)

    if torch.cuda.is_available():
        if args.cuda:
            device = 'cuda'
            if torch.cuda.device_count() > 1:
                model = torch.nn.DataParallel(model)
            torch.cuda.empty_cache()
        else:
            device = 'cpu'
    else:
        device = 'cpu'

    # We are going to use the CrossEntropyLoss loss function as it's most
    # frequentely used in classification problems with multiple classes which
    # fits the problem. This criterion  combines LogSoftMax and NLLLoss.
    criterion_seg = nn.CrossEntropyLoss(weight=class_weights)
    #criterion_cls = nn.BCEWithLogitsLoss(weight=class_weights)
    criterion_cls = nn.KLDivLoss(reduction='sum')
    criterion = [criterion_seg, criterion_cls]

    # Evaluation metric
    if args.ignore_unlabeled:
        ignore_index = list(class_encoding).index('unlabeled')
    else:
        ignore_index = None
    metric = IoU(num_classes, ignore_index=ignore_index)

    # Test the trained model on the test set
    test = Test(model, test_loader, criterion, metric, device)

    print(">>>> Running test dataset")

    loss, (iou, miou) = test.run_epoch(args.print_step)
    # class_iou = dict(zip(class_encoding.keys(), iou))

    print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou))

    # Print per class IoU
    for key, class_iou in zip(class_encoding.keys(), iou):
        print("{0}: {1:.4f}".format(key, class_iou))

    # Show a batch of samples and labels
    if args.imshow_batch:
        print("A batch of predictions from the test set...")
        images, _ = iter(test_loader).next()
        predict(model, images, class_encoding, device)
Beispiel #5
0
def test(model, test_loader, class_weights, class_encoding):
    print("\nTesting...\n")

    num_classes = len(class_encoding)

    criterion = nn.CrossEntropyLoss(weight=class_weights)

    # Evaluation metric
    if args.ignore_unlabeled:
        ignore_index = list(class_encoding).index('unlabeled')
    else:
        ignore_index = None
    metric = IoU(num_classes, ignore_index=ignore_index)

    # Test the trained model on the test set
    test = Test(model, test_loader, criterion, metric, device,
                args.backbone.lower())

    print(">>>> Running test dataset")

    loss, (iou, miou) = test.run_epoch(args.print_step)
    class_iou = dict(zip(class_encoding.keys(), iou))

    print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou))

    # Print per class IoU
    for key, class_iou in zip(class_encoding.keys(), iou):
        print("{0}: {1:.4f}".format(key, class_iou))

    # Save arguments
    summary_filename_performance = os.path.join(args.save_dir,
                                                args.name + '_TEST_' + '.txt')
    with open(summary_filename_performance, 'w') as summary_file_2:
        summary_file_2.write("\nTEST\n")
        summary_file_2.write("Mean IoU: {0}\n".format(miou))
        for key, class_iou in zip(class_encoding.keys(), iou):
            summary_file_2.write("{0}: {1:.4f}\n".format(key, class_iou))
        summary_file_2.close()

    # Show a batch of samples and labels
    if args.imshow_batch_test:
        print("A batch of predictions from the test set...")
        images, gt_labels, _, _ = iter(test_loader).next()
        predict(model, images, gt_labels, class_encoding)
Beispiel #6
0
def test(model, test_loader, class_weights, class_encoding, step):
    print("\nTesting...\n")

    num_classes = len(class_encoding)

    # We are going to use the CrossEntropyLoss loss function as it's most
    # frequentely used in classification problems with multiple classes which
    # fits the problem. This criterion  combines LogSoftMax and NLLLoss.
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    if use_cuda:
        criterion = criterion.cuda()

    # Evaluation metric
    if args.ignore_unlabeled:
        ignore_index = list(class_encoding).index('unlabeled')
    else:
        ignore_index = None
    metric = IoU(num_classes, ignore_index=ignore_index)

    # Test the trained model on the test set
    test = Test(model, test_loader, criterion, metric, use_cuda, step)

    print(">>>> Running test dataset")

    loss, (iou, miou) = test.run_epoch(args.print_step)
    class_iou = dict(zip(class_encoding.keys(), iou))

    print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou))

    # Print per class IoU
    for key, class_iou in zip(class_encoding.keys(), iou):
        print("{0}: {1:.4f}".format(key, class_iou))

    # Show a batch of samples and labels
    if args.imshow_batch:
        print("A batch of predictions from the test set...")
        images, _ = iter(test_loader).next()
        predict(model, images, class_encoding)
Beispiel #7
0
def test(model, test_loader, class_weights, class_encoding):
    print("\nTesting...\n")

    num_classes = len(class_encoding)

    # 使用CrossEntropyLoss损失函数
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    if use_cuda:
        criterion = criterion.cuda()

    # Evaluation metric
    # if not args.ignore_unlabeled:
    #     ignore_index = list(class_encoding).index('unlabeled')
    # else:
    #     ignore_index = None
    ignore_index = None
    metric = IoU(num_classes, ignore_index=ignore_index)

    # Test the trained model on the test set
    test = Test(model, test_loader, criterion, metric, use_cuda)

    print(">>>> Running test dataset")

    loss, (iou, miou) = test.run_epoch(args.print_step)
    class_iou = dict(zip(class_encoding.keys(), iou))

    print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou))

    # Print per class IoU
    for key, class_iou in zip(class_encoding.keys(), iou):
        print("{0}: {1:.4f}".format(key, class_iou))

    # Show a batch of samples and labels
    if args.imshow_batch:
        print("A batch of predictions from the test set...")
        images, _ = iter(test_loader).next()
        predict(model, images, class_encoding)
Beispiel #8
0
def train(train_loader, val_loader, class_weights, class_encoding):
    print("\nTraining...\n")

    num_classes = len(class_encoding)

    if args.backbone.lower() == 'fcn':
        model = torchvision.models.segmentation.fcn_resnet50(
            num_classes=num_classes).to(device)
        # FCN plus resnet weights
        model_2 = models.resnet50(pretrained=True).to(device)

        model_dict = model.state_dict()
        pretrained_dict = model_2.state_dict()
        newpretrained_dict = collections.OrderedDict()

        for key, val in pretrained_dict.items():
            newpretrained_dict['backbone.' + key] = val

        # 1. filter out unnecessary keys
        pretrained_dict = {
            k: v
            for k, v in newpretrained_dict.items() if k in model_dict
        }
        # 2. overwrite entries in the existing state dict
        model_dict.update(pretrained_dict)
        # 3. load the new state dict
        model.load_state_dict(model_dict)

    elif args.backbone.lower() == 'deeplab':
        model = torchvision.models.segmentation.deeplabv3_resnet50(
            num_classes=num_classes).to(device)

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)

    criterion = nn.CrossEntropyLoss(weight=class_weights)

    if args.optimizer.lower() == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.learning_rate,
                               weight_decay=args.weight_decay)

    # Learning rate decay scheduler
    lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs,
                                     args.lr_decay)

    # Evaluation metric
    if args.ignore_unlabeled:
        ignore_index = list(class_encoding).index('unlabeled')
    else:
        ignore_index = None
    metric = IoU(num_classes, ignore_index=ignore_index)

    start_epoch = 0
    best_miou = 0

    # Start Training
    train = Train(model, train_loader, optimizer, criterion, metric, device,
                  args.backbone.lower(), args.consistency)
    val = Test(model, val_loader, criterion, metric, device,
               args.backbone.lower())

    for epoch in range(start_epoch, args.epochs):

        print(">>>> [Epoch: {0:d}] Training".format(epoch))
        epoch_loss, (iou,
                     miou) = train.run_epoch(iteration_loss=args.print_step,
                                             epochnum=epoch)
        lr_updater.step()
        print('\n')
        print(
            ">>>> [Epoch: {0:d}] Training Avg. loss: {1:.4f} | Mean IoU: {2:.4f}"
            .format(epoch, epoch_loss, miou))

        #  show the val results every VAl_num epochs
        VAl_num = args.save_val_every_epoch
        if (epoch + 1) % VAl_num == 0 or epoch + 1 == args.epochs:

            print(">>>> [Epoch: {0:d}] Validation".format(epoch))

            loss, (iou, miou) = val.run_epoch(iteration_loss=args.print_step,
                                              epochnum=epoch)

            print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
                  format(epoch, loss, miou))

            # 			# Save arguments
            # 			summary_filename_performance = os.path.join(args.save_dir, args.name + 'summary_epoch_' + str(epoch) + '.txt')
            # 			with open(summary_filename_performance, 'w') as summary_file_2:
            #
            # 				summary_file_2.write("\nVALIDATION\n")
            # 				summary_file_2.write("Epoch: {0}\n". format(epoch))
            # 				summary_file_2.write("Mean IoU: {0}\n". format(miou))
            # 				for key, class_iou in zip(class_encoding.keys(), iou):
            # 				   summary_file_2.write("{0}: {1:.4f}\n".format(key, class_iou))

            # 			summary_file_2.close()
            utils.save_checkpoint_epoch(model, optimizer, epoch, best_miou,
                                        args)

            # Print per class IoU on last epoch or if best iou
            if epoch + 1 == args.epochs or miou > best_miou:
                for key, class_iou in zip(class_encoding.keys(), iou):
                    print("{0}: {1:.4f}".format(key, class_iou))

            # Save the model if it's the best thus far
            if miou > best_miou:
                print("\nBest model thus far. Saving...\n")
                best_miou = miou
                utils.save_checkpoint(model, optimizer, epoch + 1, best_miou,
                                      args)

    return model
def train(rank, world_size, cfg):

    # Setup seeds
    torch.manual_seed(cfg.get("seed", 1337))
    torch.cuda.manual_seed(cfg.get("seed", 1337))
    np.random.seed(cfg.get("seed", 1337))
    random.seed(cfg.get("seed", 1337))

    # init distributed compute
    master_port = int(os.environ.get("MASTER_PORT", 8738))
    master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1")
    tcp_store = torch.distributed.TCPStore(master_addr, master_port,
                                           world_size, rank == 0)
    torch.distributed.init_process_group('nccl',
                                         store=tcp_store,
                                         rank=rank,
                                         world_size=world_size)

    # Setup device
    if torch.cuda.is_available():
        device = torch.device("cuda", rank)
        torch.cuda.set_device(device)
    else:
        assert world_size == 1
        device = torch.device("cpu")

    if rank == 0:
        writer = SummaryWriter(logdir=cfg["logdir"])
        logger = get_logger(cfg["logdir"])
        logger.info("Let SMNet training begin !!")

    # Setup Dataloader
    t_loader = SMNetLoader(cfg["data"], split=cfg['data']['train_split'])
    v_loader = SMNetLoader(cfg['data'], split=cfg["data"]["val_split"])
    t_sampler = DistributedSampler(t_loader)
    v_sampler = DistributedSampler(v_loader, shuffle=False)

    if rank == 0:
        print('#Envs in train: %d' % (len(t_loader.files)))
        print('#Envs in val: %d' % (len(v_loader.files)))

    trainloader = data.DataLoader(
        t_loader,
        batch_size=cfg["training"]["batch_size"] // world_size,
        num_workers=cfg["training"]["n_workers"],
        drop_last=True,
        pin_memory=True,
        sampler=t_sampler,
        multiprocessing_context='fork',
    )

    valloader = data.DataLoader(
        v_loader,
        batch_size=cfg["training"]["batch_size"] // world_size,
        num_workers=cfg["training"]["n_workers"],
        pin_memory=True,
        sampler=v_sampler,
        multiprocessing_context='fork',
    )

    # Setup Model
    model = SMNet(cfg['model'], device)
    model.apply(model.weights_init)
    model = model.to(device)

    if device.type == 'cuda':
        model = torch.nn.parallel.DistributedDataParallel(model,
                                                          device_ids=[rank])

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    if rank == 0:
        print('# trainable parameters = ', params)

    # Setup optimizer, lr_scheduler and loss function
    optimizer_params = {
        k: v
        for k, v in cfg["training"]["optimizer"].items() if k != "name"
    }
    optimizer = torch.optim.SGD(
        filter(lambda p: p.requires_grad, model.parameters()),
        **optimizer_params)

    if rank == 0:
        logger.info("Using optimizer {}".format(optimizer))

    lr_decay_lambda = lambda epoch: cfg['training']['scheduler'][
        'lr_decay_rate']**(epoch // cfg['training']['scheduler'][
            'lr_epoch_per_decay'])
    scheduler = LambdaLR(optimizer, lr_lambda=lr_decay_lambda)

    # Setup Metrics
    obj_running_metrics = IoU(cfg['model']['n_obj_classes'])
    obj_running_metrics_val = IoU(cfg['model']['n_obj_classes'])
    obj_running_metrics.reset()
    obj_running_metrics_val.reset()
    val_loss_meter = averageMeter()
    time_meter = averageMeter()

    # setup Loss
    loss_fn = SemmapLoss()
    loss_fn = loss_fn.to(device=device)

    if rank == 0:
        logger.info("Using loss {}".format(loss_fn))

    # init training
    start_iter = 0
    start_epoch = 0
    best_iou = -100.0
    if cfg["training"]["resume"] is not None:
        if os.path.isfile(cfg["training"]["resume"]):
            if rank == 0:
                logger.info(
                    "Loading model and optimizer from checkpoint '{}'".format(
                        cfg["training"]["resume"]))
                print(
                    "Loading model and optimizer from checkpoint '{}'".format(
                        cfg["training"]["resume"]))
            checkpoint = torch.load(cfg["training"]["resume"],
                                    map_location="cpu")
            model_state = checkpoint["model_state"]
            model.load_state_dict(model_state)
            optimizer.load_state_dict(checkpoint["optimizer_state"])
            scheduler.load_state_dict(checkpoint["scheduler_state"])
            start_epoch = checkpoint["epoch"]
            start_iter = checkpoint["iter"]
            best_iou = checkpoint['best_iou']
            if rank == 0:
                logger.info("Loaded checkpoint '{}' (iter {})".format(
                    cfg["training"]["resume"], checkpoint["epoch"]))
        else:
            if rank == 0:
                logger.info("No checkpoint found at '{}'".format(
                    cfg["training"]["resume"]))
                print("No checkpoint found at '{}'".format(
                    cfg["training"]["resume"]))

    elif cfg['training']['load_model'] is not None:
        checkpoint = torch.load(cfg["training"]["load_model"],
                                map_location="cpu")
        model_state = checkpoint['model_state']
        model.load_state_dict(model_state)
        if rank == 0:
            logger.info(
                "Loading model and optimizer from checkpoint '{}'".format(
                    cfg["training"]["load_model"]))
            print("Loading model and optimizer from checkpoint '{}'".format(
                cfg["training"]["load_model"]))

    # start training
    iter = start_iter
    for epoch in range(start_epoch, cfg["training"]["train_epoch"], 1):

        t_sampler.set_epoch(epoch)

        for batch in trainloader:

            iter += 1
            start_ts = time.time()

            features, masks_inliers, proj_indices, semmap_gt, _ = batch

            model.train()

            optimizer.zero_grad()
            semmap_pred, observed_masks = model(features, proj_indices,
                                                masks_inliers)

            if observed_masks.any():

                loss = loss_fn(semmap_gt.to(device), semmap_pred,
                               observed_masks)

                loss.backward()

                optimizer.step()

                semmap_pred = semmap_pred.permute(0, 2, 3, 1)

                masked_semmap_gt = semmap_gt[observed_masks]
                masked_semmap_pred = semmap_pred[observed_masks]

                obj_gt = masked_semmap_gt.detach()
                obj_pred = masked_semmap_pred.data.max(-1)[1].detach()
                obj_running_metrics.add(obj_pred, obj_gt)

            time_meter.update(time.time() - start_ts)

            if (iter % cfg["training"]["print_interval"] == 0):
                conf_metric = obj_running_metrics.conf_metric.conf
                conf_metric = torch.FloatTensor(conf_metric)
                conf_metric = conf_metric.to(device)
                distrib.all_reduce(conf_metric)
                distrib.all_reduce(loss)
                loss /= world_size

                if (rank == 0):
                    conf_metric = conf_metric.cpu().numpy()
                    conf_metric = conf_metric.astype(np.int32)
                    tmp_metrics = IoU(cfg['model']['n_obj_classes'])
                    tmp_metrics.reset()
                    tmp_metrics.conf_metric.conf = conf_metric
                    _, mIoU, acc, _, mRecall, _, mPrecision = tmp_metrics.value(
                    )
                    writer.add_scalar("train_metrics/mIoU", mIoU, iter)
                    writer.add_scalar("train_metrics/mRecall", mRecall, iter)
                    writer.add_scalar("train_metrics/mPrecision", mPrecision,
                                      iter)
                    writer.add_scalar("train_metrics/Overall_Acc", acc, iter)

                    fmt_str = "Iter: {:d} == Epoch [{:d}/{:d}] == Loss: {:.4f} == mIoU: {:.4f} == mRecall:{:.4f} == mPrecision:{:.4f} == Overall_Acc:{:.4f} == Time/Image: {:.4f}"

                    print_str = fmt_str.format(
                        iter,
                        epoch,
                        cfg["training"]["train_epoch"],
                        loss.item(),
                        mIoU,
                        mRecall,
                        mPrecision,
                        acc,
                        time_meter.avg / cfg["training"]["batch_size"],
                    )

                    print(print_str)
                    writer.add_scalar("loss/train_loss", loss.item(), iter)
                    time_meter.reset()

        model.eval()
        with torch.no_grad():
            for batch_val in valloader:

                features, masks_inliers, proj_indices, semmap_gt, _ = batch_val

                semmap_pred, observed_masks = model(features, proj_indices,
                                                    masks_inliers)

                if observed_masks.any():

                    loss_val = loss_fn(semmap_gt.to(device), semmap_pred,
                                       observed_masks)

                    semmap_pred = semmap_pred.permute(0, 2, 3, 1)

                    masked_semmap_gt = semmap_gt[observed_masks]
                    masked_semmap_pred = semmap_pred[observed_masks]

                    obj_gt_val = masked_semmap_gt
                    obj_pred_val = masked_semmap_pred.data.max(-1)[1]
                    obj_running_metrics_val.add(obj_pred_val, obj_gt_val)

                    val_loss_meter.update(loss_val.item())

        conf_metric = obj_running_metrics_val.conf_metric.conf
        conf_metric = torch.FloatTensor(conf_metric)
        conf_metric = conf_metric.to(device)
        distrib.all_reduce(conf_metric)

        val_loss_avg = val_loss_meter.avg
        val_loss_avg = torch.FloatTensor([val_loss_avg])
        val_loss_avg = val_loss_avg.to(device)
        distrib.all_reduce(val_loss_avg)
        val_loss_avg /= world_size

        if rank == 0:
            val_loss_avg = val_loss_avg.cpu().numpy()
            val_loss_avg = val_loss_avg[0]
            writer.add_scalar("loss/val_loss", val_loss_avg, iter)

            logger.info("Iter %d Loss: %.4f" % (iter, val_loss_avg))

            conf_metric = conf_metric.cpu().numpy()
            conf_metric = conf_metric.astype(np.int32)
            tmp_metrics = IoU(cfg['model']['n_obj_classes'])
            tmp_metrics.reset()
            tmp_metrics.conf_metric.conf = conf_metric
            _, mIoU, acc, _, mRecall, _, mPrecision = tmp_metrics.value()
            writer.add_scalar("val_metrics/mIoU", mIoU, iter)
            writer.add_scalar("val_metrics/mRecall", mRecall, iter)
            writer.add_scalar("val_metrics/mPrecision", mPrecision, iter)
            writer.add_scalar("val_metrics/Overall_Acc", acc, iter)

            logger.info("val -- mIoU: {}".format(mIoU))
            logger.info("val -- mRecall: {}".format(mRecall))
            logger.info("val -- mPrecision: {}".format(mPrecision))
            logger.info("val -- Overall_Acc: {}".format(acc))

            print("val -- mIoU: {}".format(mIoU))
            print("val -- mRecall: {}".format(mRecall))
            print("val -- mPrecision: {}".format(mPrecision))
            print("val -- Overall_Acc: {}".format(acc))

            if mIoU >= best_iou:
                best_iou = mIoU
                state = {
                    "epoch": epoch,
                    "iter": iter,
                    "model_state": model.state_dict(),
                    "optimizer_state": optimizer.state_dict(),
                    "scheduler_state": scheduler.state_dict(),
                    "best_iou": best_iou,
                }
                save_path = os.path.join(
                    writer.file_writer.get_logdir(),
                    "{}_mp3d_best_model.pkl".format(cfg["model"]["arch"]),
                )
                torch.save(state, save_path)

            # -- save checkpoint after every epoch
            state = {
                "epoch": epoch,
                "iter": iter,
                "model_state": model.state_dict(),
                "optimizer_state": optimizer.state_dict(),
                "scheduler_state": scheduler.state_dict(),
                "best_iou": best_iou,
            }
            save_path = os.path.join(cfg['checkpoint_dir'], "ckpt_model.pkl")
            torch.save(state, save_path)

        val_loss_meter.reset()
        obj_running_metrics_val.reset()
        obj_running_metrics.reset()

        scheduler.step(epoch)
Beispiel #10
0
def main_worker(gpu, ngpus_per_node, args):
    global best_mIoU
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    if args.pretrained:
        print("=> using pre-trained model 'DFANet'")
        model = DFANet(pretrained=True, pretrained_backbone=False)
    else:
        print("=> creating model 'DFANet'")
        model = DFANet(pretrained=False, pretrained_backbone=True)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(args.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss(ignore_index=19).cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    metric = IoU(20, ignore_index=19)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_mIoU = checkpoint['best_mIoU']
            if args.gpu is not None:
                # best_mIoU may be from a checkpoint from a different GPU
                best_mIoU = best_mIoU.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    train_dataset = Cityscapes(args.data,
                               split='train',
                               mode='fine',
                               target_type='semantic',
                               transform=joint_transforms.Compose([
                                   joint_transforms.RandomHorizontalFlip(),
                                   joint_transforms.RandomSized(1024),
                                   joint_transforms.ToTensor(),
                                   joint_transforms.Normalize(
                                       mean=[0.485, 0.456, 0.406],
                                       std=[0.229, 0.224, 0.225])
                               ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(Cityscapes(
        args.data,
        split='val',
        mode='fine',
        target_type='semantic',
        transform=joint_transforms.Compose([
            joint_transforms.RandomHorizontalFlip(),
            joint_transforms.RandomSized(1024),
            joint_transforms.ToTensor(),
            joint_transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                       std=[0.229, 0.224, 0.225])
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        # evaluate on training data
        train_mIoU, train_loss = validate(train_loader, model, criterion,
                                          metric, args)

        # evaluate on validation set
        val_mIoU, val_loss = validate(val_loader, model, criterion, metric,
                                      args)

        print("Train mIoU: {}".format(train_mIoU))
        print("Train Loss: {}".format(train_loss))
        print("Val mIoU: {}".format(val_mIoU))
        print("Val mIoU: {}".format(val_loss))
Beispiel #11
0
def main():
    assert os.path.isdir(
        args.dataset_dir), "The directory \"{0}\" doesn't exist.".format(
            args.dataset_dir)

    # Fail fast if the saving directory doesn't exist
    assert os.path.isdir(
        args.save_dir), "The directory \"{0}\" doesn't exist.".format(
            args.save_dir)

    # Import the requested dataset
    if args.dataset.lower() == 'cityscapes':
        from data import Cityscapes as dataset
    else:
        # Should never happen...but just in case it does
        raise RuntimeError("\"{0}\" is not a supported dataset.".format(
            args.dataset))
    print("\nLoading dataset...\n")

    print("Selected dataset:", args.dataset)
    print("Dataset directory:", args.dataset_dir)
    print("Save directory:", args.save_dir)

    image_transform = transforms.Compose(
        [transforms.Resize((args.height, args.width)),
         transforms.ToTensor()])

    label_transform = transforms.Compose([
        transforms.Resize((args.height, args.width)),
        ext_transforms.PILToLongTensor()
    ])

    # Get selected dataset
    # Load the training set as tensors
    train_set = dataset(args.dataset_dir,
                        mode='train',
                        max_iters=args.max_iters,
                        transform=image_transform,
                        label_transform=label_transform)
    train_loader = data.DataLoader(train_set,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.workers)

    trainloader_iter = enumerate(train_loader)

    # Load the validation set as tensors
    val_set = dataset(args.dataset_dir,
                      mode='val',
                      max_iters=args.max_iters,
                      transform=image_transform,
                      label_transform=label_transform)
    val_loader = data.DataLoader(val_set,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.workers)

    # Load the test set as tensors
    test_set = dataset(args.dataset_dir,
                       mode='test',
                       max_iters=args.max_iters,
                       transform=image_transform,
                       label_transform=label_transform)
    test_loader = data.DataLoader(test_set,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=args.workers)

    # Get encoding between pixel valus in label images and RGB colors
    class_encoding = train_set.color_encoding
    # Get number of classes to predict
    num_classes = len(class_encoding)

    # Print information for debugging
    print("Number of classes to predict:", num_classes)
    print("Train dataset size:", len(train_set))
    print("Validation dataset size:", len(val_set))

    # Get the parameters for the validation set
    if args.mode.lower() == 'test':
        images, labels = iter(test_loader).next()
    else:
        images, labels = iter(train_loader).next()
    print("Image size:", images.size())
    print("Label size:", labels.size())
    print("Class-color encoding:", class_encoding)

    # Show a batch of samples and labels
    if args.imshow_batch:
        print("Close the figure window to continue...")
        label_to_rgb = transforms.Compose([
            ext_transforms.LongTensorToRGBPIL(class_encoding),
            transforms.ToTensor()
        ])
        color_labels = utils.batch_transform(labels, label_to_rgb)
        utils.imshow_batch(images, color_labels)

    # Get class weights from the selected weighing technique

    print("\nTraining...\n")

    num_classes = len(class_encoding)
    # Define the model with the encoder and decoder from the deeplabv2
    input_encoder = Encoder().to(device)
    decoder_t = Decoder(num_classes).to(device)

    # Define the entropy loss for the segmentation task
    criterion = CrossEntropy2d()

    # Set the optimizer function for model
    optimizer_g = optim.SGD(itertools.chain(input_encoder.parameters(),
                                            decoder_t.parameters()),
                            lr=args.learning_rate,
                            momentum=0.9,
                            weight_decay=1e-4)

    optimizer_g.zero_grad()

    # Evaluation metric
    if args.ignore_unlabeled:
        ignore_index = list(class_encoding).index('unlabeled')
    else:
        ignore_index = None
    metric = IoU(num_classes, ignore_index=ignore_index)

    # Optionally resume from a checkpoint
    if args.resume:

        input_encoder, decoder_t, optimizer_g, start_epoch, best_miou = utils.load_checkpoint(
            input_encoder, decoder_t, optimizer_g, args.save_dir, args.name)
        print("Resuming from model: Start epoch = {0} "
              "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou))
    else:
        start_epoch = 0
        best_miou = 0

    # Start Training
    print()

    metric.reset()

    val = Test(input_encoder, decoder_t, val_loader, criterion, metric, device)

    for i_iter in range(args.max_iters):

        optimizer_g.zero_grad()
        adjust_learning_rate(optimizer_g, i_iter)

        _, batch_data = trainloader_iter.__next__()
        inputs = batch_data[0].to(device)
        labels = batch_data[1].to(device)

        f_i = input_encoder(inputs)

        outputs_i = decoder_t(f_i)
        loss_seg = criterion(outputs_i, labels)

        loss_g = loss_seg
        loss_g.backward()
        optimizer_g.step()

        if i_iter % args.save_pred_every == 0 and i_iter != 0:
            print('iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}'.format(
                i_iter, args.max_iters, loss_g))
            print(">>>> [iter: {0:d}] Validation".format(i_iter))

            # Validate the trained model after the weights are saved
            loss, (iou, miou) = val.run_epoch(args.print_step)

            print(">>>> [iter: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
                  format(i_iter, loss, miou))

            if miou > best_miou:
                for key, class_iou in zip(class_encoding.keys(), iou):
                    print("{0}: {1:.4f}".format(key, class_iou))

            # Save the model if it's the best thus far
            if miou > best_miou:
                print("\nBest model thus far. Saving...\n")
                best_miou = miou
                utils.save_checkpoint(input_encoder, decoder_t, optimizer_g,
                                      i_iter + 1, best_miou, args)
Beispiel #12
0
def train(train_loader, val_loader, class_weights, class_encoding):
    print("Training...")
    num_classes = len(class_encoding)
    model = ERFNet(num_classes)
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.Adam(model.parameters(),
                           lr=args.learning_rate,
                           weight_decay=args.weight_decay)
    # Learning rate decay scheduler
    lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs,
                                     args.lr_decay)

    # Evaluation metric
    if args.ignore_unlabeled:
        ignore_index = list(class_encoding).index('unlabeled')
    else:
        ignore_index = None

    metric = IoU(num_classes, ignore_index=ignore_index)

    if use_cuda:
        model = model.cuda()
        criterion = criterion.cuda()

    # Optionally resume from a checkpoint
    if args.resume:
        model, optimizer, start_epoch, best_miou, val_miou, train_miou, val_loss, train_loss = utils.load_checkpoint(
            model, optimizer, args.save_dir, args.name, True)
        print(
            "Resuming from model: Start epoch = {0} | Best mean IoU = {1:.4f}".
            format(start_epoch, best_miou))
    else:
        start_epoch = 0
        best_miou = 0
        val_miou = []
        train_miou = []
        val_loss = []
        train_loss = []

    # Start Training
    train = Train(model, train_loader, optimizer, criterion, metric, use_cuda)
    val = Test(model, val_loader, criterion, metric, use_cuda)

    for epoch in range(start_epoch, args.epochs):
        print(">> [Epoch: {0:d}] Training".format(epoch))
        lr_updater.step()
        epoch_loss, (iou, miou) = train.run_epoch(args.print_step)
        print(
            ">> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".format(
                epoch, epoch_loss, miou))
        train_loss.append(epoch_loss)
        train_miou.append(miou)

        #preform a validation test
        if (epoch + 1) % 10 == 0 or epoch + 1 == args.epochs:
            print(">>>> [Epoch: {0:d}] Validation".format(epoch))
            loss, (iou, miou) = val.run_epoch(args.print_step)
            print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
                  format(epoch, loss, miou))
            val_loss.append(loss)
            val_miou.append(miou)
            # Print per class IoU on last epoch or if best iou
            if epoch + 1 == args.epochs or miou > best_miou:
                for key, class_iou in zip(class_encoding.keys(), iou):
                    print("{0}: {1:.4f}".format(key, class_iou))
            # Save the model if it's the best thus far
            if miou > best_miou:
                print("Best model thus far. Saving...")
                best_miou = miou
                utils.save_checkpoint(model, optimizer, epoch + 1, best_miou,
                                      val_miou, train_miou, val_loss,
                                      train_loss, args)

    return model, train_loss, train_miou, val_loss, val_miou
pred_dir = 'data/replica/OUTPUTS/fullrez/SMNet_gru_lastlayer_m256/'

if dataset == 'mp3d':
    paths = json.load(open('data/paths.json', 'r'))
    envs_splits = json.load(open('data/envs_splits.json', 'r'))
    envs = envs_splits['{}_envs'.format(split)]
    envs = [x for x in envs if x in paths]
    envs.sort()
elif dataset == 'replica':
    paths = json.load(open('../replica/paths.json', 'r'))
    envs = list(paths.keys())
    envs.sort()
    envs.remove('room_2')

if dataset == 'mp3d':
    metrics = IoU(13)
elif dataset == 'replica':
    metrics = IoU(13, ignore_index=5)
metrics.reset()

total = 0

filename = os.path.join(pred_dir, 'evaluation_metrics.h5')
with h5py.File(filename, 'w') as f:
    for env in tqdm(envs):

        file = env + '.h5'

        if not os.path.isfile(os.path.join(pred_dir, 'semmap', file)): continue

        total += 1
Beispiel #14
0
def trainmal(model,
             train_loader,
             val_loader,
             class_weights,
             class_encoding,
             pretrained="./save/mal.pt"):
    """
    this function trains the attacker
    @param pretrained : String
      None => Doesn't initialize the attacker with pretrained weights
      filename => initializes the attacker with pretrained weights in filename
    """
    print("\nTraining Attacker...\n")

    num_classes = len(class_encoding)

    model = Malicious_Autoencoder(model)

    if pretrained:
        model.load_state_dict(torch.load(pretrained)["state_dict"])

    model = model.to(device)

    criterion = nn.CrossEntropyLoss(weight=class_weights)

    optimizer = optim.Adam(model.parameters(),
                           lr=args.learning_rate,
                           weight_decay=args.weight_decay)

    lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs,
                                     args.lr_decay)

    # Evaluation metric
    if args.ignore_unlabeled:
        ignore_index = list(class_encoding).index('unlabeled')
    else:
        ignore_index = None
    metric = IoU(num_classes, ignore_index=ignore_index)

    start_epoch = 0
    best_miou = 0
    best_loss = 99999999

    def new_loss(outputs, labels):
        transformx, bout = outputs
        origx, desireds = labels

        l1 = torch.dist(transformx, origx)
        l2 = criterion(bout, desireds)

        return l1, l2

    # Start Training
    train = Train(model, train_loader, optimizer, new_loss, metric, device)
    val = Test(model, val_loader, new_loss, metric, device)
    for epoch in tqdm(range(start_epoch, args.epochs)):
        print(">>>> [Epoch: {0:d}] Training".format(epoch))

        lr_updater.step()
        epoch_loss, (iou, miou) = train.run_epoch(args.print_step,
                                                  trainmal=True)

        print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
              format(epoch, epoch_loss, miou))

        if (epoch + 1) % 3 == 0 or epoch + 1 == args.epochs:
            print(">>>> [Epoch: {0:d}] Validation".format(epoch))

            loss, (iou, miou) = val.run_epoch(args.print_step, trainmal=True)

            print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
                  format(epoch, loss, miou))

            # Print per class IoU on last epoch or if best iou
            if epoch + 1 == args.epochs or miou > best_miou:
                for key, class_iou in zip(class_encoding.keys(), iou):
                    print("{0}: {1:.4f}".format(key, class_iou))

            # Save the model if it's the best thus far
            # if miou > best_miou:
            if loss < best_loss:
                print("\nBest model thus far. Saving...\n")
                # best_miou = miou
                best_loss = loss
                n = args.name
                args.name = args.malname
                utils.save_checkpoint(model, optimizer, epoch + 1, best_miou,
                                      args)
                args.name = n

    return model
Beispiel #15
0
def train(train_loader, val_loader, class_weights, class_encoding):
    print("\nTraining...\n")
    vis_calling_times = 0

    num_classes = len(class_encoding)

    # Intialize ENet
    model = ENet(num_classes).to(device)
    # Check if the network architecture is correct
    if torch.cuda.device_count() > 1:
        print(">>>Use mult GPU for trainning>>>")
        gpu_num = torch.cuda.device_count()
        gpu_list = list(range(gpu_num))
        model = nn.DataParallel(model, device_ids=gpu_list)
    print(model)

    # We are going to use the CrossEntropyLoss loss function as it's most
    # frequentely used in classification problems with multiple classes which
    # fits the problem. This criterion  combines LogSoftMax and NLLLoss.
    criterion = nn.CrossEntropyLoss(weight=class_weights)

    # ENet authors used Adam as the optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=args.learning_rate,
                           weight_decay=args.weight_decay)

    # Learning rate decay scheduler
    lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs,
                                     args.lr_decay)

    # Evaluation metric
    if args.ignore_unlabeled:
        ignore_index = list(class_encoding).index('unlabeled')
    else:
        ignore_index = None
    metric = IoU(num_classes, ignore_index=ignore_index)

    # Optionally resume from a checkpoint
    if args.resume:
        model, optimizer, start_epoch, best_miou = utils.load_checkpoint(
            model, optimizer, args.save_dir, args.name)
        print("Resuming from model: Start epoch = {0} "
              "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou))
    else:
        start_epoch = 0
        best_miou = 0

    # Start Training
    print()
    train = Train(model, train_loader, optimizer, criterion, metric, device)
    val = Test(model, val_loader, criterion, metric, device)
    for epoch in range(start_epoch, args.epochs):
        print(">>>> [Epoch: {0:d}] Training".format(epoch))

        lr_updater.step()
        epoch_loss, (iou, miou) = train.run_epoch(args.print_step)

        print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
              format(epoch, epoch_loss, miou))

        print(">>>> [Epoch: {0:d}] Validation".format(epoch))

        loss, (iou, miou) = val.run_epoch(args.print_step)

        print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
              format(epoch, loss, miou))

        if (epoch + 1) % 10 == 0 or epoch + 1 == args.epochs:
            # Print per class IoU on last epoch or if best iou
            if epoch + 1 == args.epochs or miou > best_miou:
                for key, class_iou in zip(class_encoding.keys(), iou):
                    print("{0}: {1:.4f}".format(key, class_iou))

            # Save the model if it's the best thus far
            if miou > best_miou:
                print("\nBest model thus far. Saving...\n")
                best_miou = miou
                utils.save_checkpoint(model, optimizer, epoch + 1, best_miou,
                                      args)

        if vis_calling_times == 0:
            # set to false
            vis_calling_times = 1
            win = viz.line(X=np.column_stack(
                (np.array(epoch), np.array(epoch))),
                           Y=np.column_stack(
                               (np.array(epoch_loss), np.array(loss))),
                           opts=dict(legend=['training loss', 'eval loss'],
                                     title='loss'))
        else:
            viz.line(
                X=np.column_stack((np.array(epoch), np.array(epoch))),
                Y=np.column_stack((np.array(epoch_loss), np.array(loss))),
                win=win,  #win要保持一致
                update='append')

        # if vis_first_create:
        #     vis_first_create = false

        #     win = viz.line( X=np.column_stack((np.array(epoch),np.array(epoch))),
        #                     Y=np.column_stack((np.array(epoch_loss),np.array(loss))),
        #                     name=
        #                     opts=dict(title='loss'))
        # else:
        #     viz.line(   X=np.column_stack((np.array(epoch),np.array(epoch))),
        #                 Y=np.column_stack((np.array(epoch_loss),np.array(loss))),
        #                 win=win,#win要保持一致
        #                 update='append')

    return model
def compute_accuracy(outputs, labels, num_classes):
    metric = IoU(num_classes, ignore_index=None)
    metric.reset()
    metric.add(outputs.detach(), labels.detach())
    (iou, miou) = metric.value()
    return miou
Beispiel #17
0
if args.dataset.lower() == 'cityscapes':
    Ignoring_voidslabel_cityscapes(args.dataset_dir + 'leftImg8bit/train/',args.Divide_y,args.Divide_x,(args.width_train,args.height_train))
    Label_Majority_Dict = np.load('Cityscapes_MetaInfo.npy',allow_pickle='TRUE').item()
    # path for rgb training data
    Dir_RGB = args.dataset_dir + 'leftImg8bit/train/'  
    # path to anootation  data
    Dir_Ann = args.dataset_dir + 'gtFine/train/'
    
    Dir_AL_RGB =  args.dataset_dir + 'AL_Iter_RGB' +  '/'
    Dir_AL_Ann =  args.dataset_dir + 'AL_Iter_Ann' + '/'
    

class_weights = np.ones(num_classes)
class_weights = torch.from_numpy(class_weights).float().to(device)
ignore_index = list(class_encoding).index('unlabeled')
metric = IoU(num_classes, ignore_index=ignore_index)
class_weights[ignore_index] = 0
print('class_weights',class_weights)


# loss for each iteration
criterion_CE = nn.CrossEntropyLoss(weight=class_weights)
criterion_MSE = nn.MSELoss()

if not os.path.isdir(args.save_dir):
    os.makedirs(args.save_dir)


if args.dataset.lower() == 'cityscapes':
        void_label = 0
elif args.dataset.lower() == 'camvid':
Beispiel #18
0
def train(train_loader, val_loader, class_weights, class_encoding):
    print("\nTraining...\n")

    num_classes = len(class_encoding)

    # 初始化ENet
    model = DeepLabV3(num_classes)
    # model.load_state_dict(torch.load("save/model_13_2_2_2_epoch_580.pth"))
    # model.aspp.conv_1x1_4 = torch.nn.Conv2d(256, num_classes, kernel_size=1)
    # 检查网络结构是否正确
    print(model)

    # 交叉熵的损失函数
    criterion = nn.CrossEntropyLoss(weight=class_weights)

    # Adam as the optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=args.learning_rate,
                           weight_decay=args.weight_decay)

    # 学习率衰减
    # lr_decay_epochs: 学习率衰减期。
    # lr_decay: 学习率衰减的乘积因子,默认值:-0.1
    lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs,
                                     args.lr_decay)

    # 评价指标
    # if not args.ignore_unlabeled:
    #     ignore_index = list(class_encoding).index('unlabeled')
    # else:
    #     ignore_index = None
    ignore_index = None
    metric = IoU(num_classes, ignore_index=ignore_index)

    if use_cuda:
        print("model使用GPU")
        model = model.cuda()
        criterion = criterion.cuda()

    # Optionally 从checkpoint恢复
    if args.resume:
        model, optimizer, start_epoch, best_miou = utils.load_checkpoint(
            model, optimizer, args.save_dir, args.name)
        print("Resuming from model: Start epoch = {0} "
              "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou))
    else:
        start_epoch = 0
        best_miou = 0

    # 开始 Training
    print()
    train = Train(model, train_loader, optimizer, criterion, metric, use_cuda)
    val = Test(model, val_loader, criterion, metric, use_cuda)
    for epoch in range(start_epoch, args.epochs):
        print(">>>> [Epoch: {0:d}] Training".format(epoch))

        lr_updater.step()  # 修改学习率,开始训练
        epoch_loss, (iou, miou) = train.run_epoch(args.print_step)

        # 打印epoch,loss,mean iou
        print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
              format(epoch, epoch_loss, miou))

        # 如果当前的epochs结束,打印验证的进行一个验证
        if (epoch + 1) % 10 == 0 or epoch + 1 == args.epochs:
            print(">>>> [Epoch: {0:d}] Validation".format(epoch))

            loss, (iou, miou) = val.run_epoch(args.print_step)

            print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
                  format(epoch, loss, miou))

            # Print per class IoU on last epoch or if best iou
            if epoch + 1 == args.epochs or miou > best_miou:
                for key, class_iou in zip(class_encoding.keys(), iou):
                    print("{0}: {1:.4f}".format(key, class_iou))

            # Save the model if it's the best thus far
            if miou > best_miou:
                print("\nBest model thus far. Saving...\n")
                best_miou = miou
                utils.save_checkpoint(model, optimizer, epoch + 1, best_miou,
                                      args)

    return model
Beispiel #19
0
def train(train_loader, val_loader, class_weights, class_encoding):
    print("\nTraining...\n")

    num_classes = len(class_encoding)

    # Intialize ENet
    model = ENet(num_classes).to(device)
    # Check if the network architecture is correct
    print(model)

    # We are going to use the CrossEntropyLoss loss function as it's most
    # frequentely used in classification problems with multiple classes which
    # fits the problem. This criterion  combines LogSoftMax and NLLLoss.
    criterion = nn.CrossEntropyLoss(weight=class_weights)

    # ENet authors used Adam as the optimizer
    optimizer = optim.Adam(
        model.parameters(),
        lr=args.learning_rate,
        weight_decay=args.weight_decay)

    # Learning rate decay scheduler
    lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs,
                                     args.lr_decay)

    # Evaluation metric
    if args.ignore_unlabeled:
        ignore_index = list(class_encoding).index('unlabeled')
    else:
        ignore_index = None
    metric = IoU(num_classes, ignore_index=ignore_index)

    # Optionally resume from a checkpoint
    if args.resume:
        model, optimizer, start_epoch, best_miou = utils.load_checkpoint(
            model, optimizer, args.save_dir, args.name)
        print("Resuming from model: Start epoch = {0} "
              "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou))
    else:
        start_epoch = 0
        best_miou = 0

    # Start Training
    print()
    train = Train(model, train_loader, optimizer, criterion, metric, device)
    val = Test(model, val_loader, criterion, metric, device)
    for epoch in range(start_epoch, args.epochs):
        print(">>>> [Epoch: {0:d}] Training".format(epoch))

        epoch_loss, (iou, miou) = train.run_epoch(args.print_step)
        lr_updater.step()

        print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
              format(epoch, epoch_loss, miou))

        if (epoch + 1) % 10 == 0 or epoch + 1 == args.epochs:
            print(">>>> [Epoch: {0:d}] Validation".format(epoch))

            loss, (iou, miou) = val.run_epoch(args.print_step)

            print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
                  format(epoch, loss, miou))

            # Print per class IoU on last epoch or if best iou
            if epoch + 1 == args.epochs or miou > best_miou:
                for key, class_iou in zip(class_encoding.keys(), iou):
                    print("{0}: {1:.4f}".format(key, class_iou))

            # Save the model if it's the best thus far
            if miou > best_miou:
                print("\nBest model thus far. Saving...\n")
                best_miou = miou
                utils.save_checkpoint(model, optimizer, epoch + 1, best_miou,
                                      args)

    return model
def train(train_loader, val_loader, class_weights, class_encoding):
    print("\nTraining...\n")

    num_classes = len(class_encoding)

    # Intialize ENet
    model = ENet(num_classes).to(device)
    # Check if the network architecture is correct
    print(model)

    # We are going to use the CrossEntropyLoss loss function as it's most
    # frequentely used in classification problems with multiple classes which
    # fits the problem. This criterion  combines LogSoftMax and NLLLoss.
    criterion = nn.CrossEntropyLoss(weight=class_weights)

    # ENet authors used Adam as the optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=args.learning_rate,
                           weight_decay=args.weight_decay)

    # Learning rate decay scheduler
    lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs,
                                     args.lr_decay)

    # Evaluation metric
    if args.ignore_unlabeled:
        ignore_index = list(class_encoding).index('unlabeled')
    else:
        ignore_index = None
    metric = IoU(num_classes, ignore_index=ignore_index)

    # Optionally resume from a checkpoint
    if args.resume:
        model, optimizer, start_epoch, best_miou = utils.load_checkpoint(
            model, optimizer, args.save_dir, args.name)
        print("Resuming from model: Start epoch = {0} "
              "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou))
    else:
        start_epoch = 0
        best_miou = 0

    # Start Training
    print()
    train = Train(model, train_loader, optimizer, criterion, metric, device)
    val = Test(model, val_loader, criterion, metric, device)
    for epoch in range(start_epoch, args.epochs):
        print(">>>> [Epoch: {0:d}] Training".format(epoch))

        lr_updater.step()
        epoch_loss, (iou, miou) = train.run_epoch(args.print_step)

        # Visualization by TensorBoardX
        writer.add_scalar('data/train/loss', epoch_loss, epoch)
        writer.add_scalar('data/train/mean_IoU', miou, epoch)

        print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
              format(epoch, epoch_loss, miou))

        if (epoch + 1) % 1 == 0 or epoch + 1 == args.epochs:
            print(">>>> [Epoch: {0:d}] Validation".format(epoch))

            loss, (iou, miou) = val.run_epoch(args.print_step)

            # Visualization by TensorBoardX
            writer.add_scalar('data/val/loss', loss, epoch)
            writer.add_scalar('data/val/mean_IoU', miou, epoch)

            print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".
                  format(epoch, loss, miou))

            # Print per class IoU on last epoch or if best iou
            if epoch + 1 == args.epochs or miou > best_miou:
                for key, class_iou in zip(class_encoding.keys(), iou):
                    print("{0}: {1:.4f}".format(key, class_iou))

            # Save the model if it's the best thus far
            if miou > best_miou:
                print("\nBest model thus far. Saving...\n")
                best_miou = miou
                utils.save_checkpoint(model, optimizer, epoch + 1, best_miou,
                                      args)

            # Visualization of the predicted batch in TensorBoard
            for i, batch in enumerate(val_loader):
                if i == 1:
                    break

                # Get the inputs and labels
                inputs = batch[0].to(device)
                labels = batch[1].to(device)

                # Forward propagation
                with torch.no_grad():
                    predictions = model(inputs)

                # Predictions is one-hot encoded with "num_classes" channels.
                # Convert it to a single int using the indices where the maximum (1) occurs
                _, predictions = torch.max(predictions.data, 1)

                label_to_rgb = transforms.Compose([
                    ext_transforms.LongTensorToRGBPIL(class_encoding),
                    transforms.ToTensor()
                ])
                color_predictions = utils.batch_transform(
                    predictions.cpu(), label_to_rgb)

                in_training_visualization(model, inputs, labels,
                                          class_encoding, writer, epoch, 'val')

    return model