def test(model, test_loader, class_weights, class_encoding): print("\nTesting...\n") num_classes = len(class_encoding) criterion = nn.CrossEntropyLoss(weight=class_weights) # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Test the trained model on the test set test = Test(model, test_loader, criterion, metric, device) print(">>>> Running test dataset") loss, (iou, miou) = test.run_epoch(iteration_loss=False) class_iou = dict(zip(class_encoding.keys(), iou)) print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou)) # Print per class IoU for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Show a batch of samples and labels # if args.imshow_batch: if True: print("A batch of predictions from the test set...") images, _ = next(iter(test_loader)) predict(model, images, class_encoding)
def test(self, ): """ Test the generator. """ print("\nTesting...\n") num_classes = len(self.class_encoding) # We are going to use the CrossEntropyLoss loss function as it's most # frequently used in classification problems with multiple classes # which fits the problem. This criterion combines LogSoftMax and # NLLLoss. criterion = nn.CrossEntropyLoss(weight=self.class_weights) # Evaluation metric ignore_index = list(class_encoding).index('unlabeled') metric = IoU(num_classes, ignore_index=ignore_index) # Test the trained model on the test set test = Test(self.generator, self.test_loader, criterion, metric, self.device) print(">>>> Running test dataset") loss, (iou, miou) = test.run_epoch(iteration_loss=True) class_iou = dict(zip(class_encoding.keys(), iou)) print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou)) # Print per class IoU for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou))
def test(model, test_loader, class_weights, class_encoding): print("Testing...") num_classes = len(class_encoding) criterion = nn.CrossEntropyLoss(weight=class_weights) if use_cuda: criterion = criterion.cuda() # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Test the trained model on the test set test = Test(model, test_loader, criterion, metric, use_cuda) print(">>>> Running test dataset") loss, (iou, miou) = test.run_epoch(args.print_step) class_iou = dict(zip(class_encoding.keys(), iou)) print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou)) # Print per class IoU for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou))
def test(model, test_loader, class_weights, class_encoding): print("\nTesting...\n") num_classes = len(class_encoding) if torch.cuda.is_available(): if args.cuda: device = 'cuda' if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) torch.cuda.empty_cache() else: device = 'cpu' else: device = 'cpu' # We are going to use the CrossEntropyLoss loss function as it's most # frequentely used in classification problems with multiple classes which # fits the problem. This criterion combines LogSoftMax and NLLLoss. criterion_seg = nn.CrossEntropyLoss(weight=class_weights) #criterion_cls = nn.BCEWithLogitsLoss(weight=class_weights) criterion_cls = nn.KLDivLoss(reduction='sum') criterion = [criterion_seg, criterion_cls] # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Test the trained model on the test set test = Test(model, test_loader, criterion, metric, device) print(">>>> Running test dataset") loss, (iou, miou) = test.run_epoch(args.print_step) # class_iou = dict(zip(class_encoding.keys(), iou)) print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou)) # Print per class IoU for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Show a batch of samples and labels if args.imshow_batch: print("A batch of predictions from the test set...") images, _ = iter(test_loader).next() predict(model, images, class_encoding, device)
def test(model, test_loader, class_weights, class_encoding): print("\nTesting...\n") num_classes = len(class_encoding) criterion = nn.CrossEntropyLoss(weight=class_weights) # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Test the trained model on the test set test = Test(model, test_loader, criterion, metric, device, args.backbone.lower()) print(">>>> Running test dataset") loss, (iou, miou) = test.run_epoch(args.print_step) class_iou = dict(zip(class_encoding.keys(), iou)) print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou)) # Print per class IoU for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save arguments summary_filename_performance = os.path.join(args.save_dir, args.name + '_TEST_' + '.txt') with open(summary_filename_performance, 'w') as summary_file_2: summary_file_2.write("\nTEST\n") summary_file_2.write("Mean IoU: {0}\n".format(miou)) for key, class_iou in zip(class_encoding.keys(), iou): summary_file_2.write("{0}: {1:.4f}\n".format(key, class_iou)) summary_file_2.close() # Show a batch of samples and labels if args.imshow_batch_test: print("A batch of predictions from the test set...") images, gt_labels, _, _ = iter(test_loader).next() predict(model, images, gt_labels, class_encoding)
def test(model, test_loader, class_weights, class_encoding, step): print("\nTesting...\n") num_classes = len(class_encoding) # We are going to use the CrossEntropyLoss loss function as it's most # frequentely used in classification problems with multiple classes which # fits the problem. This criterion combines LogSoftMax and NLLLoss. criterion = nn.CrossEntropyLoss(weight=class_weights) if use_cuda: criterion = criterion.cuda() # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Test the trained model on the test set test = Test(model, test_loader, criterion, metric, use_cuda, step) print(">>>> Running test dataset") loss, (iou, miou) = test.run_epoch(args.print_step) class_iou = dict(zip(class_encoding.keys(), iou)) print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou)) # Print per class IoU for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Show a batch of samples and labels if args.imshow_batch: print("A batch of predictions from the test set...") images, _ = iter(test_loader).next() predict(model, images, class_encoding)
def test(model, test_loader, class_weights, class_encoding): print("\nTesting...\n") num_classes = len(class_encoding) # 使用CrossEntropyLoss损失函数 criterion = nn.CrossEntropyLoss(weight=class_weights) if use_cuda: criterion = criterion.cuda() # Evaluation metric # if not args.ignore_unlabeled: # ignore_index = list(class_encoding).index('unlabeled') # else: # ignore_index = None ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Test the trained model on the test set test = Test(model, test_loader, criterion, metric, use_cuda) print(">>>> Running test dataset") loss, (iou, miou) = test.run_epoch(args.print_step) class_iou = dict(zip(class_encoding.keys(), iou)) print(">>>> Avg. loss: {0:.4f} | Mean IoU: {1:.4f}".format(loss, miou)) # Print per class IoU for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Show a batch of samples and labels if args.imshow_batch: print("A batch of predictions from the test set...") images, _ = iter(test_loader).next() predict(model, images, class_encoding)
def train(train_loader, val_loader, class_weights, class_encoding): print("\nTraining...\n") num_classes = len(class_encoding) if args.backbone.lower() == 'fcn': model = torchvision.models.segmentation.fcn_resnet50( num_classes=num_classes).to(device) # FCN plus resnet weights model_2 = models.resnet50(pretrained=True).to(device) model_dict = model.state_dict() pretrained_dict = model_2.state_dict() newpretrained_dict = collections.OrderedDict() for key, val in pretrained_dict.items(): newpretrained_dict['backbone.' + key] = val # 1. filter out unnecessary keys pretrained_dict = { k: v for k, v in newpretrained_dict.items() if k in model_dict } # 2. overwrite entries in the existing state dict model_dict.update(pretrained_dict) # 3. load the new state dict model.load_state_dict(model_dict) elif args.backbone.lower() == 'deeplab': model = torchvision.models.segmentation.deeplabv3_resnet50( num_classes=num_classes).to(device) if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") model = nn.DataParallel(model) criterion = nn.CrossEntropyLoss(weight=class_weights) if args.optimizer.lower() == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) # Learning rate decay scheduler lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs, args.lr_decay) # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) start_epoch = 0 best_miou = 0 # Start Training train = Train(model, train_loader, optimizer, criterion, metric, device, args.backbone.lower(), args.consistency) val = Test(model, val_loader, criterion, metric, device, args.backbone.lower()) for epoch in range(start_epoch, args.epochs): print(">>>> [Epoch: {0:d}] Training".format(epoch)) epoch_loss, (iou, miou) = train.run_epoch(iteration_loss=args.print_step, epochnum=epoch) lr_updater.step() print('\n') print( ">>>> [Epoch: {0:d}] Training Avg. loss: {1:.4f} | Mean IoU: {2:.4f}" .format(epoch, epoch_loss, miou)) # show the val results every VAl_num epochs VAl_num = args.save_val_every_epoch if (epoch + 1) % VAl_num == 0 or epoch + 1 == args.epochs: print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val.run_epoch(iteration_loss=args.print_step, epochnum=epoch) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) # # Save arguments # summary_filename_performance = os.path.join(args.save_dir, args.name + 'summary_epoch_' + str(epoch) + '.txt') # with open(summary_filename_performance, 'w') as summary_file_2: # # summary_file_2.write("\nVALIDATION\n") # summary_file_2.write("Epoch: {0}\n". format(epoch)) # summary_file_2.write("Mean IoU: {0}\n". format(miou)) # for key, class_iou in zip(class_encoding.keys(), iou): # summary_file_2.write("{0}: {1:.4f}\n".format(key, class_iou)) # summary_file_2.close() utils.save_checkpoint_epoch(model, optimizer, epoch, best_miou, args) # Print per class IoU on last epoch or if best iou if epoch + 1 == args.epochs or miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou utils.save_checkpoint(model, optimizer, epoch + 1, best_miou, args) return model
def train(rank, world_size, cfg): # Setup seeds torch.manual_seed(cfg.get("seed", 1337)) torch.cuda.manual_seed(cfg.get("seed", 1337)) np.random.seed(cfg.get("seed", 1337)) random.seed(cfg.get("seed", 1337)) # init distributed compute master_port = int(os.environ.get("MASTER_PORT", 8738)) master_addr = os.environ.get("MASTER_ADDR", "127.0.0.1") tcp_store = torch.distributed.TCPStore(master_addr, master_port, world_size, rank == 0) torch.distributed.init_process_group('nccl', store=tcp_store, rank=rank, world_size=world_size) # Setup device if torch.cuda.is_available(): device = torch.device("cuda", rank) torch.cuda.set_device(device) else: assert world_size == 1 device = torch.device("cpu") if rank == 0: writer = SummaryWriter(logdir=cfg["logdir"]) logger = get_logger(cfg["logdir"]) logger.info("Let SMNet training begin !!") # Setup Dataloader t_loader = SMNetLoader(cfg["data"], split=cfg['data']['train_split']) v_loader = SMNetLoader(cfg['data'], split=cfg["data"]["val_split"]) t_sampler = DistributedSampler(t_loader) v_sampler = DistributedSampler(v_loader, shuffle=False) if rank == 0: print('#Envs in train: %d' % (len(t_loader.files))) print('#Envs in val: %d' % (len(v_loader.files))) trainloader = data.DataLoader( t_loader, batch_size=cfg["training"]["batch_size"] // world_size, num_workers=cfg["training"]["n_workers"], drop_last=True, pin_memory=True, sampler=t_sampler, multiprocessing_context='fork', ) valloader = data.DataLoader( v_loader, batch_size=cfg["training"]["batch_size"] // world_size, num_workers=cfg["training"]["n_workers"], pin_memory=True, sampler=v_sampler, multiprocessing_context='fork', ) # Setup Model model = SMNet(cfg['model'], device) model.apply(model.weights_init) model = model.to(device) if device.type == 'cuda': model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[rank]) model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) if rank == 0: print('# trainable parameters = ', params) # Setup optimizer, lr_scheduler and loss function optimizer_params = { k: v for k, v in cfg["training"]["optimizer"].items() if k != "name" } optimizer = torch.optim.SGD( filter(lambda p: p.requires_grad, model.parameters()), **optimizer_params) if rank == 0: logger.info("Using optimizer {}".format(optimizer)) lr_decay_lambda = lambda epoch: cfg['training']['scheduler'][ 'lr_decay_rate']**(epoch // cfg['training']['scheduler'][ 'lr_epoch_per_decay']) scheduler = LambdaLR(optimizer, lr_lambda=lr_decay_lambda) # Setup Metrics obj_running_metrics = IoU(cfg['model']['n_obj_classes']) obj_running_metrics_val = IoU(cfg['model']['n_obj_classes']) obj_running_metrics.reset() obj_running_metrics_val.reset() val_loss_meter = averageMeter() time_meter = averageMeter() # setup Loss loss_fn = SemmapLoss() loss_fn = loss_fn.to(device=device) if rank == 0: logger.info("Using loss {}".format(loss_fn)) # init training start_iter = 0 start_epoch = 0 best_iou = -100.0 if cfg["training"]["resume"] is not None: if os.path.isfile(cfg["training"]["resume"]): if rank == 0: logger.info( "Loading model and optimizer from checkpoint '{}'".format( cfg["training"]["resume"])) print( "Loading model and optimizer from checkpoint '{}'".format( cfg["training"]["resume"])) checkpoint = torch.load(cfg["training"]["resume"], map_location="cpu") model_state = checkpoint["model_state"] model.load_state_dict(model_state) optimizer.load_state_dict(checkpoint["optimizer_state"]) scheduler.load_state_dict(checkpoint["scheduler_state"]) start_epoch = checkpoint["epoch"] start_iter = checkpoint["iter"] best_iou = checkpoint['best_iou'] if rank == 0: logger.info("Loaded checkpoint '{}' (iter {})".format( cfg["training"]["resume"], checkpoint["epoch"])) else: if rank == 0: logger.info("No checkpoint found at '{}'".format( cfg["training"]["resume"])) print("No checkpoint found at '{}'".format( cfg["training"]["resume"])) elif cfg['training']['load_model'] is not None: checkpoint = torch.load(cfg["training"]["load_model"], map_location="cpu") model_state = checkpoint['model_state'] model.load_state_dict(model_state) if rank == 0: logger.info( "Loading model and optimizer from checkpoint '{}'".format( cfg["training"]["load_model"])) print("Loading model and optimizer from checkpoint '{}'".format( cfg["training"]["load_model"])) # start training iter = start_iter for epoch in range(start_epoch, cfg["training"]["train_epoch"], 1): t_sampler.set_epoch(epoch) for batch in trainloader: iter += 1 start_ts = time.time() features, masks_inliers, proj_indices, semmap_gt, _ = batch model.train() optimizer.zero_grad() semmap_pred, observed_masks = model(features, proj_indices, masks_inliers) if observed_masks.any(): loss = loss_fn(semmap_gt.to(device), semmap_pred, observed_masks) loss.backward() optimizer.step() semmap_pred = semmap_pred.permute(0, 2, 3, 1) masked_semmap_gt = semmap_gt[observed_masks] masked_semmap_pred = semmap_pred[observed_masks] obj_gt = masked_semmap_gt.detach() obj_pred = masked_semmap_pred.data.max(-1)[1].detach() obj_running_metrics.add(obj_pred, obj_gt) time_meter.update(time.time() - start_ts) if (iter % cfg["training"]["print_interval"] == 0): conf_metric = obj_running_metrics.conf_metric.conf conf_metric = torch.FloatTensor(conf_metric) conf_metric = conf_metric.to(device) distrib.all_reduce(conf_metric) distrib.all_reduce(loss) loss /= world_size if (rank == 0): conf_metric = conf_metric.cpu().numpy() conf_metric = conf_metric.astype(np.int32) tmp_metrics = IoU(cfg['model']['n_obj_classes']) tmp_metrics.reset() tmp_metrics.conf_metric.conf = conf_metric _, mIoU, acc, _, mRecall, _, mPrecision = tmp_metrics.value( ) writer.add_scalar("train_metrics/mIoU", mIoU, iter) writer.add_scalar("train_metrics/mRecall", mRecall, iter) writer.add_scalar("train_metrics/mPrecision", mPrecision, iter) writer.add_scalar("train_metrics/Overall_Acc", acc, iter) fmt_str = "Iter: {:d} == Epoch [{:d}/{:d}] == Loss: {:.4f} == mIoU: {:.4f} == mRecall:{:.4f} == mPrecision:{:.4f} == Overall_Acc:{:.4f} == Time/Image: {:.4f}" print_str = fmt_str.format( iter, epoch, cfg["training"]["train_epoch"], loss.item(), mIoU, mRecall, mPrecision, acc, time_meter.avg / cfg["training"]["batch_size"], ) print(print_str) writer.add_scalar("loss/train_loss", loss.item(), iter) time_meter.reset() model.eval() with torch.no_grad(): for batch_val in valloader: features, masks_inliers, proj_indices, semmap_gt, _ = batch_val semmap_pred, observed_masks = model(features, proj_indices, masks_inliers) if observed_masks.any(): loss_val = loss_fn(semmap_gt.to(device), semmap_pred, observed_masks) semmap_pred = semmap_pred.permute(0, 2, 3, 1) masked_semmap_gt = semmap_gt[observed_masks] masked_semmap_pred = semmap_pred[observed_masks] obj_gt_val = masked_semmap_gt obj_pred_val = masked_semmap_pred.data.max(-1)[1] obj_running_metrics_val.add(obj_pred_val, obj_gt_val) val_loss_meter.update(loss_val.item()) conf_metric = obj_running_metrics_val.conf_metric.conf conf_metric = torch.FloatTensor(conf_metric) conf_metric = conf_metric.to(device) distrib.all_reduce(conf_metric) val_loss_avg = val_loss_meter.avg val_loss_avg = torch.FloatTensor([val_loss_avg]) val_loss_avg = val_loss_avg.to(device) distrib.all_reduce(val_loss_avg) val_loss_avg /= world_size if rank == 0: val_loss_avg = val_loss_avg.cpu().numpy() val_loss_avg = val_loss_avg[0] writer.add_scalar("loss/val_loss", val_loss_avg, iter) logger.info("Iter %d Loss: %.4f" % (iter, val_loss_avg)) conf_metric = conf_metric.cpu().numpy() conf_metric = conf_metric.astype(np.int32) tmp_metrics = IoU(cfg['model']['n_obj_classes']) tmp_metrics.reset() tmp_metrics.conf_metric.conf = conf_metric _, mIoU, acc, _, mRecall, _, mPrecision = tmp_metrics.value() writer.add_scalar("val_metrics/mIoU", mIoU, iter) writer.add_scalar("val_metrics/mRecall", mRecall, iter) writer.add_scalar("val_metrics/mPrecision", mPrecision, iter) writer.add_scalar("val_metrics/Overall_Acc", acc, iter) logger.info("val -- mIoU: {}".format(mIoU)) logger.info("val -- mRecall: {}".format(mRecall)) logger.info("val -- mPrecision: {}".format(mPrecision)) logger.info("val -- Overall_Acc: {}".format(acc)) print("val -- mIoU: {}".format(mIoU)) print("val -- mRecall: {}".format(mRecall)) print("val -- mPrecision: {}".format(mPrecision)) print("val -- Overall_Acc: {}".format(acc)) if mIoU >= best_iou: best_iou = mIoU state = { "epoch": epoch, "iter": iter, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), "scheduler_state": scheduler.state_dict(), "best_iou": best_iou, } save_path = os.path.join( writer.file_writer.get_logdir(), "{}_mp3d_best_model.pkl".format(cfg["model"]["arch"]), ) torch.save(state, save_path) # -- save checkpoint after every epoch state = { "epoch": epoch, "iter": iter, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), "scheduler_state": scheduler.state_dict(), "best_iou": best_iou, } save_path = os.path.join(cfg['checkpoint_dir'], "ckpt_model.pkl") torch.save(state, save_path) val_loss_meter.reset() obj_running_metrics_val.reset() obj_running_metrics.reset() scheduler.step(epoch)
def main_worker(gpu, ngpus_per_node, args): global best_mIoU args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model if args.pretrained: print("=> using pre-trained model 'DFANet'") model = DFANet(pretrained=True, pretrained_backbone=False) else: print("=> creating model 'DFANet'") model = DFANet(pretrained=False, pretrained_backbone=True) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int(args.workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss(ignore_index=19).cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) metric = IoU(20, ignore_index=19) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_mIoU = checkpoint['best_mIoU'] if args.gpu is not None: # best_mIoU may be from a checkpoint from a different GPU best_mIoU = best_mIoU.to(args.gpu) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code train_dataset = Cityscapes(args.data, split='train', mode='fine', target_type='semantic', transform=joint_transforms.Compose([ joint_transforms.RandomHorizontalFlip(), joint_transforms.RandomSized(1024), joint_transforms.ToTensor(), joint_transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(Cityscapes( args.data, split='val', mode='fine', target_type='semantic', transform=joint_transforms.Compose([ joint_transforms.RandomHorizontalFlip(), joint_transforms.RandomSized(1024), joint_transforms.ToTensor(), joint_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: validate(val_loader, model, criterion, args) return for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) # evaluate on training data train_mIoU, train_loss = validate(train_loader, model, criterion, metric, args) # evaluate on validation set val_mIoU, val_loss = validate(val_loader, model, criterion, metric, args) print("Train mIoU: {}".format(train_mIoU)) print("Train Loss: {}".format(train_loss)) print("Val mIoU: {}".format(val_mIoU)) print("Val mIoU: {}".format(val_loss))
def main(): assert os.path.isdir( args.dataset_dir), "The directory \"{0}\" doesn't exist.".format( args.dataset_dir) # Fail fast if the saving directory doesn't exist assert os.path.isdir( args.save_dir), "The directory \"{0}\" doesn't exist.".format( args.save_dir) # Import the requested dataset if args.dataset.lower() == 'cityscapes': from data import Cityscapes as dataset else: # Should never happen...but just in case it does raise RuntimeError("\"{0}\" is not a supported dataset.".format( args.dataset)) print("\nLoading dataset...\n") print("Selected dataset:", args.dataset) print("Dataset directory:", args.dataset_dir) print("Save directory:", args.save_dir) image_transform = transforms.Compose( [transforms.Resize((args.height, args.width)), transforms.ToTensor()]) label_transform = transforms.Compose([ transforms.Resize((args.height, args.width)), ext_transforms.PILToLongTensor() ]) # Get selected dataset # Load the training set as tensors train_set = dataset(args.dataset_dir, mode='train', max_iters=args.max_iters, transform=image_transform, label_transform=label_transform) train_loader = data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) trainloader_iter = enumerate(train_loader) # Load the validation set as tensors val_set = dataset(args.dataset_dir, mode='val', max_iters=args.max_iters, transform=image_transform, label_transform=label_transform) val_loader = data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Load the test set as tensors test_set = dataset(args.dataset_dir, mode='test', max_iters=args.max_iters, transform=image_transform, label_transform=label_transform) test_loader = data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Get encoding between pixel valus in label images and RGB colors class_encoding = train_set.color_encoding # Get number of classes to predict num_classes = len(class_encoding) # Print information for debugging print("Number of classes to predict:", num_classes) print("Train dataset size:", len(train_set)) print("Validation dataset size:", len(val_set)) # Get the parameters for the validation set if args.mode.lower() == 'test': images, labels = iter(test_loader).next() else: images, labels = iter(train_loader).next() print("Image size:", images.size()) print("Label size:", labels.size()) print("Class-color encoding:", class_encoding) # Show a batch of samples and labels if args.imshow_batch: print("Close the figure window to continue...") label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform(labels, label_to_rgb) utils.imshow_batch(images, color_labels) # Get class weights from the selected weighing technique print("\nTraining...\n") num_classes = len(class_encoding) # Define the model with the encoder and decoder from the deeplabv2 input_encoder = Encoder().to(device) decoder_t = Decoder(num_classes).to(device) # Define the entropy loss for the segmentation task criterion = CrossEntropy2d() # Set the optimizer function for model optimizer_g = optim.SGD(itertools.chain(input_encoder.parameters(), decoder_t.parameters()), lr=args.learning_rate, momentum=0.9, weight_decay=1e-4) optimizer_g.zero_grad() # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Optionally resume from a checkpoint if args.resume: input_encoder, decoder_t, optimizer_g, start_epoch, best_miou = utils.load_checkpoint( input_encoder, decoder_t, optimizer_g, args.save_dir, args.name) print("Resuming from model: Start epoch = {0} " "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) else: start_epoch = 0 best_miou = 0 # Start Training print() metric.reset() val = Test(input_encoder, decoder_t, val_loader, criterion, metric, device) for i_iter in range(args.max_iters): optimizer_g.zero_grad() adjust_learning_rate(optimizer_g, i_iter) _, batch_data = trainloader_iter.__next__() inputs = batch_data[0].to(device) labels = batch_data[1].to(device) f_i = input_encoder(inputs) outputs_i = decoder_t(f_i) loss_seg = criterion(outputs_i, labels) loss_g = loss_seg loss_g.backward() optimizer_g.step() if i_iter % args.save_pred_every == 0 and i_iter != 0: print('iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}'.format( i_iter, args.max_iters, loss_g)) print(">>>> [iter: {0:d}] Validation".format(i_iter)) # Validate the trained model after the weights are saved loss, (iou, miou) = val.run_epoch(args.print_step) print(">>>> [iter: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(i_iter, loss, miou)) if miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou utils.save_checkpoint(input_encoder, decoder_t, optimizer_g, i_iter + 1, best_miou, args)
def train(train_loader, val_loader, class_weights, class_encoding): print("Training...") num_classes = len(class_encoding) model = ERFNet(num_classes) criterion = nn.CrossEntropyLoss(weight=class_weights) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) # Learning rate decay scheduler lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs, args.lr_decay) # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) if use_cuda: model = model.cuda() criterion = criterion.cuda() # Optionally resume from a checkpoint if args.resume: model, optimizer, start_epoch, best_miou, val_miou, train_miou, val_loss, train_loss = utils.load_checkpoint( model, optimizer, args.save_dir, args.name, True) print( "Resuming from model: Start epoch = {0} | Best mean IoU = {1:.4f}". format(start_epoch, best_miou)) else: start_epoch = 0 best_miou = 0 val_miou = [] train_miou = [] val_loss = [] train_loss = [] # Start Training train = Train(model, train_loader, optimizer, criterion, metric, use_cuda) val = Test(model, val_loader, criterion, metric, use_cuda) for epoch in range(start_epoch, args.epochs): print(">> [Epoch: {0:d}] Training".format(epoch)) lr_updater.step() epoch_loss, (iou, miou) = train.run_epoch(args.print_step) print( ">> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}".format( epoch, epoch_loss, miou)) train_loss.append(epoch_loss) train_miou.append(miou) #preform a validation test if (epoch + 1) % 10 == 0 or epoch + 1 == args.epochs: print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val.run_epoch(args.print_step) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) val_loss.append(loss) val_miou.append(miou) # Print per class IoU on last epoch or if best iou if epoch + 1 == args.epochs or miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if miou > best_miou: print("Best model thus far. Saving...") best_miou = miou utils.save_checkpoint(model, optimizer, epoch + 1, best_miou, val_miou, train_miou, val_loss, train_loss, args) return model, train_loss, train_miou, val_loss, val_miou
pred_dir = 'data/replica/OUTPUTS/fullrez/SMNet_gru_lastlayer_m256/' if dataset == 'mp3d': paths = json.load(open('data/paths.json', 'r')) envs_splits = json.load(open('data/envs_splits.json', 'r')) envs = envs_splits['{}_envs'.format(split)] envs = [x for x in envs if x in paths] envs.sort() elif dataset == 'replica': paths = json.load(open('../replica/paths.json', 'r')) envs = list(paths.keys()) envs.sort() envs.remove('room_2') if dataset == 'mp3d': metrics = IoU(13) elif dataset == 'replica': metrics = IoU(13, ignore_index=5) metrics.reset() total = 0 filename = os.path.join(pred_dir, 'evaluation_metrics.h5') with h5py.File(filename, 'w') as f: for env in tqdm(envs): file = env + '.h5' if not os.path.isfile(os.path.join(pred_dir, 'semmap', file)): continue total += 1
def trainmal(model, train_loader, val_loader, class_weights, class_encoding, pretrained="./save/mal.pt"): """ this function trains the attacker @param pretrained : String None => Doesn't initialize the attacker with pretrained weights filename => initializes the attacker with pretrained weights in filename """ print("\nTraining Attacker...\n") num_classes = len(class_encoding) model = Malicious_Autoencoder(model) if pretrained: model.load_state_dict(torch.load(pretrained)["state_dict"]) model = model.to(device) criterion = nn.CrossEntropyLoss(weight=class_weights) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs, args.lr_decay) # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) start_epoch = 0 best_miou = 0 best_loss = 99999999 def new_loss(outputs, labels): transformx, bout = outputs origx, desireds = labels l1 = torch.dist(transformx, origx) l2 = criterion(bout, desireds) return l1, l2 # Start Training train = Train(model, train_loader, optimizer, new_loss, metric, device) val = Test(model, val_loader, new_loss, metric, device) for epoch in tqdm(range(start_epoch, args.epochs)): print(">>>> [Epoch: {0:d}] Training".format(epoch)) lr_updater.step() epoch_loss, (iou, miou) = train.run_epoch(args.print_step, trainmal=True) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, epoch_loss, miou)) if (epoch + 1) % 3 == 0 or epoch + 1 == args.epochs: print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val.run_epoch(args.print_step, trainmal=True) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) # Print per class IoU on last epoch or if best iou if epoch + 1 == args.epochs or miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far # if miou > best_miou: if loss < best_loss: print("\nBest model thus far. Saving...\n") # best_miou = miou best_loss = loss n = args.name args.name = args.malname utils.save_checkpoint(model, optimizer, epoch + 1, best_miou, args) args.name = n return model
def train(train_loader, val_loader, class_weights, class_encoding): print("\nTraining...\n") vis_calling_times = 0 num_classes = len(class_encoding) # Intialize ENet model = ENet(num_classes).to(device) # Check if the network architecture is correct if torch.cuda.device_count() > 1: print(">>>Use mult GPU for trainning>>>") gpu_num = torch.cuda.device_count() gpu_list = list(range(gpu_num)) model = nn.DataParallel(model, device_ids=gpu_list) print(model) # We are going to use the CrossEntropyLoss loss function as it's most # frequentely used in classification problems with multiple classes which # fits the problem. This criterion combines LogSoftMax and NLLLoss. criterion = nn.CrossEntropyLoss(weight=class_weights) # ENet authors used Adam as the optimizer optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) # Learning rate decay scheduler lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs, args.lr_decay) # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Optionally resume from a checkpoint if args.resume: model, optimizer, start_epoch, best_miou = utils.load_checkpoint( model, optimizer, args.save_dir, args.name) print("Resuming from model: Start epoch = {0} " "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) else: start_epoch = 0 best_miou = 0 # Start Training print() train = Train(model, train_loader, optimizer, criterion, metric, device) val = Test(model, val_loader, criterion, metric, device) for epoch in range(start_epoch, args.epochs): print(">>>> [Epoch: {0:d}] Training".format(epoch)) lr_updater.step() epoch_loss, (iou, miou) = train.run_epoch(args.print_step) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, epoch_loss, miou)) print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val.run_epoch(args.print_step) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) if (epoch + 1) % 10 == 0 or epoch + 1 == args.epochs: # Print per class IoU on last epoch or if best iou if epoch + 1 == args.epochs or miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou utils.save_checkpoint(model, optimizer, epoch + 1, best_miou, args) if vis_calling_times == 0: # set to false vis_calling_times = 1 win = viz.line(X=np.column_stack( (np.array(epoch), np.array(epoch))), Y=np.column_stack( (np.array(epoch_loss), np.array(loss))), opts=dict(legend=['training loss', 'eval loss'], title='loss')) else: viz.line( X=np.column_stack((np.array(epoch), np.array(epoch))), Y=np.column_stack((np.array(epoch_loss), np.array(loss))), win=win, #win要保持一致 update='append') # if vis_first_create: # vis_first_create = false # win = viz.line( X=np.column_stack((np.array(epoch),np.array(epoch))), # Y=np.column_stack((np.array(epoch_loss),np.array(loss))), # name= # opts=dict(title='loss')) # else: # viz.line( X=np.column_stack((np.array(epoch),np.array(epoch))), # Y=np.column_stack((np.array(epoch_loss),np.array(loss))), # win=win,#win要保持一致 # update='append') return model
def compute_accuracy(outputs, labels, num_classes): metric = IoU(num_classes, ignore_index=None) metric.reset() metric.add(outputs.detach(), labels.detach()) (iou, miou) = metric.value() return miou
if args.dataset.lower() == 'cityscapes': Ignoring_voidslabel_cityscapes(args.dataset_dir + 'leftImg8bit/train/',args.Divide_y,args.Divide_x,(args.width_train,args.height_train)) Label_Majority_Dict = np.load('Cityscapes_MetaInfo.npy',allow_pickle='TRUE').item() # path for rgb training data Dir_RGB = args.dataset_dir + 'leftImg8bit/train/' # path to anootation data Dir_Ann = args.dataset_dir + 'gtFine/train/' Dir_AL_RGB = args.dataset_dir + 'AL_Iter_RGB' + '/' Dir_AL_Ann = args.dataset_dir + 'AL_Iter_Ann' + '/' class_weights = np.ones(num_classes) class_weights = torch.from_numpy(class_weights).float().to(device) ignore_index = list(class_encoding).index('unlabeled') metric = IoU(num_classes, ignore_index=ignore_index) class_weights[ignore_index] = 0 print('class_weights',class_weights) # loss for each iteration criterion_CE = nn.CrossEntropyLoss(weight=class_weights) criterion_MSE = nn.MSELoss() if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) if args.dataset.lower() == 'cityscapes': void_label = 0 elif args.dataset.lower() == 'camvid':
def train(train_loader, val_loader, class_weights, class_encoding): print("\nTraining...\n") num_classes = len(class_encoding) # 初始化ENet model = DeepLabV3(num_classes) # model.load_state_dict(torch.load("save/model_13_2_2_2_epoch_580.pth")) # model.aspp.conv_1x1_4 = torch.nn.Conv2d(256, num_classes, kernel_size=1) # 检查网络结构是否正确 print(model) # 交叉熵的损失函数 criterion = nn.CrossEntropyLoss(weight=class_weights) # Adam as the optimizer optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) # 学习率衰减 # lr_decay_epochs: 学习率衰减期。 # lr_decay: 学习率衰减的乘积因子,默认值:-0.1 lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs, args.lr_decay) # 评价指标 # if not args.ignore_unlabeled: # ignore_index = list(class_encoding).index('unlabeled') # else: # ignore_index = None ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) if use_cuda: print("model使用GPU") model = model.cuda() criterion = criterion.cuda() # Optionally 从checkpoint恢复 if args.resume: model, optimizer, start_epoch, best_miou = utils.load_checkpoint( model, optimizer, args.save_dir, args.name) print("Resuming from model: Start epoch = {0} " "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) else: start_epoch = 0 best_miou = 0 # 开始 Training print() train = Train(model, train_loader, optimizer, criterion, metric, use_cuda) val = Test(model, val_loader, criterion, metric, use_cuda) for epoch in range(start_epoch, args.epochs): print(">>>> [Epoch: {0:d}] Training".format(epoch)) lr_updater.step() # 修改学习率,开始训练 epoch_loss, (iou, miou) = train.run_epoch(args.print_step) # 打印epoch,loss,mean iou print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, epoch_loss, miou)) # 如果当前的epochs结束,打印验证的进行一个验证 if (epoch + 1) % 10 == 0 or epoch + 1 == args.epochs: print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val.run_epoch(args.print_step) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) # Print per class IoU on last epoch or if best iou if epoch + 1 == args.epochs or miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou utils.save_checkpoint(model, optimizer, epoch + 1, best_miou, args) return model
def train(train_loader, val_loader, class_weights, class_encoding): print("\nTraining...\n") num_classes = len(class_encoding) # Intialize ENet model = ENet(num_classes).to(device) # Check if the network architecture is correct print(model) # We are going to use the CrossEntropyLoss loss function as it's most # frequentely used in classification problems with multiple classes which # fits the problem. This criterion combines LogSoftMax and NLLLoss. criterion = nn.CrossEntropyLoss(weight=class_weights) # ENet authors used Adam as the optimizer optimizer = optim.Adam( model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) # Learning rate decay scheduler lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs, args.lr_decay) # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Optionally resume from a checkpoint if args.resume: model, optimizer, start_epoch, best_miou = utils.load_checkpoint( model, optimizer, args.save_dir, args.name) print("Resuming from model: Start epoch = {0} " "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) else: start_epoch = 0 best_miou = 0 # Start Training print() train = Train(model, train_loader, optimizer, criterion, metric, device) val = Test(model, val_loader, criterion, metric, device) for epoch in range(start_epoch, args.epochs): print(">>>> [Epoch: {0:d}] Training".format(epoch)) epoch_loss, (iou, miou) = train.run_epoch(args.print_step) lr_updater.step() print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, epoch_loss, miou)) if (epoch + 1) % 10 == 0 or epoch + 1 == args.epochs: print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val.run_epoch(args.print_step) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) # Print per class IoU on last epoch or if best iou if epoch + 1 == args.epochs or miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou utils.save_checkpoint(model, optimizer, epoch + 1, best_miou, args) return model
def train(train_loader, val_loader, class_weights, class_encoding): print("\nTraining...\n") num_classes = len(class_encoding) # Intialize ENet model = ENet(num_classes).to(device) # Check if the network architecture is correct print(model) # We are going to use the CrossEntropyLoss loss function as it's most # frequentely used in classification problems with multiple classes which # fits the problem. This criterion combines LogSoftMax and NLLLoss. criterion = nn.CrossEntropyLoss(weight=class_weights) # ENet authors used Adam as the optimizer optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) # Learning rate decay scheduler lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs, args.lr_decay) # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Optionally resume from a checkpoint if args.resume: model, optimizer, start_epoch, best_miou = utils.load_checkpoint( model, optimizer, args.save_dir, args.name) print("Resuming from model: Start epoch = {0} " "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) else: start_epoch = 0 best_miou = 0 # Start Training print() train = Train(model, train_loader, optimizer, criterion, metric, device) val = Test(model, val_loader, criterion, metric, device) for epoch in range(start_epoch, args.epochs): print(">>>> [Epoch: {0:d}] Training".format(epoch)) lr_updater.step() epoch_loss, (iou, miou) = train.run_epoch(args.print_step) # Visualization by TensorBoardX writer.add_scalar('data/train/loss', epoch_loss, epoch) writer.add_scalar('data/train/mean_IoU', miou, epoch) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, epoch_loss, miou)) if (epoch + 1) % 1 == 0 or epoch + 1 == args.epochs: print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val.run_epoch(args.print_step) # Visualization by TensorBoardX writer.add_scalar('data/val/loss', loss, epoch) writer.add_scalar('data/val/mean_IoU', miou, epoch) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) # Print per class IoU on last epoch or if best iou if epoch + 1 == args.epochs or miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou utils.save_checkpoint(model, optimizer, epoch + 1, best_miou, args) # Visualization of the predicted batch in TensorBoard for i, batch in enumerate(val_loader): if i == 1: break # Get the inputs and labels inputs = batch[0].to(device) labels = batch[1].to(device) # Forward propagation with torch.no_grad(): predictions = model(inputs) # Predictions is one-hot encoded with "num_classes" channels. # Convert it to a single int using the indices where the maximum (1) occurs _, predictions = torch.max(predictions.data, 1) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform( predictions.cpu(), label_to_rgb) in_training_visualization(model, inputs, labels, class_encoding, writer, epoch, 'val') return model