def train(train_loader, val_loader, class_weights): model = ENet(num_classes) criterion = nn.CrossEntropyLoss(weight=class_weights) optimizer = optim.Adam(model.parameters(), lr=5e-4, weight_decay=2e-4) lr_updater = lr_scheduler.StepLR( optimizer, 10, 1e-7) # Large dataset, decaying every 10 epochs.. ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) model = model.cuda() criterion = criterion.cuda() # model, optimizer, start_epoch, best_miou = utils.load_checkpoint( # model, optimizer, args.save_dir, args.name) # print("Resuming from model: Start epoch = {0} " # "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) start_epoch = 0 best_miou = 0 train = Train(model, train_loader, optimizer, criterion, metric, use_cuda=True) val = Test(model, val_loader, criterion, metric, use_cuda=True) n_epochs = 200 for epoch in range(start_epoch, n_epochs): print(">>>> [Epoch: {0:d}] Training".format(epoch)) lr_updater.step() epoch_loss, (iou, miou) = train.run_epoch(iteration_loss=True) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, epoch_loss, miou)) if (epoch + 1) % 10 == 0 or epoch + 1 == n_epochs: print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val.run_epoch(iteration_loss=True) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) # Print per class IoU on last epoch or if best iou if epoch + 1 == n_epochs or miou > best_miou: for class_iou in iou: print(class_iou) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou torch.save( model.state_dict(), '/mnt/disks/data/d4dl/snapshots/snapshot_' + str(epoch) + '.pt') return model
def main_script(args): # Fail fast if the dataset directory doesn't exist assert os.path.isdir( args.dataset_dir), "The directory \"{0}\" doesn't exist.".format( args.dataset_dir) # Fail fast if the saving directory doesn't exist assert os.path.isdir( args.save_dir), "The directory \"{0}\" doesn't exist.".format( args.save_dir) # Import the requested dataset if args.dataset.lower() == 'camvid': from data import CamVid as dataset elif args.dataset.lower() == 'cityscapes': from data import Cityscapes as dataset else: # Should never happen...but just in case it does raise RuntimeError("\"{0}\" is not a supported dataset.".format( args.dataset)) loaders, w_class, class_encoding = load_dataset(dataset, args.color_space, args.hue_value) train_loader, val_loader, test_loader = loaders if args.mode.lower() in {'train', 'full'}: model = train(train_loader, val_loader, w_class, class_encoding) if args.mode.lower() == 'full': test(model, test_loader, w_class, class_encoding) elif args.mode.lower() == 'test': # Intialize a new ENet model num_classes = len(class_encoding) model = ENet(num_classes) if use_cuda: model = model.cuda() # Here we register forward hooks for each layer. # model.initial_block.register_forward_hook(save_activations) # model.downsample1_0.register_forward_hook(save_activations) # model.regular1_1.register_forward_hook(save_activations) # model.downsample2_0.register_forward_hook(save_activations) # Initialize a optimizer just so we can retrieve the model from the # checkpoint optimizer = optim.Adam(model.parameters()) # Load the previoulsy saved model state to the ENet model model = utils.load_checkpoint(model, optimizer, args.save_dir, args.name)[0] test(model, test_loader, w_class, class_encoding) else: # Should never happen...but just in case it does raise RuntimeError( "\"{0}\" is not a valid choice for execution mode.".format( args.mode))
def train(train_loader, val_loader, class_weights, class_encoding): print("\nTraining...\n") num_classes = len(class_encoding) # Intialize ENet model = ENet(num_classes) # Check if the network architecture is correct print(model) # We are going to use the CrossEntropyLoss loss function as it's most # frequentely used in classification problems with multiple classes which # fits the problem. This criterion combines LogSoftMax and NLLLoss. criterion = nn.CrossEntropyLoss(weight=class_weights) # ENet authors used Adam as the optimizer optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) # Learning rate decay scheduler lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs, args.lr_decay) # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) if use_cuda: model = model.cuda() criterion = criterion.cuda() # Optionally resume from a checkpoint if args.resume: model, optimizer, start_epoch, best_miou = utils.load_checkpoint( model, optimizer, args.save_dir, args.name) print("Resuming from model: Start epoch = {0} " "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) else: start_epoch = 0 best_miou = 0 # Start Training print() train = Train(model, train_loader, optimizer, criterion, metric, use_cuda) val = Test(model, val_loader, criterion, metric, use_cuda) for epoch in range(start_epoch, args.epochs): print(">>>> [Epoch: {0:d}] Training".format(epoch)) lr_updater.step() epoch_loss, (iou, miou) = train.run_epoch(args.print_step) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, epoch_loss, miou)) if (epoch + 1) % 10 == 0 or epoch + 1 == args.epochs: print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val.run_epoch(args.print_step) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) # Print per class IoU on last epoch or if best iou if epoch + 1 == args.epochs or miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou utils.save_checkpoint(model, optimizer, epoch + 1, best_miou, args) return model
raise RuntimeError("\"{0}\" is not a supported dataset.".format( args.dataset)) loaders, w_class, class_encoding = load_dataset(dataset) train_loader, val_loader, test_loader = loaders if args.mode.lower() in {'train', 'full'}: model = train(train_loader, val_loader, w_class, class_encoding) if args.mode.lower() == 'full': test(model, test_loader, w_class, class_encoding) elif args.mode.lower() == 'test': # Intialize a new ENet model num_classes = len(class_encoding) model = ENet(num_classes) if use_cuda: model = model.cuda() # Initialize a optimizer just so we can retrieve the model from the # checkpoint optimizer = optim.Adam(model.parameters()) # Load the previoulsy saved model state to the ENet model model = utils.load_checkpoint(model, optimizer, args.save_dir, args.name)[0] print(model) test(model, test_loader, w_class, class_encoding) else: # Should never happen...but just in case it does raise RuntimeError( "\"{0}\" is not a valid choice for execution mode.".format( args.mode))