def train(train_loader, val_loader, class_weights): model = ENet(num_classes) criterion = nn.CrossEntropyLoss(weight=class_weights) optimizer = optim.Adam(model.parameters(), lr=5e-4, weight_decay=2e-4) lr_updater = lr_scheduler.StepLR( optimizer, 10, 1e-7) # Large dataset, decaying every 10 epochs.. ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) model = model.cuda() criterion = criterion.cuda() # model, optimizer, start_epoch, best_miou = utils.load_checkpoint( # model, optimizer, args.save_dir, args.name) # print("Resuming from model: Start epoch = {0} " # "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) start_epoch = 0 best_miou = 0 train = Train(model, train_loader, optimizer, criterion, metric, use_cuda=True) val = Test(model, val_loader, criterion, metric, use_cuda=True) n_epochs = 200 for epoch in range(start_epoch, n_epochs): print(">>>> [Epoch: {0:d}] Training".format(epoch)) lr_updater.step() epoch_loss, (iou, miou) = train.run_epoch(iteration_loss=True) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, epoch_loss, miou)) if (epoch + 1) % 10 == 0 or epoch + 1 == n_epochs: print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val.run_epoch(iteration_loss=True) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) # Print per class IoU on last epoch or if best iou if epoch + 1 == n_epochs or miou > best_miou: for class_iou in iou: print(class_iou) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou torch.save( model.state_dict(), '/mnt/disks/data/d4dl/snapshots/snapshot_' + str(epoch) + '.pt') return model
def main_script(args): # Fail fast if the dataset directory doesn't exist assert os.path.isdir( args.dataset_dir), "The directory \"{0}\" doesn't exist.".format( args.dataset_dir) # Fail fast if the saving directory doesn't exist assert os.path.isdir( args.save_dir), "The directory \"{0}\" doesn't exist.".format( args.save_dir) # Import the requested dataset if args.dataset.lower() == 'camvid': from data import CamVid as dataset elif args.dataset.lower() == 'cityscapes': from data import Cityscapes as dataset else: # Should never happen...but just in case it does raise RuntimeError("\"{0}\" is not a supported dataset.".format( args.dataset)) loaders, w_class, class_encoding = load_dataset(dataset, args.color_space, args.hue_value) train_loader, val_loader, test_loader = loaders if args.mode.lower() in {'train', 'full'}: model = train(train_loader, val_loader, w_class, class_encoding) if args.mode.lower() == 'full': test(model, test_loader, w_class, class_encoding) elif args.mode.lower() == 'test': # Intialize a new ENet model num_classes = len(class_encoding) model = ENet(num_classes) if use_cuda: model = model.cuda() # Here we register forward hooks for each layer. # model.initial_block.register_forward_hook(save_activations) # model.downsample1_0.register_forward_hook(save_activations) # model.regular1_1.register_forward_hook(save_activations) # model.downsample2_0.register_forward_hook(save_activations) # Initialize a optimizer just so we can retrieve the model from the # checkpoint optimizer = optim.Adam(model.parameters()) # Load the previoulsy saved model state to the ENet model model = utils.load_checkpoint(model, optimizer, args.save_dir, args.name)[0] test(model, test_loader, w_class, class_encoding) else: # Should never happen...but just in case it does raise RuntimeError( "\"{0}\" is not a valid choice for execution mode.".format( args.mode))
def predict(): image_transform = transforms.Compose( [transforms.Resize(target_size), transforms.ToTensor()]) label_transform = transforms.Compose( [transforms.Resize(target_size), ext_transforms.PILToLongTensor()]) # Get selected dataset # Load the training set as tensors train_set = Cityscapes(data_dir, mode='test', transform=image_transform, label_transform=label_transform) class_encoding = train_set.color_encoding num_classes = len(class_encoding) model = ENet(num_classes).to(device) # Initialize a optimizer just so we can retrieve the model from the # checkpoint optimizer = optim.Adam(model.parameters()) # Load the previoulsy saved model state to the ENet model model = utils.load_checkpoint(model, optimizer, 'save', 'ENet_cityscapes_mine.pth')[0] # print(model) image = Image.open('images/mainz_000000_008001_leftImg8bit.png') images = Variable(image_transform(image).to(device).unsqueeze(0)) image = np.array(image) # Make predictions! predictions = model(images) _, predictions = torch.max(predictions.data, 1) # 0~18 prediction = predictions.cpu().numpy()[0] - 1 mask_color = np.asarray(label_to_color_image(prediction, 'cityscapes'), dtype=np.uint8) mask_color = cv2.resize(mask_color, (image.shape[1], image.shape[0])) print(image.shape) print(mask_color.shape) res = cv2.addWeighted(image, 0.3, mask_color, 0.7, 0.6) # cv2.imshow('rr', mask_color) cv2.imshow('combined', res) cv2.waitKey(0)
def create_model(args,n_classes): """ Creates a model according to: - args.model_type: ENet or BiSeNet - args.dropout: if different from 0 or None, a dropout is done Dropout is done by adding a forward hook to the batch-norm layers. Dropped- out pixels are replaced by the batch norm bias for their channel. It is designed for MC dropout (i.e. even at inference stage) and is always active even after calling model.eval() or model.train(False). Returns a Torch model """ if not hasattr(args,"model_type") or args.model_type.lower()=="enet": model = ENet(n_classes) elif args.model_type.lower()=="bisenet": model = BiSeNetv2(3, n_classes, ratio=8) else: raise ValueError("Model type: expected ENet or BiSeNet") if hasattr(args,"dropout") and args.dropout not in [0, None]: utils.add_dropout(model, args.dropout) return model
def main(): """Main function.""" loaders, class_weights, class_encoding = load_dataset(dataset) train_loader, val_loader, test_loader = loaders num_classes = len(class_encoding) critic = DiscriminativeNet() generator = ENet(num_classes) dataloader = load_real_data(real_dataset) optimizer_D = optim.Adam(critic.parameters(), lr=0.0001, betas=(0.5, 0.999)) optimizer_G = optim.Adam(generator.parameters(), lr=0.0001, betas=(0.5, 0.999)) gan = WGANGP(generator, critic, dataloader, train_loader, test_loader, class_weights, class_encoding, ngpu=ngpu, device=device, nr_epochs=500, print_every=10, save_every=400, optimizer_D=optimizer_D, optimizer_G=optimizer_G) gan.train() samples_l, D_losses, G_losses = gan.get_training_results()
def __init__(self, exp): # IoU and pixAcc Metric calculator self.metric = SegmentationMetric(7) cfg_path = os.path.join(os.getcwd(), 'config/tusimple_config.yaml') self.exp_name = exp self.writer = SummaryWriter('tensorboard/' + self.exp_name) with open(cfg_path) as file: cfg = yaml.load(file, Loader=yaml.FullLoader) self.device = torch.device(cfg['DEVICE']) self.max_epochs = cfg['TRAIN']['MAX_EPOCHS'] self.dataset_path = cfg['DATASET']['PATH'] # TODO remove this and refactor PROPERLY self.input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(cfg['DATASET']['MEAN'], cfg['DATASET']['STD']), ]) mean = cfg['DATASET']['MEAN'] std = cfg['DATASET']['STD'] self.train_transform = Compose(Resize(size=(645,373)), RandomCrop(size=(640,368)), RandomFlip(0.5), Rotation(2), ToTensor(), Normalize(mean=mean, std=std)) self.val_transform = Compose(Resize(size=(640,368)), ToTensor(), Normalize(mean=mean, std=std)) data_kwargs = { 'transform': self.input_transform, 'size': cfg['DATASET']['SIZE'], } self.train_dataset = tuSimple( path=cfg['DATASET']['PATH'], image_set='train', transforms=self.train_transform ) self.val_dataset = tuSimple( path = cfg['DATASET']['PATH'], image_set = 'val', transforms =self.val_transform, ) self.train_loader = data.DataLoader( dataset = self.train_dataset, batch_size = cfg['TRAIN']['BATCH_SIZE'], shuffle = True, num_workers = 0, pin_memory = True, drop_last = True, ) self.val_loader = data.DataLoader( dataset = self.val_dataset, batch_size = cfg['TRAIN']['BATCH_SIZE'], shuffle = False, num_workers = 0, pin_memory = True, drop_last = False, ) self.iters_per_epoch = len(self.train_dataset) // (cfg['TRAIN']['BATCH_SIZE']) self.max_iters = cfg['TRAIN']['MAX_EPOCHS'] * self.iters_per_epoch # -------- network -------- weight = [0.4, 1, 1, 1, 1, 1, 1] self.model = ENet(num_classes=7).to(self.device) self.optimizer = optim.SGD( self.model.parameters(), lr=cfg['OPTIM']['LR'], weight_decay=cfg['OPTIM']['DECAY'], momentum=0.9, ) self.lr_scheduler = get_scheduler(self.optimizer, max_iters=self.max_iters, iters_per_epoch=self.iters_per_epoch) #self.optimizer = optim.Adam( # self.model.parameters(), # lr = cfg['OPTIM']['LR'], # weight_decay=0, # ) self.criterion = nn.CrossEntropyLoss(weight=torch.tensor([0.4, 1, 1, 1, 1, 1, 1])).cuda() self.bce = nn.BCELoss().cuda()
def initialize(self): args = self.args if args.architecture == 'deeplab': print('Using Deeplab') model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] elif args.architecture == 'enet': print('Using ENet') model = ENet(num_classes=self.nclass, encoder_relu=True, decoder_relu=True) train_params = [{'params': model.parameters(), 'lr': args.lr}] elif args.architecture == 'fastscnn': print('Using FastSCNN') model = FastSCNN(3, self.nclass) train_params = [{'params': model.parameters(), 'lr': args.lr}] if args.optimizer == 'SGD': optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) elif args.optimizer == 'Adam': optimizer = torch.optim.Adam(train_params, weight_decay=args.weight_decay) else: raise NotImplementedError if args.use_balanced_weights: weight = calculate_weights_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer self.evaluator = Evaluator(self.nclass) if args.use_lr_scheduler: self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) else: self.scheduler = None if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() self.best_pred = 0.0
import torch from models.enet import ENet import os from configs import config_factory if __name__ == '__main__': save_pth = os.path.join(config_factory['resnet_cityscapes'].respth, 'model_final.pth') model = ENet(nb_classes=19) model.load_state_dict(torch.load(save_pth)) model.eval() example = torch.rand(2, 3, 1024, 1024).cpu() traced_script_module = torch.jit.trace(model, example) traced_script_module.save( os.path.join(config_factory['resnet_cityscapes'].respth, "model_dfanet_1024.pt"))
def __init__(self, s_exp_name, t_exp_name): cfg_path = os.path.join(os.getcwd(), 'config/tusimple_config.yaml') self.s_exp_name = s_exp_name self.t_exp_name = t_exp_name self.writer = SummaryWriter('tensorboard/' + self.s_exp_name) self.metric = SegmentationMetric(7) with open(cfg_path) as cfg: config = yaml.load(cfg, Loader=yaml.FullLoader) self.device = torch.device(config['DEVICE']) self.max_epochs = config['TRAIN']['MAX_EPOCHS'] self.dataset_path = config['DATASET']['PATH'] self.mean = config['DATASET']['MEAN'] self.std = config['DATASET']['STD'] ''' self.input_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(self.mean, self.std), ]) ''' self.train_transform = Compose(Resize(size=(645, 373)), RandomCrop(size=(640, 368)), RandomFlip(0.5), Rotation(2), ToTensor(), Normalize(mean=self.mean, std=self.std)) self.val_transform = Compose(Resize(size=(640, 368)), ToTensor(), Normalize(mean=self.mean, std=self.std)) self.train_dataset = tuSimple(path=config['DATASET']['PATH'], image_set='train', transforms=self.train_transform) self.val_dataset = tuSimple( path=config['DATASET']['PATH'], image_set='val', transforms=self.val_transform, ) self.train_loader = data.DataLoader( dataset=self.train_dataset, batch_size=config['TRAIN']['BATCH_SIZE'], shuffle=True, num_workers=0, pin_memory=True, drop_last=True, ) self.val_loader = data.DataLoader( dataset=self.val_dataset, batch_size=config['TRAIN']['BATCH_SIZE'], shuffle=False, num_workers=0, pin_memory=True, drop_last=False, ) self.iters_per_epoch = len( self.train_dataset) // config['TRAIN']['BATCH_SIZE'] self.max_iters = self.max_epochs * self.iters_per_epoch # ------------network------------ self.s_model = ENet(num_classes=7).to(self.device) self.t_model = ENet(num_classes=7).to(self.device) self.optimizer = optim.SGD( self.s_model.parameters(), lr=config['OPTIM']['LR'], weight_decay=config['OPTIM']['DECAY'], momentum=0.9, ) self.lr_scheduler = get_scheduler( self.optimizer, max_iters=self.max_iters, iters_per_epoch=self.iters_per_epoch, ) self.ce = nn.CrossEntropyLoss(weight=torch.tensor( [0.4, 1, 1, 1, 1, 1, 1])).cuda() #background weight 0.4 self.bce = nn.BCELoss().cuda() self.kl = nn.KLDivLoss().cuda() #reduction='batchmean' gives NaN self.mse = nn.MSELoss().cuda()
def _init_model(self): self.model = ENet(self.num_classes).to(device) checkpoint = torch.load(self.model_path) self.model.load_state_dict(checkpoint['state_dict']) print('Model loaded!')
def create_model(num_classes=20, device='cuda'): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = ENet(num_classes).to(device) return model
else: # Should never happen...but just in case it does raise RuntimeError("\"{0}\" is not a supported dataset.".format( args.dataset)) loaders, w_class, class_encoding = load_dataset(dataset) train_loader, val_loader, test_loader = loaders if args.mode.lower() in {'train', 'full'}: model = train(train_loader, val_loader, w_class, class_encoding) if args.mode.lower() == 'full': test(model, test_loader, w_class, class_encoding) elif args.mode.lower() == 'test': # Intialize a new ENet model num_classes = len(class_encoding) model = ENet(num_classes) if use_cuda: model = model.cuda() # Initialize a optimizer just so we can retrieve the model from the # checkpoint optimizer = optim.Adam(model.parameters()) # Load the previoulsy saved model state to the ENet model model = utils.load_checkpoint(model, optimizer, args.save_dir, args.name)[0] print(model) test(model, test_loader, w_class, class_encoding) else: # Should never happen...but just in case it does raise RuntimeError(
def train(train_loader, val_loader, class_weights, class_encoding): print("\nTraining...\n") vis_calling_times = 0 num_classes = len(class_encoding) # Intialize ENet model = ENet(num_classes).to(device) # Check if the network architecture is correct if torch.cuda.device_count() > 1: print(">>>Use mult GPU for trainning>>>") gpu_num = torch.cuda.device_count() gpu_list = list(range(gpu_num)) model = nn.DataParallel(model, device_ids=gpu_list) print(model) # We are going to use the CrossEntropyLoss loss function as it's most # frequentely used in classification problems with multiple classes which # fits the problem. This criterion combines LogSoftMax and NLLLoss. criterion = nn.CrossEntropyLoss(weight=class_weights) # ENet authors used Adam as the optimizer optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) # Learning rate decay scheduler lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs, args.lr_decay) # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Optionally resume from a checkpoint if args.resume: model, optimizer, start_epoch, best_miou = utils.load_checkpoint( model, optimizer, args.save_dir, args.name) print("Resuming from model: Start epoch = {0} " "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) else: start_epoch = 0 best_miou = 0 # Start Training print() train = Train(model, train_loader, optimizer, criterion, metric, device) val = Test(model, val_loader, criterion, metric, device) for epoch in range(start_epoch, args.epochs): print(">>>> [Epoch: {0:d}] Training".format(epoch)) lr_updater.step() epoch_loss, (iou, miou) = train.run_epoch(args.print_step) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, epoch_loss, miou)) print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val.run_epoch(args.print_step) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) if (epoch + 1) % 10 == 0 or epoch + 1 == args.epochs: # Print per class IoU on last epoch or if best iou if epoch + 1 == args.epochs or miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou utils.save_checkpoint(model, optimizer, epoch + 1, best_miou, args) if vis_calling_times == 0: # set to false vis_calling_times = 1 win = viz.line(X=np.column_stack( (np.array(epoch), np.array(epoch))), Y=np.column_stack( (np.array(epoch_loss), np.array(loss))), opts=dict(legend=['training loss', 'eval loss'], title='loss')) else: viz.line( X=np.column_stack((np.array(epoch), np.array(epoch))), Y=np.column_stack((np.array(epoch_loss), np.array(loss))), win=win, #win要保持一致 update='append') # if vis_first_create: # vis_first_create = false # win = viz.line( X=np.column_stack((np.array(epoch),np.array(epoch))), # Y=np.column_stack((np.array(epoch_loss),np.array(loss))), # name= # opts=dict(title='loss')) # else: # viz.line( X=np.column_stack((np.array(epoch),np.array(epoch))), # Y=np.column_stack((np.array(epoch_loss),np.array(loss))), # win=win,#win要保持一致 # update='append') return model
from data import Cityscapes as dataset else: # Should never happen...but just in case it does raise RuntimeError("\"{0}\" is not a supported dataset.".format( args.dataset)) loaders, w_class, class_encoding = load_dataset(dataset) train_loader, val_loader, test_loader = loaders if args.mode.lower() in {'train', 'full'}: model = train(train_loader, val_loader, w_class, class_encoding) if args.mode.lower() in {'test', 'full'}: if args.mode.lower() == 'test': # Intialize a new ENet model num_classes = len(class_encoding) model = ENet(num_classes).to(device) # Initialize a optimizer just so we can retrieve the model from the # checkpoint optimizer = optim.Adam(model.parameters()) # Load the previoulsy saved model state to the ENet model model = utils.load_checkpoint(model, optimizer, args.save_dir, args.name)[0] if args.mode.lower() == 'test': print(model) test(model, test_loader, w_class, class_encoding)
def __init__(self, args): self.args = args self.saver = PassiveSaver(args) self.saver.save_experiment_config() self.summary = TensorboardSummary(self.saver.experiment_dir) self.writer = self.summary.create_summary() kwargs = {'pin_memory': False, 'memory_hog': args.memory_hog} self.train_set, self.train_loader, self.val_loader, self.test_loader, self.nclass = make_dataloader( args.dataset, args.base_size, args.crop_size, args.batch_size, args.workers, args.overfit, **kwargs) self.train_set.make_dataset_multiple_of_batchsize(args.batch_size) if args.architecture == 'deeplab': print('Using Deeplab') model = DeepLab(num_classes=self.nclass, backbone=args.backbone, output_stride=args.out_stride, sync_bn=args.sync_bn, freeze_bn=args.freeze_bn) train_params = [{ 'params': model.get_1x_lr_params(), 'lr': args.lr }, { 'params': model.get_10x_lr_params(), 'lr': args.lr * 10 }] elif args.architecture == 'enet': print('Using ENet') model = ENet(num_classes=self.nclass, encoder_relu=True, decoder_relu=True) train_params = [{'params': model.parameters(), 'lr': args.lr}] elif args.architecture == 'fastscnn': print('Using FastSCNN') model = FastSCNN(3, self.nclass) train_params = [{'params': model.parameters(), 'lr': args.lr}] if args.optimizer == 'SGD': optimizer = torch.optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) elif args.optimizer == 'Adam': optimizer = torch.optim.Adam(train_params, weight_decay=args.weight_decay) else: raise NotImplementedError if args.use_balanced_weights: weight = calculate_weights_labels(args.dataset, self.train_loader, self.nclass) weight = torch.from_numpy(weight.astype(np.float32)) else: weight = None self.criterion = SegmentationLosses( weight=weight, cuda=args.cuda).build_loss(mode=args.loss_type) self.model, self.optimizer = model, optimizer self.evaluator = Evaluator(self.nclass) if args.use_lr_scheduler: self.scheduler = LR_Scheduler(args.lr_scheduler, args.lr, args.epochs, len(self.train_loader)) else: self.scheduler = None if args.cuda: self.model = torch.nn.DataParallel(self.model, device_ids=self.args.gpu_ids) patch_replication_callback(self.model) self.model = self.model.cuda() self.best_pred = 0.0 if args.resume is not None: if not os.path.isfile(args.resume): raise RuntimeError(f"=> no checkpoint found at {args.resume}") checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] if args.cuda: self.model.module.load_state_dict(checkpoint['state_dict']) else: self.model.load_state_dict(checkpoint['state_dict']) if not args.ft: self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_pred = checkpoint['best_pred'] print( f'=> loaded checkpoint {args.resume} (epoch {checkpoint["epoch"]})' )
def train(train_loader, val_loader, class_weights, class_encoding): print("\nTraining...\n") num_classes = len(class_encoding) # Intialize ENet model = ENet(num_classes).to(device) # Check if the network architecture is correct print(model) # We are going to use the CrossEntropyLoss loss function as it's most # frequentely used in classification problems with multiple classes which # fits the problem. This criterion combines LogSoftMax and NLLLoss. criterion = nn.CrossEntropyLoss(weight=class_weights) # ENet authors used Adam as the optimizer optimizer = optim.Adam( model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) # Learning rate decay scheduler lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs, args.lr_decay) # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Optionally resume from a checkpoint if args.resume: model, optimizer, start_epoch, best_miou = utils.load_checkpoint( model, optimizer, args.save_dir, args.name) print("Resuming from model: Start epoch = {0} " "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) else: start_epoch = 0 best_miou = 0 # Start Training print() train = Train(model, train_loader, optimizer, criterion, metric, device) val = Test(model, val_loader, criterion, metric, device) for epoch in range(start_epoch, args.epochs): print(">>>> [Epoch: {0:d}] Training".format(epoch)) epoch_loss, (iou, miou) = train.run_epoch(args.print_step) lr_updater.step() print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, epoch_loss, miou)) if (epoch + 1) % 10 == 0 or epoch + 1 == args.epochs: print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val.run_epoch(args.print_step) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) # Print per class IoU on last epoch or if best iou if epoch + 1 == args.epochs or miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou utils.save_checkpoint(model, optimizer, epoch + 1, best_miou, args) return model
def train(train_loader, val_loader, class_weights, class_encoding): print("\nTraining...\n") num_classes = len(class_encoding) # Intialize ENet model = ENet(num_classes).to(device) # Check if the network architecture is correct print(model) # We are going to use the CrossEntropyLoss loss function as it's most # frequentely used in classification problems with multiple classes which # fits the problem. This criterion combines LogSoftMax and NLLLoss. criterion = nn.CrossEntropyLoss(weight=class_weights) # ENet authors used Adam as the optimizer optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) # Learning rate decay scheduler lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs, args.lr_decay) # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Optionally resume from a checkpoint if args.resume: model, optimizer, start_epoch, best_miou = utils.load_checkpoint( model, optimizer, args.save_dir, args.name) print("Resuming from model: Start epoch = {0} " "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) else: start_epoch = 0 best_miou = 0 # Start Training print() train = Train(model, train_loader, optimizer, criterion, metric, device) val = Test(model, val_loader, criterion, metric, device) for epoch in range(start_epoch, args.epochs): print(">>>> [Epoch: {0:d}] Training".format(epoch)) lr_updater.step() epoch_loss, (iou, miou) = train.run_epoch(args.print_step) # Visualization by TensorBoardX writer.add_scalar('data/train/loss', epoch_loss, epoch) writer.add_scalar('data/train/mean_IoU', miou, epoch) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, epoch_loss, miou)) if (epoch + 1) % 1 == 0 or epoch + 1 == args.epochs: print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val.run_epoch(args.print_step) # Visualization by TensorBoardX writer.add_scalar('data/val/loss', loss, epoch) writer.add_scalar('data/val/mean_IoU', miou, epoch) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) # Print per class IoU on last epoch or if best iou if epoch + 1 == args.epochs or miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou utils.save_checkpoint(model, optimizer, epoch + 1, best_miou, args) # Visualization of the predicted batch in TensorBoard for i, batch in enumerate(val_loader): if i == 1: break # Get the inputs and labels inputs = batch[0].to(device) labels = batch[1].to(device) # Forward propagation with torch.no_grad(): predictions = model(inputs) # Predictions is one-hot encoded with "num_classes" channels. # Convert it to a single int using the indices where the maximum (1) occurs _, predictions = torch.max(predictions.data, 1) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform( predictions.cpu(), label_to_rgb) in_training_visualization(model, inputs, labels, class_encoding, writer, epoch, 'val') return model