def get_lr_scheduler(scheduler='hybrid'): if scheduler == 'fixed': step_size = args.step_size step_sizes = [ step_size * i for i in range(1, int(math.ceil(args.epochs / step_size))) ] from utilities.lr_scheduler import FixedMultiStepLR lr_scheduler = FixedMultiStepLR(base_lr=args.lr, steps=step_sizes, gamma=args.lr_decay) elif scheduler == 'clr': step_size = args.step_size step_sizes = [ step_size * i for i in range(1, int(math.ceil(args.epochs / step_size))) ] from utilities.lr_scheduler import CyclicLR lr_scheduler = CyclicLR(min_lr=args.lr, cycle_len=5, steps=step_sizes, gamma=args.lr_decay) elif scheduler == 'poly': from utilities.lr_scheduler import PolyLR lr_scheduler = PolyLR(base_lr=args.lr, max_epochs=args.epochs, power=args.power) elif scheduler == 'hybrid': from utilities.lr_scheduler import HybirdLR lr_scheduler = HybirdLR(base_lr=args.lr, max_epochs=args.epochs, clr_max=args.clr_max, cycle_len=args.cycle_len) elif scheduler == 'linear': from utilities.lr_scheduler import LinearLR lr_scheduler = LinearLR(base_lr=args.lr, max_epochs=args.epochs) else: print_error_message('{} scheduler Not supported'.format(scheduler)) exit() print_info_message(lr_scheduler) return lr_scheduler
def main(args): crop_size = args.crop_size assert isinstance(crop_size, tuple) print_info_message( 'Running Model at image resolution {}x{} with batch size {}'.format( crop_size[0], crop_size[1], args.batch_size)) if not os.path.isdir(args.savedir): os.makedirs(args.savedir) num_gpus = torch.cuda.device_count() device = 'cuda' if num_gpus > 0 else 'cpu' if args.dataset == 'pascal': from data_loader.segmentation.voc import VOCSegmentation, VOC_CLASS_LIST train_dataset = VOCSegmentation(root=args.data_path, train=True, crop_size=crop_size, scale=args.scale, coco_root_dir=args.coco_path) val_dataset = VOCSegmentation(root=args.data_path, train=False, crop_size=crop_size, scale=args.scale) seg_classes = len(VOC_CLASS_LIST) class_wts = torch.ones(seg_classes) elif args.dataset == 'city': from data_loader.segmentation.cityscapes import CityscapesSegmentation, CITYSCAPE_CLASS_LIST train_dataset = CityscapesSegmentation(root=args.data_path, train=True, size=crop_size, scale=args.scale, coarse=args.coarse) val_dataset = CityscapesSegmentation(root=args.data_path, train=False, size=crop_size, scale=args.scale, coarse=False) seg_classes = len(CITYSCAPE_CLASS_LIST) class_wts = torch.ones(seg_classes) class_wts[0] = 2.8149201869965 class_wts[1] = 6.9850029945374 class_wts[2] = 3.7890393733978 class_wts[3] = 9.9428062438965 class_wts[4] = 9.7702074050903 class_wts[5] = 9.5110931396484 class_wts[6] = 10.311357498169 class_wts[7] = 10.026463508606 class_wts[8] = 4.6323022842407 class_wts[9] = 9.5608062744141 class_wts[10] = 7.8698215484619 class_wts[11] = 9.5168733596802 class_wts[12] = 10.373730659485 class_wts[13] = 6.6616044044495 class_wts[14] = 10.260489463806 class_wts[15] = 10.287888526917 class_wts[16] = 10.289801597595 class_wts[17] = 10.405355453491 class_wts[18] = 10.138095855713 class_wts[19] = 0.0 elif args.dataset == 'greenhouse': print(args.use_depth) from data_loader.segmentation.greenhouse import GreenhouseRGBDSegmentation, GreenhouseDepth, GREENHOUSE_CLASS_LIST train_dataset = GreenhouseDepth(root=args.data_path, list_name='train_depth_ae.txt', train=True, size=crop_size, scale=args.scale, use_filter=True) val_dataset = GreenhouseRGBDSegmentation(root=args.data_path, list_name='val_depth_ae.txt', train=False, size=crop_size, scale=args.scale, use_depth=True) class_weights = np.load('class_weights.npy')[:4] print(class_weights) class_wts = torch.from_numpy(class_weights).float().to(device) seg_classes = len(GREENHOUSE_CLASS_LIST) else: print_error_message('Dataset: {} not yet supported'.format( args.dataset)) exit(-1) print_info_message('Training samples: {}'.format(len(train_dataset))) print_info_message('Validation samples: {}'.format(len(val_dataset))) from model.autoencoder.depth_autoencoder import espnetv2_autoenc args.classes = 3 model = espnetv2_autoenc(args) train_params = [{ 'params': model.get_basenet_params(), 'lr': args.lr * args.lr_mult }] optimizer = optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay) num_params = model_parameters(model) flops = compute_flops(model, input=torch.Tensor(1, 1, crop_size[0], crop_size[1])) print_info_message( 'FLOPs for an input of size {}x{}: {:.2f} million'.format( crop_size[0], crop_size[1], flops)) print_info_message('Network Parameters: {:.2f} million'.format(num_params)) writer = SummaryWriter(log_dir=args.savedir, comment='Training and Validation logs') try: writer.add_graph(model, input_to_model=torch.Tensor(1, 3, crop_size[0], crop_size[1])) except: print_log_message( "Not able to generate the graph. Likely because your model is not supported by ONNX" ) start_epoch = 0 print('device : ' + device) #criterion = nn.CrossEntropyLoss(weight=class_wts, reduction='none', ignore_index=args.ignore_idx) #criterion = SegmentationLoss(n_classes=seg_classes, loss_type=args.loss_type, # device=device, ignore_idx=args.ignore_idx, # class_wts=class_wts.to(device)) criterion = nn.MSELoss() # criterion = nn.L1Loss() if num_gpus >= 1: if num_gpus == 1: # for a single GPU, we do not need DataParallel wrapper for Criteria. # So, falling back to its internal wrapper from torch.nn.parallel import DataParallel model = DataParallel(model) model = model.cuda() criterion = criterion.cuda() else: from utilities.parallel_wrapper import DataParallelModel, DataParallelCriteria model = DataParallelModel(model) model = model.cuda() criterion = DataParallelCriteria(criterion) criterion = criterion.cuda() if torch.backends.cudnn.is_available(): import torch.backends.cudnn as cudnn cudnn.benchmark = True cudnn.deterministic = True train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) if args.scheduler == 'fixed': step_size = args.step_size step_sizes = [ step_size * i for i in range(1, int(math.ceil(args.epochs / step_size))) ] from utilities.lr_scheduler import FixedMultiStepLR lr_scheduler = FixedMultiStepLR(base_lr=args.lr, steps=step_sizes, gamma=args.lr_decay) elif args.scheduler == 'clr': step_size = args.step_size step_sizes = [ step_size * i for i in range(1, int(math.ceil(args.epochs / step_size))) ] from utilities.lr_scheduler import CyclicLR lr_scheduler = CyclicLR(min_lr=args.lr, cycle_len=5, steps=step_sizes, gamma=args.lr_decay) elif args.scheduler == 'poly': from utilities.lr_scheduler import PolyLR lr_scheduler = PolyLR(base_lr=args.lr, max_epochs=args.epochs, power=args.power) elif args.scheduler == 'hybrid': from utilities.lr_scheduler import HybirdLR lr_scheduler = HybirdLR(base_lr=args.lr, max_epochs=args.epochs, clr_max=args.clr_max, cycle_len=args.cycle_len) elif args.scheduler == 'linear': from utilities.lr_scheduler import LinearLR lr_scheduler = LinearLR(base_lr=args.lr, max_epochs=args.epochs) else: print_error_message('{} scheduler Not supported'.format( args.scheduler)) exit() print_info_message(lr_scheduler) with open(args.savedir + os.sep + 'arguments.json', 'w') as outfile: import json arg_dict = vars(args) arg_dict['model_params'] = '{} '.format(num_params) arg_dict['flops'] = '{} '.format(flops) json.dump(arg_dict, outfile) extra_info_ckpt = '{}_{}_{}'.format(args.model, args.s, crop_size[0]) best_loss = 0.0 for epoch in range(start_epoch, args.epochs): lr_base = lr_scheduler.step(epoch) # set the optimizer with the learning rate # This can be done inside the MyLRScheduler lr_seg = lr_base * args.lr_mult optimizer.param_groups[0]['lr'] = lr_seg # optimizer.param_groups[1]['lr'] = lr_seg # Train model.train() losses = AverageMeter() for i, batch in enumerate(train_loader): inputs = batch[1].to(device=device) # Depth target = batch[0].to(device=device) # RGB outputs = model(inputs) if device == 'cuda': loss = criterion(outputs, target).mean() if isinstance(outputs, (list, tuple)): target_dev = outputs[0].device outputs = gather(outputs, target_device=target_dev) else: loss = criterion(outputs, target) losses.update(loss.item(), inputs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() # if not (i % 10): # print("Step {}, write images".format(i)) # image_grid = torchvision.utils.make_grid(outputs.data.cpu()).numpy() # writer.add_image('Autoencoder/results/train', image_grid, len(train_loader) * epoch + i) writer.add_scalar('Autoencoder/Loss/train', loss.item(), len(train_loader) * epoch + i) print_info_message('Running batch {}/{} of epoch {}'.format( i + 1, len(train_loader), epoch + 1)) train_loss = losses.avg writer.add_scalar('Autoencoder/LR/seg', round(lr_seg, 6), epoch) # Val if epoch % 5 == 0: losses = AverageMeter() with torch.no_grad(): for i, batch in enumerate(val_loader): inputs = batch[2].to(device=device) # Depth target = batch[0].to(device=device) # RGB outputs = model(inputs) if device == 'cuda': loss = criterion(outputs, target) # .mean() if isinstance(outputs, (list, tuple)): target_dev = outputs[0].device outputs = gather(outputs, target_device=target_dev) else: loss = criterion(outputs, target) losses.update(loss.item(), inputs.size(0)) image_grid = torchvision.utils.make_grid( outputs.data.cpu()).numpy() writer.add_image('Autoencoder/results/val', image_grid, epoch) image_grid = torchvision.utils.make_grid( inputs.data.cpu()).numpy() writer.add_image('Autoencoder/inputs/val', image_grid, epoch) image_grid = torchvision.utils.make_grid( target.data.cpu()).numpy() writer.add_image('Autoencoder/target/val', image_grid, epoch) val_loss = losses.avg print_info_message( 'Running epoch {} with learning rates: base_net {:.6f}, segment_net {:.6f}' .format(epoch, lr_base, lr_seg)) # remember best miou and save checkpoint is_best = val_loss < best_loss best_loss = min(val_loss, best_loss) weights_dict = model.module.state_dict( ) if device == 'cuda' else model.state_dict() save_checkpoint( { 'epoch': epoch + 1, 'arch': args.model, 'state_dict': weights_dict, 'best_loss': best_loss, 'optimizer': optimizer.state_dict(), }, is_best, args.savedir, extra_info_ckpt) writer.add_scalar('Autoencoder/Loss/val', val_loss, epoch) writer.close()
def main(args): if args.im_size in [300, 512]: from model.detection.ssd_config import get_config cfg = get_config(args.im_size) else: print_error_message('{} image size not supported'.format(args.im_size)) # ----------------------------------------------------------------------------- # Dataset # ----------------------------------------------------------------------------- train_transform = TrainTransform(cfg.image_size) target_transform = MatchPrior( PriorBox(cfg)(), cfg.center_variance, cfg.size_variance, cfg.iou_threshold) val_transform = ValTransform(cfg.image_size) if args.dataset in ['voc', 'pascal']: from data_loader.detection.voc import VOCDataset, VOC_CLASS_LIST train_dataset_2007 = VOCDataset(root_dir=args.data_path, transform=train_transform, target_transform=target_transform, is_training=True, split="VOC2007") train_dataset_2012 = VOCDataset(root_dir=args.data_path, transform=train_transform, target_transform=target_transform, is_training=True, split="VOC2012") train_dataset = torch.utils.data.ConcatDataset( [train_dataset_2007, train_dataset_2012]) val_dataset = VOCDataset(root_dir=args.data_path, transform=val_transform, target_transform=target_transform, is_training=False, split="VOC2007") num_classes = len(VOC_CLASS_LIST) elif args.dataset == 'coco': from data_loader.detection.coco import COCOObjectDetection, COCO_CLASS_LIST train_dataset = COCOObjectDetection(root_dir=args.data_path, transform=train_transform, target_transform=target_transform, is_training=True) val_dataset = COCOObjectDetection(root_dir=args.data_path, transform=val_transform, target_transform=target_transform, is_training=False) num_classes = len(COCO_CLASS_LIST) else: print_error_message('{} dataset is not supported yet'.format( args.dataset)) exit() cfg.NUM_CLASSES = num_classes # ----------------------------------------------------------------------------- # Dataset loader # ----------------------------------------------------------------------------- print_info_message('Training samples: {}'.format(len(train_dataset))) print_info_message('Validation samples: {}'.format(len(val_dataset))) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # ----------------------------------------------------------------------------- # Model # ----------------------------------------------------------------------------- model = ssd(args, cfg) if args.finetune: if os.path.isfile(args.finetune): print_info_message('Loading weights for finetuning from {}'.format( args.finetune)) weight_dict = torch.load(args.finetune, map_location=torch.device(device='cpu')) model.load_state_dict(weight_dict) print_info_message('Done') else: print_warning_message('No file for finetuning. Please check.') if args.freeze_bn: print_info_message('Freezing batch normalization layers') for m in model.modules(): if isinstance(m, torch.nn.BatchNorm2d): m.eval() m.weight.requires_grad = False m.bias.requires_grad = False # ----------------------------------------------------------------------------- # Optimizer and Criterion # ----------------------------------------------------------------------------- optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.wd) criterion = MultiBoxLoss(neg_pos_ratio=cfg.neg_pos_ratio) # writer for logs writer = SummaryWriter(log_dir=args.save, comment='Training and Validation logs') try: writer.add_graph(model, input_to_model=torch.Tensor(1, 3, cfg.image_size, cfg.image_size)) except: print_log_message( "Not able to generate the graph. Likely because your model is not supported by ONNX" ) #model stats num_params = model_parameters(model) flops = compute_flops(model, input=torch.Tensor(1, 3, cfg.image_size, cfg.image_size)) print_info_message( 'FLOPs for an input of size {}x{}: {:.2f} million'.format( cfg.image_size, cfg.image_size, flops)) print_info_message('Network Parameters: {:.2f} million'.format(num_params)) num_gpus = torch.cuda.device_count() device = 'cuda' if num_gpus >= 1 else 'cpu' min_val_loss = float('inf') start_epoch = 0 # start from epoch 0 or last epoch if args.resume: if os.path.isfile(args.resume): print_info_message("=> loading checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.checkpoint, map_location=torch.device('cpu')) model.load_state_dict(checkpoint['state_dict']) min_val_loss = checkpoint['min_loss'] start_epoch = checkpoint['epoch'] else: print_warning_message("=> no checkpoint found at '{}'".format( args.resume)) if num_gpus >= 1: model = torch.nn.DataParallel(model) model = model.to(device) if torch.backends.cudnn.is_available(): import torch.backends.cudnn as cudnn cudnn.benchmark = True cudnn.deterministic = True # ----------------------------------------------------------------------------- # Scheduler # ----------------------------------------------------------------------------- if args.lr_type == 'poly': from utilities.lr_scheduler import PolyLR lr_scheduler = PolyLR(base_lr=args.lr, max_epochs=args.epochs, power=args.power) elif args.lr_type == 'hybrid': from utilities.lr_scheduler import HybirdLR lr_scheduler = HybirdLR(base_lr=args.lr, max_epochs=args.epochs, clr_max=args.clr_max, cycle_len=args.cycle_len) elif args.lr_type == 'clr': from utilities.lr_scheduler import CyclicLR lr_scheduler = CyclicLR(min_lr=args.lr, cycle_len=args.cycle_len, steps=args.steps, gamma=args.gamma, step=True) elif args.lr_type == 'cosine': from utilities.lr_scheduler import CosineLR lr_scheduler = CosineLR(base_lr=args.lr, max_epochs=args.epochs) else: print_error_message('{} scheduler not yet supported'.format( args.lr_type)) exit() print_info_message(lr_scheduler) # ----------------------------------------------------------------------------- # Training and validation loop # ----------------------------------------------------------------------------- extra_info_ckpt = '{}_{}'.format(args.model, args.s) for epoch in range(start_epoch, args.epochs): curr_lr = lr_scheduler.step(epoch) optimizer.param_groups[0]['lr'] = curr_lr print_info_message('Running epoch {} at LR {}'.format(epoch, curr_lr)) train_loss, train_cl_loss, train_loc_loss = train(train_loader, model, criterion, optimizer, device, epoch=epoch) val_loss, val_cl_loss, val_loc_loss = validate(val_loader, model, criterion, device, epoch=epoch) # Save checkpoint is_best = val_loss < min_val_loss min_val_loss = min(val_loss, min_val_loss) weights_dict = model.module.state_dict( ) if device == 'cuda' else model.state_dict() save_checkpoint( { 'epoch': epoch, 'model': args.model, 'state_dict': weights_dict, 'min_loss': min_val_loss }, is_best, args.save, extra_info_ckpt) writer.add_scalar('Detection/LR/learning_rate', round(curr_lr, 6), epoch) writer.add_scalar('Detection/Loss/train', train_loss, epoch) writer.add_scalar('Detection/Loss/val', val_loss, epoch) writer.add_scalar('Detection/Loss/train_cls', train_cl_loss, epoch) writer.add_scalar('Detection/Loss/val_cls', val_cl_loss, epoch) writer.add_scalar('Detection/Loss/train_loc', train_loc_loss, epoch) writer.add_scalar('Detection/Loss/val_loc', val_loc_loss, epoch) writer.add_scalar('Detection/Complexity/Flops', min_val_loss, math.ceil(flops)) writer.add_scalar('Detection/Complexity/Params', min_val_loss, math.ceil(num_params)) writer.close()
def main(args): crop_size = args.crop_size assert isinstance(crop_size, tuple) print_info_message( 'Running Model at image resolution {}x{} with batch size {}'.format( crop_size[0], crop_size[1], args.batch_size)) if not os.path.isdir(args.savedir): os.makedirs(args.savedir) if args.dataset == 'pascal': from data_loader.segmentation.voc import VOCSegmentation, VOC_CLASS_LIST train_dataset = VOCSegmentation(root=args.data_path, train=True, crop_size=crop_size, scale=args.scale, coco_root_dir=args.coco_path) val_dataset = VOCSegmentation(root=args.data_path, train=False, crop_size=crop_size, scale=args.scale) seg_classes = len(VOC_CLASS_LIST) class_wts = torch.ones(seg_classes) elif args.dataset == 'city': from data_loader.segmentation.cityscapes import CityscapesSegmentation, CITYSCAPE_CLASS_LIST train_dataset = CityscapesSegmentation(root=args.data_path, train=True, size=crop_size, scale=args.scale, coarse=args.coarse) val_dataset = CityscapesSegmentation(root=args.data_path, train=False, size=crop_size, scale=args.scale, coarse=False) seg_classes = len(CITYSCAPE_CLASS_LIST) class_wts = torch.ones(seg_classes) class_wts[0] = 2.8149201869965 class_wts[1] = 6.9850029945374 class_wts[2] = 3.7890393733978 class_wts[3] = 9.9428062438965 class_wts[4] = 9.7702074050903 class_wts[5] = 9.5110931396484 class_wts[6] = 10.311357498169 class_wts[7] = 10.026463508606 class_wts[8] = 4.6323022842407 class_wts[9] = 9.5608062744141 class_wts[10] = 7.8698215484619 class_wts[11] = 9.5168733596802 class_wts[12] = 10.373730659485 class_wts[13] = 6.6616044044495 class_wts[14] = 10.260489463806 class_wts[15] = 10.287888526917 class_wts[16] = 10.289801597595 class_wts[17] = 10.405355453491 class_wts[18] = 10.138095855713 class_wts[19] = 0.0 else: print_error_message('Dataset: {} not yet supported'.format( args.dataset)) exit(-1) print_info_message('Training samples: {}'.format(len(train_dataset))) print_info_message('Validation samples: {}'.format(len(val_dataset))) if args.model == 'espnetv2': from model.segmentation.espnetv2 import espnetv2_seg args.classes = seg_classes model = espnetv2_seg(args) elif args.model == 'dicenet': from model.segmentation.dicenet import dicenet_seg model = dicenet_seg(args, classes=seg_classes) else: print_error_message('Arch: {} not yet supported'.format(args.model)) exit(-1) if args.finetune: if os.path.isfile(args.finetune): print_info_message('Loading weights for finetuning from {}'.format( args.finetune)) weight_dict = torch.load(args.finetune, map_location=torch.device(device='cpu')) model.load_state_dict(weight_dict) print_info_message('Done') else: print_warning_message('No file for finetuning. Please check.') if args.freeze_bn: print_info_message('Freezing batch normalization layers') for m in model.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() m.weight.requires_grad = False m.bias.requires_grad = False num_gpus = torch.cuda.device_count() device = 'cuda' if num_gpus > 0 else 'cpu' train_params = [{ 'params': model.get_basenet_params(), 'lr': args.lr }, { 'params': model.get_segment_params(), 'lr': args.lr * args.lr_mult }] optimizer = optim.SGD(train_params, momentum=args.momentum, weight_decay=args.weight_decay) num_params = model_parameters(model) flops = compute_flops(model, input=torch.Tensor(1, 3, crop_size[0], crop_size[1])) print_info_message( 'FLOPs for an input of size {}x{}: {:.2f} million'.format( crop_size[0], crop_size[1], flops)) print_info_message('Network Parameters: {:.2f} million'.format(num_params)) writer = SummaryWriter(log_dir=args.savedir, comment='Training and Validation logs') try: writer.add_graph(model, input_to_model=torch.Tensor(1, 3, crop_size[0], crop_size[1])) except: print_log_message( "Not able to generate the graph. Likely because your model is not supported by ONNX" ) start_epoch = 0 best_miou = 0.0 if args.resume: if os.path.isfile(args.resume): print_info_message("=> loading checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume, map_location=torch.device('cpu')) start_epoch = checkpoint['epoch'] best_miou = checkpoint['best_miou'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print_info_message("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print_warning_message("=> no checkpoint found at '{}'".format( args.resume)) #criterion = nn.CrossEntropyLoss(weight=class_wts, reduction='none', ignore_index=args.ignore_idx) criterion = SegmentationLoss(n_classes=seg_classes, loss_type=args.loss_type, device=device, ignore_idx=args.ignore_idx, class_wts=class_wts.to(device)) if num_gpus >= 1: if num_gpus == 1: # for a single GPU, we do not need DataParallel wrapper for Criteria. # So, falling back to its internal wrapper from torch.nn.parallel import DataParallel model = DataParallel(model) model = model.cuda() criterion = criterion.cuda() else: from utilities.parallel_wrapper import DataParallelModel, DataParallelCriteria model = DataParallelModel(model) model = model.cuda() criterion = DataParallelCriteria(criterion) criterion = criterion.cuda() if torch.backends.cudnn.is_available(): import torch.backends.cudnn as cudnn cudnn.benchmark = True cudnn.deterministic = True train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) if args.scheduler == 'fixed': step_size = args.step_size step_sizes = [ step_size * i for i in range(1, int(math.ceil(args.epochs / step_size))) ] from utilities.lr_scheduler import FixedMultiStepLR lr_scheduler = FixedMultiStepLR(base_lr=args.lr, steps=step_sizes, gamma=args.lr_decay) elif args.scheduler == 'clr': step_size = args.step_size step_sizes = [ step_size * i for i in range(1, int(math.ceil(args.epochs / step_size))) ] from utilities.lr_scheduler import CyclicLR lr_scheduler = CyclicLR(min_lr=args.lr, cycle_len=5, steps=step_sizes, gamma=args.lr_decay) elif args.scheduler == 'poly': from utilities.lr_scheduler import PolyLR lr_scheduler = PolyLR(base_lr=args.lr, max_epochs=args.epochs, power=args.power) elif args.scheduler == 'hybrid': from utilities.lr_scheduler import HybirdLR lr_scheduler = HybirdLR(base_lr=args.lr, max_epochs=args.epochs, clr_max=args.clr_max, cycle_len=args.cycle_len) elif args.scheduler == 'linear': from utilities.lr_scheduler import LinearLR lr_scheduler = LinearLR(base_lr=args.lr, max_epochs=args.epochs) else: print_error_message('{} scheduler Not supported'.format( args.scheduler)) exit() print_info_message(lr_scheduler) with open(args.savedir + os.sep + 'arguments.json', 'w') as outfile: import json arg_dict = vars(args) arg_dict['model_params'] = '{} '.format(num_params) arg_dict['flops'] = '{} '.format(flops) json.dump(arg_dict, outfile) extra_info_ckpt = '{}_{}_{}'.format(args.model, args.s, crop_size[0]) for epoch in range(start_epoch, args.epochs): lr_base = lr_scheduler.step(epoch) # set the optimizer with the learning rate # This can be done inside the MyLRScheduler lr_seg = lr_base * args.lr_mult optimizer.param_groups[0]['lr'] = lr_base optimizer.param_groups[1]['lr'] = lr_seg print_info_message( 'Running epoch {} with learning rates: base_net {:.6f}, segment_net {:.6f}' .format(epoch, lr_base, lr_seg)) miou_train, train_loss = train(model, train_loader, optimizer, criterion, seg_classes, epoch, device=device) miou_val, val_loss = val(model, val_loader, criterion, seg_classes, device=device) # remember best miou and save checkpoint is_best = miou_val > best_miou best_miou = max(miou_val, best_miou) weights_dict = model.module.state_dict( ) if device == 'cuda' else model.state_dict() save_checkpoint( { 'epoch': epoch + 1, 'arch': args.model, 'state_dict': weights_dict, 'best_miou': best_miou, 'optimizer': optimizer.state_dict(), }, is_best, args.savedir, extra_info_ckpt) writer.add_scalar('Segmentation/LR/base', round(lr_base, 6), epoch) writer.add_scalar('Segmentation/LR/seg', round(lr_seg, 6), epoch) writer.add_scalar('Segmentation/Loss/train', train_loss, epoch) writer.add_scalar('Segmentation/Loss/val', val_loss, epoch) writer.add_scalar('Segmentation/mIOU/train', miou_train, epoch) writer.add_scalar('Segmentation/mIOU/val', miou_val, epoch) writer.add_scalar('Segmentation/Complexity/Flops', best_miou, math.ceil(flops)) writer.add_scalar('Segmentation/Complexity/Params', best_miou, math.ceil(num_params)) writer.close()
def main(args): crop_size = args.crop_size assert isinstance(crop_size, tuple) print_info_message( 'Running Model at image resolution {}x{} with batch size {}'.format( crop_size[0], crop_size[1], args.batch_size)) if not os.path.isdir(args.savedir): os.makedirs(args.savedir) num_gpus = torch.cuda.device_count() device = 'cuda' if num_gpus > 0 else 'cpu' if args.dataset == 'greenhouse': print(args.use_depth) from data_loader.segmentation.greenhouse import GreenhouseRGBDSegCls, GREENHOUSE_CLASS_LIST train_dataset = GreenhouseRGBDSegCls( root=args.data_path, list_name='train_greenhouse_mult.txt', train=True, size=crop_size, scale=args.scale, use_depth=args.use_depth) val_dataset = GreenhouseRGBDSegCls(root=args.data_path, list_name='val_greenhouse_mult.txt', train=False, size=crop_size, scale=args.scale, use_depth=args.use_depth) class_weights = np.load('class_weights.npy')[:4] print(class_weights) class_wts = torch.from_numpy(class_weights).float().to(device) seg_classes = len(GREENHOUSE_CLASS_LIST) color_encoding = OrderedDict([('end_of_plant', (0, 255, 0)), ('other_part_of_plant', (0, 255, 255)), ('artificial_objects', (255, 0, 0)), ('ground', (255, 255, 0)), ('background', (0, 0, 0))]) else: print_error_message('Dataset: {} not yet supported'.format( args.dataset)) exit(-1) print_info_message('Training samples: {}'.format(len(train_dataset))) print_info_message('Validation samples: {}'.format(len(val_dataset))) if args.model == 'espdnet': from model.segmentation.espdnet_mult import espdnet_mult args.classes = seg_classes args.cls_classes = 5 model = espdnet_mult(args) else: print_error_message('Arch: {} not yet supported'.format(args.model)) exit(-1) if args.finetune: if os.path.isfile(args.finetune): print_info_message('Loading weights for finetuning from {}'.format( args.finetune)) weight_dict = torch.load(args.finetune, map_location=torch.device(device='cpu')) model.load_state_dict(weight_dict) print_info_message('Done') else: print_warning_message('No file for finetuning. Please check.') if args.freeze_bn: print_info_message('Freezing batch normalization layers') for m in model.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() m.weight.requires_grad = False m.bias.requires_grad = False if args.use_depth: train_params = [{ 'params': model.get_basenet_params(), 'lr': args.lr }, { 'params': model.get_segment_params(), 'lr': args.lr * args.lr_mult }, { 'params': model.get_depth_encoder_params(), 'lr': args.lr }] else: train_params = [{ 'params': model.get_basenet_params(), 'lr': args.lr }, { 'params': model.get_segment_params(), 'lr': args.lr * args.lr_mult }] optimizer = optim.SGD(train_params, lr=args.lr * args.lr_mult, momentum=args.momentum, weight_decay=args.weight_decay) num_params = model_parameters(model) flops = compute_flops(model, input=torch.Tensor(1, 3, crop_size[0], crop_size[1])) print_info_message( 'FLOPs for an input of size {}x{}: {:.2f} million'.format( crop_size[0], crop_size[1], flops)) print_info_message('Network Parameters: {:.2f} million'.format(num_params)) writer = SummaryWriter(log_dir=args.savedir, comment='Training and Validation logs') try: writer.add_graph(model, input_to_model=torch.Tensor(1, 3, crop_size[0], crop_size[1])) except: print_log_message( "Not able to generate the graph. Likely because your model is not supported by ONNX" ) start_epoch = 0 best_miou = 0.0 if args.resume: if os.path.isfile(args.resume): print_info_message("=> loading checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume, map_location=torch.device('cpu')) start_epoch = checkpoint['epoch'] best_miou = checkpoint['best_miou'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print_info_message("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print_warning_message("=> no checkpoint found at '{}'".format( args.resume)) print('device : ' + device) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) cls_class_weight = calc_cls_class_weight(train_loader, 5) print(cls_class_weight) #criterion = nn.CrossEntropyLoss(weight=class_wts, reduction='none', ignore_index=args.ignore_idx) criterion_seg = SegmentationLoss(n_classes=seg_classes, loss_type=args.loss_type, device=device, ignore_idx=args.ignore_idx, class_wts=class_wts.to(device)) criterion_cls = nn.CrossEntropyLoss( weight=torch.from_numpy(cls_class_weight).float().to(device)) if num_gpus >= 1: if num_gpus == 1: # for a single GPU, we do not need DataParallel wrapper for Criteria. # So, falling back to its internal wrapper from torch.nn.parallel import DataParallel model = DataParallel(model) model = model.cuda() criterion_seg = criterion_seg.cuda() else: from utilities.parallel_wrapper import DataParallelModel, DataParallelCriteria model = DataParallelModel(model) model = model.cuda() criterion_seg = DataParallelCriteria(criterion_seg) criterion_seg = criterion_seg.cuda() criterion_cls = criterion_cls.cuda() if torch.backends.cudnn.is_available(): import torch.backends.cudnn as cudnn cudnn.benchmark = True cudnn.deterministic = True if args.scheduler == 'fixed': step_size = args.step_size step_sizes = [ step_size * i for i in range(1, int(math.ceil(args.epochs / step_size))) ] from utilities.lr_scheduler import FixedMultiStepLR lr_scheduler = FixedMultiStepLR(base_lr=args.lr, steps=step_sizes, gamma=args.lr_decay) elif args.scheduler == 'clr': step_size = args.step_size step_sizes = [ step_size * i for i in range(1, int(math.ceil(args.epochs / step_size))) ] from utilities.lr_scheduler import CyclicLR lr_scheduler = CyclicLR(min_lr=args.lr, cycle_len=5, steps=step_sizes, gamma=args.lr_decay) elif args.scheduler == 'poly': from utilities.lr_scheduler import PolyLR lr_scheduler = PolyLR(base_lr=args.lr, max_epochs=args.epochs, power=args.power) elif args.scheduler == 'hybrid': from utilities.lr_scheduler import HybirdLR lr_scheduler = HybirdLR(base_lr=args.lr, max_epochs=args.epochs, clr_max=args.clr_max, cycle_len=args.cycle_len) elif args.scheduler == 'linear': from utilities.lr_scheduler import LinearLR lr_scheduler = LinearLR(base_lr=args.lr, max_epochs=args.epochs) else: print_error_message('{} scheduler Not supported'.format( args.scheduler)) exit() print_info_message(lr_scheduler) with open(args.savedir + os.sep + 'arguments.json', 'w') as outfile: import json arg_dict = vars(args) arg_dict['model_params'] = '{} '.format(num_params) arg_dict['flops'] = '{} '.format(flops) json.dump(arg_dict, outfile) extra_info_ckpt = '{}_{}_{}'.format(args.model, args.s, crop_size[0]) for epoch in range(start_epoch, args.epochs): lr_base = lr_scheduler.step(epoch) # set the optimizer with the learning rate # This can be done inside the MyLRScheduler lr_seg = lr_base * args.lr_mult optimizer.param_groups[0]['lr'] = lr_base optimizer.param_groups[1]['lr'] = lr_seg if args.use_depth: optimizer.param_groups[2]['lr'] = lr_base print_info_message( 'Running epoch {} with learning rates: base_net {:.6f}, segment_net {:.6f}' .format(epoch, lr_base, lr_seg)) miou_train, train_loss, train_seg_loss, train_cls_loss = train( model, train_loader, optimizer, criterion_seg, seg_classes, epoch, criterion_cls, device=device, use_depth=args.use_depth) miou_val, val_loss, val_seg_loss, val_cls_loss = val( model, val_loader, criterion_seg, criterion_cls, seg_classes, device=device, use_depth=args.use_depth) batch = iter(val_loader).next() if args.use_depth: in_training_visualization_2(model, images=batch[0].to(device=device), depths=batch[2].to(device=device), labels=batch[1].to(device=device), class_encoding=color_encoding, writer=writer, epoch=epoch, data='Segmentation', device=device) else: in_training_visualization_2(model, images=batch[0].to(device=device), labels=batch[1].to(device=device), class_encoding=color_encoding, writer=writer, epoch=epoch, data='Segmentation', device=device) # image_grid = torchvision.utils.make_grid(outputs.data.cpu()).numpy() # writer.add_image('Segmentation/results/val', image_grid, epoch) # remember best miou and save checkpoint is_best = miou_val > best_miou best_miou = max(miou_val, best_miou) weights_dict = model.module.state_dict( ) if device == 'cuda' else model.state_dict() save_checkpoint( { 'epoch': epoch + 1, 'arch': args.model, 'state_dict': weights_dict, 'best_miou': best_miou, 'optimizer': optimizer.state_dict(), }, is_best, args.savedir, extra_info_ckpt) writer.add_scalar('Segmentation/LR/base', round(lr_base, 6), epoch) writer.add_scalar('Segmentation/LR/seg', round(lr_seg, 6), epoch) writer.add_scalar('Segmentation/Loss/train', train_loss, epoch) writer.add_scalar('Segmentation/SegLoss/train', train_seg_loss, epoch) writer.add_scalar('Segmentation/ClsLoss/train', train_cls_loss, epoch) writer.add_scalar('Segmentation/Loss/val', val_loss, epoch) writer.add_scalar('Segmentation/SegLoss/val', val_seg_loss, epoch) writer.add_scalar('Segmentation/ClsLoss/val', val_cls_loss, epoch) writer.add_scalar('Segmentation/mIOU/train', miou_train, epoch) writer.add_scalar('Segmentation/mIOU/val', miou_val, epoch) writer.add_scalar('Segmentation/Complexity/Flops', best_miou, math.ceil(flops)) writer.add_scalar('Segmentation/Complexity/Params', best_miou, math.ceil(num_params)) writer.close()
def main(args): # ----------------------------------------------------------------------------- # Create model # ----------------------------------------------------------------------------- if args.model == 'dicenet': from model.classification import dicenet as net model = net.CNNModel(args) elif args.model == 'espnetv2': from model.classification import espnetv2 as net model = net.EESPNet(args) elif args.model == 'shufflenetv2': from model.classification import shufflenetv2 as net model = net.CNNModel(args) else: print_error_message('Model {} not yet implemented'.format(args.model)) exit() if args.finetune: # laod the weights for finetuning if os.path.isfile(args.weights_ft): pretrained_dict = torch.load(args.weights_ft, map_location=torch.device('cpu')) print_info_message('Loading pretrained basenet model weights') model_dict = model.state_dict() overlap_dict = { k: v for k, v in model_dict.items() if k in pretrained_dict } total_size_overlap = 0 for k, v in enumerate(overlap_dict): total_size_overlap += torch.numel(overlap_dict[v]) total_size_pretrain = 0 for k, v in enumerate(pretrained_dict): total_size_pretrain += torch.numel(pretrained_dict[v]) if len(overlap_dict) == 0: print_error_message( 'No overlaping weights between model file and pretrained weight file. Please check' ) print_info_message('Overlap ratio of weights: {:.2f} %'.format( (total_size_overlap * 100.0) / total_size_pretrain)) model_dict.update(overlap_dict) model.load_state_dict(model_dict, strict=False) print_info_message('Pretrained basenet model loaded!!') else: print_error_message('Unable to find the weights: {}'.format( args.weights_ft)) # ----------------------------------------------------------------------------- # Writer for logging # ----------------------------------------------------------------------------- if not os.path.isdir(args.savedir): os.makedirs(args.savedir) writer = SummaryWriter(log_dir=args.savedir, comment='Training and Validation logs') try: writer.add_graph(model, input_to_model=torch.randn(1, 70, args.inpSize, args.inpSize)) except: print_log_message( "Not able to generate the graph. Likely because your model is not supported by ONNX" ) # network properties num_params = model_parameters(model) flops = compute_flops(model) print_info_message('FLOPs: {:.2f} million'.format(flops)) print_info_message('Network Parameters: {:.2f} million'.format(num_params)) # ----------------------------------------------------------------------------- # Optimizer # ----------------------------------------------------------------------------- optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint best_acc = 0.0 num_gpus = torch.cuda.device_count() device = 'cuda' if num_gpus >= 1 else 'cpu' if args.resume: if os.path.isfile(args.resume): print_info_message("=> loading checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict'], map_location=torch.device(device)) optimizer.load_state_dict(checkpoint['optimizer']) print_info_message("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print_warning_message("=> no checkpoint found at '{}'".format( args.resume)) # ----------------------------------------------------------------------------- # Loss Fn # ----------------------------------------------------------------------------- if args.dataset == 'imagenet': criterion = nn.CrossEntropyLoss() acc_metric = 'Top-1' elif args.dataset == 'coco': criterion = nn.BCEWithLogitsLoss() acc_metric = 'F1' elif args.dataset == 'Heart': criterion = nn.L1Loss() acc_metric = 'Test' else: print_error_message('{} dataset not yet supported'.format( args.dataset)) if num_gpus >= 1: model = torch.nn.DataParallel(model) model = model.cuda() criterion = criterion.cuda() if torch.backends.cudnn.is_available(): import torch.backends.cudnn as cudnn cudnn.benchmark = True cudnn.deterministic = True # ----------------------------------------------------------------------------- # Data Loaders # ----------------------------------------------------------------------------- # Data loading code if args.dataset == 'imagenet': train_loader, val_loader = img_loader.data_loaders(args) # import the loaders too from utilities.train_eval_classification import train, validate elif args.dataset == 'coco': from data_loader.classification.coco import COCOClassification train_dataset = COCOClassification(root=args.data, split='train', year='2017', inp_size=args.inpSize, scale=args.scale, is_training=True) val_dataset = COCOClassification(root=args.data, split='val', year='2017', inp_size=args.inpSize, is_training=False) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) # import the loaders too from utilities.train_eval_classification import train_multi as train from utilities.train_eval_classification import validate_multi as validate elif args.dataset == 'Heart': from utilities.train_eval_classification import train, validate def load_npy(npy_path): try: data = np.load(npy_path).item() except: data = np.load(npy_path) return data def loadData(data_path): npy_data = load_npy(data_path) signals = npy_data['signals'] gts = npy_data['gts'] return signals, gts ht_img_width, ht_img_height = args.inpSize, args.inpSize ht_batch_size = args.batch_size signal_length = args.channels signals_train, gts_train = loadData( '../DiCENeT/CardioNet/data_train/fps7_sample10_2D_train.npy') signals_val, gts_val = loadData( '../DiCENeT/CardioNet/data_train/fps7_sample10_2D_val.npy') from data_loader.classification.heart import HeartDataGenerator heart_train_data = HeartDataGenerator(signals_train, gts_train, ht_batch_size) # heart_train_data.squeeze heart_val_data = HeartDataGenerator(signals_val, gts_val, ht_batch_size) # heart_val_data.squeeze train_loader = torch.utils.data.DataLoader(heart_train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) val_loader = torch.utils.data.DataLoader(heart_val_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) else: print_error_message('{} dataset not yet supported'.format( args.dataset)) # ----------------------------------------------------------------------------- # LR schedulers # ----------------------------------------------------------------------------- if args.scheduler == 'fixed': step_sizes = args.steps from utilities.lr_scheduler import FixedMultiStepLR lr_scheduler = FixedMultiStepLR(base_lr=args.lr, steps=step_sizes, gamma=args.lr_decay) elif args.scheduler == 'clr': from utilities.lr_scheduler import CyclicLR step_sizes = args.steps lr_scheduler = CyclicLR(min_lr=args.lr, cycle_len=5, steps=step_sizes, gamma=args.lr_decay) elif args.scheduler == 'poly': from utilities.lr_scheduler import PolyLR lr_scheduler = PolyLR(base_lr=args.lr, max_epochs=args.epochs) elif args.scheduler == 'linear': from utilities.lr_scheduler import LinearLR lr_scheduler = LinearLR(base_lr=args.lr, max_epochs=args.epochs) elif args.scheduler == 'hybrid': from utilities.lr_scheduler import HybirdLR lr_scheduler = HybirdLR(base_lr=args.lr, max_epochs=args.epochs, clr_max=args.clr_max) else: print_error_message('Scheduler ({}) not yet implemented'.format( args.scheduler)) exit() print_info_message(lr_scheduler) # set up the epoch variable in case resuming training if args.start_epoch != 0: for epoch in range(args.start_epoch): lr_scheduler.step(epoch) with open(args.savedir + os.sep + 'arguments.json', 'w') as outfile: import json arg_dict = vars(args) arg_dict['model_params'] = '{} '.format(num_params) arg_dict['flops'] = '{} '.format(flops) json.dump(arg_dict, outfile) # ----------------------------------------------------------------------------- # Training and Val Loop # ----------------------------------------------------------------------------- extra_info_ckpt = args.model + '_' + str(args.s) for epoch in range(args.start_epoch, args.epochs): lr_log = lr_scheduler.step(epoch) # set the optimizer with the learning rate # This can be done inside the MyLRScheduler for param_group in optimizer.param_groups: param_group['lr'] = lr_log print_info_message("LR for epoch {} = {:.5f}".format(epoch, lr_log)) train_acc, train_loss = train(data_loader=train_loader, model=model, criteria=criterion, optimizer=optimizer, epoch=epoch, device=device) # evaluate on validation set val_acc, val_loss = validate(data_loader=val_loader, model=model, criteria=criterion, device=device) # remember best prec@1 and save checkpoint is_best = val_acc > best_acc best_acc = max(val_acc, best_acc) weights_dict = model.module.state_dict( ) if device == 'cuda' else model.state_dict() save_checkpoint( { 'epoch': epoch + 1, 'state_dict': weights_dict, 'best_prec1': best_acc, 'optimizer': optimizer.state_dict(), }, is_best, args.savedir, extra_info_ckpt) writer.add_scalar('Classification/LR/learning_rate', lr_log, epoch) writer.add_scalar('Classification/Loss/Train', train_loss, epoch) writer.add_scalar('Classification/Loss/Val', val_loss, epoch) writer.add_scalar('Classification/{}/Train'.format(acc_metric), train_acc, epoch) writer.add_scalar('Classification/{}/Val'.format(acc_metric), val_acc, epoch) writer.add_scalar('Classification/Complexity/Top1_vs_flops', best_acc, round(flops, 2)) writer.add_scalar('Classification/Complexity/Top1_vs_params', best_acc, round(num_params, 2)) writer.close()
def main(args): crop_size = args.crop_size assert isinstance(crop_size, tuple) print_info_message( 'Running Model at image resolution {}x{} with batch size {}'.format( crop_size[0], crop_size[1], args.batch_size)) if not os.path.isdir(args.savedir): os.makedirs(args.savedir) num_gpus = torch.cuda.device_count() device = 'cuda' if num_gpus > 0 else 'cpu' if args.dataset == 'pascal': from data_loader.segmentation.voc import VOCSegmentation, VOC_CLASS_LIST train_dataset = VOCSegmentation(root=args.data_path, train=True, crop_size=crop_size, scale=args.scale, coco_root_dir=args.coco_path) val_dataset = VOCSegmentation(root=args.data_path, train=False, crop_size=crop_size, scale=args.scale) seg_classes = len(VOC_CLASS_LIST) class_wts = torch.ones(seg_classes) elif args.dataset == 'city': from data_loader.segmentation.cityscapes import CityscapesSegmentation, CITYSCAPE_CLASS_LIST, color_encoding train_dataset = CityscapesSegmentation(root=args.data_path, train=True, coarse=False) val_dataset = CityscapesSegmentation(root=args.data_path, train=False, coarse=False) seg_classes = len(CITYSCAPE_CLASS_LIST) class_wts = torch.ones(seg_classes) class_wts[0] = 10 / 2.8149201869965 class_wts[1] = 10 / 6.9850029945374 class_wts[2] = 10 / 3.7890393733978 class_wts[3] = 10 / 9.9428062438965 class_wts[4] = 10 / 9.7702074050903 class_wts[5] = 10 / 9.5110931396484 class_wts[6] = 10 / 10.311357498169 class_wts[7] = 10 / 10.026463508606 class_wts[8] = 10 / 4.6323022842407 class_wts[9] = 10 / 9.5608062744141 class_wts[10] = 10 / 7.8698215484619 class_wts[11] = 10 / 9.5168733596802 class_wts[12] = 10 / 10.373730659485 class_wts[13] = 10 / 6.6616044044495 class_wts[14] = 10 / 10.260489463806 class_wts[15] = 10 / 10.287888526917 class_wts[16] = 10 / 10.289801597595 class_wts[17] = 10 / 10.405355453491 class_wts[18] = 10 / 10.138095855713 class_wts[19] = 0.0 elif args.dataset == 'greenhouse': print(args.use_depth) from data_loader.segmentation.greenhouse import GreenhouseRGBDSegmentation, GREENHOUSE_CLASS_LIST, color_encoding train_dataset = GreenhouseRGBDSegmentation( root=args.data_path, list_name=args.train_list, train=True, size=crop_size, scale=args.scale, use_depth=args.use_depth, use_traversable=args.greenhouse_use_trav) val_dataset = GreenhouseRGBDSegmentation( root=args.data_path, list_name=args.val_list, train=False, size=crop_size, scale=args.scale, use_depth=args.use_depth, use_traversable=args.greenhouse_use_trav) class_weights = np.load('class_weights.npy') # [:4] print(class_weights) class_wts = torch.from_numpy(class_weights).float().to(device) print(GREENHOUSE_CLASS_LIST) seg_classes = len(GREENHOUSE_CLASS_LIST) # color_encoding = OrderedDict([ # ('end_of_plant', (0, 255, 0)), # ('other_part_of_plant', (0, 255, 255)), # ('artificial_objects', (255, 0, 0)), # ('ground', (255, 255, 0)), # ('background', (0, 0, 0)) # ]) elif args.dataset == 'ishihara': print(args.use_depth) from data_loader.segmentation.ishihara_rgbd import IshiharaRGBDSegmentation, ISHIHARA_RGBD_CLASS_LIST train_dataset = IshiharaRGBDSegmentation( root=args.data_path, list_name='ishihara_rgbd_train.txt', train=True, size=crop_size, scale=args.scale, use_depth=args.use_depth) val_dataset = IshiharaRGBDSegmentation( root=args.data_path, list_name='ishihara_rgbd_val.txt', train=False, size=crop_size, scale=args.scale, use_depth=args.use_depth) seg_classes = len(ISHIHARA_RGBD_CLASS_LIST) class_wts = torch.ones(seg_classes) color_encoding = OrderedDict([('Unlabeled', (0, 0, 0)), ('Building', (70, 70, 70)), ('Fence', (190, 153, 153)), ('Others', (72, 0, 90)), ('Pedestrian', (220, 20, 60)), ('Pole', (153, 153, 153)), ('Road ', (157, 234, 50)), ('Road', (128, 64, 128)), ('Sidewalk', (244, 35, 232)), ('Vegetation', (107, 142, 35)), ('Car', (0, 0, 255)), ('Wall', (102, 102, 156)), ('Traffic ', (220, 220, 0))]) elif args.dataset == 'sun': print(args.use_depth) from data_loader.segmentation.sun_rgbd import SUNRGBDSegmentation, SUN_RGBD_CLASS_LIST train_dataset = SUNRGBDSegmentation(root=args.data_path, list_name='sun_rgbd_train.txt', train=True, size=crop_size, ignore_idx=args.ignore_idx, scale=args.scale, use_depth=args.use_depth) val_dataset = SUNRGBDSegmentation(root=args.data_path, list_name='sun_rgbd_val.txt', train=False, size=crop_size, ignore_idx=args.ignore_idx, scale=args.scale, use_depth=args.use_depth) seg_classes = len(SUN_RGBD_CLASS_LIST) class_wts = torch.ones(seg_classes) color_encoding = OrderedDict([('Background', (0, 0, 0)), ('Bed', (0, 255, 0)), ('Books', (70, 70, 70)), ('Ceiling', (190, 153, 153)), ('Chair', (72, 0, 90)), ('Floor', (220, 20, 60)), ('Furniture', (153, 153, 153)), ('Objects', (157, 234, 50)), ('Picture', (128, 64, 128)), ('Sofa', (244, 35, 232)), ('Table', (107, 142, 35)), ('TV', (0, 0, 255)), ('Wall', (102, 102, 156)), ('Window', (220, 220, 0))]) elif args.dataset == 'camvid': print(args.use_depth) from data_loader.segmentation.camvid import CamVidSegmentation, CAMVID_CLASS_LIST, color_encoding train_dataset = CamVidSegmentation( root=args.data_path, list_name='train_camvid.txt', train=True, size=crop_size, scale=args.scale, label_conversion=args.label_conversion, normalize=args.normalize) val_dataset = CamVidSegmentation( root=args.data_path, list_name='val_camvid.txt', train=False, size=crop_size, scale=args.scale, label_conversion=args.label_conversion, normalize=args.normalize) if args.label_conversion: from data_loader.segmentation.greenhouse import GREENHOUSE_CLASS_LIST, color_encoding seg_classes = len(GREENHOUSE_CLASS_LIST) class_wts = torch.ones(seg_classes) else: seg_classes = len(CAMVID_CLASS_LIST) tmp_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=False) class_wts = calc_cls_class_weight(tmp_loader, seg_classes, inverted=True) class_wts = torch.from_numpy(class_wts).float().to(device) # class_wts = torch.ones(seg_classes) print("class weights : {}".format(class_wts)) args.use_depth = False elif args.dataset == 'forest': from data_loader.segmentation.freiburg_forest import FreiburgForestDataset, FOREST_CLASS_LIST, color_encoding train_dataset = FreiburgForestDataset(train=True, size=crop_size, scale=args.scale, normalize=args.normalize) val_dataset = FreiburgForestDataset(train=False, size=crop_size, scale=args.scale, normalize=args.normalize) seg_classes = len(FOREST_CLASS_LIST) tmp_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=False) class_wts = calc_cls_class_weight(tmp_loader, seg_classes, inverted=True) class_wts = torch.from_numpy(class_wts).float().to(device) # class_wts = torch.ones(seg_classes) print("class weights : {}".format(class_wts)) args.use_depth = False else: print_error_message('Dataset: {} not yet supported'.format( args.dataset)) exit(-1) print_info_message('Training samples: {}'.format(len(train_dataset))) print_info_message('Validation samples: {}'.format(len(val_dataset))) if args.model == 'espnetv2': from model.segmentation.espnetv2 import espnetv2_seg args.classes = seg_classes model = espnetv2_seg(args) elif args.model == 'espdnet': from model.segmentation.espdnet import espdnet_seg args.classes = seg_classes print("Trainable fusion : {}".format(args.trainable_fusion)) print("Segmentation classes : {}".format(seg_classes)) model = espdnet_seg(args) elif args.model == 'espdnetue': from model.segmentation.espdnet_ue import espdnetue_seg2 args.classes = seg_classes print("Trainable fusion : {}".format(args.trainable_fusion)) print("Segmentation classes : {}".format(seg_classes)) model = espdnetue_seg2(args, fix_pyr_plane_proj=True) elif args.model == 'deeplabv3': # from model.segmentation.deeplabv3 import DeepLabV3 from torchvision.models.segmentation.segmentation import deeplabv3_resnet101 args.classes = seg_classes # model = DeepLabV3(seg_classes) model = deeplabv3_resnet101(num_classes=seg_classes, aux_loss=True) torch.backends.cudnn.enabled = False elif args.model == 'unet': from model.segmentation.unet import UNet model = UNet(in_channels=3, out_channels=seg_classes) # model = torch.hub.load('mateuszbuda/brain-segmentation-pytorch', 'unet', # in_channels=3, out_channels=seg_classes, init_features=32, pretrained=False) elif args.model == 'dicenet': from model.segmentation.dicenet import dicenet_seg model = dicenet_seg(args, classes=seg_classes) else: print_error_message('Arch: {} not yet supported'.format(args.model)) exit(-1) if args.finetune: if os.path.isfile(args.finetune): print_info_message('Loading weights for finetuning from {}'.format( args.finetune)) weight_dict = torch.load(args.finetune, map_location=torch.device(device='cpu')) model.load_state_dict(weight_dict) print_info_message('Done') else: print_warning_message('No file for finetuning. Please check.') if args.freeze_bn: print_info_message('Freezing batch normalization layers') for m in model.modules(): if isinstance(m, nn.BatchNorm2d): m.eval() m.weight.requires_grad = False m.bias.requires_grad = False if args.model == 'deeplabv3' or args.model == 'unet': train_params = [{'params': model.parameters(), 'lr': args.lr}] elif args.use_depth: train_params = [{ 'params': model.get_basenet_params(), 'lr': args.lr }, { 'params': model.get_segment_params(), 'lr': args.lr * args.lr_mult }, { 'params': model.get_depth_encoder_params(), 'lr': args.lr * args.lr_mult }] else: train_params = [{ 'params': model.get_basenet_params(), 'lr': args.lr }, { 'params': model.get_segment_params(), 'lr': args.lr * args.lr_mult }] optimizer = optim.SGD(train_params, lr=args.lr * args.lr_mult, momentum=args.momentum, weight_decay=args.weight_decay) num_params = model_parameters(model) flops = compute_flops(model, input=torch.Tensor(1, 3, crop_size[0], crop_size[1])) print_info_message( 'FLOPs for an input of size {}x{}: {:.2f} million'.format( crop_size[0], crop_size[1], flops)) print_info_message('Network Parameters: {:.2f} million'.format(num_params)) writer = SummaryWriter(log_dir=args.savedir, comment='Training and Validation logs') try: writer.add_graph(model, input_to_model=torch.Tensor(1, 3, 288, 480)) except: print_log_message( "Not able to generate the graph. Likely because your model is not supported by ONNX" ) start_epoch = 0 best_miou = 0.0 if args.resume: if os.path.isfile(args.resume): print_info_message("=> loading checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume, map_location=torch.device('cpu')) start_epoch = checkpoint['epoch'] best_miou = checkpoint['best_miou'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print_info_message("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print_warning_message("=> no checkpoint found at '{}'".format( args.resume)) print('device : ' + device) #criterion = nn.CrossEntropyLoss(weight=class_wts, reduction='none', ignore_index=args.ignore_idx) criterion = SegmentationLoss(n_classes=seg_classes, loss_type=args.loss_type, device=device, ignore_idx=args.ignore_idx, class_wts=class_wts.to(device)) nid_loss = NIDLoss(image_bin=32, label_bin=seg_classes) if args.use_nid else None if num_gpus >= 1: if num_gpus == 1: # for a single GPU, we do not need DataParallel wrapper for Criteria. # So, falling back to its internal wrapper from torch.nn.parallel import DataParallel model = DataParallel(model) model = model.cuda() criterion = criterion.cuda() if args.use_nid: nid_loss.cuda() else: from utilities.parallel_wrapper import DataParallelModel, DataParallelCriteria model = DataParallelModel(model) model = model.cuda() criterion = DataParallelCriteria(criterion) criterion = criterion.cuda() if args.use_nid: nid_loss = DataParallelCriteria(nid_loss) nid_loss = nid_loss.cuda() if torch.backends.cudnn.is_available(): import torch.backends.cudnn as cudnn cudnn.benchmark = True cudnn.deterministic = True train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=20, shuffle=False, pin_memory=True, num_workers=args.workers) if args.scheduler == 'fixed': step_size = args.step_size step_sizes = [ step_size * i for i in range(1, int(math.ceil(args.epochs / step_size))) ] from utilities.lr_scheduler import FixedMultiStepLR lr_scheduler = FixedMultiStepLR(base_lr=args.lr, steps=step_sizes, gamma=args.lr_decay) elif args.scheduler == 'clr': step_size = args.step_size step_sizes = [ step_size * i for i in range(1, int(math.ceil(args.epochs / step_size))) ] from utilities.lr_scheduler import CyclicLR lr_scheduler = CyclicLR(min_lr=args.lr, cycle_len=5, steps=step_sizes, gamma=args.lr_decay) elif args.scheduler == 'poly': from utilities.lr_scheduler import PolyLR lr_scheduler = PolyLR(base_lr=args.lr, max_epochs=args.epochs, power=args.power) elif args.scheduler == 'hybrid': from utilities.lr_scheduler import HybirdLR lr_scheduler = HybirdLR(base_lr=args.lr, max_epochs=args.epochs, clr_max=args.clr_max, cycle_len=args.cycle_len) elif args.scheduler == 'linear': from utilities.lr_scheduler import LinearLR lr_scheduler = LinearLR(base_lr=args.lr, max_epochs=args.epochs) else: print_error_message('{} scheduler Not supported'.format( args.scheduler)) exit() print_info_message(lr_scheduler) with open(args.savedir + os.sep + 'arguments.json', 'w') as outfile: import json arg_dict = vars(args) arg_dict['model_params'] = '{} '.format(num_params) arg_dict['flops'] = '{} '.format(flops) json.dump(arg_dict, outfile) extra_info_ckpt = '{}_{}_{}'.format(args.model, args.s, crop_size[0]) for epoch in range(start_epoch, args.epochs): lr_base = lr_scheduler.step(epoch) # set the optimizer with the learning rate # This can be done inside the MyLRScheduler lr_seg = lr_base * args.lr_mult optimizer.param_groups[0]['lr'] = lr_base if len(optimizer.param_groups) > 1: optimizer.param_groups[1]['lr'] = lr_seg if args.use_depth: optimizer.param_groups[2]['lr'] = lr_base print_info_message( 'Running epoch {} with learning rates: base_net {:.6f}, segment_net {:.6f}' .format(epoch, lr_base, lr_seg)) if args.model == 'espdnetue' or ( (args.model == 'deeplabv3' or args.model == 'unet') and args.use_aux): from utilities.train_eval_seg import train_seg_ue as train from utilities.train_eval_seg import val_seg_ue as val else: from utilities.train_eval_seg import train_seg as train from utilities.train_eval_seg import val_seg as val iou_train, train_loss = train(model, train_loader, optimizer, criterion, seg_classes, epoch, device=device, use_depth=args.use_depth, add_criterion=nid_loss) iou_val, val_loss = val(model, val_loader, criterion, seg_classes, device=device, use_depth=args.use_depth, add_criterion=nid_loss) batch_train = iter(train_loader).next() batch = iter(val_loader).next() if args.use_depth: in_training_visualization_img( model, images=batch_train[0].to(device=device), depths=batch_train[2].to(device=device), labels=batch_train[1].to(device=device), class_encoding=color_encoding, writer=writer, epoch=epoch, data='Segmentation/train', device=device) in_training_visualization_img(model, images=batch[0].to(device=device), depths=batch[2].to(device=device), labels=batch[1].to(device=device), class_encoding=color_encoding, writer=writer, epoch=epoch, data='Segmentation/val', device=device) image_grid = torchvision.utils.make_grid( batch[2].to(device=device).data.cpu()).numpy() print(type(image_grid)) writer.add_image('Segmentation/depths', image_grid, epoch) else: in_training_visualization_img( model, images=batch_train[0].to(device=device), labels=batch_train[1].to(device=device), class_encoding=color_encoding, writer=writer, epoch=epoch, data='Segmentation/train', device=device) in_training_visualization_img(model, images=batch[0].to(device=device), labels=batch[1].to(device=device), class_encoding=color_encoding, writer=writer, epoch=epoch, data='Segmentation/val', device=device) # image_grid = torchvision.utils.make_grid(outputs.data.cpu()).numpy() # writer.add_image('Segmentation/results/val', image_grid, epoch) # remember best miou and save checkpoint miou_val = iou_val[[1, 2, 3]].mean() is_best = miou_val > best_miou best_miou = max(miou_val, best_miou) weights_dict = model.module.state_dict( ) if device == 'cuda' else model.state_dict() save_checkpoint( { 'epoch': epoch + 1, 'arch': args.model, 'state_dict': weights_dict, 'best_miou': best_miou, 'optimizer': optimizer.state_dict(), }, is_best, args.savedir, extra_info_ckpt) writer.add_scalar('Segmentation/LR/base', round(lr_base, 6), epoch) writer.add_scalar('Segmentation/LR/seg', round(lr_seg, 6), epoch) writer.add_scalar('Segmentation/Loss/train', train_loss, epoch) writer.add_scalar('Segmentation/Loss/val', val_loss, epoch) writer.add_scalar('Segmentation/mIOU/train', iou_train[[1, 2, 3]].mean(), epoch) writer.add_scalar('Segmentation/mIOU/val', miou_val, epoch) writer.add_scalar('Segmentation/plant_IOU/val', iou_val[1], epoch) writer.add_scalar('Segmentation/ao_IOU/val', iou_val[2], epoch) writer.add_scalar('Segmentation/ground_IOU/val', iou_val[3], epoch) writer.add_scalar('Segmentation/Complexity/Flops', best_miou, math.ceil(flops)) writer.add_scalar('Segmentation/Complexity/Params', best_miou, math.ceil(num_params)) writer.close()