def __init__(self, mode=None, transform=None): ''' Create a map-style dataset for training or testing with optinonal transforms. @parameters: mode: one of the followings:'train', test', or 'valid'. transform: a callable transform/composed of transforms applied to each sample. ''' super().__init__() if mode not in ('train', 'test', 'valid'): print('invalid mode {}'.format(mode)) sys.exit(1) self.mode = mode self.transform = transform self.config = read_config() dataset_dir = self.config['DATASET']['dataset_dir'] dataset_name = self.config['DATASET']['dataset_name'] filename = dataset_name + '_' + self.mode + '_files.txt' files_path = os.path.join(dataset_dir, self.mode, filename) gt_dir = self.config['DATASET']['gt_dir'] gt_dir = os.path.join(dataset_dir, self.mode, gt_dir) input_dir = self.config['DATASET']['input_dir'] input_dir = os.path.join(dataset_dir, self.mode, input_dir) self.img_gt_paths = read_img_gt_path( files_path, input_dir, gt_dir, )
class DORNNET(): config = utils.read_config() def __init__(self): self.depth_net = DORN() path_model = DORNNET.config['MODEL']['kitti'] print('{0} model is used!'.format('kitti')) model_dict = utils.get_model(path_model) # load the trained model's parameters self.depth_net.load_state_dict(model_dict) # move the network to cuda/gpu device if available self.depth_net.to(utils.device) # bn and dropout layers will work in evaluation mode self.depth_net.eval() def __call__(self, filename): img = Image.open(filename) img_tensor = utils.transform_img(img, self.config) if self.depth_net.training: raise ValueError('Model is in training mode!') # disable autograd engine because of no back-propagatio with torch.no_grad(): pred_labels, ord_probs = self.depth_net(img_tensor) return pred_labels, ord_probs
class Evaluate(): config = read_config() K = config['INPUT'].getint('sid_bins') min_ = config['INPUT'].getfloat('min_depth') max_ = config['INPUT'].getfloat('max_depth') # depth bins according to SID, see (1) in DORN expo = np.arange(0, K + 2) / (K + 1) bins = min_ * (max_ / min_)**expo def __init__(self): self.delta1_sum = 0 self.delta2_sum = 0 self.delta3_sum = 0 self.abs_rel_sum = 0 self.sq_rel_sum = 0 self.si_log_0 = 0 self.si_log_1 = 0 self.inv_sq_sum = 0 self.count = 0 @torch.no_grad() def compute(self, predict_depth, target_depth): ''' Compute unnormalized error metrics ''' mask = torch.logical_and(target_depth >= self.min_, target_depth <= self.max_) predict_depth = predict_depth[mask] target_depth = target_depth[mask] thresh = torch.max(target_depth/predict_depth, \ predict_depth/target_depth) delta1_sum = (thresh < 1.25).float().sum() delta2_sum = (thresh < 1.25**2).float().sum() delta3_sum = (thresh < 1.25**3).float().sum() self.delta1_sum += delta1_sum self.delta2_sum += delta2_sum self.delta3_sum += delta3_sum bias = predict_depth - target_depth abs_rel_sum = (bias.abs() / target_depth).sum() self.abs_rel_sum += abs_rel_sum sq_rel_sum = (bias**2 / target_depth).sum() self.sq_rel_sum += sq_rel_sum inv_diff = 1 / predict_depth - 1 / target_depth self.inv_sq_sum += (inv_diff**2).sum() log_err = torch.log(predict_depth / target_depth) self.si_log_0 += (log_err**2).sum() self.si_log_1 += log_err.sum() # number of pixels with gt depths self.count += mask.int().sum() def get(self): ''' return metrics by name and values. ''' return { 'delta1': self.delta1.item(), 'delta2': self.delta2.item(), 'delta3': self.delta3.item(), 'abs_rel': self.abs_rel.item(), 'sq_rel': self.sq_rel.item(), 'si_log': self.si_log.item(), 'irmse': self.irmse.item() } @torch.no_grad() def results(self): ''' compute average (final) results ''' self.delta1 = self.delta1_sum / self.count self.delta2 = self.delta2_sum / self.count self.delta3 = self.delta3_sum / self.count self.abs_rel = self.abs_rel_sum / self.count self.sq_rel = self.sq_rel_sum / self.count self.irmse = torch.sqrt(self.inv_sq_sum / self.count) self.si_log = self.si_log_0/self.count - \ (self.si_log_1/self.count)**2 def __str__(self): s = 'Error Metrics:\n' s += 'delta1={0:0.3f}\ndelta2={1:0.3f}\ndelta3={2:0.3f}\n'.format( self.delta1.item(), self.delta2.item(), self.delta3.item()) s += 'absolute relative error={0:0.3f}\n'.format(self.abs_rel.item()) s += 'squared relative error={0:0.3f}\n'.format(self.sq_rel.item()) s += 'inverse root mean square error={0:0.3f}\n'.format( self.irmse.item()) s += 'scale invariant logaritmic error={0:0.3f}\n'.format( self.si_log.item()) return s
import torch import torch.nn as nn import torch.nn.functional as F from collections import OrderedDict from dorn.model import utils from dorn.model.network.backbone import resnet101 config = utils.read_config() class FullImageEncoder(nn.Module): ''' captures global texture information as monocular depth cue. ''' def __init__(self): super().__init__() drop_prob = config['TRAIN'].getfloat('dropout_prob') self.global_pooling = nn.AvgPool2d(16, 16, ceil_mode=True) self.global_dropout = nn.Dropout2d(p=drop_prob) self.global_fc = nn.Linear(2048 * 4 * 5, 512) self.conv_depth = nn.Conv2d(512, 512, 1) self.interp = nn.UpsamplingBilinear2d((49, 65))
import random import numpy as np import torch from PIL import Image from itertools import product import torchvision.transforms as transforms import torchvision.transforms.functional as F from dorn.model.utils import read_config # global config parameters config = read_config() # initialize random number generator random.seed() class Scale(): def __init__(self): ''' Scale PIL image and npy depth map with the random scale factor uniformly sampled in the interval of [1.0, 1.2] ''' # random scale factor self.scale = random.uniform(1.0, 1.2) def __call__(self, img_depth): img = img_depth['img'] # new_size = (new_w, new_h)
def main(): ''' Train the DORN and after each epoch runs a full validation. The function also keeps track of the best performing model (in terms of X performance metric), and at the end of training it saves and return that model. ''' config = utils.read_config() train_loader, valid_loader = create_loader(config) resume_training = config['TRAIN'].getboolean('resume') ckpt_path = config['TRAIN']['checkpoint_path'] # generate model depth_net = DORN() if resume_training: # resume training by loading the saved checkpoint if os.path.isfile(ckpt_path): print("loading checkpoint {0}".format(ckpt_path)) checkpoint = torch.load(ckpt_path) start_epoch = checkpoint['epoch'] + 1 # previously saved model state and optimizer state optimizer = checkpoint['optimizer'] model_dict = checkpoint['state_dict'] # set model and optimizer states to those in checkpoint depth_net.load_state_dict(model_dict) optimizer.load_state_dict(optimizer) else: path_model = config['MODEL']['kitti'] model_dict = utils.get_model(path_model) dorn_dict = depth_net.state_dict() # overwrite existing parameter values dorn_dict.update(model_dict) # load the pretrained model depth_net.load_state_dict(dorn_dict) start_epoch = 0 # transfer network onto the GPU if available # always do this before constructing optimizer depth_net = depth_net.to(utils.device) # log the network graph tb = utils.graph_visualize(depth_net, config) # ordinal loss function criterion = OrdLoss(config) ''' sets requires_grad attribute of the all params in the model to False when we are feature extracting. This is because that we only want to compute gradients for the the last layer(s) of the DORN, i.e., the 2nd conv layer of the scene understanding module. ''' # fix all network parameters for param in depth_net.parameters(): param.requires_grad = False # trainable parameters train_params = [] # scu block where layer(s) is/are fine-tuned my_block = depth_net.scu_module.concat_process # make the 2nd conv layer of the scu module trainable for key, layer in my_block.__dict__['_modules'].items(): if (key in {'conv_1', 'conv_2', 'conv_comp'}): for param in layer.parameters(): train_params.append(param) param.requires_grad = True lr = config['OPTIMIZER'].getfloat('learning_rate') wd = config['OPTIMIZER'].getfloat('weight_decay') omega = config['OPTIMIZER'].getfloat('momentum') # dict of parameters block and their lr train_params = [{'params': train_params, 'lr': lr}] # construct the optimizer:only conv2 in scu module is optimized optimizer = \ optim.SGD(train_params, lr=lr, momentum=omega, weight_decay=wd, nesterov=True) # decay the lr by gamma once epoch no reaches one of the milestones scheduler = \ optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5,20], gamma=0.1) num_epochs = config['TRAIN'].getint('num_epochs') valid_rate = config['TRAIN'].getint('valid_rate') best_acc = 0.0 for epoch in range(start_epoch, num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) train(depth_net, train_loader, criterion, optimizer, epoch, config, tb) # find the best model by running validation if (epoch + 1) % valid_rate == 0: metrics = validate(depth_net, valid_loader, criterion, epoch, config, tb) epoch_acc = metrics['delta1'] # deep copy the best model if epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = depth_net.state_dict() torch.save(best_model_wts, ckpt_path) # decay learning rate of params after each epoch scheduler.step() # write all pending events tb.flush() tb.close()