def define_transforms(self, input, gt, img=None): # Define random variabels hflip_input = np.random.uniform(0.0, 1.0) > 0.5 and self.flip == 'hflip' if self.train: i, j, h, w = transforms.RandomCrop.get_params( input, output_size=self.crop) input = F.crop(input, i, j, h, w) gt = F.crop(gt, i, j, h, w) if hflip_input: input, gt = F.hflip(input), F.hflip(gt) if self.use_rgb: img = F.crop(img, i, j, h, w) if hflip_input: img = F.hflip(img) input, gt = depth_read(input, self.sparse_val), depth_read( gt, self.sparse_val) else: input, gt = self.center_crop(input), self.center_crop(gt) if self.use_rgb: img = self.center_crop(img) input, gt = depth_read(input, self.sparse_val), depth_read( gt, self.sparse_val) return input, gt, img
def define_transforms(self, input, gt, img=None): input, gt = self.center_crop(input), self.center_crop(gt) if self.use_rgb: img = self.center_crop(img) if self.carla: input, gt = depth_read_carla(input), depth_read_carla(gt) else: input, gt = depth_read(input, self.sparse_val), depth_read( gt, self.sparse_val) return input, gt, img
def define_transforms(self, input, gt, img=None): # Define random variabels angle = np.random.uniform(-5.0, 5.0) and self.rotate angle = int(angle) if type(angle) == bool else angle hflip_input = np.random.uniform(0.0, 1.0) > 0.5 and self.flip == 'hflip' vflip_input = np.random.uniform(0.0, 1.0) > 0.5 and self.flip == 'vflip' self.scale = np.random.uniform(self.lowerbound, self.bound) if self.train: input, gt = F.rotate(input, angle), F.rotate(gt, angle) i, j, h, w = transforms.RandomCrop.get_params( input, output_size=self.crop) input = F.crop(input, i, j, h, w) gt = F.crop(gt, i, j, h, w) if hflip_input: input, gt = F.hflip(input), F.hflip(gt) elif vflip_input: input, gt = F.vflip(input), F.vflip(gt) if self.use_rgb: img = F.rotate(img, angle) # img = misc.imresize(img, self.scale) #'nearest') # img = Image.fromarray(img) img = F.crop(img, i, j, h, w) # img = self.center_crop(img) if hflip_input: img = F.hflip(img) elif vflip_input: img = F.vflip(img) # img = self.color_jitter(img) input, gt = depth_read(input, self.sparse_val) / self.scale, depth_read( gt, self.sparse_val) / self.scale else: # input, gt = F.crop(input, 130, 10, 240, 1200), F.crop(gt, 130, 10, 240, 1200) input, gt = self.center_crop(input), self.center_crop(gt) if self.use_rgb: # img = F.crop(img) img = self.center_crop(img) input, gt = depth_read(input, self.sparse_val), depth_read( gt, self.sparse_val) return input, gt, img
def compute_mean_std(self): nums = np.array([]) means = np.array([]) stds = np.array([]) max_lst = np.array([]) for i, raw_img_path in tqdm.tqdm(enumerate(self.train_paths['lidar_in'])): raw_img = Image.open(raw_img_path) raw_np = depth_read(raw_img) vec = raw_np[raw_np >= 0] # vec = vec/84.0 means = np.append(means, np.mean(vec)) stds = np.append(stds, np.std(vec)) nums = np.append(nums, len(vec)) max_lst = np.append(max_lst, np.max(vec)) mean = np.dot(nums, means)/np.sum(nums) std = np.sqrt((np.dot(nums, stds**2) + np.dot(nums, (means-mean)**2))/np.sum(nums)) return mean, std, max_lst
def __getitem__(self, idx): lidar = self.lidar_files[idx] raw_path = os.path.join(lidar) raw_pil = Image.open(raw_path) assert raw_pil.size == (1216, 352) crop = 352 - args.crop_h raw_pil_crop = raw_pil.crop((0, crop, 1216, 352)) raw = depth_read(raw_pil_crop, args.sparse_val) raw = self.to_tensor(raw).float() rgb = self.rgb_files[idx] rgb_path = os.path.join(rgb) rgb_pil = Image.open(rgb_path) assert rgb_pil.size == (1216, 352) rgb_pil_crop = rgb_pil.crop((0, crop, 1216, 352)) rgb = self.to_tensor(rgb_pil_crop).float() rgb = rgb * 255.0 return raw, rgb, os.path.basename(lidar), crop
def main(): global args global dataset args = parser.parse_args() torch.backends.cudnn.benchmark = args.cudnn best_file_name = glob.glob(os.path.join(args.save_path, 'model_best*'))[0] save_root = os.path.join(os.path.dirname(best_file_name), 'results') if not os.path.isdir(save_root): os.makedirs(save_root) print("==========\nArgs:{}\n==========".format(args)) # INIT print("Init model: '{}'".format(args.mod)) channels_in = 1 if args.input_type == 'depth' else 4 model = Models.define_model(mod=args.mod, in_channels=channels_in) print("Number of parameters in model {} is {:.3f}M".format( args.mod.upper(), sum(tensor.numel() for tensor in model.parameters()) / 1e6)) if not args.no_cuda: # Load on gpu before passing params to optimizer if not args.multi: model = model.cuda() else: model = torch.nn.DataParallel(model).cuda() if os.path.isfile(best_file_name): print("=> loading checkpoint '{}'".format(best_file_name)) checkpoint = torch.load(best_file_name) model.load_state_dict(checkpoint['state_dict']) lowest_loss = checkpoint['loss'] best_epoch = checkpoint['best epoch'] print( 'Lowest RMSE for selection validation set was {:.4f} in epoch {}'. format(lowest_loss, best_epoch)) else: print("=> no checkpoint found at '{}'".format(best_file_name)) return if not args.no_cuda: model = model.cuda() print("Initializing dataset {}".format(args.dataset)) dataset = Datasets.define_dataset(args.dataset, args.data_path, args.input_type) dataset.prepare_dataset() to_pil = transforms.ToPILImage() to_tensor = transforms.ToTensor() norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) depth_norm = transforms.Normalize(mean=[14.97 / args.max_depth], std=[11.15 / args.max_depth]) model.eval() print("===> Start testing") total_time = [] with torch.no_grad(): for i, (img, rgb, gt) in tqdm.tqdm( enumerate( zip(dataset.selected_paths['lidar_in'], dataset.selected_paths['img'], dataset.selected_paths['gt']))): raw_path = os.path.join(img) raw_pil = Image.open(raw_path) gt_path = os.path.join(gt) gt_pil = Image.open(gt) assert raw_pil.size == (1216, 352) crop = 352 - args.crop_h raw_pil_crop = raw_pil.crop((0, crop, 1216, 352)) gt_pil_crop = gt_pil.crop((0, crop, 1216, 352)) raw = depth_read(raw_pil_crop, args.sparse_val) raw = to_tensor(raw).float() gt = depth_read(gt_pil_crop, args.sparse_val) gt = to_tensor(gt).float() valid_mask = (raw > 0).detach().float() input = torch.unsqueeze(raw, 0).cuda() gt = torch.unsqueeze(gt, 0).cuda() if args.normal: # Put in {0-1} range and then normalize input = input / args.max_depth # input = depth_norm(input) if args.input_type == 'rgb': rgb_path = os.path.join(rgb) rgb_pil = Image.open(rgb_path) assert rgb_pil.size == (1216, 352) rgb_pil_crop = rgb_pil.crop((0, crop, 1216, 352)) rgb = to_tensor(rgb_pil_crop).float() rgb = torch.unsqueeze(rgb, 0).cuda() if not args.normal: rgb = rgb * 255.0 input = torch.cat((input, rgb), 1) torch.cuda.synchronize() a = time.perf_counter() output, _, _, _ = model(input) torch.cuda.synchronize() b = time.perf_counter() total_time.append(b - a) if args.normal: output = output * args.max_depth output = torch.clamp(output, min=0, max=85) output = output * 256. raw = raw * 256. output = output[0][0:1].cpu() data = output[0].numpy() if crop != 0: padding = (0, 0, crop, 0) output = torch.nn.functional.pad(output, padding, "constant", 0) output[:, 0:crop] = output[:, crop].repeat(crop, 1) pil_img = to_pil(output.int()) assert pil_img.size == (1216, 352) pil_img.save(os.path.join(save_root, os.path.basename(img))) print('average_time: ', sum(total_time[100:]) / (len(total_time[100:]))) print('num imgs: ', i + 1)