def _get_view3(self, filepath): image = load_image(os.path.join(filepath, '0001.jpg')) depth = np.load(os.path.join(filepath, '0001.npy')) rgb_contexts = [ load_image(os.path.join(filepath, '0000.jpg')), load_image(os.path.join(filepath, '0002.jpg')) ] poses = [ p.reshape((3, 4)) for p in np.genfromtxt( os.path.join(filepath, 'poses.txt')).astype(np.float64) ] pos0 = np.eye(4) pos1 = np.eye(4) pos2 = np.eye(4) pos0[:3, :] = poses[0] pos1[:3, :] = poses[1] pos2[:3, :] = poses[2] pos10 = np.matmul(pos0, np.linalg.inv(pos1)) pos12 = np.matmul(pos2, np.linalg.inv(pos1)) pose_context = [pos10.astype(np.float32), pos12.astype(np.float32)] return image, depth, rgb_contexts, pose_context
def read_png_depth(file): """Reads a .png depth map.""" depth_png = np.array(load_image(file), dtype=int) assert (np.max(depth_png) > 255), 'Wrong .png depth file' depth = depth_png.astype(np.float) / 256. depth[depth_png == 0] = -1. return np.expand_dims(depth, axis=2)
def _read_rgb_context_files(self, session, filename): context_paths = self._get_context_file_paths(filename, self.file_tree[session]) return [ load_image(os.path.join(self.root_dir, session, filename)) for filename in context_paths ]
def infer_and_save_pose(input_file_refs, input_file, model_wrapper, image_shape, data_type, save_depth_root, save_vis_root): """ Process a single input file to produce and save visualization Parameters ---------- input_file_refs : list(str) Reference image file paths input_file : str Image file for pose estimation model_wrapper : nn.Module Model wrapper used for inference image_shape : Image shape Input image shape half: bool use half precision (fp16) save: str Save format (npz or png) """ base_name = os.path.splitext(os.path.basename(input_file))[0] image_raw_wh = load_image(input_file).size # Load image def process_image(filename): image = load_image(filename) # Resize and to tensor intr = get_intrinsics(image.size, image_shape, data_type) #(3, 3) image = resize_image(image, image_shape) image = to_tensor(image).unsqueeze(0) intr = torch.from_numpy(intr).unsqueeze(0) #(1, 3, 3) # Send image to GPU if available if torch.cuda.is_available(): image = image.to('cuda') intr = intr.to('cuda') return image, intr image_ref = [process_image(input_file_ref)[0] for input_file_ref in input_file_refs] image, intrinsics = process_image(input_file) batch = {'rgb': image, 'rgb_context': image_ref, "intrinsics": intrinsics} output = model_wrapper(batch) inv_depth = output['inv_depths'][0] #(1, 1, h, w) depth = inv2depth(inv_depth)[0, 0].detach().cpu().numpy() #(h, w) pose21 = output['poses'][0].mat[0].detach().cpu().numpy() #(4, 4) #TODO check: targe -> ref[0] pose23 = output['poses'][1].mat[0].detach().cpu().numpy() #(4, 4) #TODO check: targe -> ref[0] vis_depth = viz_inv_depth(inv_depth[0]) * 255 vis_depth_upsample = cv2.resize(vis_depth, image_raw_wh, interpolation=cv2.INTER_LINEAR) write_image(os.path.join(save_vis_root, f"{base_name}.jpg"), vis_depth_upsample) depth_upsample = cv2.resize(depth, image_raw_wh, interpolation=cv2.INTER_NEAREST) np.save(os.path.join(save_depth_root, f"{base_name}.npy"), depth_upsample) return depth, pose21, pose23, intrinsics[0].detach().cpu().numpy(), image[0].permute(1, 2, 0).detach().cpu().numpy() * 255
def process_image(filename): image = load_image(filename) # Resize and to tensor image = resize_image(image, image_shape) image = to_tensor(image).unsqueeze(0) # Send image to GPU if available if torch.cuda.is_available(): image = image.to('cuda:{}'.format(rank()), dtype=dtype) return image
def _get_view2(self, filepath): image = load_image(os.path.join(filepath, '0000.jpg')) depth = np.load(os.path.join(filepath, '0000.npy')) rgb_contexts = [load_image(os.path.join(filepath, '0001.jpg'))] poses = [ p.reshape((3, 4)) for p in np.genfromtxt( os.path.join(filepath, 'poses.txt')).astype(np.float64) ] pos0 = np.zeros((4, 4)) pos1 = np.zeros((4, 4)) pos0[:3, :] = poses[0] pos0[3, 3] = 1. pos1[:3, :] = poses[1] pos1[3, 3] = 1. pos01 = np.matmul(pos1, np.linalg.inv(pos0)) pose_context = [pos01.astype(np.float32)] return image, depth, rgb_contexts, pose_context
def read_png_depth(file): """Reads a .png depth map.""" depth_png = np.array(load_image(file), dtype=int) depth = depth_png.astype(np.float) / 1000. # assert (np.max(depth_png) > 1000.), 'Wrong .png depth file' # if (np.max(depth_png) > 1000.): # depth = depth_png.astype(np.float) / 1000. # else: # depth = depth_png.astype(np.float) depth[depth_png == 0] = -1. return np.expand_dims(depth, axis=2)
def process_image(filename): image = load_image(filename) # Resize and to tensor intr = get_intrinsics(image.size, image_shape, data_type) #(3, 3) image = resize_image(image, image_shape) image = to_tensor(image).unsqueeze(0) intr = torch.from_numpy(intr).unsqueeze(0) #(1, 3, 3) # Send image to GPU if available if torch.cuda.is_available(): image = image.to('cuda') intr = intr.to('cuda') return image, intr
def __getitem__(self, idx): filepath = self.paths[idx] image = load_image(os.path.join(filepath, '0000.jpg')) depth = np.load(os.path.join(filepath, '0000.npy')) rgb_contexts = [load_image(os.path.join(filepath, '0001.jpg'))] poses = [ p.reshape((3, 4)) for p in np.genfromtxt( os.path.join(filepath, 'poses.txt')).astype(np.float64) ] pos0 = np.zeros((4, 4)) pos1 = np.zeros((4, 4)) pos0[:3, :] = poses[0] pos0[3, 3] = 1. pos1[:3, :] = poses[1] pos1[3, 3] = 1. pos = np.matmul(pos1, np.linalg.inv(pos0)) # pos = np.matmul(np.linalg.inv(pos1), pos0) pose_context = [pos.astype(np.float32)] intr = np.genfromtxt(os.path.join(filepath, 'cam.txt')) sample = { 'idx': idx, 'filename': '%s' % (filepath.split('/')[-1]), 'rgb': image, 'depth': depth, 'pose_context': pose_context, 'intrinsics': intr } if self.has_context: sample['rgb_context'] = rgb_contexts if self.data_transform: sample = self.data_transform(sample) return sample
def __getitem__(self, idx): session, filename = self.files[idx] image = self._read_rgb_file(session, filename) if self.with_depth: depth = self._read_depth(self._get_depth_file(os.path.join(self.root_dir, session, filename))) resized_depth = cv2.resize(depth, image.size, interpolation = cv2.INTER_NEAREST) intr_path = os.path.join(self.root_dir, session, filename).split('color')[0] + 'intrinsic/intrinsic_color.txt' intr = np.genfromtxt(intr_path)[:3, :3] context_paths = self._get_context_file_paths(filename, self.file_tree[session]) context_images = [load_image(os.path.join(self.root_dir, session, filename)) for filename in context_paths] pose_path = os.path.join(self.root_dir, session, filename).replace('color', 'pose').replace('.jpg', '.txt') pose = np.genfromtxt(pose_path) context_pose_paths = [os.path.join(self.root_dir, session, x).replace('color', 'pose'). replace('.jpg', '.txt') for x in context_paths] context_poses = [np.genfromtxt(x) for x in context_pose_paths] #rel_poses = [np.matmul(x, np.linalg.inv(pose)).astype(np.float32) for x in context_poses] rel_poses = [np.matmul(np.linalg.inv(x), pose).astype(np.float32) for x in context_poses] sample = { 'idx': idx, 'filename': '%s_%s' % (session.split('/')[0], os.path.splitext(filename)[0]), 'rgb': image, 'intrinsics': intr, 'pose_context': rel_poses } # print(filename, context_paths) # Add depth information if requested if self.with_depth: sample.update({ 'depth': resized_depth, }) if self.has_context: sample['rgb_context'] = context_images if self.data_transform: sample = self.data_transform(sample) return sample
def load_depth(file): """ Load a depth map from file Parameters ---------- file : str Depth map filename (.npz or .png) Returns ------- depth : np.array [H,W] Depth map (invalid pixels are 0) """ if file.endswith('npz'): return np.load(file)['depth'] elif file.endswith('png'): depth_png = np.array(load_image(file), dtype=int) assert (np.max(depth_png) > 255), 'Wrong .png depth file' return depth_png.astype(np.float) / 256. else: raise NotImplementedError('Depth extension not supported.')
def __getitem__(self, idx): session, filename = self.files[idx] image = self._read_rgb_file(session, filename) if self.with_depth: depth = self._read_depth( self._get_depth_file( os.path.join(self.root_dir, session, filename))) resized_depth = cv2.resize(depth, image.size, interpolation=cv2.INTER_NEAREST) intr_path = os.path.join( self.root_dir, session, filename).split('color')[0] + 'intrinsic/intrinsic_color.txt' intr = np.genfromtxt(intr_path)[:3, :3] ba_idx = self.bafile_tree[session].index(filename) if self.forward_context == 2 and self.backward_context == 2: context_paths = [ os.path.join(self.root_dir, session, self.bacontext1_tree[session][ba_idx]), os.path.join(self.root_dir, session, self.bacontext2_tree[session][ba_idx]), os.path.join(self.root_dir, session, self.bacontext3_tree[session][ba_idx]), os.path.join(self.root_dir, session, self.bacontext4_tree[session][ba_idx]) ] elif self.forward_context == 1 and self.backward_context == 1: context_paths = [ os.path.join(self.root_dir, session, self.bacontext1_tree[session][ba_idx]), os.path.join(self.root_dir, session, self.bacontext2_tree[session][ba_idx]) ] elif self.forward_context == 1 and self.backward_context == 0: context_paths = [ os.path.join(self.root_dir, session, self.bacontext1_tree[session][ba_idx]) ] elif self.forward_context == 1 and self.backward_context == -1: if np.random.random() < 0.5: # !!!repeat!!! context_paths = [ os.path.join(self.root_dir, session, self.bacontext1_tree[session][ba_idx]), os.path.join(self.root_dir, session, self.bacontext1_tree[session][ba_idx]) ] else: context_paths = [ os.path.join(self.root_dir, session, self.bacontext1_tree[session][ba_idx]), os.path.join(self.root_dir, session, self.bacontext2_tree[session][ba_idx]) ] else: raise NotImplementedError context_images = [ load_image(os.path.join(self.root_dir, session, filename)) for filename in context_paths ] pose_path = os.path.join(self.root_dir, session, filename).replace( 'color', 'pose').replace('.jpg', '.txt') pose = np.genfromtxt(pose_path) context_pose_paths = [ os.path.join(self.root_dir, session, x).replace('color', 'pose').replace('.jpg', '.txt') for x in context_paths ] context_poses = [np.genfromtxt(x) for x in context_pose_paths] #rel_poses = [np.matmul(x, np.linalg.inv(pose)).astype(np.float32) for x in context_poses] rel_poses = [ np.matmul(np.linalg.inv(x), pose).astype(np.float32) for x in context_poses ] sample = { 'idx': idx, 'filename': '%s_%s' % (session.split('/')[0], os.path.splitext(filename)[0]), 'rgb': image, 'intrinsics': intr, 'pose_context': rel_poses } # print(filename, context_paths) # Add depth information if requested if self.with_depth: sample.update({ 'depth': resized_depth, }) if self.has_context: sample['rgb_context'] = context_images if self.data_transform: sample = self.data_transform(sample) return sample
def _read_rgb_file(self, session, filename): raw_root_dir = os.path.join(os.path.dirname(self.root_dir), 'train') return load_image(os.path.join(raw_root_dir, session, filename))
def _read_rgb_file(self, session, filename): file_path = os.path.join(self.root_dir, session, filename) rgb = load_image(file_path) return rgb
def _read_rgb_file(self, session, filename): return load_image(os.path.join(self.root_dir, session, filename))
def __getitem__(self, idx): """Get dataset sample given an index.""" # Add image information sample = { 'idx': idx, 'filename': '%s_%010d' % (self.split, idx), 'rgb': load_image(self.paths[idx]), } # Add intrinsics parent_folder = self._get_parent_folder(self.paths[idx]) if parent_folder in self.calibration_cache: c_data = self.calibration_cache[parent_folder] else: c_data = self._read_raw_calib_file(parent_folder) self.calibration_cache[parent_folder] = c_data sample.update({ 'intrinsics': self._get_intrinsics(self.paths[idx], c_data), }) # Add pose information if requested if self.with_pose: sample.update({ 'pose': self._get_pose(self.paths[idx]), }) # Add depth information if requested if self.with_depth: sample.update({ 'depth': self._read_depth(self._get_depth_file(self.paths[idx])), }) # Add context information if requested if self.with_context: # Add context images all_context_idxs = self.backward_context_paths[idx] + \ self.forward_context_paths[idx] image_context_paths, _ = \ self._get_context_files(self.paths[idx], all_context_idxs) image_context = [load_image(f) for f in image_context_paths] sample.update({'rgb_context': image_context}) # Add context poses if self.with_pose: first_pose = sample['pose'] image_context_pose = [ self._get_pose(f) for f in image_context_paths ] image_context_pose = [ invert_pose_numpy(context_pose) @ first_pose for context_pose in image_context_pose ] sample.update({'pose_context': image_context_pose}) # Apply transformations if self.data_transform: sample = self.data_transform(sample) # Return sample return sample
def infer_and_save_depth(input_file, output_file, model_wrapper, image_shape, half, save): """ Process a single input file to produce and save visualization Parameters ---------- input_file : str Image file output_file : str Output file, or folder where the output will be saved model_wrapper : nn.Module Model wrapper used for inference image_shape : Image shape Input image shape half: bool use half precision (fp16) save: str Save format (npz or png) """ if not is_image(output_file): # If not an image, assume it's a folder and append the input name os.makedirs(output_file, exist_ok=True) output_file = os.path.join(output_file, os.path.basename(input_file)) # change to half precision for evaluation if requested dtype = torch.float16 if half else None # Load image image = load_image(input_file) # Resize and to tensor image = resize_image(image, image_shape) image = to_tensor(image).unsqueeze(0) # Send image to GPU if available if torch.cuda.is_available(): image = image.to('cuda:{}'.format(rank()), dtype=dtype) # Depth inference (returns predicted inverse depth) pred_inv_depth = model_wrapper.depth(image)[0] if save == 'npz' or save == 'png': # Get depth from predicted depth map and save to different formats filename = '{}.{}'.format(os.path.splitext(output_file)[0], save) print('Saving {} to {}'.format( pcolor(input_file, 'cyan', attrs=['bold']), pcolor(filename, 'magenta', attrs=['bold']))) write_depth(filename, depth=inv2depth(pred_inv_depth)) else: # Prepare RGB image rgb = image[0].permute(1, 2, 0).detach().cpu().numpy() * 255 depth = inv2depth(pred_inv_depth)[0].detach().cpu().numpy() print(depth.shape) h, w = rgb.shape[:2] fx = fy = w * 1.2 cx = w / 2.0 cy = h / 2.0 generate_pointcloud(rgb, depth, fx, fy, cx, cy, "./kitti_hr_test.ply") np.savez("./data.npz", rgb=rgb, depth=depth) # Prepare inverse depth viz_pred_inv_depth = viz_inv_depth(pred_inv_depth[0]) * 255 # Concatenate both vertically image = np.concatenate([rgb, viz_pred_inv_depth], 0) # Save visualization print('Saving {} to {}'.format( pcolor(input_file, 'cyan', attrs=['bold']), pcolor(output_file, 'magenta', attrs=['bold']))) imwrite(output_file, image[:, :, ::-1])