def verify_features(self, I_a, d_a, K, I_b, se3_gt, x, y, title): """ Extract feature pyramids f_a, f_b of I_a and I_b Wrap f_b to f_a Compute distances of a pixel in f_a with the neighbors of its corresponding pixels in f_b :param I_a: Image of frame A, dim: (N, C, H, W) :param d_a: Depth of frame A, dim: (N, 1, H, W) :param K: intrinsic matrix at level 0: dim: (N, 3, 3) :param I_b: Image of frame B, dim: (N, C, H, W) :param se3_gt: Groundtruth of se3, dim: (N, 6) :return: """ import banet_track.ba_debug as debug (N, C, H, W) = I_a.shape I_a.requires_grad_() I_b.requires_grad_() # Concate I_a and I_b I = torch.cat([I_a, I_b], dim=0) # Aggregate pyramid features aggr_pyramid = self.aggregate_pyramid_features(self.backbone_net.forward(I)) aggr_pyramid_f_a = [f[:N, :, :, :] for f in aggr_pyramid] aggr_pyramid_f_b = [f[N:, :, :, :] for f in aggr_pyramid] for level in [2, 1, 0]: (level_H, level_W) = self.level_dim_hw[level] # Resize and Rescale the depth and the intrinsic matrix rescale_ratio = 1.0 / math.pow(2, level) level_K = rescale_ratio * K.detach() # dim: (N, 3, 3) level_d_a = F.interpolate(d_a, scale_factor=rescale_ratio).detach() # dim: (N, 1, H, W) # Cache several variables: R, t = se3_exp(se3_gt) x_a_2d = self.x_valid_2d[level] # dim: (N, H*W, 2) X_a_3d = batched_pi_inv(level_K, x_a_2d, level_d_a.view((N, level_H * level_W, 1))) X_b_3d = batched_transpose(R, t, X_a_3d) x_b_2d, _ = batched_pi(level_K, X_b_3d) x_b_2d = module.batched_x_2d_normalize(float(level_H), float(level_W), x_b_2d).view(N, level_H, level_W, 2) # (N, H, W, 2) # Wrap the feature level_aggr_pyramid_f_b_wrap = batched_interp2d(aggr_pyramid_f_b[level], x_b_2d) level_x = int(x * rescale_ratio) level_y = int(y * rescale_ratio) left = level_x - debug.similar_window_offset left = left if left >= 0 else 0 right = level_x + debug.similar_window_offset up = level_y - debug.similar_window_offset up = up if up >= 0 else 0 down = level_y + debug.similar_window_offset batch_distance = torch.norm(aggr_pyramid_f_a[level][:, :, up:down, left:right] - # (N, level_H, level_W) level_aggr_pyramid_f_b_wrap[:, :, level_y:level_y+1, level_x:level_x+1], 2, 1) show_multiple_img([{'img': I_a[0].detach().cpu().numpy().transpose(1, 2, 0), 'title': 'I_a'}, {'img': I_b[0].detach().cpu().numpy().transpose(1, 2, 0), 'title': 'I_b'}, {'img': batch_distance[0].detach().cpu().numpy(), 'title': 'feature distance', 'cmap':'gray'}], title=title, num_cols=3)
def face_rendering(mesh, camera_pose, light_poses, show=True): """ Render face RGBD images with input camera pose and lighting :param mesh: Trimesh object :param camera_pose: Twc, np.array 4x4 :param light_poses: list of light poses, Twc, list[np.array 4x4] :param show: whether show rendered image :return: """ mesh = pyrender.Mesh.from_trimesh(mesh) scene = pyrender.Scene() scene.add(mesh) # Set up the camera -- z-axis away from the scene, x-axis right, y-axis up camera = pyrender.PerspectiveCamera(yfov=np.pi / 10.0) scene.add(camera, pose=camera_pose) # Set up the light for light_pose in light_poses: light = pyrender.DirectionalLight(color=np.ones(3), intensity=10.0) light_pose = rotation_matrix(angle=0.0, direction=[0.0, 1.0, 0.0]) scene.add(light, pose=light_pose) # Render the scene r = pyrender.OffscreenRenderer(960, 1280) color, depth = r.render(scene) # depth[depth < 1e-5] = 0.75 # Show the images if show: img_list = [{'img': color, 'title': 'RGB'}, {'img': depth, 'title': 'Depth'}] show_multiple_img(img_list, num_cols=2) # print(depth[480, 640]) r.delete() # Compute camera pose Twc Twc = camera_pose T = np.array([ [1.0, 0.0, 0.0, 0.0], [0.0, -1.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0], [0.0, 0.0, 0.0, 1.0], ]) Twc = np.dot(T, np.dot(Twc, T)) return color, depth, K_from_PerspectiveCamera(camera, 1280, 960), Twc
def simple_face_rendering(obj_file_path, show=True): mesh = load_mesh_from_obj(obj_file_path) mesh = pyrender.Mesh.from_trimesh(mesh) scene = pyrender.Scene() scene.add(mesh) # Set up the camera -- z-axis away from the scene, x-axis right, y-axis up camera = pyrender.PerspectiveCamera(yfov=np.pi / 10.0) camera_pose = np.array([ [1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 1.0],#/300], [0.0, 0.0, 0.0, 1.0], ]) # camera_pose = rotation_matrix(angle=np.pi / 4.0, direction=[0.0, 1.0, 0.0]) # camera_pose[0, 3] = camera_pose[2, 3] = np.sqrt(2) / 2 scene.add(camera, pose=camera_pose) # Set up the light -- a single spot light in the same spot as the camera light = pyrender.DirectionalLight(color=np.ones(3), intensity=10.0) light_pose = rotation_matrix(angle=0.0, direction=[0.0, 1.0, 0.0]) scene.add(light, pose=light_pose) # Render the scene r = pyrender.OffscreenRenderer(960, 1280) color, depth = r.render(scene) # depth[depth < 1e-5] = 0.75 # Show the images if show: img_list = [{'img': color, 'title': 'RGB'}, {'img': depth, 'title': 'Depth'}] show_multiple_img(img_list, num_cols=2) # print(depth[480, 640]) r.delete() # Compute camera pose Twc Twc = camera_pose T = np.array([ [1.0, 0.0, 0.0, 0.0], [0.0, -1.0, 0.0, 0.0], [0.0, 0.0, -1.0, 0.0], [0.0, 0.0, 0.0, 1.0], ]) Twc = np.dot(T, np.dot(Twc, T)) return color, depth, K_from_PerspectiveCamera(camera, 1280, 960), Twc
Tcw_b = torch.from_numpy(Tcw_b).cuda() K = torch.from_numpy(K).cuda() img_a = torch.from_numpy(img_a).cuda() img_b = torch.from_numpy(img_b).cuda() depth_a = torch.from_numpy(depth_a).cuda().view(H, W) depth_b = torch.from_numpy(depth_b).cuda().view(H, W) wrap_b2a, _ = cam_opt_gpu.wrapping(img_b, depth_a, K, Tcw_a, Tcw_b) dense_a2b, _ = cam_opt_gpu.dense_corres_a2b(depth_a, K, Tcw_a, Tcw_b) overlap_marks = cam_opt_gpu.mark_out_bound_pixels(dense_a2b, depth_a) overlap_marks = overlap_marks.float() overlap_ratio = cam_opt_gpu.photometric_overlap(depth_a, K, Tcw_a, Tcw_b) print(overlap_ratio) show_multiple_img([{ 'img': img_a.cpu().numpy(), 'title': 'a' }, { 'img': img_b.cpu().numpy(), 'title': 'b' }, { 'img': wrap_b2a.cpu().numpy(), 'title': 'a2b' }, { 'img': overlap_marks.cpu().numpy(), 'title': 'overlap', 'cmap': 'gray' }], title='View', num_cols=4)
next_Tcw[:3, :3], next_Tcw[:3, 3]) # Translation Cb = cam_opt.camera_center_from_Tcw(rel_T[:3, :3], rel_T[:3, 3]) baseline = np.linalg.norm(Cb) # View angle q = trans.quaternion_from_matrix(rel_T) R = trans.quaternion_matrix(q) rel_rad, rel_axis, _ = trans.rotation_from_matrix(R) rel_deg = np.rad2deg(rel_rad) next2cur, _ = cam_opt.wrapping(cur_img, next_img, cur_depth, K, rel_T[:3, :3], rel_T[:3, 3]) show_multiple_img([{ 'img': cur_img, 'title': 'a' }, { 'img': next2cur, 'title': 'wrap_b2a' }, { 'img': next_img, 'title': 'b' }, { 'img': cur_depth.reshape((h, w)), 'title': 'depth', 'cmap': 'jet' }], title='rel_deg: %f, rel_trans: %f' % (rel_deg, baseline)) break
Tcw[i + 1, :3, :3], Tcw[i + 1, :3, 3]) next2cur, _ = wrapping(cur_img, next_img, depth[i], cur_K, rel_T[:3, :3], rel_T[:3, 3]) img_list.append({'img': cur_img, 'title': str(i)}) depth_list.append({'img': depth[i], 'title': str(i)}) wrap_list.append({ 'img': next2cur, 'title': str(i + 1) + ' to ' + str(i) }) if i == min(I.shape[0] - 1, 5) - 1: img_list.append({'img': next_img, 'title': str(i + 1)}) depth_list.append({'img': depth[i + 1], 'title': str(i + 1)}) wrap_list.append({'img': query_img, 'title': 'query image'}) show_multiple_img(img_list + wrap_list + depth_list, title='dataset debug', num_cols=max(2, min(I.shape[0], 5))) # cur_img = I[0]#.reshape((256, 256, 1)) # next_img = seq_dict['img'][0].numpy().transpose(1, 2, 0)#depth[0 + 1].reshape((256, 256, 1)) # cur_depth = depth[0] # cur_K = K[0] # Tcw_next = seq_dict['Tcw'][0].numpy() # rel_T = relateive_pose(Tcw[0, :3, :3], Tcw[0, :3, 3], Tcw_next[:3, :3], Tcw_next[:3, 3]) # next2cur, _ = wrapping(cur_img, next_img, cur_depth, cur_K, rel_T[:3, :3], rel_T[:3, 3]) # img_list.append({'img': cur_img, 'title': str(0)}) # wrap_list.append({'img': next2cur, 'title': str(0+1) + 'to' + str(0)}) # img_list.append({'img': next_img, 'title': str(1)}) # img_list.append({'img': cur_depth, 'title': 'depth' + str(0)}) # # show_multiple_img(img_list + wrap_list, title='dataset debug', num_cols=2)
I_wrap, d_wrap = wrap(I, d, rel_T, K, pre_cached_x_2d=pre_x_2d) n_I_wrap, n_d_wrap = wrap(I, d, n_rel_T, K, pre_cached_x_2d=pre_x_2d) p_I_wrap, p_d_wrap = wrap(I, d, p_rel_T, K, pre_cached_x_2d=pre_x_2d) for img_idx in range(0, L - 1): img = I[img_idx, 0, :, :, :].permute(1, 2, 0).cpu().numpy() img_b2a = I_wrap[img_idx, 0, :, :, :].permute(1, 2, 0).cpu().numpy() n_img_b2a = n_I_wrap[img_idx, 0, :, :, :].permute(1, 2, 0).cpu().numpy() p_img_b2a = p_I_wrap[img_idx, 0, :, :, :].permute(1, 2, 0).cpu().numpy() img_b = I[img_idx + 1, 0, :, :, :].permute(1, 2, 0).cpu().numpy() img_list.append({'img': img, 'title': 'F' + str(img_idx)}) img_list.append({'img': img_b2a, 'title': 'B_to_A' + str(img_idx)}) img_list.append({ 'img': p_img_b2a, 'title': 'Pred B_to_A' + str(img_idx) }) img_list.append({ 'img': n_img_b2a, 'title': 'Noise B_to_A' + str(img_idx) }) img_list.append({'img': img_b, 'title': 'F' + str(img_idx + 1)}) show_multiple_img(img_list, title='Preview', num_cols=5, figsize=(8, 26), show=False) plt.savefig(os.path.join(out_dir, "%05d_sample.png" % sample_idx))
anchor_depth = data_dict['anchor_depth'] anchor_Tcw = data_dict['anchor_Tcw'] pos_img = data_dict['pos_img'] pos_ori_img = data_dict['pos_ori_img'] pos_depth = data_dict['pos_depth'] pos_Tcw = data_dict['pos_Tcw'] neg_img = data_dict['neg_img'] neg_ori_img = data_dict['neg_ori_img'] neg_depth = data_dict['neg_depth'] neg_Tcw = data_dict['neg_Tcw'] sel_idces = np.random.choice(5, 3, replace=False) show_multiple_img([{'img': anchor_ori_img[0].numpy().transpose((1, 2, 0)), 'title': 'anchor_ori_img'}, {'img': anchor_depth[0].numpy()[0], 'title': 'anchor_depth', 'cmap': 'jet'}, {'img': anchor_img[0].numpy().transpose((1, 2, 0)), 'title': 'anchor_img'}, {'img': pos_ori_img[0, sel_idces[0]].numpy().transpose((1, 2, 0)), 'title': 'pos_ori_img0'}, {'img': pos_ori_img[0, sel_idces[1]].numpy().transpose((1, 2, 0)), 'title': 'pos_ori_img1'}, {'img': pos_ori_img[0, sel_idces[2]].numpy().transpose((1, 2, 0)), 'title': 'pos_ori_img2'}, {'img': neg_ori_img[0, sel_idces[0]].numpy().transpose((1, 2, 0)), 'title': 'neg_ori_img0'}, {'img': neg_ori_img[0, sel_idces[1]].numpy().transpose((1, 2, 0)), 'title': 'neg_ori_img1'}, {'img': neg_ori_img[0, sel_idces[2]].numpy().transpose((1, 2, 0)), 'title': 'neg_ori_img2'} ],title='Dataset Debug', num_cols=3) input('wait')
show_2d_path = True # Load the original frame and random sample subset ori_seq = frame_seq_data.FrameSeqData(ori_seq_json_path) sub_seq_list = seq_data.random_sel_frames.rand_sel_subseq_sun3d( scene_frames=ori_seq, trans_thres_range=0.15, frames_per_subseq_num=10, frames_range=(0.00, 0.8), max_subseq_num=30, interval_thres=2) ''' Scripts ----------------------------------------------------------------------------------------------------------- ''' if show_2d_path: plt.figure() ax = plt.gca() plt_seq.plot_frames_seq_2d(ori_seq, ax, legend='all') for sub_seq in sub_seq_list: plt_seq.plot_frames_seq_2d(sub_seq, ax, point_style='x-') plt.show() else: for seq in sub_seq_list: img_list = [] for frame in seq.frames: cur_name = frame['file_name'] cur_frame_idx = frame['id'] cur_img = cv2.imread(os.path.join(base_dir, cur_name)).astype( np.float32) / 255.0 img_list.append({'img': cur_img, "title": cur_frame_idx}) show_multiple_img(img_list)
def batched_select_gradient_pixels(imgs, depths, I_b, K, R, t, grad_thres=0.1, depth_thres=1e-4, num_pyramid=3, num_gradient_pixels=2000, visualize=False): """ batch version of select gradient pixels, all operate in CPU :param imgs: input mini-batch gray-scale images, torch.Tensor (N, 1, H, W) :param depths: mini-batch depth maps, torch.Tensor (N, 1, H, W) :param I_b: paired images, torch.Tensor(N, C, H, W) :param K: camera intrinsic matrix tensor (N, 3, 3) :param R: rotation matrix in dimension of (N, 3, 3) :param t: translation vector (N, 3) :param grad_thres: selecting the pixel if gradient norm > gradient threshold :param depth_thres: selecting the pixel if depth > depth threshold :param num_pyramid: number of feature map pyramids used in ba_tracknet :param num_gradient_pixels: the number of pixels we want to select in one feature map :param visualize: plot selected pixels :return: selected indices, torch.Tensor (N, num_pyramid, num_gradient_pixels) """ N, C, H, W = imgs.shape depths_np = depths.view(N, H, W).numpy() # (N, H, W) grad = batched_gradient(imgs) # (N, 2, H, W) grad_np = grad.numpy() grad_np = np.transpose(grad_np, [0, 2, 3, 1]) # (N, H, W, 2) grad_norm = np.linalg.norm(grad_np, axis=-1) # (N, H, W) # Cache several variables: x_a_2d = x_2d_coords_torch(N, H, W).cpu() # (N, H*W, 2) X_a_3d = batched_pi_inv(K, x_a_2d.view(N, H * W, 2), depths.view(N, H * W, 1)) X_b_3d = batched_transpose(R, t, X_a_3d) x_b_2d, _ = batched_pi(K, X_b_3d) x_b_2d = batched_x_2d_normalize(float(H), float(W), x_b_2d).view(N, H, W, 2) # (N, H, W, 2) I_b_wrap = batched_interp2d(I_b, x_b_2d) I_b_norm_wrap_np = torch.norm(I_b_wrap, p=2, dim=1).numpy() # (N, H, W) sel_index = torch.empty((N, num_pyramid, num_gradient_pixels), device=torch.device('cpu')).long() for i in range(N): cur_H = H cur_W = W for j in range(num_pyramid): pixel_count = 0 cur_grad_thres = grad_thres while pixel_count < num_gradient_pixels: cur_grad_norm = cv2.resize(grad_norm[i, :, :], dsize=(cur_W, cur_H)) cur_depths_np = skimage.measure.block_reduce( depths_np[i, :, :], (2**j, 2**j), np.min) cur_I_b_norm_wrap_np = skimage.measure.block_reduce( I_b_norm_wrap_np[i, :, :], (2**j, 2**j), np.min) cur_mask = np.logical_and( cur_grad_norm > cur_grad_thres, cur_depths_np > depth_thres) # (H, W) cur_mask = np.logical_and(cur_mask, cur_I_b_norm_wrap_np > 1e-5) cur_sel_index = np.asarray(np.where( cur_mask.reshape(cur_H * cur_W)), dtype=np.int) cur_sel_index = cur_sel_index.ravel() np.random.shuffle(cur_sel_index) num_indices = cur_sel_index.shape[0] start = pixel_count last = pixel_count + num_indices if pixel_count + num_indices < num_gradient_pixels else num_gradient_pixels sel_index[i, j, start:last] = torch.from_numpy( cur_sel_index[:last - start]).long() pixel_count += num_indices cur_grad_thres -= 1. / 255. cur_H //= 2 cur_W //= 2 # Visualize if visualize: img_list = [{ 'img': I_b[0].numpy().transpose(1, 2, 0), 'title': 'I_b' }, { 'img': I_b_wrap[0].numpy().transpose(1, 2, 0), 'title': 'I_b_wrap_to_a' }, { 'img': I_b_norm_wrap_np[0], 'title': 'I_b_norm_wrap_to_a', 'cmap': 'gray' }, { 'img': imgs[0, 0].numpy(), 'title': 'I_a', 'cmap': 'gray' }, { 'img': depths_np[0], 'title': 'd_a', 'cmap': 'gray' }] cur_H = H cur_W = W for i in range(num_pyramid): selected_mask = np.zeros((cur_H * cur_W), dtype=np.float32) selected_mask[sel_index[0, i, :].numpy()] = 1.0 img_list.append({ 'img': selected_mask.reshape(cur_H, cur_W), 'title': 'sel_index_' + str(i), 'cmap': 'gray' }) cur_H //= 2 cur_W //= 2 show_multiple_img(img_list, title='select pixels visualization', num_cols=4) return sel_index