def test_optimization(scene, batch_size, print_interval=20, imsave_interval=20, max_iter=100, out_dir='./proj_tmp/'): """ First render using the full renderer to get the surfel position and color and then render using the projection layer for testing Returns: """ from torch import optim import os import matplotlib.pyplot as plt plt.ion() if not os.path.exists(out_dir): os.makedirs(out_dir) res = render_scene(scene) scene = make_torch_var(load_scene(scene)) pos_wc = res['pos'].reshape(-1, res['pos'].shape[-1]).repeat(batch_size, 1, 1) camera = scene['camera'] camera['eye'] = camera['eye'].repeat(batch_size, 1) camera['at'] = camera['at'].repeat(batch_size, 1) camera['up'] = camera['up'].repeat(batch_size, 1) target_image = res['image'].repeat(batch_size, 1, 1, 1) input_image = target_image + 0.1 * torch.randn(target_image.size(), device=target_image.device) input_image.requires_grad = True criterion = torch.nn.MSELoss(size_average=True).cuda() optimizer = optim.Adam([input_image], lr=1e-2) h1 = plt.figure() loss_per_iter = [] for iter in range(100): im_est, mask = projection_renderer(pos_wc, input_image, camera) optimizer.zero_grad() loss = criterion(im_est * 255, target_image * 255) loss_ = get_data(loss) loss_per_iter.append(loss_) if iter % print_interval == 0 or iter == max_iter - 1: print('{}. Loss: {}'.format(iter, loss_)) if iter % imsave_interval == 0 or iter == max_iter - 1: im_out_ = get_data(input_image) im_out_ = np.uint8(255 * im_out_ / im_out_.max()) plt.figure(h1.number) plt.imshow(im_out_[0].squeeze()) plt.title('%d. loss= %f' % (iter, loss_)) plt.savefig(out_dir + '/fig_%05d.png' % iter) loss.backward() optimizer.step()
def test_raster_coordinates(scene, batch_size): """Test if the projected raster coordinates are correct Args: scene: Path to scene file Returns: None """ res = render_scene(scene) scene = make_torch_var(load_scene(scene)) pos_cc = res['pos'].reshape(1, -1, res['pos'].shape[-1]) pos_cc = pos_cc.repeat(batch_size, 1, 1) camera = scene['camera'] camera['eye'] = camera['eye'].repeat(batch_size, 1) camera['at'] = camera['at'].repeat(batch_size, 1) camera['up'] = camera['up'].repeat(batch_size, 1) viewport = make_list2np(camera['viewport']) W, H = float(viewport[2] - viewport[0]), float(viewport[3] - viewport[1]) px_coord_idx, px_coord = project_image_coordinates(pos_cc, camera) xp, yp = np.meshgrid(np.linspace(0, W - 1, int(W)), np.linspace(0, H - 1, int(H))) xp = xp.ravel()[None, ...].repeat(batch_size, axis=0) yp = yp.ravel()[None, ...].repeat(batch_size, axis=0) px_coord = torch.round(px_coord - 0.5).long() np.testing.assert_array_almost_equal(xp, get_data(px_coord[..., 0])) np.testing.assert_array_almost_equal(yp, get_data(px_coord[..., 1]))
def test_render(scene, lfnet, num_samples=200): res = render_scene(scene) pos = get_data(res['pos']) normal = get_data(res['normal']) im = lf_renderer(pos, normal, lfnet, num_samples=num_samples) im_ = get_data(im) im_ = im_ / im_.max() plt.figure() plt.imshow(im_) plt.show()
def test_transformation_consistency(scene, batch_size): print('test_transformation_consistency') res = render_scene(scene) scene = make_torch_var(load_scene(scene)) pos_cc = res['pos'].reshape(-1, res['pos'].shape[-1]) normal_cc = res['normal'].reshape(-1, res['normal'].shape[-1]) surfels = cam_to_world(pos_cc, normal_cc, scene['camera']) surfels_cc = world_to_cam(surfels['pos'], surfels['normal'], scene['camera']) np.testing.assert_array_almost_equal(get_data(pos_cc), get_data(surfels_cc['pos'][:, :3])) np.testing.assert_array_almost_equal(get_data(normal_cc), get_data(surfels_cc['normal'][:, :3]))
def test_depth_to_world_consistency(scene, batch_size): res = render_scene(scene) scene = make_torch_var(load_scene(scene)) pos_wc1 = res['pos'].reshape(-1, res['pos'].shape[-1]) pos_cc1 = world_to_cam(pos_wc1, None, scene['camera'])['pos'] # First test the non-batched z_to_pcl_CC method: # NOTE: z_to_pcl_CC takes as input the Z dimension in the camera coordinate # and gets the full (X, Y, Z) in the camera coordinate. pos_cc2 = z_to_pcl_CC(pos_cc1[:, 2], scene['camera']) pos_wc2 = cam_to_world(pos_cc2, None, scene['camera'])['pos'] # Test Z -> (X, Y, Z) np.testing.assert_array_almost_equal(get_data(pos_cc1[..., :3]), get_data(pos_cc2[..., :3])) # Test world -> camera -> Z -> (X, Y, Z) in camera -> world np.testing.assert_array_almost_equal(get_data(pos_wc1[..., :3]), get_data(pos_wc2[..., :3])) # Then test the batched version: camera = scene['camera'] camera['eye'] = camera['eye'].repeat(batch_size, 1) camera['at'] = camera['at'].repeat(batch_size, 1) camera['up'] = camera['up'].repeat(batch_size, 1) pos_wc1 = pos_wc1.repeat(batch_size, 1, 1) pos_cc1 = world_to_cam_batched(pos_wc1, None, scene['camera'])['pos'] pos_cc2 = z_to_pcl_CC_batched(pos_cc1[..., 2], camera) # NOTE: z = -depth pos_wc2 = cam_to_world_batched(pos_cc2, None, camera)['pos'] # Test Z -> (X, Y, Z) np.testing.assert_array_almost_equal(get_data(pos_cc1[..., :3]), get_data(pos_cc2[..., :3])) # Test world -> camera -> Z -> (X, Y, Z) in camera -> world np.testing.assert_array_almost_equal(get_data(pos_wc1[..., :3]), get_data(pos_wc2[..., :3]))
def test_render_projection_consistency(scene, batch_size): """ First render using the full renderer to get the surfel position and color and then render using the projection layer for testing Returns: """ res = render_scene(scene) scene = make_torch_var(load_scene(scene)) pos_cc = res['pos'].reshape(-1, res['pos'].shape[-1]).repeat(batch_size, 1, 1) camera = scene['camera'] camera['eye'] = camera['eye'].repeat(batch_size, 1) camera['at'] = camera['at'].repeat(batch_size, 1) camera['up'] = camera['up'].repeat(batch_size, 1) image = res['image'].repeat(batch_size, 1, 1, 1) im, mask = projection_renderer(pos_cc, image, camera) diff = np.abs(get_data(image) - get_data(im)) np.testing.assert_(diff.sum() < 1e-10, 'Non-zero difference.')
def optimize_lfnet(scene, lfnet, max_iter=2000, num_samples=120, lr=1e-3, print_interval=10, imsave_interval=100, out_dir='./tmp_lf_opt'): """ Args: scene: scene file lfnet: Light Field Network Returns: """ if not os.path.exists(out_dir): os.makedirs(out_dir) res = render_scene(scene) pos = get_data(res['pos']) normal = get_data(res['normal']) opt_vars = lfnet.parameters() criterion = torch.nn.MSELoss(size_average=True).cuda() optimizer = optim.Adam(opt_vars, lr=lr) lr_scheduler = StepLR(optimizer, step_size=500, gamma=0.8) loss_per_iter = [] target_im = res['image'] target_im_grad = grad_spatial2d(target_im.mean(dim=-1)[..., np.newaxis]) h1 = plt.figure() plt.figure(h1.number) plt.imshow(get_data(target_im)) plt.title('Target') plt.savefig(out_dir + '/Target.png') for iter in range(max_iter): im_est = lf_renderer(pos, normal, lfnet, num_samples=num_samples) im_est_grad = grad_spatial2d(im_est.mean(dim=-1)[..., np.newaxis]) optimizer.zero_grad() loss = criterion(im_est * 255, target_im * 255) + criterion( target_im_grad * 100, im_est_grad * 100) loss_ = get_data(loss) loss_per_iter.append(loss_) if iter % print_interval == 0 or iter == max_iter - 1: print('{}. Loss: {}'.format(iter, loss_)) if iter % imsave_interval == 0 or iter == max_iter - 1: im_out_ = get_data(im_est) im_out_ = np.uint8(255 * im_out_ / im_out_.max()) plt.figure(h1.number) plt.imshow(im_out_) plt.title('%d. loss= %f' % (iter, loss_)) plt.savefig(out_dir + '/fig_%05d.png' % iter) loss.backward() lr_scheduler.step() optimizer.step() plt.figure() plt.plot(loss_per_iter, linewidth=2) plt.xlabel('Iteration', fontsize=14) plt.title('Loss', fontsize=12) plt.grid(True) plt.savefig(out_dir + '/loss.png')
def test_depth_optimization(scene, batch_size, print_interval=20, imsave_interval=20, max_iter=100, out_dir='./proj_tmp_depth-fast/'): """ First render using the full renderer to get the surfel position and color and then render using the projection layer for testing Returns: """ from torch import optim import torchvision import os import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt import imageio from PIL import Image plt.ion() if not os.path.exists(out_dir): os.makedirs(out_dir) use_chair = True use_fast_projection = True use_masked_loss = True use_same_render_method_for_target = False lr = 1e-2 res = render_scene(scene) scene = make_torch_var(load_scene(scene)) # true_pos_wc = res['pos'].reshape(-1, res['pos'].shape[-1]).repeat(batch_size, 1, 1) true_input_img = res['image'].unsqueeze(0).repeat(batch_size, 1, 1, 1) if use_chair: camera = scene['camera'] camera['eye'] = tch_var_f([0, 0, 4, 1]).repeat(batch_size, 1) camera['at'] = tch_var_f([0, 0, 0, 1]).repeat(batch_size, 1) camera['up'] = tch_var_f([0, 1, 0, 0]).repeat(batch_size, 1) else: camera = scene['camera'] camera['eye'] = camera['eye'].repeat(batch_size, 1) camera['at'] = camera['at'].repeat(batch_size, 1) camera['up'] = camera['up'].repeat(batch_size, 1) if use_chair: chair_0 = Image.open('object-0-azimuth-000.006-rgb.png') true_input_img = torchvision.transforms.ToTensor()(chair_0).to( true_input_img.device).unsqueeze(0) true_input_img = true_input_img.permute(0, 2, 3, 1) camera['viewport'] = [0, 0, 128, 128] # TODO don't hardcode true_depth = res['depth'].repeat(batch_size, 1, 1).reshape( batch_size, -1) # Not relevant if 'use_chair' is True # depth = true_depth.clone() + 0.1 * torch.randn_like(true_depth) depth = 0.1 * torch.randn( batch_size, true_input_img.size(-2) * true_input_img.size(-3), device=true_input_img.device, dtype=torch.float) depth.requires_grad = True if use_chair: target_angle = np.deg2rad(20) else: target_angle = -np.pi / 12 rotated_camera = copy.deepcopy(camera) # randomly_rotate_cameras(rotated_camera, theta_range=[-np.pi / 16, np.pi / 16], phi_range=[-np.pi / 8, np.pi / 8]) randomly_rotate_cameras(rotated_camera, theta_range=[0, 1e-10], phi_range=[target_angle, target_angle + 1e-10]) if use_chair: target_image = Image.open('object-0-azimuth-020.006-rgb.png') target_image = torchvision.transforms.ToTensor()(target_image).to( true_input_img.device).unsqueeze(0) target_image = target_image.permute(0, 2, 3, 1) target_mask = torch.ones(*target_image.size()[:-1], 1, device=target_image.device, dtype=torch.float) else: true_pos_cc = z_to_pcl_CC_batched(-true_depth, camera) # NOTE: z = -depth true_pos_wc = cam_to_world_batched(true_pos_cc, None, camera)['pos'] if use_same_render_method_for_target: if use_fast_projection: target_image, proj_out = projection_renderer_differentiable_fast( true_pos_wc, true_input_img, rotated_camera) target_mask = proj_out['mask'] else: target_image, target_mask = projection_renderer_differentiable( true_pos_wc, true_input_img, rotated_camera) # target_image, _ = projection_renderer(true_pos_wc, true_input_img, rotated_camera) else: scene2 = copy.deepcopy(scene) scene['camera'] = copy.deepcopy(rotated_camera) scene['camera']['eye'] = scene['camera']['eye'][0] scene['camera']['at'] = scene['camera']['at'][0] scene['camera']['up'] = scene['camera']['up'][0] target_image = render(scene)['image'].unsqueeze(0).repeat( batch_size, 1, 1, 1) target_mask = torch.ones(*target_image.size()[:-1], 1, device=target_image.device, dtype=torch.float) input_image = true_input_img # + 0.1 * torch.randn(target_image.size(), device=target_image.device) criterion = torch.nn.MSELoss(reduction='none').cuda() optimizer = optim.Adam([depth], lr=1e-2) h1 = plt.figure() # fig_imgs = [] depth_imgs = [] out_imgs = [] imageio.imsave(out_dir + '/optimization_input_image.png', input_image[0].cpu().numpy()) imageio.imsave(out_dir + '/optimization_target_image.png', target_image[0].cpu().numpy()) if not use_chair: imageio.imsave( out_dir + '/optimization_target_depth.png', true_depth.view(*input_image.size()[:-1], 1)[0].cpu().numpy()) loss_per_iter = [] for iter in range(500): optimizer.zero_grad() # depth_in = torch.nn.functional.softplus(depth + 3) depth_in = depth + 4 pos_cc = z_to_pcl_CC_batched(-depth_in, camera) # NOTE: z = -depth pos_wc = cam_to_world_batched(pos_cc, None, camera)['pos'] if use_fast_projection: im_est, proj_out = projection_renderer_differentiable_fast( pos_wc, input_image, rotated_camera) im_mask = proj_out['mask'] else: im_est, im_mask = projection_renderer_differentiable( pos_wc, input_image, rotated_camera) # im_est, mask = projection_renderer(pos_wc, input_image, rotated_camera) if use_masked_loss: loss = torch.sum(target_mask * im_mask * criterion( im_est * 255, target_image * 255)) / torch.sum( target_mask * im_mask) else: loss = criterion(im_est * 255, target_image * 255).mean() loss_ = get_data(loss) loss_per_iter.append(loss_) if iter % print_interval == 0 or iter == max_iter - 1: print('{}. Loss: {}'.format(iter, loss_)) if iter % imsave_interval == 0 or iter == max_iter - 1: # Input image # im_out_ = get_data(input_image.detach()) # im_out_ = np.uint8(255 * im_out_ / im_out_.max()) # fig = plt.figure(h1.number) # plot = fig.add_subplot(111) # plot.imshow(im_out_[0].squeeze()) # plot.set_title('%d. loss= %f' % (iter, loss_)) # # plt.savefig(out_dir + '/fig_%05d.png' % iter) # fig_data = np.array(fig.canvas.renderer._renderer) # fig_imgs.append(fig_data) # Depth im_out_ = get_data( depth_in.view(*input_image.size()[:-1], 1).detach()) im_out_ = np.uint8(255 * im_out_ / im_out_.max()) fig = plt.figure(h1.number) plot = fig.add_subplot(111) plot.imshow(im_out_[0].squeeze()) plot.set_title('%d. loss= %f' % (iter, loss_)) # plt.savefig(out_dir + '/fig_%05d.png' % iter) depth_data = np.array(fig.canvas.renderer._renderer) depth_imgs.append(depth_data) # Output image im_out_ = get_data(im_est.detach()) im_out_ = np.uint8(255 * im_out_ / im_out_.max()) fig = plt.figure(h1.number) plot = fig.add_subplot(111) plot.imshow(im_out_[0].squeeze()) plot.set_title('%d. loss= %f' % (iter, loss_)) # plt.savefig(out_dir + '/fig_%05d.png' % iter) out_data = np.array(fig.canvas.renderer._renderer) out_imgs.append(out_data) loss.backward() optimizer.step() # imageio.mimsave(out_dir + '/optimization_anim_in.gif', fig_imgs) imageio.mimsave(out_dir + '/optimization_anim_depth.gif', depth_imgs) imageio.mimsave(out_dir + '/optimization_anim_out.gif', out_imgs)
def test_visual_reverse_renderer(scene, batch_size): """ Test that outputs visual images for the user to compare """ from torchvision.utils import save_image import time res = render_scene(scene) scene = make_torch_var(load_scene(scene)) camera = scene['camera'] camera['eye'] = camera['eye'].repeat(batch_size, 1) camera['at'] = camera['at'].repeat(batch_size, 1) camera['up'] = camera['up'].repeat(batch_size, 1) original_camera = copy.deepcopy(camera) pos_wc = res['pos'].reshape(-1, res['pos'].shape[-1]).repeat(batch_size, 1, 1) depth = res['depth'].repeat(batch_size, 1, 1).reshape(batch_size, -1) # pos_cc = z_to_pcl_CC_batched(-depth, scene['camera']) # NOTE: z = -depth # pos_wc = cam_to_world_batched(pos_cc, None, scene['camera'])['pos'] randomly_rotate_cameras(camera, theta_range=[-np.pi / 16, np.pi / 16], phi_range=[-np.pi / 8, np.pi / 8]) image = res['image'].repeat(batch_size, 1, 1, 1) save_image(image.clone().permute(0, 3, 1, 2), 'test-original.png', nrow=2) save_image(depth.view(*image.size()[:-1], 1).clone().permute(0, 3, 1, 2), 'test-original-depth.png', nrow=2, normalize=True) # im, _ = projection_renderer(pos_wc, image, camera) # save_image(im.clone().permute(0, 3, 1, 2), 'test-rotated-reprojected-nonblurred.png', nrow=2) # If we want to merge with another already rotated image # NOTE: only works on batch 1 because `render` is not batched rotated_scene = copy.deepcopy(scene) rotated_scene['camera'] = copy.deepcopy(camera) rotated_scene['camera']['eye'] = rotated_scene['camera']['eye'][0] rotated_scene['camera']['at'] = rotated_scene['camera']['at'][0] rotated_scene['camera']['up'] = rotated_scene['camera']['up'][0] res_rotated = render(rotated_scene) rotated_image = res_rotated['image'].repeat(batch_size, 1, 1, 1) save_image(rotated_image.clone().permute(0, 3, 1, 2), 'test-original-rotated.png', nrow=2) out_pos_wc = res_rotated['pos'].reshape(-1, res['pos'].shape[-1]).repeat( batch_size, 1, 1) torch.cuda.synchronize() st = time.time() im, proj_out = projection_reverse_renderer(image, pos_wc, out_pos_wc, original_camera, camera, compute_new_depth=True) torch.cuda.synchronize() print(f"t1: {time.time() - st}") st = time.time() projection_renderer_differentiable_fast(pos_wc, image, camera, blur_size=1e-10, compute_new_depth=True) torch.cuda.synchronize() print(f"t2: {time.time() - st}") save_image(im.clone().permute(0, 3, 1, 2), 'test-fast-rotated-reprojected-unmerged.png', nrow=2) save_image(proj_out['mask'].clone().permute(0, 3, 1, 2), 'test-fast-soft-mask.png', nrow=2, normalize=True) save_image(proj_out['depth'].clone().permute(0, 3, 1, 2), 'test-fast-depth.png', nrow=2, normalize=True) for key in proj_out.keys(): proj_out[key] = proj_out[key].cpu().numpy() np.savez('test-fast-rotation-reprojection.npz', **proj_out, image=im.cpu().numpy(), image_in=image.cpu().numpy(), depth_in=depth.view(*image.size()[:-1], 1).cpu().numpy()) im, _ = projection_reverse_renderer(image, pos_wc, out_pos_wc, original_camera, camera, rotated_image) save_image(im.clone().permute(0, 3, 1, 2), 'test-fast-rotated-reprojected-merged.png', nrow=2)