def batch_render_random_camera(filename, cam_dist, num_views, width, height, fovy, focal_length, theta_range=None, phi_range=None, axis=None, angle=None, cam_pos=None, cam_lookat=None, double_sided=False, use_quartic=False, b_shadow=True, tile_size=None, save_image_queue=None): rendering_time = [] obj = load_model(filename) # normalize the vertices v = obj['v'] axis_range = np.max(v, axis=0) - np.min(v, axis=0) v = (v - np.mean(v, axis=0)) / max(axis_range) # Normalize to make the largest spread 1 obj['v'] = v scene = copy.deepcopy(SCENE_BASIC) scene['camera']['viewport'] = [0, 0, width, height] scene['camera']['fovy'] = np.deg2rad(fovy) scene['camera']['focal_length'] = focal_length mesh = obj_to_triangle_spec(obj) faces = mesh['face'] normals = mesh['normal'] num_tri = faces.shape[0] if 'disk' in scene['objects']: del scene['objects']['disk'] scene['objects'].update({'triangle': {'face': None, 'normal': None, 'material_idx': None}}) scene['objects']['triangle']['face'] = tch_var_f(faces.tolist()) scene['objects']['triangle']['normal'] = tch_var_f(normals.tolist()) scene['objects']['triangle']['material_idx'] = tch_var_l(np.zeros(num_tri, dtype=int).tolist()) scene['materials']['albedo'] = tch_var_f([[0.6, 0.6, 0.6]]) scene['tonemap']['gamma'] = tch_var_f([1.0]) # Linear output # generate camera positions on a sphere if cam_pos is None: cam_pos = uniform_sample_sphere(radius=cam_dist, num_samples=num_views, axis=axis, angle=angle, theta_range=theta_range, phi_range=phi_range) lookat = cam_lookat if cam_lookat is not None else np.mean(v, axis=0) scene['camera']['at'] = tch_var_f(lookat) for idx in range(cam_pos.shape[0]): scene['camera']['eye'] = tch_var_f(cam_pos[idx]) # main render run start_time = time() res = render(scene, tile_size=tile_size, tiled=tile_size is not None, shadow=b_shadow, double_sided=double_sided, use_quartic=use_quartic) res['suffix'] = '_{}'.format(idx) res['camera_far'] = scene['camera']['far'] save_image_queue.put_nowait(get_data(res)) rendering_time.append(time() - start_time) # Timing statistics print('Rendering time mean: {}s, std: {}s'.format(np.mean(rendering_time), np.std(rendering_time)))
def render_scene(scene, output_folder, norm_depth_image_only=False, backface_culling=False, plot_res=True): if not os.path.exists(output_folder): os.mkdir(output_folder) # main render run res = render(scene, norm_depth_image_only=norm_depth_image_only, backface_culling=backface_culling) im = get_data(res['image']) im_nearest = get_data(res['nearest']) obj_pixel_count = get_data( res['obj_pixel_count']) if 'obj_pixel_count' in res else None if plot_res: plt.ion() plt.figure() plt.imshow(im) plt.title('Final Rendered Image') plt.savefig(output_folder + '/img_torch.png') plt.figure() plt.imshow(im_nearest) plt.title('Nearest Object Index') plt.colorbar() plt.savefig(output_folder + '/img_nearest.png') plt.figure() plt.plot(obj_pixel_count, 'r-+') plt.xlabel('Object Index') plt.ylabel('Number of Pixels') depth = get_data(res['depth']) depth[depth >= scene['camera']['far']] = np.inf print(depth.min(), depth.max()) if plot_res and depth.min() != np.inf: plt.figure() plt.imshow(depth) plt.title('Depth Image') plt.savefig(output_folder + '/img_depth_torch.png') if plot_res: plt.ioff() plt.show() return res
def __getitem__(self, idx): """Get item.""" # Get object path synset, obj = self.samples[idx] obj_path = os.path.join(self.root_dir, synset, obj, 'models', 'model_normalized.obj') # Load obj model obj_model = load_model(obj_path) # Show loaded model animate_sample_generation(model_name=None, obj=obj_model, num_samples=10, out_dir=None, resample=False, rotate_angle=360) # Convert model to splats splats_model = obj_to_splat(obj_model, use_circum_circle=True) # Create a splat scene that can be redenred n_splats = splats_model['vn'].shape[0] splat_scene = SplatScene(n_lights=2, n_splats=n_splats) # Add the splats to the scene for i, splat in enumerate( np.column_stack((splats_model['vn'], splats_model['v'], np.asarray(splats_model['r'], dtype=np.float32), np.ones((n_splats, 3), dtype=np.float32)))): splat_scene.set_splat_array(i, splat) # Camera splat_scene.set_camera(viewport=np.asarray([0, 0, 64, 64], dtype=np.float32), eye=np.asarray([0.0, 1.0, 10.0, 1.0], dtype=np.float32), up=np.asarray([0.0, 1.0, 0.0, 0.0], dtype=np.float32), at=np.asarray([0.0, 0.0, 0.0, 1.0], dtype=np.float32), fovy=90.0, focal_length=1.0, near=1.0, far=1000.0) # Tonemap splat_scene.set_tonemap(tonemap_type='gamma', gamma=0.8) # Lights splat_scene.set_light(id=0, pos=np.asarray([20., 20., 20.], dtype=np.float32), color=np.asarray([0.8, 0.1, 0.1], dtype=np.float32), attenuation=np.asarray([0.2, 0.2, 0.2], dtype=np.float32)) splat_scene.set_light(id=1, pos=np.asarray([-15, 3., 15.], dtype=np.float32), color=np.asarray([0.8, 0.1, 0.1], dtype=np.float32), attenuation=np.asarray([0., 1., 0.], dtype=np.float32)) # print (splat_scene.scene) # print (splat_scene.to_pytorch()) # import ipdb; ipdb.set_trace() # print (splat_scene.to_pytorch()) # Show splats model res = render(splat_scene.to_pytorch()) print('Finished render') if True: im = res['image'].cpu().data.numpy() else: im = res['image'].data.numpy() plt.ion() plt.figure() plt.imshow(im) plt.title('Final Rendered Image') plt.show() print("Finish plot") exit() # Add model and synset to the output dictionary sample = {'splats': splats_model, 'synset': synset} # Transform if self.transform: sample = self.transform(sample) return sample
def get_real_samples(self): """Get a real sample.""" # Define the camera poses if not self.opt.same_view: if self.opt.full_sphere_sampling: self.cam_pos = uniform_sample_sphere( radius=self.opt.cam_dist, num_samples=self.opt.batchSize, axis=self.opt.axis, angle=np.deg2rad(self.opt.angle), theta_range=self.opt.theta, phi_range=self.opt.phi) else: self.cam_pos = uniform_sample_sphere( radius=self.opt.cam_dist, num_samples=self.opt.batchSize, axis=self.opt.axis, angle=self.opt.angle, theta_range=np.deg2rad(self.opt.theta), phi_range=np.deg2rad(self.opt.phi)) if self.opt.full_sphere_sampling_light: self.light_pos1 = uniform_sample_sphere( radius=self.opt.cam_dist, num_samples=self.opt.batchSize, axis=self.opt.axis, angle=np.deg2rad(44), theta_range=self.opt.theta, phi_range=self.opt.phi) # self.light_pos2 = uniform_sample_sphere(radius=self.opt.cam_dist, num_samples=self.opt.batchSize, # axis=self.opt.axis, angle=np.deg2rad(40), # theta_range=self.opt.theta, phi_range=self.opt.phi) else: print("inbox") light_eps = 0.15 self.light_pos1 = np.random.rand(self.opt.batchSize, 3) * self.opt.cam_dist + light_eps self.light_pos2 = np.random.rand(self.opt.batchSize, 3) * self.opt.cam_dist + light_eps # TODO: deg2rad in all the angles???? # Create a splats rendering scene large_scene = create_scene(self.opt.width, self.opt.height, self.opt.fovy, self.opt.focal_length, self.opt.n_splats) lookat = self.opt.at if self.opt.at is not None else [ 0.0, 0.0, 0.0, 1.0 ] large_scene['camera']['at'] = tch_var_f(lookat) # Render scenes data, data_depth, data_normal, data_cond = [], [], [], [] inpath = self.opt.vis_images + '/' inpath2 = self.opt.vis_input + '/' for idx in range(self.opt.batchSize): # Save the splats into the rendering scene if self.opt.use_mesh: if 'sphere' in large_scene['objects']: del large_scene['objects']['sphere'] if 'disk' in large_scene['objects']: del large_scene['objects']['disk'] if 'triangle' not in large_scene['objects']: large_scene['objects'] = { 'triangle': { 'face': None, 'normal': None, 'material_idx': None } } samples = self.get_samples() large_scene['objects']['triangle']['material_idx'] = tch_var_l( np.zeros(samples['mesh']['face'][0].shape[0], dtype=int).tolist()) large_scene['objects']['triangle']['face'] = Variable( samples['mesh']['face'][0].cuda(), requires_grad=False) large_scene['objects']['triangle']['normal'] = Variable( samples['mesh']['normal'][0].cuda(), requires_grad=False) else: if 'sphere' in large_scene['objects']: del large_scene['objects']['sphere'] if 'triangle' in large_scene['objects']: del large_scene['objects']['triangle'] if 'disk' not in large_scene['objects']: large_scene['objects'] = { 'disk': { 'pos': None, 'normal': None, 'material_idx': None } } large_scene['objects']['disk']['radius'] = tch_var_f( np.ones(self.opt.n_splats) * self.opt.splats_radius) large_scene['objects']['disk']['material_idx'] = tch_var_l( np.zeros(self.opt.n_splats, dtype=int).tolist()) large_scene['objects']['disk']['pos'] = Variable( samples['splats']['pos'][idx].cuda(), requires_grad=False) large_scene['objects']['disk']['normal'] = Variable( samples['splats']['normal'][idx].cuda(), requires_grad=False) # Set camera position if not self.opt.same_view: large_scene['camera']['eye'] = tch_var_f(self.cam_pos[idx]) else: large_scene['camera']['eye'] = tch_var_f(self.cam_pos[0]) large_scene['lights']['pos'][0, :3] = tch_var_f( self.light_pos1[idx]) #large_scene['lights']['pos'][1,:3]=tch_var_f(self.light_pos2[idx]) # Render scene res = render(large_scene, norm_depth_image_only=self.opt.norm_depth_image_only, double_sided=True, use_quartic=self.opt.use_quartic) # Get rendered output if self.opt.render_img_nc == 1: depth = res['depth'] im_d = depth.unsqueeze(0) else: depth = res['depth'] im_d = depth.unsqueeze(0) im = res['image'].permute(2, 0, 1) im_ = get_data(res['image']) #im_img_ = get_normalmap_image(im_) target_normal_ = get_data(res['normal']) target_normalmap_img_ = get_normalmap_image(target_normal_) im_n = tch_var_f(target_normalmap_img_).view( im.shape[1], im.shape[2], 3).permute(2, 0, 1) # Add depth image to the output structure file_name = inpath2 + str(self.iterationa_no) + "_" + str( self.critic_iter) + 'input_{:05d}.txt'.format(idx) text_file = open(file_name, "w") text_file.write('%s\n' % (str(large_scene['camera']['eye'].data))) text_file.close() out_file_name = inpath2 + str(self.iterationa_no) + "_" + str( self.critic_iter) + 'input_{:05d}.npy'.format(idx) np.save(out_file_name, self.cam_pos[idx]) out_file_name2 = inpath2 + str(self.iterationa_no) + "_" + str( self.critic_iter) + 'input_light{:05d}.npy'.format(idx) np.save(out_file_name2, self.light_pos1[idx]) out_file_name3 = inpath2 + str(self.iterationa_no) + "_" + str( self.critic_iter) + 'input_im{:05d}.npy'.format(idx) np.save(out_file_name3, get_data(res['image'])) out_file_name4 = inpath2 + str(self.iterationa_no) + "_" + str( self.critic_iter) + 'input_depth{:05d}.npy'.format(idx) np.save(out_file_name4, get_data(res['depth'])) out_file_name5 = inpath2 + str(self.iterationa_no) + "_" + str( self.critic_iter) + 'input_normal{:05d}.npy'.format(idx) np.save(out_file_name5, get_data(res['normal'])) if self.iterationa_no % (self.opt.save_image_interval * 5) == 0: imsave((inpath + str(self.iterationa_no) + 'real_normalmap_{:05d}.png'.format(idx)), target_normalmap_img_) imsave((inpath + str(self.iterationa_no) + 'real_depth_{:05d}.png'.format(idx)), get_data(depth)) # imsave(inpath + str(self.iterationa_no) + 'real_depthmap_{:05d}.png'.format(idx), im_d) # imsave(inpath + str(self.iterationa_no) + 'world_normalmap_{:05d}.png'.format(idx), target_worldnormalmap_img_) data.append(im) data_depth.append(im_d) data_normal.append(im_n) data_cond.append(large_scene['camera']['eye']) # Stack real samples real_samples = torch.stack(data) real_samples_depth = torch.stack(data_depth) real_samples_normal = torch.stack(data_normal) real_samples_cond = torch.stack(data_cond) self.batch_size = real_samples.size(0) if not self.opt.no_cuda: real_samples = real_samples.cuda() real_samples_depth = real_samples_depth.cuda() real_samples_normal = real_samples_normal.cuda() real_samples_cond = real_samples_cond.cuda() # Set input/output variables self.input.resize_as_(real_samples.data).copy_(real_samples.data) self.input_depth.resize_as_(real_samples_depth.data).copy_( real_samples_depth.data) self.input_normal.resize_as_(real_samples_normal.data).copy_( real_samples_normal.data) self.input_cond.resize_as_(real_samples_cond.data).copy_( real_samples_cond.data) self.label.resize_(self.batch_size).fill_(self.real_label) # TODO: Remove Variables self.inputv = Variable(self.input) self.inputv_depth = Variable(self.input_depth) self.inputv_normal = Variable(self.input_normal) self.inputv_cond = Variable(self.input_cond) self.labelv = Variable(self.label)
def render_sphere_world(out_dir, cam_pos, radius, width, height, fovy, focal_length, b_display=False): """ Generate z positions on a grid fixed inside the view frustum in the world coordinate system. Place the camera and choose the camera's field of view so that the side of the square touches the frustum. """ import copy print('render sphere') sampling_time = [] rendering_time = [] num_samples = width * height r = np.ones(num_samples) * radius large_scene = copy.deepcopy(SCENE_TEST) large_scene['camera']['viewport'] = [0, 0, width, height] large_scene['camera']['fovy'] = np.deg2rad(fovy) large_scene['camera']['focal_length'] = focal_length large_scene['objects']['disk']['radius'] = tch_var_f(r) large_scene['objects']['disk']['material_idx'] = tch_var_l(np.zeros(num_samples, dtype=int).tolist()) large_scene['materials']['albedo'] = tch_var_f([[0.6, 0.6, 0.6]]) large_scene['tonemap']['gamma'] = tch_var_f([1.0]) # Linear output x, y = np.meshgrid(np.linspace(-1, 1, width), np.linspace(-1, 1, height)) #z = np.sqrt(1 - np.min(np.stack((x ** 2 + y ** 2, np.ones_like(x)), axis=-1), axis=-1)) unit_disk_mask = (x ** 2 + y ** 2) <= 1 z = np.sqrt(1 - unit_disk_mask * (x ** 2 + y ** 2)) # Make a hemi-sphere bulging out of the xy-plane scene z[~unit_disk_mask] = 0 pos = np.stack((x.ravel(), y.ravel(), z.ravel()), axis=1) # Normals outside the sphere should be [0, 0, 1] x[~unit_disk_mask] = 0 y[~unit_disk_mask] = 0 z[~unit_disk_mask] = 1 normals = np_normalize(np.stack((x.ravel(), y.ravel(), z.ravel()), axis=1)) if b_display: plt.ion() plt.figure() plt.imshow(pos[..., 2].reshape((height, width))) plt.figure() plt.imshow(normals[..., 2].reshape((height, width))) large_scene['objects']['disk']['pos'] = tch_var_f(pos) large_scene['objects']['disk']['normal'] = tch_var_f(normals) large_scene['camera']['eye'] = tch_var_f(cam_pos) # main render run start_time = time() res = render(large_scene) rendering_time.append(time() - start_time) im = get_data(res['image']) im = np.uint8(255. * im) depth = get_data(res['depth']) depth[depth >= large_scene['camera']['far']] = depth.min() im_depth = np.uint8(255. * (depth - depth.min()) / (depth.max() - depth.min())) if b_display: plt.figure() plt.imshow(im, interpolation='none') plt.title('Image') plt.savefig(out_dir + '/fig_img_orig.png') plt.figure() plt.imshow(im_depth, interpolation='none') plt.title('Depth Image') plt.savefig(out_dir + '/fig_depth_orig.png') imsave(out_dir + '/img_orig.png', im) imsave(out_dir + '/depth_orig.png', im_depth) # hold matplotlib figure plt.ioff() plt.show()
def optimize_scene(input_scene, target_scene, out_dir, max_iter=100, lr=1e-3, print_interval=10, imsave_interval=10): """A demo function to check if the differentiable renderer can optimize. :param scene: :param out_dir: :return: """ if not os.path.exists(out_dir): os.mkdir(out_dir) target_res = render(target_scene) target_im = target_res['image'] target_im.require_grad = False criterion = nn.MSELoss() if CUDA: target_im_ = target_res['image'].cpu() criterion = criterion.cuda() plt.ion() plt.figure() plt.imshow(target_im_.data.numpy()) plt.title('Target Image') plt.savefig(out_dir + 'target.png') input_scene['materials']['albedo'].requires_grad = True optimizer = optim.Adam(input_scene['materials'].values(), lr=lr) h0 = plt.figure() h1 = plt.figure() loss_per_iter = [] for iter in range(max_iter): res = render(input_scene) im_out = res['image'] optimizer.zero_grad() loss = criterion(im_out, target_im) im_out_ = get_data(im_out) loss_ = get_data(loss) loss_per_iter.append(loss_) if iter == 0: plt.figure(h0.number) plt.imshow(im_out_) plt.title('Initial') if iter % print_interval == 0: print('%d. loss= %f' % (iter, loss_)) print(input_scene['materials']) plt.figure(h1.number) plt.imshow(im_out_) plt.title('%d. loss= %f' % (iter, loss_)) plt.savefig(out_dir + '/fig_%05d.png' % iter) loss.backward() optimizer.step() plt.figure() plt.plot(loss_per_iter, linewidth=2) plt.xlabel('Iteration', fontsize=14) plt.title('MSE Loss', fontsize=12) plt.grid(True) plt.savefig(out_dir + '/loss.png') plt.ioff() plt.show()
def optimize_splats_along_ray_shadow_with_normalest_test( out_dir, width, height, max_iter=100, lr=1e-3, scale=10, shadow=True, vis_only=False, samples=1, est_normals=False, b_generate_normals=False, print_interval=10, imsave_interval=10, xyz_save_interval=100): """A demo function to check if the differentiable renderer can optimize splats rendered along ray. :param scene: :param out_dir: :return: """ import torch import copy from diffrend.torch.params import SCENE_SPHERE_HALFBOX_0 if not os.path.exists(out_dir): os.mkdir(out_dir) scene = SCENE_SPHERE_HALFBOX_0 scene['camera']['viewport'] = [0, 0, width, height] scene['camera']['fovy'] = np.deg2rad(45) scene['camera']['focal_length'] = 1 scene['camera']['eye'] = tch_var_f( [2, 1, 2, 1]) # tch_var_f([1, 1, 1, 1]) # tch_var_f([2, 2, 2, 1]) # scene['camera']['at'] = tch_var_f( [0, 0.8, 0, 1]) # tch_var_f([0, 1, 0, 1]) # tch_var_f([2, 2, 0, 1]) # scene['lights']['attenuation'] = tch_var_f([ [0., 0.0, 0.01], [0., 0.0, 0.01], [0., 0.0, 0.01], ]) scene['materials']['coeffs'] = tch_var_f([ [1.0, 0.0, 0.0], [1.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.5, 0.2, 8.0], [1.0, 0.0, 0.0], [1.0, 0.0, 0.0], ]) target_res = render(scene, tiled=True, shadow=shadow) target_im = normalize_maxmin(target_res['image']) target_im.require_grad = False target_im_ = get_data(target_im) target_pos_ = get_data(target_res['pos']) target_normal_ = get_data(target_res['normal']) target_normalmap_img_ = get_normalmap_image(target_normal_) target_depth_ = get_data(target_res['depth']) print('[z_min, z_max] = [%f, %f]' % (np.min(target_pos_[..., 2]), np.max(target_pos_[..., 2]))) print('[depth_min, depth_max] = [%f, %f]' % (np.min(target_depth_), np.max(target_depth_))) # world -> cam -> render_splats_along_ray cc_tform = world_to_cam(target_res['pos'].view( (-1, 3)), target_res['normal'].view((-1, 3)), scene['camera']) wc_cc_tform = cam_to_world(cc_tform['pos'], cc_tform['normal'], scene['camera']) # Check normal estimation in camera space pos_cc = cc_tform['pos'][:, :3].contiguous().view(target_im.shape) normal_cc = cc_tform['normal'][:, :3].contiguous().view(target_im.shape) plane_fit_est = estimate_surface_normals_plane_fit(pos_cc, None) normal_cc_normalmap = get_normalmap_image(get_data(normal_cc)) plane_fit_est_normalmap = get_normalmap_image(get_data(plane_fit_est)) pos_diff = torch.abs(wc_cc_tform['pos'][:, :3] - target_res['pos'].view((-1, 3))) mean_pos_diff = torch.mean(pos_diff) normal_diff = torch.abs(wc_cc_tform['normal'][:, :3] - target_res['normal'].view(-1, 3)) mean_normal_diff = torch.mean(normal_diff) print('mean_pos_diff', mean_pos_diff, 'mean_normal_diff', mean_normal_diff) wc_cc_normal = wc_cc_tform['normal'].view(target_im_.shape) wc_cc_normal_img = get_normalmap_image(get_data(wc_cc_normal)) material_idx = tch_var_l(np.ones(cc_tform['pos'].shape[0]) * 3) input_scene = copy.deepcopy(scene) del input_scene['objects']['sphere'] del input_scene['objects']['triangle'] light_vis = tch_var_f( np.ones( (input_scene['lights']['pos'].shape[0], cc_tform['pos'].shape[0]))) input_scene['objects'] = { 'disk': { 'pos': cc_tform['pos'], 'normal': cc_tform['normal'], 'material_idx': material_idx, 'light_vis': light_vis, } } target_res_noshadow = render(scene, tiled=True, shadow=False) res = render_splats_along_ray(input_scene) test_img_ = get_data(normalize_maxmin(res['image'])) test_depth_ = get_data(res['depth']) test_normal_ = get_data(res['normal']).reshape(test_img_.shape) test_normalmap_ = get_normalmap_image(test_normal_) im_diff = np.abs(test_img_ - get_data(normalize_maxmin(target_res_noshadow['image']))) print('mean image diff: {}'.format(np.mean(im_diff))) #### PLOT plt.ion() plt.figure() plt.imshow(test_img_, interpolation='none') plt.title('Test Image') plt.savefig(out_dir + '/test_img.png') plt.figure() plt.imshow(test_depth_, interpolation='none') plt.title('Test Depth') plt.savefig(out_dir + '/test_depth.png') plt.figure() plt.imshow(test_normalmap_, interpolation='none') plt.title('Test Normals') plt.savefig(out_dir + '/test_normal.png') #### criterion = nn.L1Loss() #nn.MSELoss() criterion = criterion.cuda() plt.ion() plt.figure() plt.imshow(target_im_, interpolation='none') plt.title('Target Image') plt.savefig(out_dir + '/target.png') plt.figure() plt.imshow(target_normalmap_img_, interpolation='none') plt.title('Normals') plt.savefig(out_dir + '/normal.png') plt.figure() plt.imshow(wc_cc_normal_img, interpolation='none') plt.title('WC_CC Normals') plt.savefig(out_dir + '/wc_cc_normal.png') plt.figure() plt.imshow(normal_cc_normalmap, interpolation='none') plt.title('Normal CC GT') plt.savefig(out_dir + '/normal_cc.png') plt.figure() plt.imshow(plane_fit_est_normalmap, interpolation='none') plt.title('Plane fit CC') plt.savefig(out_dir + '/est_normal_cc.png') plt.figure() plt.subplot(121) plt.imshow(normal_cc_normalmap, interpolation='none') plt.title('Normal CC GT') plt.subplot(122) plt.imshow(plane_fit_est_normalmap, interpolation='none') plt.title('Plane fit CC') plt.savefig(out_dir + '/normal_and_estnormal_cc_comparison.png') input_scene = copy.deepcopy(scene) del input_scene['objects']['sphere'] del input_scene['objects']['triangle'] input_scene['camera']['viewport'] = [ 0, 0, int(width / samples), int(height / samples) ] num_splats = int(width * height / (samples * samples)) #x, y = np.meshgrid(np.linspace(-1, 1, int(width / samples)), np.linspace(-1, 1, int(height / samples))) z_min = scene['camera']['focal_length'] z_max = 3 z = -tch_var_f( np.ones(num_splats) * (z_min + z_max) / 2 ) # -torch.clamp(tch_var_f(2 * np.random.rand(num_splats)), z_min, z_max) z.requires_grad = True normal_angles = tch_var_f(np.random.rand(num_splats, 2)) normal_angles.requires_grad = True material_idx = tch_var_l(np.ones(num_splats) * 3) light_vis = tch_var_f( np.ones((input_scene['lights']['pos'].shape[0], num_splats))) light_vis.requires_grad = True if vis_only: assert shadow is True opt_vars = [light_vis] z = cc_tform['pos'][:, 2] # FIXME: sph2cart #normals = cc_tform['normal'] else: opt_vars = [z, normal_angles] if shadow: opt_vars += [light_vis] optimizer = optim.Adam(opt_vars, lr=lr) lr_scheduler = StepLR(optimizer, step_size=10000, gamma=0.8) h0 = plt.figure() h1 = plt.figure() h2 = plt.figure() h3 = plt.figure() h4 = plt.figure() gs1 = gridspec.GridSpec(3, 3) gs1.update(wspace=0.0025, hspace=0.02) # Two options for z_norm_consistency # 1. start after N iterations # 2. start at the beginning and decay # 3. start after N iterations and decay to 0 no_decay = lambda x: x exp_decay = lambda x, scale: torch.exp(-x / scale) linear_decay = lambda x, scale: scale / (x + 1e-6) spatial_var_loss_weight = 10.0 #0.0 normal_away_from_cam_loss_weight = 0.0 grad_img_depth_loss_weight = 1.0 spatial_loss_weight = 2 z_norm_weight_init = 1 # 1e-5 z_norm_activate_iter = 0 # 1000 decay_fn = lambda x: linear_decay(x, 100) loss_per_iter = [] if b_generate_normals: est_normals = False normal_est_network = NEstNetAffine(kernel_size=3, sph=False) print(normal_est_network) normal_est_network.cuda() for iter in range(max_iter): lr_scheduler.step() zz = -F.relu(-z) - z_min # torch.clamp(z, -z_max, -z_min) if b_generate_normals: normals = generate_normals(zz, scene['camera'], normal_est_network) #if iter > 100 and iter % 10 == 0: # print(normals) elif not est_normals: phi = F.sigmoid(normal_angles[:, 0]) * 2 * np.pi theta = F.sigmoid( normal_angles[:, 1] ) * np.pi / 2 # F.tanh(normal_angles[:, 1]) * np.pi / 2 normals = sph2cart_unit(torch.stack((phi, theta), dim=1)) pos = zz # torch.stack((tch_var_f(x.ravel()), tch_var_f(y.ravel()), zz), dim=1) input_scene['objects'] = { 'disk': { 'pos': pos, 'normal': normalize(normals) if not est_normals else None, 'material_idx': material_idx, 'light_vis': torch.sigmoid(light_vis), } } res = render_splats_along_ray(input_scene, samples=samples, normal_estimation_method='plane') res_pos = res['pos'] res_normal = res['normal'] spatial_loss = spatial_3x3(res_pos) depth_grad_loss = spatial_3x3(res['depth'][..., np.newaxis]) grad_img = grad_spatial2d( torch.mean(res['image'], dim=-1)[..., np.newaxis]) grad_depth_img = grad_spatial2d(res['depth'][..., np.newaxis]) image_depth_consistency_loss = depth_rgb_gradient_consistency( res['image'], res['depth']) unit_normal_loss = unit_norm2_L2loss(res_normal, 10.0) normal_away_from_cam_loss = away_from_camera_penalty( res_pos, res_normal) z_pos = res_pos[..., 2] z_loss = torch.mean((10 * F.relu(z_min - torch.abs(z_pos)))**2 + (10 * F.relu(torch.abs(z_pos) - z_max))**2) z_norm_loss = normal_consistency_cost(res_pos, res_normal, norm=1) spatial_var = torch.mean(res_pos[..., 0].var() + res_pos[..., 1].var() + res_pos[..., 2].var()) spatial_var_loss = (1 / (spatial_var + 1e-4)) im_out = normalize_maxmin(res['image']) res_depth_ = get_data(res['depth']) optimizer.zero_grad() z_norm_weight = z_norm_weight_init * float( iter > z_norm_activate_iter) * decay_fn(iter - z_norm_activate_iter) loss = criterion(scale * im_out, scale * target_im) + z_loss + unit_normal_loss + \ z_norm_weight * z_norm_loss + \ spatial_var_loss_weight * spatial_var_loss + \ grad_img_depth_loss_weight * image_depth_consistency_loss #normal_away_from_cam_loss_weight * normal_away_from_cam_loss + \ #spatial_loss_weight * spatial_loss im_out_ = get_data(im_out) im_out_normal_ = get_data(res['normal']) pos_out_ = get_data(res['pos']) loss_ = get_data(loss) z_loss_ = get_data(z_loss) z_norm_loss_ = get_data(z_norm_loss) spatial_loss_ = get_data(spatial_loss) spatial_var_loss_ = get_data(spatial_var_loss) unit_normal_loss_ = get_data(unit_normal_loss) normal_away_from_cam_loss_ = get_data(normal_away_from_cam_loss) normals_ = get_data(res_normal) image_depth_consistency_loss_ = get_data(image_depth_consistency_loss) loss_per_iter.append(loss_) if iter == 0: plt.figure(h0.number) plt.imshow(im_out_) plt.title('Initial') if iter % print_interval == 0 or iter == max_iter - 1: z_ = get_data(z) z__ = pos_out_[..., 2] print( '%d. loss= %f nloss=%f z_loss=%f [%f, %f] [%f, %f], z_normal_loss: %f,' ' spatial_var_loss: %f, normal_away_loss: %f' ' nz_range: [%f, %f], spatial_loss: %f, imd_loss: %f' % (iter, loss_, unit_normal_loss_, z_loss_, np.min(z_), np.max(z_), np.min(z__), np.max(z__), z_norm_loss_, spatial_var_loss_, normal_away_from_cam_loss_, normals_[..., 2].min(), normals_[..., 2].max(), spatial_loss_, image_depth_consistency_loss_)) if iter % xyz_save_interval == 0 or iter == max_iter - 1: save_xyz(out_dir + '/res_{:05d}.xyz'.format(iter), get_data(res_pos), get_data(res_normal)) if iter % imsave_interval == 0 or iter == max_iter - 1: z_ = get_data(z) plt.figure(h4.number) plt.clf() plt.suptitle('%d. loss= %f [%f, %f]' % (iter, loss_, np.min(z_), np.max(z_))) plt.subplot(121) #plt.axis('off') plt.imshow(im_out_, interpolation='none') plt.title('Output') plt.subplot(122) #plt.axis('off') plt.imshow(target_im_, interpolation='none') plt.title('Ground truth') # plt.subplot(223) # plt.plot(loss_per_iter, linewidth=2) # plt.xlabel('Iteration', fontsize=14) # plt.title('Loss', fontsize=12) # plt.grid(True) plt.savefig(out_dir + '/fig_im_gt_loss_%05d.png' % iter) plt.figure(h1.number, figsize=(4, 4)) plt.clf() plt.suptitle('%d. loss= %f [%f, %f]' % (iter, loss_, np.min(z_), np.max(z_))) plt.subplot(gs1[0]) plt.axis('off') plt.imshow(im_out_, interpolation='none') plt.subplot(gs1[1]) plt.axis('off') plt.imshow(get_normalmap_image(im_out_normal_), interpolation='none') ax = plt.subplot(gs1[2]) plt.axis('off') im_tmp = ax.imshow(res_depth_, interpolation='none') # create an axes on the right side of ax. The width of cax will be 5% # of ax and the padding between cax and ax will be fixed at 0.05 inch. divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(im_tmp, cax=cax) plt.subplot(gs1[3]) plt.axis('off') plt.imshow(target_im_, interpolation='none') plt.subplot(gs1[4]) plt.axis('off') plt.imshow(test_normalmap_, interpolation='none') ax = plt.subplot(gs1[5]) plt.axis('off') im_tmp = ax.imshow(test_depth_, interpolation='none') divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(im_tmp, cax=cax) W, H = input_scene['camera']['viewport'][2:] light_vis_ = get_data(torch.sigmoid(light_vis)) plt.subplot(gs1[6]) plt.axis('off') plt.imshow(light_vis_[0].reshape((H, W)), interpolation='none') if (light_vis_.shape[0] > 1): plt.subplot(gs1[7]) plt.axis('off') plt.imshow(light_vis_[1].reshape((H, W)), interpolation='none') if (light_vis_.shape[0] > 2): plt.subplot(gs1[8]) plt.axis('off') plt.imshow(light_vis_[2].reshape((H, W)), interpolation='none') plt.savefig(out_dir + '/fig_%05d.png' % iter) plt.figure(h2.number) plt.clf() plt.imshow(res_depth_) plt.colorbar() plt.savefig(out_dir + '/fig_depth_%05d.png' % iter) plt.figure(h3.number) plt.clf() plt.imshow(z_.reshape(H, W)) plt.colorbar() plt.savefig(out_dir + '/fig_z_%05d.png' % iter) loss.backward() optimizer.step() plt.figure() plt.plot(loss_per_iter, linewidth=2) plt.xlabel('Iteration', fontsize=14) plt.title('Loss', fontsize=12) plt.grid(True) plt.savefig(out_dir + '/loss.png') plt.ioff() plt.show()
def render_sphere_halfbox(out_dir, cam_pos, width, height, fovy, focal_length, num_views, cam_dist, norm_depth_image_only, theta_range=None, phi_range=None, axis=None, angle=None, cam_lookat=None, tile_size=None, use_quartic=False, b_shadow=True, b_display=False): # python splat_render_demo.py --sphere-halfbox --fovy 30 --out_dir ./sphere_halfbox_demo --cam_dist 4 --axis .8 .5 1 # --angle 5 --at 0 .4 0 --nv 10 --width=256 --height=256 scene = SCENE_SPHERE_HALFBOX scene['camera']['viewport'] = [0, 0, width, height] scene['camera']['fovy'] = np.deg2rad(fovy) scene['camera']['focal_length'] = focal_length scene['lights']['pos'] = tch_var_f([[2., 2., 1.5, 1.0], [1., 4., 1.5, 1.0] ]) # tch_var_f([[4., 4., 3., 1.0]]) scene['lights']['color_idx'] = tch_var_l([1, 3]) scene['lights']['attenuation'] = tch_var_f([[1., 0., 0.], [1., 0., 0.]]) # generate camera positions on a sphere if cam_pos is None: cam_pos = uniform_sample_sphere(radius=cam_dist, num_samples=num_views, axis=axis, angle=angle, theta_range=theta_range, phi_range=phi_range) lookat = cam_lookat if cam_lookat is not None else [0.0, 0.0, 0.0, 1.0] scene['camera']['at'] = tch_var_f(lookat) b_tiled = tile_size is not None res = render(scene, tile_size=tile_size, tiled=b_tiled, shadow=b_shadow) im = np.uint8(255. * get_data(res['image'])) depth = get_data(res['depth']) depth[depth >= scene['camera']['far']] = depth.min() im_depth = np.uint8(255. * (depth - depth.min()) / (depth.max() - depth.min())) if b_display: plt.figure() plt.imshow(im) plt.title('Image') plt.savefig(out_dir + '/fig_img_orig.png') plt.figure() plt.imshow(im_depth) plt.title('Depth Image') plt.savefig(out_dir + '/fig_depth_orig.png') imsave(out_dir + '/img_orig.png', im) imsave(out_dir + '/depth_orig.png', im_depth) if b_display: h1 = plt.figure() h2 = plt.figure() for idx in range(cam_pos.shape[0]): scene['camera']['eye'] = tch_var_f(cam_pos[idx]) suffix = '_{}'.format(idx) # main render run res = render(scene, tiled=b_tiled, shadow=b_shadow, norm_depth_image_only=norm_depth_image_only, use_quartic=use_quartic) im = np.uint8(255. * get_data(res['image'])) depth = get_data(res['depth']) depth[depth >= scene['camera']['far']] = depth.min() im_depth = np.uint8(255. * (depth - depth.min()) / (depth.max() - depth.min())) if b_display: plt.figure(h1.number) plt.imshow(im) plt.title('Image') plt.savefig(out_dir + '/fig_img' + suffix + '.png') plt.figure(h2.number) plt.imshow(im_depth) plt.title('Depth Image') plt.savefig(out_dir + '/fig_depth' + suffix + '.png') imsave(out_dir + '/img' + suffix + '.png', im) imsave(out_dir + '/depth' + suffix + '.png', im_depth)
def optimize_NDC_test(out_dir, width=32, height=32, max_iter=100, lr=1e-3, scale=10, print_interval=10, imsave_interval=10): """A demo function to check if the differentiable renderer can optimize splats in NDC. :param scene: :param out_dir: :return: """ import torch import copy from diffrend.torch.params import SCENE_SPHERE_HALFBOX if not os.path.exists(out_dir): os.mkdir(out_dir) scene = SCENE_SPHERE_HALFBOX scene['camera']['viewport'] = [0, 0, width, height] scene['camera']['fovy'] = np.deg2rad(45) scene['camera']['focal_length'] = 1 scene['camera']['eye'] = tch_var_f([2, 1, 2, 1]) scene['camera']['at'] = tch_var_f([0, 0.8, 0, 1]) target_res = render(SCENE_SPHERE_HALFBOX) target_im = target_res['image'] target_im.require_grad = False target_im_ = get_data(target_res['image']) criterion = nn.L1Loss() #nn.MSELoss() criterion = criterion.cuda() plt.ion() plt.figure() plt.imshow(target_im_) plt.title('Target Image') plt.savefig(out_dir + '/target.png') input_scene = copy.deepcopy(scene) del input_scene['objects']['sphere'] del input_scene['objects']['triangle'] num_splats = width * height x, y = np.meshgrid(np.linspace(-1, 1, width), np.linspace(-1, 1, height)) z = tch_var_f(2 * np.random.rand(num_splats) - 1) z.requires_grad = True pos = torch.stack((tch_var_f(x.ravel()), tch_var_f(y.ravel()), z), dim=1) normals = tch_var_f(np.ones((num_splats, 4)) * np.array([0, 0, 1, 0])) normals.requires_grad = True material_idx = tch_var_l(np.ones(num_splats) * 3) input_scene['objects'] = { 'disk': { 'pos': pos, 'normal': normals, 'material_idx': material_idx } } optimizer = optim.Adam((z, normals), lr=lr) h0 = plt.figure() h1 = plt.figure() loss_per_iter = [] for iter in range(max_iter): res = render_splats_NDC(input_scene) im_out = res['image'] optimizer.zero_grad() loss = criterion(scale * im_out, scale * target_im) im_out_ = get_data(im_out) loss_ = get_data(loss) loss_per_iter.append(loss_) if iter == 0: plt.figure(h0.number) plt.imshow(im_out_) plt.title('Initial') if iter % print_interval == 0 or iter == max_iter - 1: print('%d. loss= %f' % (iter, loss_)) if iter % imsave_interval == 0 or iter == max_iter - 1: plt.figure(h1.number) plt.imshow(im_out_) plt.title('%d. loss= %f' % (iter, loss_)) plt.savefig(out_dir + '/fig_%05d.png' % iter) loss.backward() optimizer.step() plt.figure() plt.plot(loss_per_iter, linewidth=2) plt.xlabel('Iteration', fontsize=14) plt.title('Loss', fontsize=12) plt.grid(True) plt.savefig(out_dir + '/loss.png') plt.ioff() plt.show()
def render_random_camera(filename, out_dir, num_samples, radius, cam_dist, num_views, width, height, fovy, focal_length, norm_depth_image_only, theta_range=None, phi_range=None, axis=None, angle=None, cam_pos=None, cam_lookat=None, use_mesh=False, double_sided=False, use_quartic=False, b_shadow=True, b_display=False, tile_size=None): """ Randomly generate N samples on a surface and render them. The samples include position and normal, the radius is set to a constant. """ sampling_time = [] rendering_time = [] obj = load_model(filename) # normalize the vertices v = obj['v'] axis_range = np.max(v, axis=0) - np.min(v, axis=0) v = (v - np.mean(v, axis=0)) / max( axis_range) # Normalize to make the largest spread 1 obj['v'] = v if not os.path.exists(out_dir): os.mkdir(out_dir) r = np.ones(num_samples) * radius scene = copy.deepcopy(SCENE_BASIC) scene['camera']['viewport'] = [0, 0, width, height] scene['camera']['fovy'] = np.deg2rad(fovy) scene['camera']['focal_length'] = focal_length if use_mesh: mesh = obj_to_triangle_spec(obj) faces = mesh['face'] normals = mesh['normal'] num_tri = faces.shape[0] # if faces.shape[-1] == 3: # faces = np.concatenate((faces, np.ones((faces.shape[0], faces.shape[1], 1))), axis=-1).tolist() # if normals.shape[-1] == 3: # normals = np.concatenate((normals, )) if 'disk' in scene['objects']: del scene['objects']['disk'] scene['objects'].update( {'triangle': { 'face': None, 'normal': None, 'material_idx': None }}) scene['objects']['triangle']['face'] = tch_var_f(faces.tolist()) scene['objects']['triangle']['normal'] = tch_var_f(normals.tolist()) scene['objects']['triangle']['material_idx'] = tch_var_l( np.zeros(num_tri, dtype=int).tolist()) else: scene['objects']['disk']['radius'] = tch_var_f(r) scene['objects']['disk']['material_idx'] = tch_var_l( np.zeros(num_samples, dtype=int).tolist()) scene['materials']['albedo'] = tch_var_f([[0.6, 0.6, 0.6]]) scene['tonemap']['gamma'] = tch_var_f([1.0]) # Linear output # generate camera positions on a sphere if cam_pos is None: cam_pos = uniform_sample_sphere(radius=cam_dist, num_samples=num_views, axis=axis, angle=angle, theta_range=theta_range, phi_range=phi_range) lookat = cam_lookat if cam_lookat is not None else np.mean(v, axis=0) scene['camera']['at'] = tch_var_f(lookat) if b_display: h1 = plt.figure() h2 = plt.figure() for idx in range(cam_pos.shape[0]): if not use_mesh: start_time = time() v, vn = uniform_sample_mesh(obj, num_samples=num_samples) sampling_time.append(time() - start_time) scene['objects']['disk']['pos'] = tch_var_f(v) scene['objects']['disk']['normal'] = tch_var_f(vn) scene['camera']['eye'] = tch_var_f(cam_pos[idx]) suffix = '_{}'.format(idx) # main render run start_time = time() res = render(scene, tile_size=tile_size, tiled=tile_size is not None, shadow=b_shadow, norm_depth_image_only=norm_depth_image_only, double_sided=double_sided, use_quartic=use_quartic) rendering_time.append(time() - start_time) im = np.uint8(255. * get_data(res['image'])) depth = get_data(res['depth']) depth[depth >= scene['camera']['far']] = depth.min() im_depth = np.uint8(255. * (depth - depth.min()) / (depth.max() - depth.min())) if b_display: plt.figure(h1.number) plt.imshow(im) plt.title('Image') plt.savefig(out_dir + '/fig_img' + suffix + '.png') plt.figure(h2.number) plt.imshow(im_depth) plt.title('Depth Image') plt.savefig(out_dir + '/fig_depth' + suffix + '.png') imsave(out_dir + '/img' + suffix + '.png', im) imsave(out_dir + '/depth' + suffix + '.png', im_depth) # Timing statistics if not use_mesh: print('Sampling time mean: {}s, std: {}s'.format( np.mean(sampling_time), np.std(sampling_time))) print('Rendering time mean: {}s, std: {}s'.format(np.mean(rendering_time), np.std(rendering_time)))
def render_sphere(out_dir, cam_pos, radius, width, height, fovy, focal_length, num_views, std_z=0.01, std_normal=0.01, b_display=False): """ Randomly generate N samples on a surface and render them. The samples include position and normal, the radius is set to a constant. """ print('render sphere') sampling_time = [] rendering_time = [] num_samples = width * height r = np.ones(num_samples) * radius scene = copy.deepcopy(SCENE_BASIC) scene['camera']['viewport'] = [0, 0, width, height] scene['camera']['fovy'] = np.deg2rad(fovy) scene['camera']['focal_length'] = focal_length scene['objects']['disk']['radius'] = tch_var_f(r) scene['objects']['disk']['material_idx'] = tch_var_l( np.zeros(num_samples, dtype=int).tolist()) scene['materials']['albedo'] = tch_var_f([[0.6, 0.6, 0.6]]) scene['tonemap']['gamma'] = tch_var_f([1.0]) # Linear output x, y = np.meshgrid(np.linspace(-1, 1, width), np.linspace(-1, 1, height)) #z = np.sqrt(1 - np.min(np.stack((x ** 2 + y ** 2, np.ones_like(x)), axis=-1), axis=-1)) unit_disk_mask = (x**2 + y**2) <= 1 z = np.sqrt(1 - unit_disk_mask * (x**2 + y**2)) # Make a hemi-sphere bulging out of the xy-plane scene z[~unit_disk_mask] = 0 pos = np.stack((x.ravel(), y.ravel(), z.ravel()), axis=1) # Normals outside the sphere should be [0, 0, 1] x[~unit_disk_mask] = 0 y[~unit_disk_mask] = 0 z[~unit_disk_mask] = 1 normals = np_normalize(np.stack((x.ravel(), y.ravel(), z.ravel()), axis=1)) if b_display: plt.ion() plt.figure() plt.imshow(pos[..., 2].reshape((height, width))) plt.figure() plt.imshow(normals[..., 2].reshape((height, width))) scene['objects']['disk']['pos'] = tch_var_f(pos) scene['objects']['disk']['normal'] = tch_var_f(normals) scene['camera']['eye'] = tch_var_f(cam_pos) # main render run start_time = time() res = render(scene) rendering_time.append(time() - start_time) im = get_data(res['image']) depth = get_data(res['depth']) depth[depth >= scene['camera']['far']] = depth.min() im_depth = np.uint8(255. * (depth - depth.min()) / (depth.max() - depth.min())) if b_display: plt.figure() plt.imshow(im) plt.title('Image') plt.savefig(out_dir + '/fig_img_orig.png') plt.figure() plt.imshow(im_depth) plt.title('Depth Image') plt.savefig(out_dir + '/fig_depth_orig.png') imsave(out_dir + '/img_orig.png', im) imsave(out_dir + '/depth_orig.png', im_depth) # generate noisy data if b_display: h1 = plt.figure() h2 = plt.figure() noisy_pos = pos for view_idx in range(num_views): noisy_pos[..., 2] = pos[..., 2] + std_z * np.random.randn(num_samples) noisy_normals = np_normalize(normals + std_normal * np.random.randn(num_samples, 3)) scene['objects']['disk']['pos'] = tch_var_f(noisy_pos) scene['objects']['disk']['normal'] = tch_var_f(noisy_normals) scene['camera']['eye'] = tch_var_f(cam_pos) # main render run start_time = time() res = render(scene) rendering_time.append(time() - start_time) im = get_data(res['image']) depth = get_data(res['depth']) depth[depth >= scene['camera']['far']] = depth.min() im_depth = np.uint8(255. * (depth - depth.min()) / (depth.max() - depth.min())) suffix_str = '{:05d}'.format(view_idx) if b_display: plt.figure(h1.number) plt.imshow(im) plt.title('Image') plt.savefig(out_dir + '/fig_img_' + suffix_str + '.png') plt.figure(h2.number) plt.imshow(im_depth) plt.title('Depth Image') plt.savefig(out_dir + '/fig_depth_' + suffix_str + '.png') imsave(out_dir + '/img_' + suffix_str + '.png', im) imsave(out_dir + '/depth_' + suffix_str + '.png', im_depth) # hold matplotlib figure plt.ioff() plt.show()
def test_depth_optimization(scene, batch_size, print_interval=20, imsave_interval=20, max_iter=100, out_dir='./proj_tmp_depth-fast/'): """ First render using the full renderer to get the surfel position and color and then render using the projection layer for testing Returns: """ from torch import optim import torchvision import os import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt import imageio from PIL import Image plt.ion() if not os.path.exists(out_dir): os.makedirs(out_dir) use_chair = True use_fast_projection = True use_masked_loss = True use_same_render_method_for_target = False lr = 1e-2 res = render_scene(scene) scene = make_torch_var(load_scene(scene)) # true_pos_wc = res['pos'].reshape(-1, res['pos'].shape[-1]).repeat(batch_size, 1, 1) true_input_img = res['image'].unsqueeze(0).repeat(batch_size, 1, 1, 1) if use_chair: camera = scene['camera'] camera['eye'] = tch_var_f([0, 0, 4, 1]).repeat(batch_size, 1) camera['at'] = tch_var_f([0, 0, 0, 1]).repeat(batch_size, 1) camera['up'] = tch_var_f([0, 1, 0, 0]).repeat(batch_size, 1) else: camera = scene['camera'] camera['eye'] = camera['eye'].repeat(batch_size, 1) camera['at'] = camera['at'].repeat(batch_size, 1) camera['up'] = camera['up'].repeat(batch_size, 1) if use_chair: chair_0 = Image.open('object-0-azimuth-000.006-rgb.png') true_input_img = torchvision.transforms.ToTensor()(chair_0).to( true_input_img.device).unsqueeze(0) true_input_img = true_input_img.permute(0, 2, 3, 1) camera['viewport'] = [0, 0, 128, 128] # TODO don't hardcode true_depth = res['depth'].repeat(batch_size, 1, 1).reshape( batch_size, -1) # Not relevant if 'use_chair' is True # depth = true_depth.clone() + 0.1 * torch.randn_like(true_depth) depth = 0.1 * torch.randn( batch_size, true_input_img.size(-2) * true_input_img.size(-3), device=true_input_img.device, dtype=torch.float) depth.requires_grad = True if use_chair: target_angle = np.deg2rad(20) else: target_angle = -np.pi / 12 rotated_camera = copy.deepcopy(camera) # randomly_rotate_cameras(rotated_camera, theta_range=[-np.pi / 16, np.pi / 16], phi_range=[-np.pi / 8, np.pi / 8]) randomly_rotate_cameras(rotated_camera, theta_range=[0, 1e-10], phi_range=[target_angle, target_angle + 1e-10]) if use_chair: target_image = Image.open('object-0-azimuth-020.006-rgb.png') target_image = torchvision.transforms.ToTensor()(target_image).to( true_input_img.device).unsqueeze(0) target_image = target_image.permute(0, 2, 3, 1) target_mask = torch.ones(*target_image.size()[:-1], 1, device=target_image.device, dtype=torch.float) else: true_pos_cc = z_to_pcl_CC_batched(-true_depth, camera) # NOTE: z = -depth true_pos_wc = cam_to_world_batched(true_pos_cc, None, camera)['pos'] if use_same_render_method_for_target: if use_fast_projection: target_image, proj_out = projection_renderer_differentiable_fast( true_pos_wc, true_input_img, rotated_camera) target_mask = proj_out['mask'] else: target_image, target_mask = projection_renderer_differentiable( true_pos_wc, true_input_img, rotated_camera) # target_image, _ = projection_renderer(true_pos_wc, true_input_img, rotated_camera) else: scene2 = copy.deepcopy(scene) scene['camera'] = copy.deepcopy(rotated_camera) scene['camera']['eye'] = scene['camera']['eye'][0] scene['camera']['at'] = scene['camera']['at'][0] scene['camera']['up'] = scene['camera']['up'][0] target_image = render(scene)['image'].unsqueeze(0).repeat( batch_size, 1, 1, 1) target_mask = torch.ones(*target_image.size()[:-1], 1, device=target_image.device, dtype=torch.float) input_image = true_input_img # + 0.1 * torch.randn(target_image.size(), device=target_image.device) criterion = torch.nn.MSELoss(reduction='none').cuda() optimizer = optim.Adam([depth], lr=1e-2) h1 = plt.figure() # fig_imgs = [] depth_imgs = [] out_imgs = [] imageio.imsave(out_dir + '/optimization_input_image.png', input_image[0].cpu().numpy()) imageio.imsave(out_dir + '/optimization_target_image.png', target_image[0].cpu().numpy()) if not use_chair: imageio.imsave( out_dir + '/optimization_target_depth.png', true_depth.view(*input_image.size()[:-1], 1)[0].cpu().numpy()) loss_per_iter = [] for iter in range(500): optimizer.zero_grad() # depth_in = torch.nn.functional.softplus(depth + 3) depth_in = depth + 4 pos_cc = z_to_pcl_CC_batched(-depth_in, camera) # NOTE: z = -depth pos_wc = cam_to_world_batched(pos_cc, None, camera)['pos'] if use_fast_projection: im_est, proj_out = projection_renderer_differentiable_fast( pos_wc, input_image, rotated_camera) im_mask = proj_out['mask'] else: im_est, im_mask = projection_renderer_differentiable( pos_wc, input_image, rotated_camera) # im_est, mask = projection_renderer(pos_wc, input_image, rotated_camera) if use_masked_loss: loss = torch.sum(target_mask * im_mask * criterion( im_est * 255, target_image * 255)) / torch.sum( target_mask * im_mask) else: loss = criterion(im_est * 255, target_image * 255).mean() loss_ = get_data(loss) loss_per_iter.append(loss_) if iter % print_interval == 0 or iter == max_iter - 1: print('{}. Loss: {}'.format(iter, loss_)) if iter % imsave_interval == 0 or iter == max_iter - 1: # Input image # im_out_ = get_data(input_image.detach()) # im_out_ = np.uint8(255 * im_out_ / im_out_.max()) # fig = plt.figure(h1.number) # plot = fig.add_subplot(111) # plot.imshow(im_out_[0].squeeze()) # plot.set_title('%d. loss= %f' % (iter, loss_)) # # plt.savefig(out_dir + '/fig_%05d.png' % iter) # fig_data = np.array(fig.canvas.renderer._renderer) # fig_imgs.append(fig_data) # Depth im_out_ = get_data( depth_in.view(*input_image.size()[:-1], 1).detach()) im_out_ = np.uint8(255 * im_out_ / im_out_.max()) fig = plt.figure(h1.number) plot = fig.add_subplot(111) plot.imshow(im_out_[0].squeeze()) plot.set_title('%d. loss= %f' % (iter, loss_)) # plt.savefig(out_dir + '/fig_%05d.png' % iter) depth_data = np.array(fig.canvas.renderer._renderer) depth_imgs.append(depth_data) # Output image im_out_ = get_data(im_est.detach()) im_out_ = np.uint8(255 * im_out_ / im_out_.max()) fig = plt.figure(h1.number) plot = fig.add_subplot(111) plot.imshow(im_out_[0].squeeze()) plot.set_title('%d. loss= %f' % (iter, loss_)) # plt.savefig(out_dir + '/fig_%05d.png' % iter) out_data = np.array(fig.canvas.renderer._renderer) out_imgs.append(out_data) loss.backward() optimizer.step() # imageio.mimsave(out_dir + '/optimization_anim_in.gif', fig_imgs) imageio.mimsave(out_dir + '/optimization_anim_depth.gif', depth_imgs) imageio.mimsave(out_dir + '/optimization_anim_out.gif', out_imgs)
def test_visual_reverse_renderer(scene, batch_size): """ Test that outputs visual images for the user to compare """ from torchvision.utils import save_image import time res = render_scene(scene) scene = make_torch_var(load_scene(scene)) camera = scene['camera'] camera['eye'] = camera['eye'].repeat(batch_size, 1) camera['at'] = camera['at'].repeat(batch_size, 1) camera['up'] = camera['up'].repeat(batch_size, 1) original_camera = copy.deepcopy(camera) pos_wc = res['pos'].reshape(-1, res['pos'].shape[-1]).repeat(batch_size, 1, 1) depth = res['depth'].repeat(batch_size, 1, 1).reshape(batch_size, -1) # pos_cc = z_to_pcl_CC_batched(-depth, scene['camera']) # NOTE: z = -depth # pos_wc = cam_to_world_batched(pos_cc, None, scene['camera'])['pos'] randomly_rotate_cameras(camera, theta_range=[-np.pi / 16, np.pi / 16], phi_range=[-np.pi / 8, np.pi / 8]) image = res['image'].repeat(batch_size, 1, 1, 1) save_image(image.clone().permute(0, 3, 1, 2), 'test-original.png', nrow=2) save_image(depth.view(*image.size()[:-1], 1).clone().permute(0, 3, 1, 2), 'test-original-depth.png', nrow=2, normalize=True) # im, _ = projection_renderer(pos_wc, image, camera) # save_image(im.clone().permute(0, 3, 1, 2), 'test-rotated-reprojected-nonblurred.png', nrow=2) # If we want to merge with another already rotated image # NOTE: only works on batch 1 because `render` is not batched rotated_scene = copy.deepcopy(scene) rotated_scene['camera'] = copy.deepcopy(camera) rotated_scene['camera']['eye'] = rotated_scene['camera']['eye'][0] rotated_scene['camera']['at'] = rotated_scene['camera']['at'][0] rotated_scene['camera']['up'] = rotated_scene['camera']['up'][0] res_rotated = render(rotated_scene) rotated_image = res_rotated['image'].repeat(batch_size, 1, 1, 1) save_image(rotated_image.clone().permute(0, 3, 1, 2), 'test-original-rotated.png', nrow=2) out_pos_wc = res_rotated['pos'].reshape(-1, res['pos'].shape[-1]).repeat( batch_size, 1, 1) torch.cuda.synchronize() st = time.time() im, proj_out = projection_reverse_renderer(image, pos_wc, out_pos_wc, original_camera, camera, compute_new_depth=True) torch.cuda.synchronize() print(f"t1: {time.time() - st}") st = time.time() projection_renderer_differentiable_fast(pos_wc, image, camera, blur_size=1e-10, compute_new_depth=True) torch.cuda.synchronize() print(f"t2: {time.time() - st}") save_image(im.clone().permute(0, 3, 1, 2), 'test-fast-rotated-reprojected-unmerged.png', nrow=2) save_image(proj_out['mask'].clone().permute(0, 3, 1, 2), 'test-fast-soft-mask.png', nrow=2, normalize=True) save_image(proj_out['depth'].clone().permute(0, 3, 1, 2), 'test-fast-depth.png', nrow=2, normalize=True) for key in proj_out.keys(): proj_out[key] = proj_out[key].cpu().numpy() np.savez('test-fast-rotation-reprojection.npz', **proj_out, image=im.cpu().numpy(), image_in=image.cpu().numpy(), depth_in=depth.view(*image.size()[:-1], 1).cpu().numpy()) im, _ = projection_reverse_renderer(image, pos_wc, out_pos_wc, original_camera, camera, rotated_image) save_image(im.clone().permute(0, 3, 1, 2), 'test-fast-rotated-reprojected-merged.png', nrow=2)
def render_scene(scene_file): scene = load_scene(scene_file) scene = make_torch_var(scene) return render(scene)