def load(self): self.scene = NNScene() setup_scene(self.scene, self.scene_data, use_mesh=self.use_mesh) if self.perturb_points and self.fastrand is None: print(f'SETTING PERTURB POINTS: {self.perturb_points}') tform = lambda p: self.perturb_points * (p - 0.5) self.fastrand = FastRand( (self.scene_data['pointcloud']['xyz'].shape[0], 2), tform, 10)
def load_scene(config_path): scene_data = load_scene_data(config_path) scene = NNScene() setup_scene(scene, scene_data) return scene, scene_data
class DynamicDataset: znear = 0.1 zfar = 1000 def __init__(self, scene_data, input_format, image_size, view_list, target_list, mask_list, label_list, keep_fov=False, gl_frame=False, input_transform=None, target_transform=None, num_samples=None, random_zoom=None, random_shift=None, drop_points=0., perturb_points=0., label_in_input=False, crop_by_mask=False, use_mesh=False, supersampling=1): print('gl_frame', gl_frame) if isinstance(image_size, (int, float)): image_size = image_size, image_size # if render image size is different from camera image size, then shift principal point K_src = scene_data['intrinsic_matrix'] old_size = scene_data['config']['viewport_size'] sx = image_size[0] / old_size[0] sy = image_size[1] / old_size[1] K = rescale_K(K_src, sx, sy, keep_fov) assert len(view_list) == len(target_list) print('image_size', image_size) self.view_list = view_list self.target_list = target_list self.mask_list = mask_list self.label_list = label_list self.scene_data = scene_data self.input_format = input_format self.image_size = image_size self.renderer = None self.scene = None self.K = K self.K_src = K_src self.random_zoom = random_zoom self.random_shift = random_shift self.sx = sx self.sy = sy self.keep_fov = keep_fov self.gl_frame = gl_frame self.target_list = target_list self.input_transform = default_input_transform if input_transform is None else input_transform self.target_transform = default_target_transform if target_transform is None else target_transform self.num_samples = num_samples if num_samples else len(view_list) self.id = None self.name = None self.drop_points = drop_points self.perturb_points = perturb_points self.label_in_input = label_in_input self.crop_by_mask = crop_by_mask self.use_mesh = use_mesh self.ss = supersampling self.fastrand = None self.timing = None self.count = 0 def load(self): self.scene = NNScene() setup_scene(self.scene, self.scene_data, use_mesh=self.use_mesh) if self.perturb_points and self.fastrand is None: print(f'SETTING PERTURB POINTS: {self.perturb_points}') tform = lambda p: self.perturb_points * (p - 0.5) self.fastrand = FastRand( (self.scene_data['pointcloud']['xyz'].shape[0], 2), tform, 10) def unload(self): self.scene.delete() self.scene = None def __len__(self): return self.num_samples def __getitem__(self, idx): # assert -1 < idx < len(self) idx = idx % len(self.view_list) # we want to make sure GL and CUDA Interop contexts are created in # the process calling __getitem__ method, otherwise rendering would not work if self.renderer is None: assert self.scene is not None, 'call load()' app.Window(visible=False) # creates GL context self.renderer = MultiscaleRender(self.scene, self.input_format, self.image_size, supersampling=self.ss) if self.timing is None: self.timing = AccumDict() tt = TicToc() tt.tic() mask = None mask_crop = None if self.mask_list[idx]: mask = load_image(self.mask_list[idx]) if self.crop_by_mask: cnt = get_rnd_crop_center_v1(mask[..., 0]) mask_crop = -1 + 2 * np.array(cnt) / mask.shape[:2] view_matrix = self.view_list[idx] K, proj_matrix = self._get_intrinsics(shift=mask_crop) target = load_image(self.target_list[idx]) target = self._warp(target, K) if mask is None: mask = np.ones((target.shape[0], target.shape[1], 1), dtype=np.float32) else: mask = self._warp(mask, K) if self.label_list[idx]: label = load_image(self.label_list[idx]) label = self._warp(label, K) label = label[..., :1] else: label = np.zeros((target.shape[0], target.shape[1], 1), dtype=np.uint8) self.timing.add('get_target', tt.toc()) tt.tic() if self.drop_points: self.scene.set_point_discard( np.random.rand(self.scene_data['pointcloud']['xyz'].shape[0]) < self.drop_points) if self.perturb_points: self.scene.set_point_perturb(self.fastrand.toss()) input_ = self.renderer.render(view_matrix=view_matrix, proj_matrix=proj_matrix) if self.label_in_input: for k in input_: if 'labels' in k: m = input_[k].sum(2) > 1e-9 label_sz = cv2.resize( label, (input_[k].shape[1], input_[k].shape[0]), interpolation=cv2.INTER_NEAREST) label_m = label_sz * m input_[k] = label_m[..., None] self.timing.add('render', tt.toc()) tt.tic() input_ = {k: self.input_transform(v) for k, v in input_.items()} target = self.target_transform(target) mask = ToTensor()(mask) label = ToTensor()(label) input_['id'] = self.id self.timing.add('transform', tt.toc()) # if self.count and self.count % 100 == 0: # print(self.timing) self.count += 1 return { 'input': input_, 'view_matrix': view_matrix, 'intrinsic_matrix': K, 'target': target, 'target_filename': self.target_list[idx], 'mask': mask, 'label': label } def _get_intrinsics(self, shift=None): K = self.K.copy() sx = 1. if self.keep_fov else self.sx sy = 1. if self.keep_fov else self.sy if self.random_zoom: z = rand_(*self.random_zoom) K[0, 0] *= z K[1, 1] *= z sx /= z sy /= z if self.random_shift: if shift is None: x, y = rand_(*self.random_shift, 2) else: x, y = shift w = self.image_size[0] * (1. - sx) / sx / 2. h = self.image_size[1] * (1. - sy) / sy / 2. K[0, 2] += x * w K[1, 2] += y * h return K, get_proj_matrix(K, self.image_size, self.znear, self.zfar) def _warp(self, image, K): H = K @ np.linalg.inv(self.K_src) image = cv2.warpPerspective(image, H, tuple(self.image_size)) if self.gl_frame: image = image[::-1].copy() return image
def __init__(self, args): with open(args.config) as f: _config = yaml.load(f) # support two types of configs # 1 type - config with scene data # 2 type - config with model checkpoints and path to scene data config if 'scene' in _config: # 1 type self.scene_data = load_scene_data(_config['scene']) net_ckpt = _config.get('net_ckpt') texture_ckpt = _config.get('texture_ckpt') else: self.scene_data = load_scene_data(args.config) net_ckpt = self.scene_data['config'].get('net_ckpt') texture_ckpt = self.scene_data['config'].get('texture_ckpt') self.viewport_size = args.viewport if args.viewport else self.scene_data[ 'config']['viewport_size'] self.viewport_size = fix_viewport_size(self.viewport_size) print('new viewport size ', self.viewport_size) # crop/resize viewport if self.scene_data['intrinsic_matrix'] is not None: K_src = self.scene_data['intrinsic_matrix'] old_size = self.scene_data['config']['viewport_size'] sx = self.viewport_size[0] / old_size[0] sy = self.viewport_size[1] / old_size[1] K_crop = rescale_K(K_src, sx, sy, keep_fov=args.keep_fov) self.scene_data['proj_matrix'] = get_proj_matrix( K_crop, self.viewport_size) elif self.scene_data['proj_matrix'] is not None: new_proj_matrix = crop_proj_matrix( self.scene_data['proj_matrix'], *self.scene_data['config']['viewport_size'], *self.viewport_size) self.scene_data['proj_matrix'] = new_proj_matrix else: raise Exception('no intrinsics are provided') if args.init_view: if args.init_view in self.scene_data['view_matrix']: idx = self.scene_data['camera_labels'].index(args.init_view) init_view = self.scene_data['view_matrix'][idx] elif os.path.exists(args.init_view): init_view = np.loadtxt(args.init_view) else: init_view = self.scene_data['view_matrix'][0] if args.origin_view: top_view = np.eye(4) top_view[2, 3] = 20. init_view = top_view if np.allclose(self.scene_data['model3d_origin'], np.eye(4)): print('Setting origin as mass center') origin = np.eye(4) origin[:3, 3] = -np.percentile( self.scene_data['pointcloud']['xyz'], 90, 0) self.scene_data['model3d_origin'] = origin else: # force identity origin self.scene_data['model3d_origin'] = np.eye(4) self.trackball = Trackball(init_view, self.viewport_size, 1, rotation_mode=args.rmode) args.use_mesh = args.use_mesh or _config.get( 'use_mesh') or args.use_texture # this also creates GL context necessary for setting up shaders self.window = app.Window(width=self.viewport_size[0], height=self.viewport_size[1], visible=True, fullscreen=False) self.window.set_size(*self.viewport_size) if args.checkpoint: assert 'Texture' in args.checkpoint, 'Set path to descriptors checkpoint' ep = re.search('epoch_[0-9]+', args.checkpoint).group().split('_')[-1] net_name = f'UNet_stage_0_epoch_{ep}_net.pth' net_ckpt = os.path.join(*args.checkpoint.split('/')[:-1], net_name) texture_ckpt = args.checkpoint need_neural_render = net_ckpt is not None self.out_buffer_location = 'torch' if need_neural_render else 'opengl' # setup screen image plane self.off_render = OffscreenRender( viewport_size=self.viewport_size, out_buffer_location=self.out_buffer_location, clear_color=args.clear_color) if self.out_buffer_location == 'torch': screen_tex, self.screen_tex_cuda = create_shared_texture( np.zeros((self.viewport_size[1], self.viewport_size[0], 4), np.float32)) else: screen_tex, self.screen_tex_cuda = self.off_render.color_buf, None self.screen_program = get_screen_program(screen_tex) self.scene = NNScene() if need_neural_render: print(f'Net checkpoint: {net_ckpt}') print(f'Texture checkpoint: {texture_ckpt}') self.model = OGL(self.scene, self.scene_data, self.viewport_size, net_ckpt, texture_ckpt, out_buffer_location=self.out_buffer_location, supersampling=args.supersampling, temporal_average=args.temp_avg) else: self.model = None if args.pca: assert texture_ckpt tex = torch.load(texture_ckpt, map_location='cpu')['state_dict']['texture_'] print('PCA...') pca = pca_color(tex) pca = (pca - np.percentile(pca, 10)) / (np.percentile(pca, 90) - np.percentile(pca, 10)) pca = np.clip(pca, 0, 1) self.scene_data['pointcloud']['rgb'] = np.clip(pca, 0, 1) setup_scene(self.scene, self.scene_data, args.use_mesh, args.use_texture) if args.light_position is not None: self.scene.set_light_position(args.light_position) if args.replay_camera: self.camera_trajectory = load_camera_trajectory(args.replay_camera) else: self.camera_trajectory = None self.window.attach(self.screen_program['transform']) self.window.push_handlers(on_init=self.on_init) self.window.push_handlers(on_close=self.on_close) self.window.push_handlers(on_draw=self.on_draw) self.window.push_handlers(on_resize=self.on_resize) self.window.push_handlers(on_key_press=self.on_key_press) self.window.push_handlers(on_mouse_press=self.on_mouse_press) self.window.push_handlers(on_mouse_drag=self.on_mouse_drag) self.window.push_handlers(on_mouse_release=self.on_mouse_release) self.window.push_handlers(on_mouse_scroll=self.on_mouse_scroll) self.mode0 = NNScene.MODE_COLOR self.mode1 = 0 self.point_size = 1 self.point_mode = False self.draw_points = not args.use_mesh self.flat_color = True self.neural_render = need_neural_render self.show_pca = False self.n_frame = 0 self.t_elapsed = 0 self.last_frame = None self.last_view_matrix = None self.last_gt_image = None self.mouse_pressed = False app.stopped = threading.Event() self.args = args
class MyApp(): def __init__(self, args): with open(args.config) as f: _config = yaml.load(f) # support two types of configs # 1 type - config with scene data # 2 type - config with model checkpoints and path to scene data config if 'scene' in _config: # 1 type self.scene_data = load_scene_data(_config['scene']) net_ckpt = _config.get('net_ckpt') texture_ckpt = _config.get('texture_ckpt') else: self.scene_data = load_scene_data(args.config) net_ckpt = self.scene_data['config'].get('net_ckpt') texture_ckpt = self.scene_data['config'].get('texture_ckpt') self.viewport_size = args.viewport if args.viewport else self.scene_data[ 'config']['viewport_size'] self.viewport_size = fix_viewport_size(self.viewport_size) print('new viewport size ', self.viewport_size) # crop/resize viewport if self.scene_data['intrinsic_matrix'] is not None: K_src = self.scene_data['intrinsic_matrix'] old_size = self.scene_data['config']['viewport_size'] sx = self.viewport_size[0] / old_size[0] sy = self.viewport_size[1] / old_size[1] K_crop = rescale_K(K_src, sx, sy, keep_fov=args.keep_fov) self.scene_data['proj_matrix'] = get_proj_matrix( K_crop, self.viewport_size) elif self.scene_data['proj_matrix'] is not None: new_proj_matrix = crop_proj_matrix( self.scene_data['proj_matrix'], *self.scene_data['config']['viewport_size'], *self.viewport_size) self.scene_data['proj_matrix'] = new_proj_matrix else: raise Exception('no intrinsics are provided') if args.init_view: if args.init_view in self.scene_data['view_matrix']: idx = self.scene_data['camera_labels'].index(args.init_view) init_view = self.scene_data['view_matrix'][idx] elif os.path.exists(args.init_view): init_view = np.loadtxt(args.init_view) else: init_view = self.scene_data['view_matrix'][0] if args.origin_view: top_view = np.eye(4) top_view[2, 3] = 20. init_view = top_view if np.allclose(self.scene_data['model3d_origin'], np.eye(4)): print('Setting origin as mass center') origin = np.eye(4) origin[:3, 3] = -np.percentile( self.scene_data['pointcloud']['xyz'], 90, 0) self.scene_data['model3d_origin'] = origin else: # force identity origin self.scene_data['model3d_origin'] = np.eye(4) self.trackball = Trackball(init_view, self.viewport_size, 1, rotation_mode=args.rmode) args.use_mesh = args.use_mesh or _config.get( 'use_mesh') or args.use_texture # this also creates GL context necessary for setting up shaders self.window = app.Window(width=self.viewport_size[0], height=self.viewport_size[1], visible=True, fullscreen=False) self.window.set_size(*self.viewport_size) if args.checkpoint: assert 'Texture' in args.checkpoint, 'Set path to descriptors checkpoint' ep = re.search('epoch_[0-9]+', args.checkpoint).group().split('_')[-1] net_name = f'UNet_stage_0_epoch_{ep}_net.pth' net_ckpt = os.path.join(*args.checkpoint.split('/')[:-1], net_name) texture_ckpt = args.checkpoint need_neural_render = net_ckpt is not None self.out_buffer_location = 'torch' if need_neural_render else 'opengl' # setup screen image plane self.off_render = OffscreenRender( viewport_size=self.viewport_size, out_buffer_location=self.out_buffer_location, clear_color=args.clear_color) if self.out_buffer_location == 'torch': screen_tex, self.screen_tex_cuda = create_shared_texture( np.zeros((self.viewport_size[1], self.viewport_size[0], 4), np.float32)) else: screen_tex, self.screen_tex_cuda = self.off_render.color_buf, None self.screen_program = get_screen_program(screen_tex) self.scene = NNScene() if need_neural_render: print(f'Net checkpoint: {net_ckpt}') print(f'Texture checkpoint: {texture_ckpt}') self.model = OGL(self.scene, self.scene_data, self.viewport_size, net_ckpt, texture_ckpt, out_buffer_location=self.out_buffer_location, supersampling=args.supersampling, temporal_average=args.temp_avg) else: self.model = None if args.pca: assert texture_ckpt tex = torch.load(texture_ckpt, map_location='cpu')['state_dict']['texture_'] print('PCA...') pca = pca_color(tex) pca = (pca - np.percentile(pca, 10)) / (np.percentile(pca, 90) - np.percentile(pca, 10)) pca = np.clip(pca, 0, 1) self.scene_data['pointcloud']['rgb'] = np.clip(pca, 0, 1) setup_scene(self.scene, self.scene_data, args.use_mesh, args.use_texture) if args.light_position is not None: self.scene.set_light_position(args.light_position) if args.replay_camera: self.camera_trajectory = load_camera_trajectory(args.replay_camera) else: self.camera_trajectory = None self.window.attach(self.screen_program['transform']) self.window.push_handlers(on_init=self.on_init) self.window.push_handlers(on_close=self.on_close) self.window.push_handlers(on_draw=self.on_draw) self.window.push_handlers(on_resize=self.on_resize) self.window.push_handlers(on_key_press=self.on_key_press) self.window.push_handlers(on_mouse_press=self.on_mouse_press) self.window.push_handlers(on_mouse_drag=self.on_mouse_drag) self.window.push_handlers(on_mouse_release=self.on_mouse_release) self.window.push_handlers(on_mouse_scroll=self.on_mouse_scroll) self.mode0 = NNScene.MODE_COLOR self.mode1 = 0 self.point_size = 1 self.point_mode = False self.draw_points = not args.use_mesh self.flat_color = True self.neural_render = need_neural_render self.show_pca = False self.n_frame = 0 self.t_elapsed = 0 self.last_frame = None self.last_view_matrix = None self.last_gt_image = None self.mouse_pressed = False app.stopped = threading.Event() self.args = args def run(self): if self.args.fps: start_fps_job() app.run() app.stopped.set() def render_frame(self, view_matrix): self.scene.set_camera_view(view_matrix) if self.neural_render: frame = self.model.infer()['output'].flip([0]) else: self.scene.set_mode(self.mode0, self.mode1) if self.point_mode == 0: self.scene.set_splat_mode(False) self.scene.program['splat_mode'] = int(0) elif self.point_mode == 1: self.scene.set_splat_mode(True) self.scene.program['splat_mode'] = int(0) elif self.point_mode == 2: self.scene.set_splat_mode(False) self.scene.program['splat_mode'] = int(1) if not self.scene.use_point_sizes: self.scene.set_point_size(self.point_size) self.scene.set_draw_points(self.draw_points) self.scene.set_flat_color(self.flat_color) frame = self.off_render.render(self.scene) return frame def print_info(self): print('-- start info') mode = [ m[0] for m in NNScene.__dict__.items() if m[0].startswith('MODE_') and self.mode0 == m[1] ][0] print(mode) n_mode = [ m[0] for m in NNScene.__dict__.items() if m[0].startswith('NORMALS_MODE_') and self.mode1 == m[1] ][0] print(n_mode) print(f'point size {self.point_size}') print(f'splat mode: {self.point_mode}') print('-- end info') def save_screen(self, out_dir='./data/screenshots'): os.makedirs(out_dir, exist_ok=True) get_name = lambda s: time.strftime(f"%m-%d_%H-%M-%S___{s}") img = self.last_frame.cpu().numpy()[..., :3][::-1, :, ::-1] * 255 cv2.imwrite(os.path.join(out_dir, get_name('screenshot') + '.png'), img) np.savetxt(os.path.join(out_dir, get_name('pose') + '.txt'), self.last_view_matrix) def get_next_view_matrix(self, frame_num, elapsed_time): if self.camera_trajectory is None: return self.trackball.pose n = int(elapsed_time * args.replay_fps) % len(self.camera_trajectory) return self.camera_trajectory[n] # ===== Window events ===== def on_init(self): pass def on_key_press(self, symbol, modifiers): KEY_PLUS = 61 if symbol == glfw.GLFW_KEY_X: self.mode0 = NNScene.MODE_XYZ self.neural_render = False elif symbol == glfw.GLFW_KEY_N: self.mode0 = NNScene.MODE_NORMALS self.neural_render = False elif symbol == glfw.GLFW_KEY_C: self.mode0 = NNScene.MODE_COLOR self.neural_render = False elif symbol == glfw.GLFW_KEY_U: self.mode0 = NNScene.MODE_UV self.neural_render = False elif symbol == glfw.GLFW_KEY_D: self.mode0 = NNScene.MODE_DEPTH self.neural_render = False elif symbol == glfw.GLFW_KEY_L: self.mode0 = NNScene.MODE_LABEL self.neural_render = False elif symbol == glfw.GLFW_KEY_Y: self.neural_render = True self.show_pca = False elif symbol == glfw.GLFW_KEY_T: self.neural_render = True self.show_pca = True elif symbol == glfw.GLFW_KEY_Z: self.mode1 = (self.mode1 + 1) % 5 elif symbol == KEY_PLUS: self.point_size = self.point_size + 1 elif symbol == glfw.GLFW_KEY_MINUS: self.point_size = max(0, self.point_size - 1) elif symbol == glfw.GLFW_KEY_P: self.point_mode = (self.point_mode + 1) % 3 elif symbol == glfw.GLFW_KEY_Q: self.draw_points = not self.draw_points elif symbol == glfw.GLFW_KEY_F: self.flat_color = not self.flat_color elif symbol == glfw.GLFW_KEY_I: self.print_info() elif symbol == glfw.GLFW_KEY_S: self.save_screen() else: print(symbol, modifiers) def on_draw(self, dt): self.last_view_matrix = self.get_next_view_matrix( self.n_frame, self.t_elapsed) self.last_frame = self.render_frame(self.last_view_matrix) if self.out_buffer_location == 'torch': cpy_tensor_to_texture(self.last_frame, self.screen_tex_cuda) self.window.clear() gl.glDisable(gl.GL_CULL_FACE) # ensure viewport size is correct (offline renderer could change it) gl.glViewport(0, 0, self.viewport_size[0], self.viewport_size[1]) self.screen_program.draw(gl.GL_TRIANGLE_STRIP) self.n_frame += 1 self.t_elapsed += dt if self.args.nearest_train: ni = nearest_train( self.scene_data['view_matrix'], np.linalg.inv(self.scene_data['model3d_origin']) @ self.last_view_matrix) label = self.scene_data['camera_labels'][ni] assert self.args.gt, 'you must define path to gt images' path = self.args.gt.replace('*', str(label)) if not os.path.exists(path): print(f'{path} NOT FOUND!') elif self.last_gt_image != path: self.last_gt_image = path img = cv2.imread(path) max_side = max(img.shape[:2]) s = 1024 / max_side img = cv2.resize(img, None, None, s, s) cv2.imshow('nearest train', img) cv2.waitKey(1) def on_resize(self, w, h): print(f'on_resize {w}x{h}') self.trackball.resize((w, h)) self.screen_program['position'] = [(0, 0), (0, h), (w, 0), (w, h)] def on_close(self): pass def on_mouse_press(self, x, y, buttons, modifiers): # print(buttons, modifiers) self.trackball.set_state(Trackball.STATE_ROTATE) if (buttons == app.window.mouse.LEFT): ctrl = (modifiers & app.window.key.MOD_CTRL) shift = (modifiers & app.window.key.MOD_SHIFT) if (ctrl and shift): self.trackball.set_state(Trackball.STATE_ZOOM) elif ctrl: self.trackball.set_state(Trackball.STATE_ROLL) elif shift: self.trackball.set_state(Trackball.STATE_PAN) elif (buttons == app.window.mouse.MIDDLE): self.trackball.set_state(Trackball.STATE_PAN) elif (buttons == app.window.mouse.RIGHT): self.trackball.set_state(Trackball.STATE_ZOOM) self.trackball.down(np.array([x, y])) # Stop animating while using the mouse self.mouse_pressed = True def on_mouse_drag(self, x, y, dx, dy, buttons): self.trackball.drag(np.array([x, y])) def on_mouse_release(self, x, y, button, modifiers): self.mouse_pressed = False def on_mouse_scroll(self, x, y, dx, dy): self.trackball.scroll(dy)