def testConservativeVoxelization(self): cube = _create_cube_mesh(99 / 100.0) grid = voxelization.voxelize_mesh(cube, [12], (3, 3, 3), transformations.scale([1, 1, 1]), image_resolution_multiplier=1) e = t.zeros([3, 3, 3]) e[1, 1, [0, 2]] = e[1, [0, 2], 1] = e[[0, 2], 1, 1] = 1 tt.assert_equal(e[None].numpy(), grid.numpy()) grid = voxelization.voxelize_mesh(cube, [12], (3, 3, 3), transformations.scale([1, 1, 1]), image_resolution_multiplier=1, conservative_rasterization=True) e = t.ones([3, 3, 3]) e[1, 1, 1] = 0 tt.assert_equal(e[None].numpy(), grid.numpy())
def testRendersSimpleVoxelGrid(self): voxel_grid = t.as_tensor( [ # z = 0 [ [1, 0, 1], # y = 0 [0, 0, 0], # y = 1 [1, 0, 1], # y = 2 ], # z = 1 [ [0, 0, 0], # y = 0 [0, 1, 0], # y = 1 [0, 0, 0], # y = 2 ], # z = 2 [ [0, 1, 0], # y = 0 [1, 1, 1], # y = 1 [0, 1, 0], # y = 2 ], ], dtype=t.int32) # Create a camera that looks at the voxel grid center from the side. The # 0.5 offset is required, since the voxel grid occupies the unit cube, # and its center is at (0.5, 0.5, 0.5) look_at = transformations.look_at_rh( (-1.2 + 0.5, -1.5 + 0.5, -0.5 + 0.5), (0.5, 0.5, 0.5), (0, 1, 0)) perspective = transformations.perspective_rh(70 * math.pi / 180, 1, 0.1, 10.0) model_view_matrix = np.matmul(perspective, look_at) image = voxel_renderer.render_voxel_grid( voxel_grid, model_view_matrix, (256, 256), # Scale down the voxel grid to fit in the unit cube transformations.scale((1.0 / 3, ) * 3), # Material 0 is transparent, 1 is red ((-1, 0, 0), (1.0, 0, 0)), # Place the light source at the camera light_position=(-1.2 + 0.5, -1.5 + 0.5, -1 + 0.5), ambient_light_color=(0.0, 0.0, 0.0), ) image = image.numpy() PIL.Image.fromarray(image).save("/tmp/tt/vv.png") with resources.open_binary(test_data, "expected_image_voxels.png") as in_file: pil_image = PIL.Image.open(in_file) expected_image = np.array(pil_image)[..., :3] self.assertEqual(image.dtype, np.uint8) self.assertEqual(tuple(image.shape), tuple(expected_image.shape)) difference_l1 = np.abs( image.astype(np.int64) - expected_image.astype(np.int64)).sum() self.assertAlmostEqual(difference_l1, 0, 1024)
def testScaleComputesCorrectMatrix(self): tt.assert_array_equal( transformations.scale((1, 2, 3)), t.tensor(( (1, 0, 0, 0), (0, 2, 0, 0), (0, 0, 3, 0), (0, 0, 0, 1), ), dtype=t.float32))
def perspective_projection(aspect_ratio: InputTensor = 1.0, znear: InputTensor = 0.0001, zfar: InputTensor = 10, fovy_degress: InputTensor = 60) -> t.Tensor: """Returns a 4x4 perspective projection matrix.""" result = transformations.perspective_lh(fovy_degress * math.pi / 180, aspect_ratio, znear, zfar) # Invert the Y axis, since the origin in 2D in OpenGL is the top left corner. return t.matmul(transformations.scale((1, -1, 1)), result)
def testVoxelizesSimpleExample(self): """Tests voxelization for a simple example with two triangles.""" diagonal_quad = t.tensor( [ [ # Triangle 1 [0, 0, 0], [1, 0, 1], [0, 1, 0], ], [ # Triangle 2 [1, 0, 1], [0, 1, 0], [1, 1, 1], ] ], dtype=t.float32) expected_grid = t.tensor( [ [ # z = 0 [1, 0, 0, 0], # y = 0 [1, 0, 0, 0], # y = 1 [1, 0, 0, 0], # y = 2 [1, 0, 0, 0], # y = 3 ], [ # z = 1 [0, 1, 0, 0], # y = 0 [0, 1, 0, 0], # y = 1 [0, 1, 0, 0], # y = 2 [0, 1, 0, 0], # y = 3 ], [ # z = 2 [0, 0, 1, 0], # y = 0 [0, 0, 1, 0], # y = 1 [0, 0, 1, 0], # y = 2 [0, 0, 1, 0], # y = 3 ], [ # z = 3 [0, 0, 0, 1], # y = 0 [0, 0, 0, 1], # y = 1 [0, 0, 0, 1], # y = 2 [0, 0, 0, 1], # y = 3 ] ], dtype=t.float32) voxel_grid = voxelization.voxelize_mesh(diagonal_quad, [2], (4, 4, 4), transformations.scale( [4, 4, 4]), image_resolution_multiplier=16) voxel_grid = voxel_grid.cuda() fill_voxels.fill_inside_voxels_gpu(voxel_grid, inplace=True) tt.assert_equal(voxel_grid.cpu().numpy(), expected_grid[np.newaxis])
def _apply_skip(self, src3d: t.Tensor, src2d: t.Tensor, stage: int, voxel_projection_matrix: t.Tensor, voxel_sample_locations: t.Tensor) -> t.Tensor: skip_fn = getattr( self, f"rt_skip_{stage}", None) # type: ray_traced_skip_connection.SampleGrid2d if not skip_fn: return src3d o = voxel_sample_locations[:, :, None, None] o = o.expand(src2d.shape[0], o.shape[1], *src2d.shape[2:]) src2d = t.cat([src2d, o], 1) r1 = src3d.new_tensor(src3d.shape[2:], dtype=t.float32) r2 = src3d.new_tensor(self.config.resolution, dtype=t.float32) layer_scale = transformations.scale(r2 / r1) layer_matrix = voxel_projection_matrix.matmul( layer_scale.to(voxel_projection_matrix.device)) skip_activations = skip_fn(src2d, layer_matrix, voxel_sample_locations) return t.cat([src3d, skip_activations], dim=1)
def __call__(self, input_image: t.Tensor, camera_transform: t.Tensor, view_to_voxel_transform: t.Tensor, grid_offsets: t.Tensor, output_resolution: Tuple[int, int, int]) -> t.Tensor: native_offsets = self.get_native_offsets(output_resolution, grid_offsets) resolution_multiplier = self.get_resolution_multiplier( output_resolution) batch_size = input_image.shape[0] md, mh, mw = [resolution_multiplier] * 3 scale = transformations.scale([1 / md, 1 / mh, 1 / mw]) view_to_voxel_transform = ( view_to_voxel_transform @ scale.to(view_to_voxel_transform.device)) pmfs = self.inference_fn(input_image, camera_transform, view_to_voxel_transform, native_offsets) _, _, num_channels, d, h, w = pmfs.shape pmfs = pmfs.reshape([md, mh, mw, batch_size, num_channels, d, h, w]) pmfs = pmfs.permute([3, 4, 5, 0, 6, 1, 7, 2]) pmfs = pmfs.reshape([batch_size, num_channels, md * d, mh * h, mw * w]) return pmfs
def compute_extra_views(artifacts: Iterable[VisualizationArtifact], aspect_ratio: float, world_to_view: t.Tensor) -> List[t.Tensor]: """Computes extra viewpoints for the given artifacts group.""" device = world_to_view.device ltf = t.tensor([sys.float_info.max] * 3, dtype=t.float32, device=device) rbb = t.tensor([-sys.float_info.max] * 3, dtype=t.float32, device=device) view_to_world = t.inverse(world_to_view) for artifact in artifacts: bbox = artifact.get_3d_box(view_to_world) if bbox is None: continue ltf = t.min(ltf, bbox[0]) rbb = t.max(rbb, bbox[1]) if (ltf > rbb).any(): ltf = t.zeros_like(ltf) rbb = t.ones_like(rbb) center = (ltf + rbb) / 2 diagonal = (rbb - ltf).max() tetrahedron_cameras = camera_util.cameras_on_tetrahedron_vertices() projection_matrix = camera_util.perspective_projection(aspect_ratio, znear=0.01, zfar=10) result = [[ projection_matrix, transformations.translate([0, 0, 0.3]), tetra_camera, transformations.scale([1 / diagonal] * 3), transformations.translate(-center), view_to_world ] for tetra_camera in tetrahedron_cameras] return [ transformations.chain([v.to(device) for v in transf_chain]) for transf_chain in result ]
def testSubGridVoxelizationWorks(self): """Tests high-precision sub grid voxelization by shifting the geometry.""" cube = _create_cube_mesh(99 / 100.0) grid = voxelization.voxelize_mesh(cube, [12], (3, 3, 3), transformations.scale([1, 1, 1]), sub_grid_sampling=True, image_resolution_multiplier=9, conservative_rasterization=True) grid = fill_voxels.fill_inside_voxels_gpu(grid.cuda(), inplace=False).cpu() e = t.zeros(1, 7, 7, 7) e[0, 2:5, 2:5, 2:5] = 1 tt.assert_equal(e.numpy(), grid.numpy()) grid = voxelization.get_sub_grid_centers(grid) e = t.zeros(1, 3, 3, 3) e[0, 1, 1, 1] = 1 tt.assert_equal(e.numpy(), grid.numpy()) cubes = t.cat([cube, cube - 0.5]) transf = t.stack([ transformations.translate([-0.5, 0, 0]), transformations.translate([0.5, 1, 1]) ]) grid = voxelization.voxelize_mesh(cubes, [12, 12], (3, 3, 3), transf, sub_grid_sampling=True, image_resolution_multiplier=9, conservative_rasterization=True) grid = fill_voxels.fill_inside_voxels_gpu(grid.cuda(), inplace=False).cpu() grid = voxelization.get_sub_grid_centers(grid) e1 = t.zeros(3, 3, 3) e1[1, 1, [0, 1]] = 1 tt.assert_equal(e1.numpy(), grid[0].numpy()) e2 = t.zeros(3, 3, 3) e2[1, [1, 2], 1] = e2[2, [1, 2], 1] = 1 tt.assert_equal(e2.numpy(), grid[1].numpy())
def voxelize(ex: BatchedExample, resolution: Tuple[int, int, int], voxel_content_fn: Callable[[int, int], int] = voxel_content_mesh_index, sub_grid_sampling: bool = False, conservative_rasterization: bool = False, image_resolution_multiplier=4, projection_depth_multiplier: int = 1, fill_inside: bool = True) -> BatchedExample: """Voxelizes the batch geometry. Args: ex: The batch to voxelize. resolution: The grid resolution, tuple(depth, height, width) voxel_content_fn: A function (batch_index, mesh_index) => voxel_content that returns the value to be stored in a voxel, given a batch_index and a mesh_index. sub_grid_sampling: Allows approximate voxelization with much higher virtual resolution. Useful for testing whether points are inside an object or not. conservative_rasterization: Whether to enable conservative rasterization. image_resolution_multiplier: Determines the image resolution used to render the triangles as a function of the voxel grid resolution. projection_depth_multiplier: Should be 1. See the documentation of corenet.geometry.voxelization.voxelize_mesh fill_inside: Whether to fill the inside of the object Returns: The batch, with a replaced voxel grid. """ with t.no_grad(): d, h, w = resolution m = max(d, h, w) batch_size = ex.grid_sampling_offset.shape[0] # This is the world->voxel transform batch_v2x = (transformations.scale([m, m, m]).expand( [batch_size, 4, 4]).to(ex.grid_sampling_offset.device)) # Now compute the a shifted world->voxel transform to account for the fact # that we sample voxels at their centers in practice grid_shift = transformations.translate(ex.grid_sampling_offset - 0.5) shifted_w2x = t.matmul(grid_shift, batch_v2x) batch_num_meshes = [len(v) for v in ex.mesh_num_tri] mesh_v2x = [] for num_meshes, w2x in zip(batch_num_meshes, shifted_w2x): mesh_v2x += [w2x] * num_meshes mesh_v2x = t.stack(mesh_v2x, 0) meshes_grid = voxelization.voxelize_mesh( triangles=ex.vertices, mesh_num_tri=t.cat(ex.mesh_num_tri, 0), resolution=resolution, view2voxel=mesh_v2x, cuda_device=None, sub_grid_sampling=sub_grid_sampling, image_resolution_multiplier=image_resolution_multiplier, conservative_rasterization=conservative_rasterization, projection_depth_multiplier=projection_depth_multiplier) # Allocate the output grid first, to reduce memory fragmentation output_grid = t.zeros([batch_size, d, h, w], dtype=t.int32, device="cuda") meshes_grid = meshes_grid.cuda() if fill_inside: fill_voxels.fill_inside_voxels_gpu(meshes_grid, inplace=True) if sub_grid_sampling: meshes_grid = voxelization.get_sub_grid_centers(meshes_grid) offset = 0 for batch_idx, num_meshes in enumerate(batch_num_meshes): labels = [ voxel_content_fn(batch_idx, m) for m in range(num_meshes) ] labels = meshes_grid.new_tensor(labels, dtype=t.float32) labels = labels[:, None, None, None].expand(num_meshes, d, h, w) grid = labels * meshes_grid[offset:offset + num_meshes] offset += num_meshes grid = grid.max(dim=0)[0].to(t.int32) output_grid[batch_idx] = grid return dataclasses.replace(ex, v2x_transform=batch_v2x, grid=output_grid)
def render_voxel_grid( voxel_grid: InputTensor, view_projection_matrix: InputTensor = None, image_size: Tuple[int, int] = (256, 256), voxel_to_view_matrix: InputTensor = None, diffuse_coefficients: InputTensor = None, light_position: InputTensor = None, light_color: InputTensor = (1.0, 1.0, 1.0), ambient_light_color: InputTensor = (0.2, 0.2, 0.2), clear_color: Tuple[float, float, float] = (0, 0, 0), output_type=t.uint8, vertex_shader=None, geometry_shader=None, fragment_shader=None, ) -> t.Tensor: """Creates a voxel grid renderer py_function TF op. Args: voxel_grid: The voxel grid tensor, containing material IDs, int32[depth, height, width]. view_projection_matrix: Transforms geometry from world to projected camera space. A float32[4, 4] row-major transformation matrix that is multiplied with 4 x 1 columns of homogeneous coordinates in the shader. If not specified, a new matrix will be calculated from the voxel grid. image_size: Output image shape, pair (height, width). voxel_to_view_matrix: Transforms from object to world space coordinates, float32[4, 4] transformation matrix. The voxel grid is assumed to be a grid of unit sized cubes placed at the origin. This matrix is responsible for transforming it into world space. If omitted, the voxel grid will be squeezed into the unit cube. diffuse_coefficients: The diffuse coefficients of the materials, float32[num_materials, 3]. light_position: The light position, float32[3]. If None, the light will be placed at the camera origin. light_color: The light RGB color, float32[3]. ambient_light_color: The ambient light RGB color, float32[3]. clear_color: The RGB color to use when clearing the image, float32[3] output_type: The output type. Either tf.uint8 or tf.float32. vertex_shader: The vertex shader. geometry_shader: The geometry shader. fragment_shader: The fragment shader. Returns: The rendered image, either uint8[height, width, 3] or float32[height, width, 3], depending on the value of output_type. """ height, width = image_size voxel_grid = util.to_tensor(voxel_grid, t.int32, "cpu") assert len(voxel_grid.shape) == 3 and voxel_grid.dtype == t.int32 if voxel_to_view_matrix is None: d = 1.0 / np.max(voxel_grid.shape) voxel_to_view_matrix = transformations.scale([d, d, d]) voxel_to_view_matrix = util.to_tensor(voxel_to_view_matrix, t.float32, "cpu") assert voxel_to_view_matrix.shape == (4, 4) if view_projection_matrix is None: mesh = t.tensor([[[0.0] * 3, [0.0] * 3, voxel_grid.shape[::-1]]]) mesh = transformations.transform_mesh(mesh, voxel_to_view_matrix) view_projection_matrix = camera_util.get_default_camera_for_mesh(mesh) view_projection_matrix = util.to_tensor(view_projection_matrix, t.float32, "cpu") assert view_projection_matrix.shape == (4, 4) if diffuse_coefficients is None: diffuse_coefficients = util.to_tensor(DEFAULT_COLOR_PALETTE, t.float32, "cpu") diffuse_coefficients = util.to_tensor(diffuse_coefficients, t.float32, "cpu") assert (len(diffuse_coefficients.shape) == 2 and diffuse_coefficients.shape[-1] == 3) # By default, we use the same fragment shader as the scene renderer, which # needs diffuse_textures to be specified. We specify a 1x1 texture, which # is however not used, since we emit texture index -1 in the geometry shader. diffuse_textures = t.ones([1, 1, 1, 3], dtype=t.uint8) # The eye position in camera space is (0, 0, -1). To compute its position # in world space, we multiply by the inverse view-projection matrix. camera_position = t.mv(t.inverse(view_projection_matrix), t.tensor([0, 0, -1, 1], dtype=t.float32)) camera_position = camera_position[:3] / camera_position[3] if light_position is None: light_position = camera_position light_position = util.to_tensor(light_position, t.float32, "cpu") assert light_position.shape == (3, ) light_color = util.to_tensor(light_color, t.float32, "cpu") assert light_color.shape == (3, ) ambient_light_color = util.to_tensor(ambient_light_color, t.float32, "cpu") assert ambient_light_color.shape == (3, ) render_args = [ gl.Uniform("voxel_to_view_matrix", voxel_to_view_matrix), gl.Uniform("view_projection_matrix", view_projection_matrix), gl.Buffer(0, voxel_grid.reshape([-1])), gl.Uniform("grid_resolution", voxel_grid.shape), gl.Buffer(1, diffuse_coefficients.reshape([-1])), gl.Uniform("light_position", light_position), gl.Uniform("camera_position", camera_position), gl.Texture("textures", diffuse_textures, bind_as_array=True), gl.Uniform("ambient_light_color", ambient_light_color), gl.Uniform("light_color", light_color), ] if not geometry_shader: geometry_shader = resources.read_text(shaders, "voxel_renderer.geom") if not vertex_shader: vertex_shader = resources.read_text(shaders, "noop.vert") if not fragment_shader: fragment_shader = resources.read_text(shaders, "point_light_illumination.frag") result = gl.gl_simple_render( gl.RenderInput(num_points=voxel_grid.numel(), arguments=render_args, output_resolution=(height, width), vertex_shader=vertex_shader, geometry_shader=geometry_shader, fragment_shader=fragment_shader, clear_color=clear_color, output_type=output_type)) return result[..., :3]