def batch(examples: List[dataset.DatasetElement]) -> BatchedExample: """Batches a list of examples.""" with t.no_grad(): all_vertices = [] batch_mesh_num_tri = [] for ex in examples: w2v = ex.view_transform batch_mesh_num_tri.append(ex.mesh_num_tri) offset = 0 for num_tri, o2w in zip(ex.mesh_num_tri, ex.o2w_transforms): mesh = ex.mesh_vertices[offset:offset + num_tri] offset += num_tri o2v = t.matmul(w2v, o2w) mesh = transformations.transform_mesh(mesh, o2v) all_vertices.append(mesh) all_vertices = t.cat(all_vertices, 0) return BatchedExample( vertices=all_vertices, view_transform=t.stack([e.view_transform for e in examples], 0), camera_transform=t.stack([e.camera_transform for e in examples], 0), mesh_num_tri=batch_mesh_num_tri, mesh_labels=[e.mesh_labels for e in examples], input_image=t.stack([e.input_image for e in examples], 0), scene_id=[e.scene_id for e in examples], grid_sampling_offset=all_vertices.new_ones( [len(batch_mesh_num_tri), 3]) * 0.5)
def __init__(self, grid: t.Tensor, voxel_to_world: t.Tensor, palette: t.Tensor = None, filter_kernel: int = 1): """Initializes the artifact. Accepts both tensors with and without a batch dimension. Args: grid: float32[num_objects, depth, height, width]. voxel_to_world: Matrix that converts from voxel to view space, float32[batch_size, 4, 4] palette: The colors to use for the different meshes. float32[batch_size, max_num_meshes, 3] filter_kernel: The size of the smoothing filter kernel to apply """ grid = util.to_tensor(grid, dtype=t.float32) assert len(grid.shape) == 4 voxel_to_world = util.to_tensor(voxel_to_world, t.float32, grid.device) assert voxel_to_world.shape == (4, 4) if filter_kernel > 1: k = filter_kernel grid = t.constant_pad_nd(grid, [(k - 1) // 2, k - 1 - (k - 1) // 2] * 3) kernel = grid.new_ones([1, 1, k, k, k], dtype=t.float32) / k**3 grid = F.conv3d(grid[np.newaxis], kernel).squeeze(0) (vertices, normals, mesh_num_tri) = MarchingCubesArtifact.to_marching_cubes(grid[1:]) vertices = transformations.transform_mesh(vertices, voxel_to_world, True) normals = transformations.transform_mesh(normals, voxel_to_world, False) if palette is not None: palette = palette[1:] self.mesh_artifact = MultiMeshArtifact(vertices=vertices, normals=normals, mesh_num_tri=mesh_num_tri, mesh_colors=palette)
def testTransformMeshCorrectlyTransformsMeshes(self): transform = ( (1, 0, 0, 0), (0, 2, 0, 0), (0, 0, 3, 0), (0, 0, 0, 1), ) mesh = ( ((12, 34, 56), (34, 32, 30), (11, 11, 18)), ((1, 2, 3), (4, 5, 6), (6, 5, 4)), ) expected_result = t.tensor(( ((12, 68, 168), (34, 64, 90), (11, 22, 54)), ((1, 4, 9), (4, 10, 18), (6, 10, 12)), ), dtype=t.float32) transformed_mesh = transformations.transform_mesh(mesh, transform) self.assertTrue(t.equal(transformed_mesh, expected_result))
def render_voxel_grid( voxel_grid: InputTensor, view_projection_matrix: InputTensor = None, image_size: Tuple[int, int] = (256, 256), voxel_to_view_matrix: InputTensor = None, diffuse_coefficients: InputTensor = None, light_position: InputTensor = None, light_color: InputTensor = (1.0, 1.0, 1.0), ambient_light_color: InputTensor = (0.2, 0.2, 0.2), clear_color: Tuple[float, float, float] = (0, 0, 0), output_type=t.uint8, vertex_shader=None, geometry_shader=None, fragment_shader=None, ) -> t.Tensor: """Creates a voxel grid renderer py_function TF op. Args: voxel_grid: The voxel grid tensor, containing material IDs, int32[depth, height, width]. view_projection_matrix: Transforms geometry from world to projected camera space. A float32[4, 4] row-major transformation matrix that is multiplied with 4 x 1 columns of homogeneous coordinates in the shader. If not specified, a new matrix will be calculated from the voxel grid. image_size: Output image shape, pair (height, width). voxel_to_view_matrix: Transforms from object to world space coordinates, float32[4, 4] transformation matrix. The voxel grid is assumed to be a grid of unit sized cubes placed at the origin. This matrix is responsible for transforming it into world space. If omitted, the voxel grid will be squeezed into the unit cube. diffuse_coefficients: The diffuse coefficients of the materials, float32[num_materials, 3]. light_position: The light position, float32[3]. If None, the light will be placed at the camera origin. light_color: The light RGB color, float32[3]. ambient_light_color: The ambient light RGB color, float32[3]. clear_color: The RGB color to use when clearing the image, float32[3] output_type: The output type. Either tf.uint8 or tf.float32. vertex_shader: The vertex shader. geometry_shader: The geometry shader. fragment_shader: The fragment shader. Returns: The rendered image, either uint8[height, width, 3] or float32[height, width, 3], depending on the value of output_type. """ height, width = image_size voxel_grid = util.to_tensor(voxel_grid, t.int32, "cpu") assert len(voxel_grid.shape) == 3 and voxel_grid.dtype == t.int32 if voxel_to_view_matrix is None: d = 1.0 / np.max(voxel_grid.shape) voxel_to_view_matrix = transformations.scale([d, d, d]) voxel_to_view_matrix = util.to_tensor(voxel_to_view_matrix, t.float32, "cpu") assert voxel_to_view_matrix.shape == (4, 4) if view_projection_matrix is None: mesh = t.tensor([[[0.0] * 3, [0.0] * 3, voxel_grid.shape[::-1]]]) mesh = transformations.transform_mesh(mesh, voxel_to_view_matrix) view_projection_matrix = camera_util.get_default_camera_for_mesh(mesh) view_projection_matrix = util.to_tensor(view_projection_matrix, t.float32, "cpu") assert view_projection_matrix.shape == (4, 4) if diffuse_coefficients is None: diffuse_coefficients = util.to_tensor(DEFAULT_COLOR_PALETTE, t.float32, "cpu") diffuse_coefficients = util.to_tensor(diffuse_coefficients, t.float32, "cpu") assert (len(diffuse_coefficients.shape) == 2 and diffuse_coefficients.shape[-1] == 3) # By default, we use the same fragment shader as the scene renderer, which # needs diffuse_textures to be specified. We specify a 1x1 texture, which # is however not used, since we emit texture index -1 in the geometry shader. diffuse_textures = t.ones([1, 1, 1, 3], dtype=t.uint8) # The eye position in camera space is (0, 0, -1). To compute its position # in world space, we multiply by the inverse view-projection matrix. camera_position = t.mv(t.inverse(view_projection_matrix), t.tensor([0, 0, -1, 1], dtype=t.float32)) camera_position = camera_position[:3] / camera_position[3] if light_position is None: light_position = camera_position light_position = util.to_tensor(light_position, t.float32, "cpu") assert light_position.shape == (3, ) light_color = util.to_tensor(light_color, t.float32, "cpu") assert light_color.shape == (3, ) ambient_light_color = util.to_tensor(ambient_light_color, t.float32, "cpu") assert ambient_light_color.shape == (3, ) render_args = [ gl.Uniform("voxel_to_view_matrix", voxel_to_view_matrix), gl.Uniform("view_projection_matrix", view_projection_matrix), gl.Buffer(0, voxel_grid.reshape([-1])), gl.Uniform("grid_resolution", voxel_grid.shape), gl.Buffer(1, diffuse_coefficients.reshape([-1])), gl.Uniform("light_position", light_position), gl.Uniform("camera_position", camera_position), gl.Texture("textures", diffuse_textures, bind_as_array=True), gl.Uniform("ambient_light_color", ambient_light_color), gl.Uniform("light_color", light_color), ] if not geometry_shader: geometry_shader = resources.read_text(shaders, "voxel_renderer.geom") if not vertex_shader: vertex_shader = resources.read_text(shaders, "noop.vert") if not fragment_shader: fragment_shader = resources.read_text(shaders, "point_light_illumination.frag") result = gl.gl_simple_render( gl.RenderInput(num_points=voxel_grid.numel(), arguments=render_args, output_resolution=(height, width), vertex_shader=vertex_shader, geometry_shader=geometry_shader, fragment_shader=fragment_shader, clear_color=clear_color, output_type=output_type)) return result[..., :3]
def get_3d_box(self, transform) -> Optional[t.Tensor]: transformed = transformations.transform_mesh(self.vertices, transform) ltf = transformed.reshape([-1, 3]).min(0)[0] rbb = transformed.reshape([-1, 3]).max(0)[0] return t.stack([ltf, rbb], 0)