# results camera = OpenGLPerspectiveCameras(device=device, R=R[None, 1, ...], T=T[None, 1, ...]) # Define the settings for rasterization and shading. Here we set the output # image to be of size 128X128. As we are rendering images for visualization # purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to # rasterize_meshes.py for explanations of these parameters. We also leave # bin_size and max_faces_per_bin to their default values of None, which sets # their values using huristics and ensures that the faster coarse-to-fine # rasterization method is used. Refer to docs/notes/renderer.md for an # explanation of the difference between naive and coarse-to-fine rasterization. raster_settings = RasterizationSettings( image_size=128, blur_radius=0.0, faces_per_pixel=1, ) # Create a phong renderer by composing a rasterizer and a shader. The textured # phong shader will interpolate the texture uv coordinates for each vertex, # sample from a texture image and apply the Phong lighting model renderer = MeshRenderer(rasterizer=MeshRasterizer( cameras=camera, raster_settings=raster_settings), shader=SoftPhongShader(device=device, cameras=camera, lights=lights)) # Create a batch of meshes by repeating the cow mesh and associated textures. # Meshes has a useful `extend` method which allows us do this very easily. # This also extends the textures.
def test_compare_with_meshes_renderer(self, batch_size=11, image_size=100, sphere_diameter=0.6): """ Generate a spherical RGB volumetric function and its corresponding mesh and check whether MeshesRenderer returns the same images as the corresponding ImplicitRenderer. """ # generate NDC camera extrinsics and intrinsics cameras = init_cameras(batch_size, image_size=[image_size, image_size], ndc=True) # get rand offset of the volume sphere_centroid = torch.randn(batch_size, 3, device=cameras.device) * 0.1 sphere_centroid.requires_grad = True # init the grid raysampler with the ndc grid raysampler = NDCGridRaysampler( image_width=image_size, image_height=image_size, n_pts_per_ray=256, min_depth=0.1, max_depth=2.0, ) # get the EA raymarcher raymarcher = EmissionAbsorptionRaymarcher() # jitter the camera intrinsics a bit for each render cameras_randomized = cameras.clone() cameras_randomized.principal_point = ( torch.randn_like(cameras.principal_point) * 0.3) cameras_randomized.focal_length = ( cameras.focal_length + torch.randn_like(cameras.focal_length) * 0.2) # the list of differentiable camera vars cam_vars = ("R", "T", "focal_length", "principal_point") # enable the gradient caching for the camera variables for cam_var in cam_vars: getattr(cameras_randomized, cam_var).requires_grad = True # get the implicit renderer images_opacities = ImplicitRenderer( raysampler=raysampler, raymarcher=raymarcher)( cameras=cameras_randomized, volumetric_function=spherical_volumetric_function, sphere_centroid=sphere_centroid, sphere_diameter=sphere_diameter, )[0] # check that the renderer does not erase gradients loss = images_opacities.sum() loss.backward() for check_var in ( *[ getattr(cameras_randomized, cam_var) for cam_var in cam_vars ], sphere_centroid, ): self.assertIsNotNone(check_var.grad) # instantiate the corresponding spherical mesh ico = ico_sphere(level=4, device=cameras.device).extend(batch_size) verts = (torch.nn.functional.normalize(ico.verts_padded(), dim=-1) * sphere_diameter + sphere_centroid[:, None]) meshes = Meshes( verts=verts, faces=ico.faces_padded(), textures=TexturesVertex(verts_features=( torch.nn.functional.normalize(verts, dim=-1) * 0.5 + 0.5)), ) # instantiate the corresponding mesh renderer lights = PointLights(device=cameras.device, location=[[0.0, 0.0, 0.0]]) renderer_textured = MeshRenderer( rasterizer=MeshRasterizer( cameras=cameras_randomized, raster_settings=RasterizationSettings(image_size=image_size, blur_radius=1e-3, faces_per_pixel=10), ), shader=SoftPhongShader( device=cameras.device, cameras=cameras_randomized, lights=lights, materials=Materials( ambient_color=((2.0, 2.0, 2.0), ), diffuse_color=((0.0, 0.0, 0.0), ), specular_color=((0.0, 0.0, 0.0), ), shininess=64, device=cameras.device, ), blend_params=BlendParams(sigma=1e-3, gamma=1e-4, background_color=(0.0, 0.0, 0.0)), ), ) # get the mesh render images_opacities_meshes = renderer_textured(meshes, cameras=cameras_randomized, lights=lights) if DEBUG: outdir = tempfile.gettempdir() + "/test_implicit_vs_mesh_renderer" os.makedirs(outdir, exist_ok=True) frames = [] for (image_opacity, image_opacity_mesh) in zip(images_opacities, images_opacities_meshes): image, opacity = image_opacity.split([3, 1], dim=-1) image_mesh, opacity_mesh = image_opacity_mesh.split([3, 1], dim=-1) diff_image = (((image - image_mesh) * 0.5 + 0.5).mean( dim=2, keepdim=True).repeat(1, 1, 3)) image_pil = Image.fromarray((torch.cat( ( image, image_mesh, diff_image, opacity.repeat(1, 1, 3), opacity_mesh.repeat(1, 1, 3), ), dim=1, ).detach().cpu().numpy() * 255.0).astype(np.uint8)) frames.append(image_pil) # export gif outfile = os.path.join(outdir, "implicit_vs_mesh_render.gif") frames[0].save( outfile, save_all=True, append_images=frames[1:], duration=batch_size // 15, loop=0, ) print(f"exported {outfile}") # export concatenated frames outfile_cat = os.path.join(outdir, "implicit_vs_mesh_render.png") Image.fromarray( np.concatenate([np.array(f) for f in frames], axis=0)).save(outfile_cat) print(f"exported {outfile_cat}") # compare the renders diff = (images_opacities - images_opacities_meshes).abs().mean(dim=-1) mu_diff = diff.mean(dim=(1, 2)) std_diff = diff.std(dim=(1, 2)) self.assertClose(mu_diff, torch.zeros_like(mu_diff), atol=5e-2) self.assertClose(std_diff, torch.zeros_like(std_diff), atol=6e-2)
def test_dataset(self): # 1. rerender input point clouds / meshes using the saved camera_mat # compare mask image with saved mask image # 2. backproject masked points to space with dense depth map, # fuse all views and save batch_size = 1 device = torch.device('cuda:0') data_dir = 'data/synthetic/cube_mesh' output_dir = os.path.join('tests', 'outputs', 'test_data') if not os.path.isdir(output_dir): os.makedirs(output_dir) # dataset dataset = MVRDataset(data_dir=data_dir, load_dense_depth=True, mode="train") data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=0, shuffle=False) meshes = load_objs_as_meshes([os.path.join(data_dir, 'mesh.obj')]).to(device) cams = dataset.get_cameras().to(device) image_size = imageio.imread(dataset.image_files[0]).shape[0] # initialize rasterizer, we check mask pngs only, so no need to create lights and shaders etc raster_settings = RasterizationSettings( image_size=image_size, blur_radius=0.0, faces_per_pixel=5, bin_size= None, # this setting controls whether naive or coarse-to-fine rasterization is used max_faces_per_bin=None # this setting is for coarse rasterization ) rasterizer = MeshRasterizer(cameras=None, raster_settings=raster_settings) # render with loaded cameras positions and training tranformation functions pixel_world_all = [] for idx, data in enumerate(data_loader): # get datas img = data.get('img.rgb').to(device) assert (img.min() >= 0 and img.max() <= 1 ), "Image must be a floating number between 0 and 1." mask_gt = data.get('img.mask').to(device).permute(0, 2, 3, 1) camera_mat = data['camera_mat'].to(device) cams.R, cams.T = decompose_to_R_and_t(camera_mat) cams._N = cams.R.shape[0] cams.to(device) self.assertTrue( torch.equal(cams.get_world_to_view_transform().get_matrix(), camera_mat)) # transform to view and rerender with non-rotated camera verts_padded = transform_to_camera_space(meshes.verts_padded(), cams) meshes_in_view = meshes.offset_verts( -meshes.verts_packed() + padded_to_packed( verts_padded, meshes.mesh_to_verts_packed_first_idx(), meshes.verts_packed().shape[0])) fragments = rasterizer(meshes_in_view, cameras=dataset.get_cameras().to(device)) # compare mask mask = fragments.pix_to_face[..., :1] >= 0 imageio.imwrite(os.path.join(output_dir, "mask_%06d.png" % idx), mask[0, ...].cpu().to(dtype=torch.uint8) * 255) # allow 5 pixels difference self.assertTrue(torch.sum(mask_gt != mask) < 5) # check dense maps # backproject points to the world pixel range (-1, 1) pixels = arange_pixels((image_size, image_size), batch_size)[1].to(device) depth_img = data.get('img.depth').to(device) # get the depth and mask at the sampled pixel position depth_gt = get_tensor_values(depth_img, pixels, squeeze_channel_dim=True) mask_gt = get_tensor_values(mask.permute(0, 3, 1, 2).float(), pixels, squeeze_channel_dim=True).bool() # get pixels and depth inside the masked area pixels_packed = pixels[mask_gt] depth_gt_packed = depth_gt[mask_gt] first_idx = torch.zeros((pixels.shape[0], ), device=device, dtype=torch.long) num_pts_in_mask = mask_gt.sum(dim=1) first_idx[1:] = num_pts_in_mask.cumsum(dim=0)[:-1] pixels_padded = packed_to_padded(pixels_packed, first_idx, num_pts_in_mask.max().item()) depth_gt_padded = packed_to_padded(depth_gt_packed, first_idx, num_pts_in_mask.max().item()) # backproject to world coordinates # contains nan and infinite values due to depth_gt_padded containing 0.0 pixel_world_padded = transform_to_world(pixels_padded, depth_gt_padded[..., None], cams) # transform back to list, containing no padded values split_size = num_pts_in_mask[..., None].repeat(1, 2) split_size[:, 1] = 3 pixel_world_list = padded_to_list(pixel_world_padded, split_size) pixel_world_all.extend(pixel_world_list) idx += 1 if idx >= 10: break pixel_world_all = torch.cat(pixel_world_all, dim=0) mesh = trimesh.Trimesh(vertices=pixel_world_all.cpu(), faces=None, process=False) mesh.export(os.path.join(output_dir, 'pixel_to_world.ply'))
def forward(self, fragments, meshes, **kwargs) -> torch.Tensor: texels = meshes.sample_textures(fragments) return texels cameras = FoVOrthographicCameras(device=device, max_x=80.0, max_y=93.0, min_x=-80.0, min_y=-93.0, scale_xyz=((1, 1, 1), )) raster_settings = RasterizationSettings( image_size=128, blur_radius=0, faces_per_pixel=6, ) def render(renderer, scene): distance, elevation, azimuth = 30, 0.0, 0 R, T = look_at_view_transform(distance, elevation, azimuth, device=device) return renderer(meshes_world=scene.to(device), R=R, T=T) def plot_channels(image): fig = plt.figure(figsize=(10, 10)) for i in range(image.size(-2)): panel = fig.add_subplot(3, 3, i + 1) panel.imshow(image[..., i, :].squeeze().cpu())
vals = [float(v) for v in line.strip().split(" ")] azimuth, elevation, yaw, dist_ratio, fov = vals distance = 1.75 * dist_ratio metadata.append((azimuth, elevation, distance)) batch_size = 6 plt.figure(figsize=(10, 10)) plt.title('R2N2 transformation settings') #import pdb; pdb.set_trace() for i, (azim, elev, dist) in enumerate(metadata): R, T = look_at_view_transform(dist=dist, elev=elev, azim=-azim, device=device) cameras = OpenGLPerspectiveCameras(device=device, R=R, T=T) lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]]) raster_settings = RasterizationSettings( image_size=512, blur_radius=0.0, faces_per_pixel=1, bin_size=0 ) renderer = MeshRenderer( rasterizer=MeshRasterizer( cameras=cameras, raster_settings=raster_settings ), shader=HardPhongShader(device=device, lights=lights) ) image = renderer(meshes_world=mesh, R=R, T=T) plt.subplot(5,5,i+1) plt.title(str(i).zfill(2)+'.png') plt.imshow(image[0, ..., :3].cpu().numpy())
faces=[faces.to(device)], textures=textures) # Initialize an OpenGL perspective camera. cameras = OpenGLPerspectiveCameras(device=device) # To blend the 100 faces we set a few parameters which control the opacity and the sharpness of # edges. Refer to blending.py for more details. blend_params = BlendParams(sigma=0.001, gamma=1.0) # Define the settings for rasterization and shading. Here we set the output image to be of size # 256x256. To form the blended image we use 100 faces for each pixel. Refer to rasterize_meshes.py # for an explanation of this parameter. raster_settings = RasterizationSettings(image_size=256, blur_radius=np.log(1. / 0.001 - 1.) * blend_params.sigma, faces_per_pixel=80, bin_size=0) # Create a silhouette mesh renderer by composing a rasterizer and a shader. lights = PointLights(device=device, location=((2.0, 2.0, -2.0), )) silhouette_renderer = MeshRenderer( rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings), shader=SoftPhongShader(blend_params=blend_params, device=device, lights=lights)) # We will also create a phong renderer. This is simpler and only needs to render one face per pixel. raster_settings = RasterizationSettings(image_size=256, blur_radius=0.0,
def test_correpondence_mapping(self): device = torch.device('cuda:0') torch.cuda.set_device(device) # Set paths data_dir = Path(__file__).parent / 'data' data_dir.mkdir(exist_ok=True) obj_dir = Path( __file__).resolve().parent.parent / "docs/tutorials/data" obj_filename = obj_dir / "cow_mesh/cow.obj" # Load obj file mesh = load_objs_as_meshes([obj_filename], device=device) try: texture_image = mesh.textures.maps_padded() except: pass R, T = look_at_view_transform(2.55, 10, 180) cameras = OpenGLPerspectiveCameras(device=device, R=R, T=T) # Define the settings for rasterization and shading. Here we set the output image to be of size # 512x512. As we are rendering images for visualization purposes only we will set faces_per_pixel=1 # and blur_radius=0.0. We also set bin_size and max_faces_per_bin to None which ensure that # the faster coarse-to-fine rasterization method is used. Refer to rasterize_meshes.py for # explanations of these parameters. Refer to docs/notes/renderer.md for an explanation of # the difference between naive and coarse-to-fine rasterization. raster_settings = RasterizationSettings( image_size=512, blur_radius=0.0, faces_per_pixel=1, perspective_correct=True, ) # create a colormap to be displayed on the object. ##generating some data x, y = np.meshgrid( np.linspace(1, 0, 100), np.linspace(0, 1, 100), ) directions = (np.sin(2 * np.pi * x) * np.cos(2 * np.pi * y) + 1) * np.pi magnitude = np.exp(-(x * x + y * y)) ##normalize data: def normalize(M): return (M - np.min(M)) / (np.max(M) - np.min(M)) d_norm = normalize(directions) m_norm = normalize(magnitude) colors = np.dstack((x, y, np.zeros_like(x))) colors = (torch.from_numpy(np.array(colors))).unsqueeze(0).float() renderer = MeshRenderer( rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings), shader=UVsCorrespondenceShader(device=device, cameras=cameras, colormap=colors)) images = renderer(mesh) # cv2.imshow('render_correspondence_texture.png', # ((255 * images[0, ..., :3]).squeeze().cpu().numpy().astype(np.uint8))[..., ::-1]) # cv2.imwrite(str(data_dir / 'render_correspondence_texture.png'), # ((255 * images[0, ..., :3]).squeeze().cpu().numpy().astype(np.uint8))[..., ::-1]) Image.fromarray( ((255 * images[0, ..., :3]).squeeze().cpu().numpy().astype( np.uint8))).save( str(data_dir / 'render_correspondence_texture.png')) # cv2.waitKey(0) self.assertTrue( (data_dir / 'render_correspondence_texture.png').exists())
def batch_render( verts, faces, faces_per_pixel=10, K=None, rot=None, trans=None, colors=None, color=(0.53, 0.53, 0.8), # light_purple ambient_col=0.5, specular_col=0.2, diffuse_col=0.3, face_colors=None, # color = (0.74117647, 0.85882353, 0.65098039), # light_blue image_sizes=None, out_res=512, bin_size=0, shading="soft", mode="rgb", blend_gamma=1e-4, min_depth=None, ): device = torch.device("cuda:0") K = K.to(device) width, height = image_sizes[0] out_size = int(max(image_sizes[0])) raster_settings = RasterizationSettings( image_size=out_size, blur_radius=0.0, faces_per_pixel=faces_per_pixel, bin_size=bin_size, ) fx = K[:, 0, 0] fy = K[:, 1, 1] focals = torch.stack([fx, fy], 1) px = K[:, 0, 2] py = K[:, 1, 2] principal_point = torch.stack([width - px, height - py], 1) if rot is None: rot = torch.eye(3).unsqueeze(0).to(device) if trans is None: trans = torch.zeros(3).unsqueeze(0).to(device) cameras = PerspectiveCameras( device=device, focal_length=focals, principal_point=principal_point, image_size=[(out_size, out_size) for _ in range(len(verts))], R=rot, T=trans, ) if mode == "rgb" and shading == "soft": lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]]) lights = DirectionalLights( device=device, direction=((0.6, -0.6, -0.6), ), ambient_color=((ambient_col, ambient_col, ambient_col), ), diffuse_color=((diffuse_col, diffuse_col, diffuse_col), ), specular_color=((specular_col, specular_col, specular_col), ), ) shader = SoftPhongShader(device=device, cameras=cameras, lights=lights) elif mode == "silh": blend_params = BlendParams(sigma=1e-4, gamma=1e-4) shader = SoftSilhouetteShader(blend_params=blend_params) elif shading == "faceidx": shader = FaceIdxShader() elif (mode == "facecolor") and (shading == "hard"): shader = FaceColorShader(face_colors=face_colors) elif (mode == "facecolor") and (shading == "soft"): shader = SoftFaceColorShader(face_colors=face_colors, blend_gamma=blend_gamma) else: raise ValueError( f"Unhandled mode {mode} and shading {shading} combination") renderer = MeshRenderer( rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings), shader=shader, ) if min_depth is not None: verts = torch.cat([verts[:, :, :2], verts[:, :, 2:].clamp(min_depth)], 2) if mode == "rgb": if colors is None: colors = get_colors(verts, color) tex = textures.TexturesVertex(verts_features=colors) meshes = Meshes(verts=verts, faces=faces, textures=tex) elif mode in ["silh", "facecolor"]: meshes = Meshes(verts=verts, faces=faces) else: raise ValueError(f"Render mode {mode} not in [rgb|silh]") square_images = renderer(meshes, cameras=cameras) height_off = int(width - height) # from matplotlib import pyplot as plt # plt.imshow(square_images.cpu()[0, :, :, 0]) # plt.savefig("tmp.png") images = torch.flip(square_images, (1, 2))[:, height_off:] return images
def __init__(self): self.batch_size = 1 self.epochs = 100 self.d_lr = 1e-5 self.g_lr = 1e-4 self.beta = 0.9 self.inp_feature = 512 * 512 self.device = torch.device( "cuda:1" if torch.cuda.is_available() else "cpu") # self.device = torch.device("cpu") self.smpl_mesh_path = "Test/smpl_pytorch/human.obj" self.path = "NOMO_preprocess/data" self.model_path = "models" self.raster_settings = RasterizationSettings( image_size=512, blur_radius=0.0, faces_per_pixel=1, ) self.n_males = 179 self.n_females = 177 self.lights = PointLights(device=self.device, location=[[0.0, 0.0, -3.0]]) self.measurements = { 'height': { 'points': [609, 3469], 'ground_truth': { 'male': 31, 'female': 13 } }, 'waist': { 'points': [ 679, 855, 920, 861, 858, 1769, 4344, 4345, 4404, 4341, 4167, 4166, 4921, 4425, 4332, 4317, 4316, 4331, 4330, 4373, 6389, 6388, 5244, 5246, 1784, 1781, 1780, 3122, 2928, 886, 845, 844, 831, 830, 846, 939, 1449, 678, 679 ], 'ground_truth': { 'male': 9, 'female': 4 } }, 'shoulder': { 'points': [ 5325, 4722, 4798, 5356, 5360, 5269, 4078, 4079, 4186, 4187, 6333, 3078, 2812, 697, 696, 589, 590, 1808, 1535, 1316, 1318, 1240, 1238, 1862 ], 'ground_truth': { 'male': 11, 'female': 22 } }, 'outseam': { 'points': [1802, 3319], 'ground_truth': { 'male': 18, 'female': 1 } }, 'inseam': { 'points': [1225, 3319], 'ground_truth': { 'male': 19, 'female': 2 } }, # 'hip_height': {'points': [1477, 3469],'ground_truth': {'male': 31, 'female': 13}}, 'knee_height': { 'points': [ 3474, 1178, 1175, 1102, 1103, 1077, 1076, 1116, 1121, 3191, 3323, 3208, 3469 ], 'ground_truth': { 'male': 20, 'female': 6 } }, 'bust_circle': { 'points': [ 1495, 1493, 2827, 1249, 3020, 3502, 6472, 4731, 4733, 4965, 4966, 6285, 4749, 4747, 4960, 6300, 6308, 6878, 4103, 4103, 4104, 4899, 6307, 4686, 4687, 1330, 1201, 1202, 2846, 1426, 615, 614, 614, 3480, 2847, 2839, 2840, 2851, 2841, 2825, 1495 ], 'ground_truth': { 'male': 6, 'female': 18 } }, 'thigh_circle': { 'points': [ 964, 909, 910, 1365, 907, 906, 957, 904, 848, 848, 849, 902, 851, 852, 898, 898, 899, 934, 935, 1453, 964 ], 'ground_truth': { 'male': 15, 'female': 32 } }, 'calf': { 'points': [ 1087, 1086, 1106, 1107, 1529, 1529, 1111, 1091, 1464, 1467, 1469, 1096, 1097, 1100, 1100, 1099, 1103, 1371, 1155, 1087 ], 'ground_truth': { 'male': 17, 'female': 23 } }, 'hip_circle': { 'points': [ 836, 838, 1230, 853, 854, 944, 850, 847, 1229, 1478, 1477, 1477, 3475, 914, 912, 1497, 3142, 3147, 3148, 4693, 4365, 4758, 4952, 6555, 6877, 4950, 4950, 4802, 4801, 6550, 6551, 6517, 6518, 6526, 6519, 6512, 4325, 4322, 1540, 836 ], 'ground_truth': { 'male': 0, 'female': 5 } }, 'bicep': { 'points': [ 629, 1678, 1716, 1679, 1679, 1314, 1379, 1378, 1394, 1393, 1389, 1388, 1388, 1234, 1231, 1386, 1384, 1737, 1398, 1395, 629 ], 'ground_truth': { 'male': 23, 'female': 24 } }, }
def render_img(face_shape, face_color, facemodel, image_size=224, fx=1015.0, fy=1015.0, px=112.0, py=112.0, device='cuda:0'): ''' ref: https://github.com/facebookresearch/pytorch3d/issues/184 The rendering function (just for test) Input: face_shape: Tensor[1, 35709, 3] face_color: Tensor[1, 35709, 3] in [0, 1] facemodel: contains `tri` (triangles[70789, 3], index start from 1) ''' from pytorch3d.structures import Meshes from pytorch3d.renderer.mesh.textures import TexturesVertex from pytorch3d.renderer import (PerspectiveCameras, PointLights, RasterizationSettings, MeshRenderer, MeshRasterizer, SoftPhongShader, BlendParams) face_color = TexturesVertex(verts_features=face_color.to(device)) face_buf = torch.from_numpy(facemodel.tri - 1) # index start from 1 face_idx = face_buf.unsqueeze(0) mesh = Meshes(face_shape.to(device), face_idx.to(device), face_color) R = torch.eye(3).view(1, 3, 3).to(device) R[0, 0, 0] *= -1.0 T = torch.zeros([1, 3]).to(device) half_size = (image_size - 1.0) / 2 focal_length = torch.tensor([fx / half_size, fy / half_size], dtype=torch.float32).reshape(1, 2).to(device) principal_point = torch.tensor([(half_size - px) / half_size, (py - half_size) / half_size], dtype=torch.float32).reshape(1, 2).to(device) cameras = PerspectiveCameras(device=device, R=R, T=T, focal_length=focal_length, principal_point=principal_point) raster_settings = RasterizationSettings(image_size=image_size, blur_radius=0.0, faces_per_pixel=1) lights = PointLights(device=device, ambient_color=((1.0, 1.0, 1.0), ), diffuse_color=((0.0, 0.0, 0.0), ), specular_color=((0.0, 0.0, 0.0), ), location=((0.0, 0.0, 1e5), )) blend_params = BlendParams(background_color=(0.0, 0.0, 0.0)) renderer = MeshRenderer(rasterizer=MeshRasterizer( cameras=cameras, raster_settings=raster_settings), shader=SoftPhongShader(device=device, cameras=cameras, lights=lights, blend_params=blend_params)) images = renderer(mesh) images = torch.clamp(images, 0.0, 1.0) return images
shapenet_loader = DataLoader(shapenet_dataset, batch_size=12, collate_fn=collate_batched_meshes) #it = iter(shapenet_loader) #shapenet_batch = next(it) #print(shapenet_batch.keys()) #batch_renderings = shapenet_batch["images"] # (N, V, H, W, 3), and in this case V is 1. #image_grid(batch_renderings.squeeze().numpy(), rows=3, cols=4, rgb=True) #renderer # Rendering settings. R, T = look_at_view_transform(1.0, 1.0, 90) cameras = OpenGLPerspectiveCameras(R=R, T=T, device=device) raster_settings = RasterizationSettings(image_size=512) lights = PointLights(location=torch.tensor([0.0, 1.0, -2.0], device=device)[None], device=device) images_by_idxs = shapenet_dataset.render( idxs=list(range(7, 8)), device=device, cameras=cameras, raster_settings=raster_settings, lights=lights, ) image_grid(images_by_idxs.cpu().numpy(), rows=1, cols=3, rgb=True) #plt.show()
def forward(self, verts, # under general camera coordinate rXdYfZ, N*V*3 faces, # indices in verts to define traingles, N*F*3 verts_uvs, # uv coordinate of corresponding verts, N*V*2 faces_uvs, # indices in verts to define triangles, N*F*3 tex_image, # under GCcd, N*H*W*3 R, # under GCcd, N*3*3 T, # under GCcd, N*3 f, # in pixel/m, N*1 C, # camera center, N*2 imgres, # int lightLoc = None): assert verts.shape[0] == 1,\ 'with some issues in pytorch3D, render 1 mesh per forward' # only need to convert either R and T or verts, we choose R and T here if self.convertToPytorch3D: R = torch.matmul(self.GCcdToPytorch3D, R) T = torch.matmul(self.GCcdToPytorch3D, T.unsqueeze(-1)).squeeze(-1) # prepare textures and mesh to render tex = TexturesUV( verts_uvs = verts_uvs, faces_uvs = faces_uvs, maps = tex_image ) mesh = Meshes(verts = verts, faces = faces, textures=tex) # Initialize a camera. The world coordinate is +Y up, +X left and +Z in. cameras = PerspectiveCameras( focal_length=f, principal_point=C, R=R, T=T, image_size=((imgres,imgres),), device=self.device ) # Define the settings for rasterization and shading. raster_settings = RasterizationSettings( image_size=imgres, blur_radius=0.0, faces_per_pixel=1, ) # Create a simple renderer by composing a rasterizer and a shader. # The simple textured shader will interpolate the texture uv coordinates # for each pixel, sample from a texture image. This renderer can # support lighting easily but we do not iimplement it. renderer = MeshRenderer( rasterizer=MeshRasterizer( cameras=cameras, raster_settings=raster_settings ), shader=SimpleShader( device=self.device ) ) # render the rendered image(s) images = renderer(mesh) return images
def run_on_image(self, image, id_str, gt_verts, gt_faces): deprocess = imagenet_deprocess(rescale_image=False) with torch.no_grad(): voxel_scores, meshes_pred = self.predictor(image.to(self.device)) sid, mid, iid = id_str.split('-') iid = int(iid) #Transform vertex space metadata_path = os.path.join('./datasets/shapenet/ShapeNetV1processed', sid, mid, "metadata.pt") metadata = torch.load(metadata_path) K = metadata["intrinsic"] RTs = metadata["extrinsics"].to(self.device) rot_y_90 = torch.tensor([[0, 0, 1, 0], [0, 1, 0, 0], [-1, 0, 0, 0], [0, 0, 0, 1]]).to(RTs) mesh = meshes_pred[-1][0] #For some strange reason all classes (expect vehicle class) require a 90 degree rotation about the y-axis #for the GT mesh invRT = torch.inverse(RTs[iid].mm(rot_y_90)) invRT_no_rot = torch.inverse(RTs[iid]) mesh._verts_list[0] = project_verts(mesh._verts_list[0], invRT) #Get look at view extrinsics render_metadata_path = os.path.join( 'datasets/shapenet/ShapeNetRenderingExtrinsics', sid, mid, 'rendering_metadata.pt') render_metadata = torch.load(render_metadata_path) render_RTs = render_metadata['extrinsics'].to(self.device) verts, faces = mesh.get_mesh_verts_faces(0) verts_rgb = torch.ones_like(verts)[None] textures = Textures(verts_rgb=verts_rgb.to(self.device)) mesh.textures = textures plt.figure(figsize=(10, 10)) #Silhouette Renderer render_image_size = 256 blend_params = BlendParams(sigma=1e-4, gamma=1e-4) raster_settings = RasterizationSettings( image_size=render_image_size, blur_radius=np.log(1. / 1e-4 - 1.) * blend_params.sigma, faces_per_pixel=50, ) gt_verts = gt_verts.to(self.device) gt_faces = gt_faces.to(self.device) verts_rgb = torch.ones_like(gt_verts)[None] textures = Textures(verts_rgb=verts_rgb) #Invert without the rotation for the vehicle class if sid == '02958343': gt_verts = project_verts(gt_verts, invRT_no_rot.to(self.device)) else: gt_verts = project_verts(gt_verts, invRT.to(self.device)) gt_mesh = Meshes(verts=[gt_verts], faces=[gt_faces], textures=textures) probability_map = 0.01 * torch.ones((1, 24)) viewgrid = torch.zeros( (1, 24, render_image_size, render_image_size)).to(self.device) fig = plt.figure(1) ax_pred = [fig.add_subplot(5, 5, i + 1) for i in range(24)] #fig = plt.figure(2) #ax_gt = [fig.add_subplot(5,5,i+1) for i in range(24)] for i in range(len(render_RTs)): if i == iid: #Don't include current view continue R = render_RTs[i][:3, :3].unsqueeze(0) T = render_RTs[i][:3, 3].unsqueeze(0) cameras = OpenGLPerspectiveCameras(device=self.device, R=R, T=T) silhouette_renderer = MeshRenderer( rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings), shader=SoftSilhouetteShader(blend_params=blend_params)) ref_image = (silhouette_renderer(meshes_world=gt_mesh, R=R, T=T) > 0).float() silhouette_image = (silhouette_renderer( meshes_world=mesh, R=R, T=T) > 0).float() # MSE Loss between both silhouettes silh_loss = torch.sum( (silhouette_image[0, :, :, 3] - ref_image[0, :, :, 3])**2) probability_map[0, i] = silh_loss.detach() viewgrid[0, i] = silhouette_image[..., -1] #ax_gt[i].imshow(ref_image[0,:,:,3].cpu().numpy()) #ax_gt[i].set_title(i) ax_pred[i].imshow(silhouette_image[0, :, :, 3].cpu().numpy()) ax_pred[i].set_title(i) img = image_to_numpy(deprocess(image[0])) #ax_gt[iid].imshow(img) ax_pred[iid].imshow(img) #fig = plt.figure(3) #ax = fig.add_subplot(111) #ax.imshow(img) pred_prob_map = self.loss_predictor(viewgrid) print('Highest actual loss: {}'.format(torch.argmax(probability_map))) print('Highest predicted loss: {}'.format(torch.argmax(pred_prob_map))) plt.show() #plt.savefig('./output_demo/figures/'+id_str+'.png') #vis_utils.visualize_prediction(id_str, img, mesh, self.output_dir) return torch.argmax(probability_map).item()
camera_sampler = CameraSampler( opt.num_cameras, batch_size, distance_range=torch.tensor( ((opt.min_dist, opt.max_dist), )), # min distance should be larger than znear+obj_dim sort_distance=True, camera_type=FoVPerspectiveCameras, camera_params=camera_params) # Define the settings for rasterization and shading. # Refer to raster_points.py for explanations of these parameters. raster_settings = RasterizationSettings( image_size=opt.image_size, blur_radius=0.0, faces_per_pixel=5, # this setting controls whether naive or coarse-to-fine rasterization is used bin_size=None, max_faces_per_bin=None # this setting is for coarse rasterization ) renderer = MeshRenderer(rasterizer=MeshRasterizer( cameras=None, raster_settings=raster_settings), shader=HardFlatShader(device=device)) renderer.to(device) if opt.point_lights: template_lights = PointLights() else: template_lights = DirectionalLights() # pcl_dict = {'points': pointclouds.points_padded[0].cpu().numpy()}
def generate_cow_renders(num_views: int = 40, data_dir: str = DATA_DIR, azimuth_range: float = 180): """ This function generates `num_views` renders of a cow mesh. The renders are generated from viewpoints sampled at uniformly distributed azimuth intervals. The elevation is kept constant so that the camera's vertical position coincides with the equator. For a more detailed explanation of this code, please refer to the docs/tutorials/fit_textured_mesh.ipynb notebook. Args: num_views: The number of generated renders. data_dir: The folder that contains the cow mesh files. If the cow mesh files do not exist in the folder, this function will automatically download them. Returns: cameras: A batch of `num_views` `FoVPerspectiveCameras` from which the images are rendered. images: A tensor of shape `(num_views, height, width, 3)` containing the rendered images. silhouettes: A tensor of shape `(num_views, height, width)` containing the rendered silhouettes. """ # set the paths # download the cow mesh if not done before cow_mesh_files = [ os.path.join(data_dir, fl) for fl in ("cow.obj", "cow.mtl", "cow_texture.png") ] if any(not os.path.isfile(f) for f in cow_mesh_files): os.makedirs(data_dir, exist_ok=True) os.system( f"wget -P {data_dir} " + "https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.obj") os.system( f"wget -P {data_dir} " + "https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.mtl") os.system( f"wget -P {data_dir} " + "https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow_texture.png" ) # Setup if torch.cuda.is_available(): device = torch.device("cuda:0") torch.cuda.set_device(device) else: device = torch.device("cpu") # Load obj file obj_filename = os.path.join(data_dir, "cow.obj") mesh = load_objs_as_meshes([obj_filename], device=device) # We scale normalize and center the target mesh to fit in a sphere of radius 1 # centered at (0,0,0). (scale, center) will be used to bring the predicted mesh # to its original center and scale. Note that normalizing the target mesh, # speeds up the optimization but is not necessary! verts = mesh.verts_packed() N = verts.shape[0] center = verts.mean(0) scale = max((verts - center).abs().max(0)[0]) mesh.offset_verts_(-(center.expand(N, 3))) mesh.scale_verts_((1.0 / float(scale))) # Get a batch of viewing angles. elev = torch.linspace(0, 0, num_views) # keep constant azim = torch.linspace(-azimuth_range, azimuth_range, num_views) + 180.0 # Place a point light in front of the object. As mentioned above, the front of # the cow is facing the -z direction. lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]]) # Initialize an OpenGL perspective camera that represents a batch of different # viewing angles. All the cameras helper methods support mixed type inputs and # broadcasting. So we can view the camera from the a distance of dist=2.7, and # then specify elevation and azimuth angles for each viewpoint as tensors. R, T = look_at_view_transform(dist=2.7, elev=elev, azim=azim) cameras = FoVPerspectiveCameras(device=device, R=R, T=T) # Define the settings for rasterization and shading. Here we set the output # image to be of size 128X128. As we are rendering images for visualization # purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to # rasterize_meshes.py for explanations of these parameters. We also leave # bin_size and max_faces_per_bin to their default values of None, which sets # their values using huristics and ensures that the faster coarse-to-fine # rasterization method is used. Refer to docs/notes/renderer.md for an # explanation of the difference between naive and coarse-to-fine rasterization. raster_settings = RasterizationSettings(image_size=128, blur_radius=0.0, faces_per_pixel=1) # Create a phong renderer by composing a rasterizer and a shader. The textured # phong shader will interpolate the texture uv coordinates for each vertex, # sample from a texture image and apply the Phong lighting model blend_params = BlendParams(sigma=1e-4, gamma=1e-4, background_color=(0.0, 0.0, 0.0)) renderer = MeshRenderer( rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings), shader=SoftPhongShader(device=device, cameras=cameras, lights=lights, blend_params=blend_params), ) # Create a batch of meshes by repeating the cow mesh and associated textures. # Meshes has a useful `extend` method which allows us do this very easily. # This also extends the textures. meshes = mesh.extend(num_views) # Render the cow mesh from each viewing angle target_images = renderer(meshes, cameras=cameras, lights=lights) # Rasterization settings for silhouette rendering sigma = 1e-4 raster_settings_silhouette = RasterizationSettings( image_size=128, blur_radius=np.log(1.0 / 1e-4 - 1.0) * sigma, faces_per_pixel=50) # Silhouette renderer renderer_silhouette = MeshRenderer( rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings_silhouette), shader=SoftSilhouetteShader(), ) # Render silhouette images. The 3rd channel of the rendering output is # the alpha/silhouette channel silhouette_images = renderer_silhouette(meshes, cameras=cameras, lights=lights) # binary silhouettes silhouette_binary = (silhouette_images[..., 3] > 1e-4).float() return cameras, target_images[..., :3], silhouette_binary
def test_render_r2n2(self): """ Test rendering objects from R2N2 selected both by indices and model_ids. """ # Set up device and seed for random selections. device = torch.device("cuda:0") torch.manual_seed(39) # Load dataset in the train split. r2n2_dataset = R2N2("train", SHAPENET_PATH, R2N2_PATH, SPLITS_PATH) # Render first three models in the dataset. R, T = look_at_view_transform(1.0, 1.0, 90) cameras = OpenGLPerspectiveCameras(R=R, T=T, device=device) raster_settings = RasterizationSettings(image_size=512) lights = PointLights( location=torch.tensor([0.0, 1.0, -2.0], device=device)[None], # TODO: debug the source of the discrepancy in two images when rendering on GPU. diffuse_color=((0, 0, 0),), specular_color=((0, 0, 0),), device=device, ) r2n2_by_idxs = r2n2_dataset.render( idxs=list(range(3)), device=device, cameras=cameras, raster_settings=raster_settings, lights=lights, ) # Check that there are three images in the batch. self.assertEqual(r2n2_by_idxs.shape[0], 3) # Compare the rendered models to the reference images. for idx in range(3): r2n2_by_idxs_rgb = r2n2_by_idxs[idx, ..., :3].squeeze().cpu() if DEBUG: Image.fromarray((r2n2_by_idxs_rgb.numpy() * 255).astype(np.uint8)).save( DATA_DIR / ("DEBUG_r2n2_render_by_idxs_%s.png" % idx) ) image_ref = load_rgb_image( "test_r2n2_render_by_idxs_and_ids_%s.png" % idx, DATA_DIR ) self.assertClose(r2n2_by_idxs_rgb, image_ref, atol=0.05) # Render the same models but by model_ids this time. r2n2_by_model_ids = r2n2_dataset.render( model_ids=[ "1a4a8592046253ab5ff61a3a2a0e2484", "1a04dcce7027357ab540cc4083acfa57", "1a9d0480b74d782698f5bccb3529a48d", ], device=device, cameras=cameras, raster_settings=raster_settings, lights=lights, ) # Compare the rendered models to the reference images. for idx in range(3): r2n2_by_model_ids_rgb = r2n2_by_model_ids[idx, ..., :3].squeeze().cpu() if DEBUG: Image.fromarray( (r2n2_by_model_ids_rgb.numpy() * 255).astype(np.uint8) ).save(DATA_DIR / ("DEBUG_r2n2_render_by_model_ids_%s.png" % idx)) image_ref = load_rgb_image( "test_r2n2_render_by_idxs_and_ids_%s.png" % idx, DATA_DIR ) self.assertClose(r2n2_by_model_ids_rgb, image_ref, atol=0.05) ############################### # Test rendering by categories ############################### # Render a mixture of categories. categories = ["chair", "lamp"] mixed_objs = r2n2_dataset.render( categories=categories, sample_nums=[1, 2], device=device, cameras=cameras, raster_settings=raster_settings, lights=lights, ) # Compare the rendered models to the reference images. for idx in range(3): mixed_rgb = mixed_objs[idx, ..., :3].squeeze().cpu() if DEBUG: Image.fromarray((mixed_rgb.numpy() * 255).astype(np.uint8)).save( DATA_DIR / ("DEBUG_r2n2_render_by_categories_%s.png" % idx) ) image_ref = load_rgb_image( "test_r2n2_render_by_categories_%s.png" % idx, DATA_DIR ) self.assertClose(mixed_rgb, image_ref, atol=0.05)
def render(self, model_ids: Optional[List[str]] = None, categories: Optional[List[str]] = None, sample_nums: Optional[List[int]] = None, idxs: Optional[List[int]] = None, shader_type=HardPhongShader, device="cpu", **kwargs) -> torch.Tensor: """ If a list of model_ids are supplied, render all the objects by the given model_ids. If no model_ids are supplied, but categories and sample_nums are specified, randomly select a number of objects (number specified in sample_nums) in the given categories and render these objects. If instead a list of idxs is specified, check if the idxs are all valid and render models by the given idxs. Otherwise, randomly select a number (first number in sample_nums, default is set to be 1) of models from the loaded dataset and render these models. Args: model_ids: List[str] of model_ids of models intended to be rendered. categories: List[str] of categories intended to be rendered. categories and sample_nums must be specified at the same time. categories can be given in the form of synset offsets or labels, or a combination of both. sample_nums: List[int] of number of models to be randomly sampled from each category. Could also contain one single integer, in which case it will be broadcasted for every category. idxs: List[int] of indices of models to be rendered in the dataset. shader_type: Select shading. Valid options include HardPhongShader (default), SoftPhongShader, HardGouraudShader, SoftGouraudShader, HardFlatShader, SoftSilhouetteShader. device: torch.device on which the tensors should be located. **kwargs: Accepts any of the kwargs that the renderer supports. Returns: Batch of rendered images of shape (N, H, W, 3). """ idxs = self._handle_render_inputs(model_ids, categories, sample_nums, idxs) # Use the getitem method which loads mesh + texture models = [self[idx] for idx in idxs] meshes = collate_batched_meshes(models)["mesh"] if meshes.textures is None: meshes.textures = TexturesVertex(verts_features=torch.ones_like( meshes.verts_padded(), device=device)) meshes = meshes.to(device) cameras = kwargs.get("cameras", FoVPerspectiveCameras()).to(device) if len(cameras) != 1 and len(cameras) % len(meshes) != 0: raise ValueError( "Mismatch between batch dims of cameras and meshes.") if len(cameras) > 1: # When rendering R2N2 models, if more than one views are provided, broadcast # the meshes so that each mesh can be rendered for each of the views. meshes = meshes.extend(len(cameras) // len(meshes)) renderer = MeshRenderer( rasterizer=MeshRasterizer( cameras=cameras, raster_settings=kwargs.get("raster_settings", RasterizationSettings()), ), shader=shader_type( device=device, cameras=cameras, lights=kwargs.get("lights", PointLights()).to(device), ), ) return renderer(meshes)
'.mtl') or filename.endswith('.png'): continue mesh = load_objs_as_meshes([filepath_in], device=device) fourcc = cv2.VideoWriter_fourcc(*'MP4V') # what does 20.0 mean? out = cv2.VideoWriter(tmp_vid_filename, fourcc, 20.0, (IMG_QUALITY, IMG_QUALITY)) lights = PointLights(device=device, location=[[0.0, 0.0, 3.0]]) R, T = look_at_view_transform(2.7, 0, 180, at=((-2, 55, -10), )) cameras = FoVPerspectiveCameras(device=device, R=R, T=T) raster_settings = RasterizationSettings( image_size=IMG_QUALITY, blur_radius=0.0, faces_per_pixel=1, ) renderer = MeshRenderer(rasterizer=MeshRasterizer( cameras=cameras, raster_settings=raster_settings), shader=SoftPhongShader(device=device, cameras=cameras, lights=lights)) min_x, max_x, min_y, max_y = get_mins_maxs_from_obj(filepath_in) center = ((min_x + max_x) / 2, (min_y + max_y) / 2, 0) radius = -20 camera_points = [] for i in tqdm(range(180), position=0, leave=True): deg = i * 2 camera_point = (center[0] + radius * math.sin(math.pi * deg / 180), center[1],