def set_renderer(): # Setup device = torch.device("cuda:0") torch.cuda.set_device(device) # Initialize an OpenGL perspective camera. R, T = look_at_view_transform(2.0, 0, 180) cameras = OpenGLOrthographicCameras(device=device, R=R, T=T) raster_settings = RasterizationSettings( image_size=512, blur_radius=0.0, faces_per_pixel=1, bin_size = None, max_faces_per_bin = None ) lights = PointLights(device=device, location=((2.0, 2.0, 2.0),)) renderer = MeshRenderer( rasterizer=MeshRasterizer( cameras=cameras, raster_settings=raster_settings ), shader=HardPhongShader( device=device, cameras=cameras, lights=lights ) ) return renderer
def __init__(self, meshes: Meshes, image_size=256, device='cuda'): """ Initialization of MaskRenderer. Renderer is initialized with predefined rasterizer and shader. A soft silhouette shader is used to compute the projection mask. :param meshes: A batch of meshes. pytorch3d.structures.Meshes. Dimension meshes \in R^N. View https://github.com/facebookresearch/pytorch3d/blob/master/pytorch3d/structures/meshes.py for additional information. In our case it is usually only one batch which is the template for a certain category. :param device: The device, on which the computation is done. :param image_size: Image size for the rasterization. Default is 256. """ super(MaskRenderer, self).__init__() self.device = device self._meshes = meshes cameras = OpenGLOrthographicCameras(device=device) # parameter settings as of Pytorch3D Tutorial # (https://pytorch3d.org/tutorials/camera_position_optimization_with_differentiable_rendering) self._rasterizer = MeshRasterizer( cameras=cameras, raster_settings=RasterizationSettings(image_size=image_size)) self._shader = SoftSilhouetteShader( blend_params=(BlendParams(sigma=1e-4, gamma=1e-4)))
def set_renderer(image_size=512, use_sfm=False): # Setup device = torch.device("cuda:0") torch.cuda.set_device(device) # Initialize an OpenGL perspective camera. R, T = look_at_view_transform(2.0, 0, 180) if use_sfm: cameras = SfMPerspectiveCameras(focal_length=580.0, device=device, R=R, T=T) else: cameras = OpenGLOrthographicCameras(device=device, R=R, T=T) raster_settings = RasterizationSettings(image_size=image_size, blur_radius=0.0, faces_per_pixel=1, bin_size=None, max_faces_per_bin=None) lights = PointLights(device=device, location=((2.0, 2.0, 2.0), )) rasterizer = MeshRasterizer(cameras=cameras, raster_settings=raster_settings) shader = HardPhongShader(device=device, cameras=cameras, lights=lights) if use_sfm: renderer = MeshRendererWithDepth(rasterizer=rasterizer, shader=shader) else: renderer = MeshRenderer(rasterizer=rasterizer, shader=shader) return renderer
def _get_projected_positions_of_sphere_points(self, sphere_points, rotation, translation): """ For the given points on unit sphere calculates the 3D coordinates on the mesh template and projects them back to image plane :param sphere_points: A (B X 3 X H X W) tensor containing the predicted points on the sphere :param rotation: A (B X CP X 3 X 3) camera rotation tensor :param translation: A (B X CP X 3) camera translation tensor :return: A tuple(xy, z, uv, uv_3d) - xy - (B X CP X 2 X H X W) x,y values of the 3D points after projecting onto image plane - z - (B X CP X 1 X H X W) z value of the projection - uv - (B X 2 X H X W) UV values of the sphere coordinates - uv_3d - (B X H X W X 3) tensor with the 3D coordinates on the mesh template for the given sphere coordinates """ uv = convert_3d_to_uv_coordinates(sphere_points.permute(0, 2, 3, 1)) batch_size = uv.size(0) height = uv.size(1) width = uv.size(2) num_poses = rotation.size(1) uv_flatten = uv.view(-1, 2) uv_3d = self.uv_to_3d(uv_flatten).view(batch_size, 1, -1, 3) uv_3d = uv_3d.repeat(1, num_poses, 1, 1).view(batch_size*num_poses, -1, 3) cameras = OpenGLOrthographicCameras(device=sphere_points.device, R=rotation.view(-1, 3, 3), T=translation.view(-1, 3)) xyz_cam = cameras.get_world_to_view_transform().transform_points(uv_3d) z = xyz_cam[:, :, 2:].view(batch_size, num_poses, height, width, 1) xy = cameras.transform_points(uv_3d)[:, :, :2].view(batch_size, num_poses, height, width, 2) xy = xy.permute(0, 1, 4, 2, 3).flip(2) z = z.permute(0, 1, 4, 2, 3) uv = uv.permute(0, 3, 1, 2) uv_3d = uv_3d.view(batch_size, num_poses, height, width, 3)[:, 0, :, :, :].squeeze() return xy, z, uv, uv_3d
def __init__(self, meshes, image_size=256, device='cuda'): super(ColorRenderer, self).__init__() self.meshes = meshes cameras = OpenGLOrthographicCameras(device=device) raster_settings = RasterizationSettings(image_size=image_size, blur_radius=0.0, faces_per_pixel=1, bin_size=0) lights = PointLights(device=device, location=((2.0, 2.0, -2.0), )) self.renderer = MeshRenderer( rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings), shader=TexturedSoftPhongShader(device=device, lights=lights))
def __init__(self, meshes: Meshes, image_size=256): """ Initialization of the Renderer Class. Instances of the mask and depth renderer are create on corresponding device. :param device: The device, on which the computation is done. :param image_size: Image size for the rasterization. Default is 256. """ super().__init__() self.meshes = meshes device = meshes.device # TODO: check how to implement weak perspective (scaled orthographic). cameras = OpenGLOrthographicCameras(device=device) self._rasterizer = MeshRasterizer( cameras=cameras, raster_settings=RasterizationSettings(image_size=image_size, faces_per_pixel=100)) self._shader = SoftSilhouetteShader( blend_params=(BlendParams(sigma=1e-4, gamma=1e-4)))
def __init__(self, meshes: Meshes, device: str, image_size: int = 256): """ Initialization of DepthRenderer. Initialization of the default mesh rasterizer and silhouette shader which is used because of simplicity. :param device: The device, on which the computation is done, e.g. cpu or cuda. :param image_size: Image size for the rasterization. Default is 256. :param meshes: A batch of meshes. pytorch3d.structures.Meshes. Dimension meshes in R^N. View https://github.com/facebookresearch/pytorch3d/blob/master/pytorch3d/structures/meshes.py for additional information. """ super(DepthRenderer, self).__init__() self._meshes = meshes # TODO: check how to implement weak perspective (scaled orthographic). cameras = OpenGLOrthographicCameras(device=device) raster_settings = RasterizationSettings(image_size=image_size) self._rasterizer = MeshRasterizer(cameras=cameras, raster_settings=raster_settings) self._shader = SoftSilhouetteShader( blend_params=(BlendParams(sigma=1e-4, gamma=1e-4)))
def __init__(self, cfgs): super(Renderer, self).__init__() self.device = cfgs.get('device', 'cuda:0') self.image_size = cfgs.get('image_size', 64) self.min_depth = cfgs.get('min_depth', 0.9) self.max_depth = cfgs.get('max_depth', 1.1) self.rot_center_depth = cfgs.get('rot_center_depth', (self.min_depth + self.max_depth) / 2) # todo: FoV (Field of View) was set to be an fixed value of 10 degree (according to the paper). self.fov = cfgs.get('fov', 10) self.tex_cube_size = cfgs.get('tex_cube_size', 2) self.renderer_min_depth = cfgs.get('renderer_min_depth', 0.1) self.renderer_max_depth = cfgs.get('renderer_max_depth', 10.) #### camera intrinsics # (u) (x) # d * K^-1 (v) = (y) # (1) (z) ## renderer for visualization R = [[[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]]] R = torch.FloatTensor(R).to(self.device) t = torch.zeros(1, 3, dtype=torch.float32).to(self.device) ## todo: K is the camera intrinsic matrix. fx = (self.image_size - 1) / 2 / (math.tan( self.fov / 2 * math.pi / 180)) fy = (self.image_size - 1) / 2 / (math.tan( self.fov / 2 * math.pi / 180)) cx = (self.image_size - 1) / 2 cy = (self.image_size - 1) / 2 K = [[fx, 0., cx], [0., fy, cy], [0., 0., 1.]] K = torch.FloatTensor(K).to(self.device) self.inv_K = torch.inverse(K).unsqueeze(0) self.K = K.unsqueeze(0) ## todo: define Renderer. ## use renderer from pytorch3d. # fixme: znear and zfar is equivalent to the neural renderer default settings. cameras = OpenGLOrthographicCameras(device=self.device, R=R, T=t, znear=0.01, zfar=100) # cameras = OpenGLPerspectiveCameras(device=self.device, R=R, T=t, # znear=self.renderer_min_depth, # zfar=self.renderer_max_depth, # fov=self.fov) raster_settings = PointsRasterizationSettings( image_size=self.image_size, radius=0.003, points_per_pixel=10, bin_size=None, max_points_per_bin=None) self.renderer = PointsRenderer( rasterizer=PointsRasterizer(cameras=cameras, raster_settings=raster_settings), compositor=AlphaCompositor(composite_params=None))
'car_mean_shape.mat'), device=device) uv_to_3d = UVto3D(mean_shape, device) key_point_colors = np.random.uniform(0, 1, (len(dataset.kp_names), 3)) for i, data in enumerate(data_loader): img = data['img'].to(device, dtype=torch.float) scale = data['scale'].to(device, dtype=torch.float) trans = data['trans'].to(device, dtype=torch.float) quat = data['quat'].to(device, dtype=torch.float) rotation, translation = get_scaled_orthographic_projection( scale, trans, quat, True) camera = OpenGLOrthographicCameras(device=device, R=rotation, T=translation) kps = (((data['kp'].to(device, dtype=torch.float) + 1) / 2) * 255).to( torch.int32) kp = draw_key_points(img, kps, key_point_colors) fig = plt.figure() plt.subplot(2, 2, 1) plt.imshow(kp[0].permute(1, 2, 0).cpu()) # Plot the key points directly using the 3D keypoints and projecting them onto image plane kp_3d = torch.from_numpy(dataset.kp_3d).to( device, dtype=torch.float32).unsqueeze(0) xyz = camera.transform_points(kp_3d) xy = (((xyz[:, :, :2] + 1) / 2) * 255).to(torch.int32) kp_xy = torch.cat((xy, kps[:, :, 2:]), dim=2)