コード例 #1
0
def test_raster_coordinates(scene, batch_size):
    """Test if the projected raster coordinates are correct

    Args:
        scene: Path to scene file

    Returns:
        None

    """
    res = render_scene(scene)
    scene = make_torch_var(load_scene(scene))
    pos_cc = res['pos'].reshape(1, -1, res['pos'].shape[-1])
    pos_cc = pos_cc.repeat(batch_size, 1, 1)

    camera = scene['camera']
    camera['eye'] = camera['eye'].repeat(batch_size, 1)
    camera['at'] = camera['at'].repeat(batch_size, 1)
    camera['up'] = camera['up'].repeat(batch_size, 1)

    viewport = make_list2np(camera['viewport'])
    W, H = float(viewport[2] - viewport[0]), float(viewport[3] - viewport[1])
    px_coord_idx, px_coord = project_image_coordinates(pos_cc, camera)
    xp, yp = np.meshgrid(np.linspace(0, W - 1, int(W)),
                         np.linspace(0, H - 1, int(H)))
    xp = xp.ravel()[None, ...].repeat(batch_size, axis=0)
    yp = yp.ravel()[None, ...].repeat(batch_size, axis=0)

    px_coord = torch.round(px_coord - 0.5).long()

    np.testing.assert_array_almost_equal(xp, get_data(px_coord[..., 0]))
    np.testing.assert_array_almost_equal(yp, get_data(px_coord[..., 1]))
コード例 #2
0
def project_image_coordinates(surfels, camera):
    """Project surfels given in world coordinate to the camera's projection plane.

    Args:
        surfels: [batch_size, pos]
        camera: [{'eye': [num_batches,...], 'lookat': [num_batches,...], 'up': [num_batches,...],
                    'viewport': [0, 0, W, H], 'fovy': <radians>}]

    Returns:
        Image of destination indices of dimensions [batch_size, H*W]
        Note that the range of possible coordinates is restricted to be between 0
        and W*H (inclusive). This is inclusive because we use the last index as
        a "dump" for any index that falls outside of the camera's field of view
    """
    surfels_plane = project_surfels(surfels, camera)

    # Rasterize
    viewport = make_list2np(camera['viewport'])
    W, H = float(viewport[2] - viewport[0]), float(viewport[3] - viewport[1])
    aspect_ratio = float(W) / float(H)

    fovy = make_list2np(camera['fovy'])
    focal_length = make_list2np(camera['focal_length'])
    h = np.tan(fovy / 2) * 2 * focal_length
    w = h * aspect_ratio

    px_coord = torch.zeros_like(surfels_plane)
    px_coord[...,
             2] = surfels_plane[...,
                                2]  # Make sure to also transmit the new depth
    px_coord[..., :2] = surfels_plane[..., :2] * tch_var_f(
        [-(W - 1) / w, (H - 1) / h]).unsqueeze(-2) + tch_var_f(
            [W / 2., H / 2.]).unsqueeze(-2)
    px_coord_idx = torch.round(px_coord - 0.5).long()

    px_idx = px_coord_idx[..., 1] * W + px_coord_idx[..., 0]

    max_idx = W * H  # Index used if the indices are out of bounds of the camera
    max_idx_tensor = tch_var_l([max_idx])

    # Map out of bounds pixels to the last (extra) index
    mask = (px_coord_idx[..., 1] < 0) | (px_coord_idx[..., 0] < 0) | (
        px_coord_idx[..., 1] >= H) | (px_coord_idx[..., 0] >= W)
    px_idx = torch.where(mask, max_idx_tensor, px_idx)

    return px_idx, px_coord
コード例 #3
0
def projection_renderer_differentiable(surfels,
                                       rgb,
                                       camera,
                                       rotated_image=None,
                                       blur_size=0.15):
    """Project surfels given in world coordinate to the camera's projection plane
       in a way that is differentiable w.r.t depth. This is achieved by interpolating
       the surfel values using a Gaussian filter.

    Args:
        surfels: [batch_size, num_surfels, pos]
        rgb: [batch_size, num_surfels, D-channel data] or [batch_size, H, W, D-channel data]
        camera: [{'eye': [num_batches,...], 'lookat': [num_batches,...], 'up': [num_batches,...],
                    'viewport': [0, 0, W, H], 'fovy': <radians>}]
        rotated_image: [batch_size, num_surfels, D-channel data] or [batch_size, H, W, D-channel data]
                        Image to mix in with the result of the rotation.
        sigma: Std of the Gaussian used for filtering. As a rule of thumb, surfels in a radius of 3*sigma
               around a pixel will have a contribution on that pixel in the final image.

    Returns:
        RGB image of dimensions [batch_size, H, W, 3] from projected surfels

    """
    px_idx, px_coord = project_image_coordinates(surfels, camera)
    viewport = make_list2np(camera['viewport'])
    W = int(viewport[2] - viewport[0])
    H = int(viewport[3] - viewport[1])
    rgb_reshaped = rgb.view(rgb.size(0), -1, rgb.size(-1))

    # Perform a weighted average of points surrounding a pixel using a Gaussian filter
    # Very similar to the idea in this paper: https://arxiv.org/pdf/1810.09381.pdf

    x, y = np.meshgrid(
        np.linspace(0, W - 1, W) + 0.5,
        np.linspace(0, H - 1, H) + 0.5)
    x, y = tch_var_f(x.ravel()).repeat(surfels.size(0),
                                       1), tch_var_f(y.ravel()).repeat(
                                           surfels.size(0), 1)
    x, y = x.unsqueeze(-1), y.unsqueeze(-1)

    xp, yp = px_coord[..., 0].unsqueeze(-2), px_coord[..., 1].unsqueeze(-2)
    sigma = blur_size * rgb.size(-2) / 6
    scale = torch.exp((-(xp - x)**2 - (yp - y)**2) / (2 * sigma**2))

    mask = scale.sum(-1)
    if rotated_image is not None:
        rotated_image = rotated_image.view(*rgb_reshaped.size())
        # out = (rotated_image_weight * rotated_image + torch.sum(scale.unsqueeze(-1) * rgb_reshaped.unsqueeze(-3), -2)) / (scale.sum(-1) + rotated_image_weight + 1e-10).unsqueeze(-1)
        out = torch.sum(scale.unsqueeze(-1) * rgb_reshaped.unsqueeze(-3),
                        -2) + rotated_image * (1 - mask)
    else:
        out = torch.sum(scale.unsqueeze(-1) * rgb_reshaped.unsqueeze(-3),
                        -2) / (mask + 1e-10).unsqueeze(-1)

    return out.view(*rgb.size()), mask.view(*rgb.size()[:-1], 1)
コード例 #4
0
def projection_renderer(surfels, rgb, camera):
    """Project surfels given in world coordinate to the camera's projection plane.

    Args:
        surfels: [batch_size, num_surfels, pos]
        rgb: [batch_size, num_surfels, D-channel data] or [batch_size, H, W, D-channel data]
        camera: [{'eye': [num_batches,...], 'lookat': [num_batches,...], 'up': [num_batches,...],
                    'viewport': [0, 0, W, H], 'fovy': <radians>}]

    Returns:
        RGB image of dimensions [batch_size, H, W, 3] from projected surfels

    """
    px_idx, _ = project_image_coordinates(surfels, camera)
    viewport = make_list2np(camera['viewport'])
    W = int(viewport[2] - viewport[0])
    rgb_reshaped = rgb.view(rgb.size(0), -1, rgb.size(-1))
    rgb_out, mask = scatter_mean_dim0(rgb_reshaped, px_idx.long())
    return rgb_out.reshape(rgb.shape), mask.reshape(rgb.shape)
コード例 #5
0
def projection_reverse_renderer(rgb,
                                in_pos_wc,
                                out_pos_wc,
                                camera1,
                                camera2,
                                rotated_image=None,
                                compute_new_depth=False,
                                depth_epsilon=1e-1,
                                mask_dropout=0):
    """
    Compute the rotated image in the opposite direction: take the surfel positions of the output image (out_pos_wc),
    and use them to find the corresponding u,v positions on the input image (rgb). Sample these positions using
    bilinear interpolation.
    """
    viewport = make_list2np(camera1['viewport'])
    W = int(viewport[2] - viewport[0])
    H = int(viewport[3] - viewport[1])
    _, px_coord = project_image_coordinates(out_pos_wc, camera1)

    px_coord = px_coord.view(*rgb.size()[:-1], 3)
    normalized_px_coord = px_coord[..., :2] / torch.tensor(
        [W, H], dtype=torch.float, device=px_coord.device) * 2 - 1
    out = torch.nn.functional.grid_sample(rgb.permute(0, 3, 1, 2),
                                          normalized_px_coord).permute(
                                              0, 2, 3, 1)

    # TODO this is a hard mask, should we make it soft by using the bilinear weights (at the edges only or everywhere?)
    # |- An alternative would be to try Dropout on the mask
    # NOTE: use 0.5 to account for bilinear interpolation
    mask = (px_coord[..., 1] < 0.5) | (px_coord[..., 0] < 0.5) | (
        px_coord[..., 1] >= H - 0.5) | (px_coord[..., 0] >= W - 0.5)
    mask = 1 - mask.view(*rgb.size()[:-1], 1).float()

    # Use depth to mask out pixels that end up on the same location
    # This process could be done by averaging the depth of neighboring pixels, but instead uses grid_sample to interpolate (twice) which turns out to be more efficient
    depth = px_coord[..., 2].unsqueeze(
        -1)  # (1) depth from cam1, ordered as cam2 pixels
    _, px_coord_out = project_image_coordinates(
        in_pos_wc, camera2
    )  # project the points from the cam1 (top) image to the bottom image space
    px_coord_out = px_coord_out.view(*rgb.size()[:-1], 3)
    normalized_px_coord_out = px_coord_out[..., :2] / torch.tensor(
        [W, H], dtype=torch.float, device=px_coord_out.device) * 2 - 1
    # sample the depth from cam1 (ordered as cam2 pixels) using the points from the cam1 (top) image
    # we get depth from cam1 (ordered as cam1 pixels)
    depth_sampled_in = torch.nn.functional.grid_sample(
        depth.permute(0, 3, 1, 2), normalized_px_coord_out)
    # we then need to flip the whole thing to get a mask in cam2 space (ordered as cam2 pixels)
    # sample the depth from cam1 (ordered as cam1 pixels) using the points from the cam2 (bottom) image
    # we get depth from cam1 (ordered as cam2 pixels)
    depth_sampled_out = torch.nn.functional.grid_sample(
        depth_sampled_in, normalized_px_coord).permute(0, 2, 3, 1)  # (2)
    # comparing the two depths images from cam1 (ordered as cam2 pixels) ((1) and (2)), we can get a mask
    mask = mask * (depth <= depth_sampled_out + depth_epsilon).float()
    if mask_dropout > 0:
        mask = torch.nn.functional.dropout(mask, mask_dropout, training=True)

    proj_out = {'mask': mask, 'image1': out}

    if rotated_image is not None:
        # mask = mask.detach() if detach_mask else mask # NOTE: Right now, it doesnt even matter since mask is non-differentiable
        out = mask * out + (1 - mask) * rotated_image

    if compute_new_depth:
        depth2 = px_coord_out[..., 2].unsqueeze(-1)
        proj_out['depth'] = torch.nn.functional.grid_sample(
            depth2.permute(0, 3, 1, 2),
            normalized_px_coord).permute(0, 2, 3, 1)

    return out, proj_out
コード例 #6
0
def projection_renderer_differentiable_fast(surfels,
                                            rgb,
                                            camera,
                                            rotated_image=None,
                                            blur_size=0.15,
                                            use_depth=True,
                                            use_center_dist=True,
                                            compute_new_depth=False,
                                            blur_rotated_image=True,
                                            detach_mask=False,
                                            detach_mask2=False,
                                            detach_depth_merge=False):
    """Project surfels given in world coordinate to the camera's projection plane
       in a way that is differentiable w.r.t depth. This is achieved by interpolating
       the surfel values using bilinear interpolation then blurring the output image using a Gaussian filter.

    Args:
        surfels: [batch_size, num_surfels, pos] - world coordinates
        rgb: [batch_size, num_surfels, D-channel data] or [batch_size, H, W, D-channel data]
        camera: [{'eye': [num_batches,...], 'lookat': [num_batches,...], 'up': [num_batches,...],
                    'viewport': [0, 0, W, H], 'fovy': <radians>}]
        rotated_image: [batch_size, num_surfels, D-channel data] or [batch_size, H, W, D-channel data]
                        Image to mix in with the result of the rotation.
        blur_size: (between 0 and 1). Determines the size of the gaussian kernel as a percentage of the width of the input image
                   The standard deviation of the Gaussian kernel is automatically calculated from this value
        use_depth: Whether to weight the surfels landing on the same output pixel by their depth relative to the camera
        use_center_dist: Whether to weight the surfels landing on the same output pixel by their distance to the nearest pixel center location
        compute_new_depth: Whether to compute and output the depth as seen by the new camera
        blur_rotated_image: Whether to blur the 'rotated_image' passed as argument before merging it with the output image.
                            Set to False if the rotated image is already blurred
        detach_mask: Whether to detach the mask m in I_top + (1 - m) * I_bottom
        detach_mask2: Alternative, to `detach_mask`, Whether to detach the mask m in m * (I_top / m') + (1 - m) * I_bottom

    Returns:
        RGB image of dimensions [batch_size, H, W, 3] from projected surfels
    """
    _, px_coord = project_image_coordinates(surfels, camera)
    viewport = make_list2np(camera['viewport'])
    W = int(viewport[2] - viewport[0])
    H = int(viewport[3] - viewport[1])
    rgb_in = rgb.view(rgb.size(0), -1, rgb.size(-1))

    # First create a uniform grid through bilinear interpolation
    # Then, perform a convolution with a Gaussian kernel to blur the output image
    # Idea from this paper: https://arxiv.org/pdf/1810.09381.pdf
    # Tensorflow implementation: https://github.com/eldar/differentiable-point-clouds/blob/master/dpc/util/point_cloud.py#L60

    px_idx = torch.floor(px_coord[..., :2] - 0.5).long()

    # Difference to the nearest pixel center on the top left
    x = (px_coord[..., 0] - 0.5) - px_idx[..., 0].float()
    y = (px_coord[..., 1] - 0.5) - px_idx[..., 1].float()
    x, y = x.unsqueeze(-1), y.unsqueeze(-1)

    def flat_px(px):
        """Flatten the pixel locations and make sure everything is within bounds"""
        out = px[..., 1] * W + px[..., 0]
        max_idx = tch_var_l([W * H])
        mask = (px[..., 1] < 0) | (px[..., 0] < 0) | (px[..., 1] >=
                                                      H) | (px[..., 0] >= W)
        out = torch.where(mask, max_idx, out)
        return out

    depth = px_coord[..., 2].detach() if detach_depth_merge else px_coord[...,
                                                                          2]
    center_dist_2 = (x**2 + y**2).squeeze(
        -1)  # squared distance to the nearest pixel center
    rgb_out = scatter_weighted_blended_oit(rgb_in * (1 - x) * (1 - y),
                                           depth,
                                           center_dist_2,
                                           flat_px(px_idx + tch_var_l([0, 0])),
                                           use_depth=use_depth,
                                           use_center_dist=use_center_dist)
    rgb_out += scatter_weighted_blended_oit(rgb_in * (1 - x) * y,
                                            depth,
                                            center_dist_2,
                                            flat_px(px_idx +
                                                    tch_var_l([0, 1])),
                                            use_depth=use_depth,
                                            use_center_dist=use_center_dist)
    rgb_out += scatter_weighted_blended_oit(rgb_in * x * (1 - y),
                                            depth,
                                            center_dist_2,
                                            flat_px(px_idx +
                                                    tch_var_l([1, 0])),
                                            use_depth=use_depth,
                                            use_center_dist=use_center_dist)
    rgb_out += scatter_weighted_blended_oit(rgb_in * x * y,
                                            depth,
                                            center_dist_2,
                                            flat_px(px_idx +
                                                    tch_var_l([1, 1])),
                                            use_depth=use_depth,
                                            use_center_dist=use_center_dist)

    soft_mask = scatter_weighted_blended_oit(
        (1 - x) * (1 - y),
        depth,
        center_dist_2,
        flat_px(px_idx + tch_var_l([0, 0])),
        use_depth=use_depth,
        use_center_dist=use_center_dist)
    soft_mask += scatter_weighted_blended_oit(
        (1 - x) * y,
        depth,
        center_dist_2,
        flat_px(px_idx + tch_var_l([0, 1])),
        use_depth=use_depth,
        use_center_dist=use_center_dist)
    soft_mask += scatter_weighted_blended_oit(x * (1 - y),
                                              depth,
                                              center_dist_2,
                                              flat_px(px_idx +
                                                      tch_var_l([1, 0])),
                                              use_depth=use_depth,
                                              use_center_dist=use_center_dist)
    soft_mask += scatter_weighted_blended_oit(x * y,
                                              depth,
                                              center_dist_2,
                                              flat_px(px_idx +
                                                      tch_var_l([1, 1])),
                                              use_depth=use_depth,
                                              use_center_dist=use_center_dist)

    if compute_new_depth:
        depth_in = depth.unsqueeze(-1)
        depth_out = scatter_weighted_blended_oit(
            depth_in * (1 - x) * (1 - y),
            depth,
            center_dist_2,
            flat_px(px_idx + tch_var_l([0, 0])),
            use_depth=use_depth,
            use_center_dist=use_center_dist)
        depth_out += scatter_weighted_blended_oit(
            depth_in * (1 - x) * y,
            depth,
            center_dist_2,
            flat_px(px_idx + tch_var_l([0, 1])),
            use_depth=use_depth,
            use_center_dist=use_center_dist)
        depth_out += scatter_weighted_blended_oit(
            depth_in * x * (1 - y),
            depth,
            center_dist_2,
            flat_px(px_idx + tch_var_l([1, 0])),
            use_depth=use_depth,
            use_center_dist=use_center_dist)
        depth_out += scatter_weighted_blended_oit(
            depth_in * x * y,
            depth,
            center_dist_2,
            flat_px(px_idx + tch_var_l([1, 1])),
            use_depth=use_depth,
            use_center_dist=use_center_dist)
        depth_out = depth_out.view(*rgb.size()[:-1], 1)

    rgb_out = rgb_out.view(*rgb.size())
    soft_mask = soft_mask.view(*rgb.size()[:-1], 1)

    # Blur the rgb and mask images
    rgb_out = blur(rgb_out.permute(0, 3, 1, 2), blur_size).permute(0, 2, 3, 1)
    soft_mask = blur(soft_mask.permute(0, 3, 1, 2),
                     blur_size).permute(0, 2, 3, 1)

    # There seems to be a bug in PyTorch where if a single division by 0 occurs in a tensor, the whole thing becomes NaN?
    # Might be related to this issue: https://github.com/pytorch/pytorch/issues/4132
    # Because of this behavior, one can't simply do `out / out_mask` in `torch.where`
    soft_mask_nonzero = torch.where(soft_mask > 0, soft_mask,
                                    torch.ones_like(soft_mask)) + 1e-20

    # If an additional image is passed in, merge it using the soft mask:
    rgb_out_normalized = torch.where(soft_mask > 0,
                                     rgb_out / soft_mask_nonzero, rgb_out)
    if rotated_image is not None:
        if blur_rotated_image:
            rotated_image = blur(rotated_image.permute(0, 3, 1, 2),
                                 blur_size).permute(0, 2, 3, 1)
        if detach_mask:
            out = torch.where(
                soft_mask > 1, rgb_out / soft_mask_nonzero.detach(),
                rgb_out + rotated_image * (1 - soft_mask.detach()))
        elif detach_mask2:
            soft_mask_detached = soft_mask.detach()
            out = soft_mask_detached * rgb_out_normalized + (
                1 - soft_mask_detached) * rotated_image
        else:
            out = torch.where(soft_mask > 1, rgb_out / soft_mask_nonzero,
                              rgb_out + rotated_image * (1 - soft_mask))
    else:
        out = rgb_out_normalized

    # Other things to output:
    proj_out = {'mask': soft_mask, 'image1': rgb_out_normalized}

    if compute_new_depth:
        depth_out = torch.where(soft_mask > 0, depth_out / soft_mask_nonzero,
                                depth_out)
        proj_out['depth'] = depth_out

    return out, proj_out