Beispiel #1
0
def item_to_obs(item):
    height, width = item['color'].shape[-2:]
    return latentfusion.observation.Observation(item['color'].unsqueeze(0),
                                                item['depth'].unsqueeze(0).unsqueeze(0),
                                                item['mask'].unsqueeze(0).unsqueeze(0).float(),
                                                Camera(intrinsic=item['intrinsic'],
                                        extrinsic=item['extrinsic'],
                                        width=width,
                                        height=height))
def load_poserbpf_camera(mat_path, key='poses'):
    mat = loadmat(mat_path)
    intrinsic = torch.tensor(mat['intrinsic_matrix']).float()
    pose = torch.tensor(mat[key]).squeeze().float()
    quat = pose[:4]
    translation = pose[4:]
    extrinsic = three.to_extrinsic_matrix(translation, quat)
    camera = Camera(intrinsic=intrinsic, extrinsic=extrinsic)
    return camera
Beispiel #3
0
def estimate_initial_pose(depth, mask, intrinsic, width, height) -> Camera:
    """Estimate the initial pose based on depth."""
    translation = torch.stack(estimate_translation(depth, mask, intrinsic), dim=-1)
    rotation = three.quaternion.identity(intrinsic.shape[0], intrinsic.device)
    extrinsic = three.to_extrinsic_matrix(translation, rotation)

    camera = Camera(intrinsic, extrinsic, height=height, width=width)

    return camera
 def from_dict(cls, d):
     height, width = d['color'].shape[-2:]
     camera = Camera(d['intrinsic'],
                     d['extrinsic'],
                     width=width,
                     height=height)
     return cls(
         d['color'],
         d['depth'].unsqueeze(-3),  # Create channel dimension.
         d['mask'].unsqueeze(-3).float(),
         camera)
def render_observation(renderer, scene):
    color, depth, mask = renderer.render(scene)
    camera = Camera(scene.intrinsic,
                    scene.extrinsic,
                    width=renderer.width,
                    height=renderer.height)

    return Observation(color.permute(2, 0, 1).unsqueeze(0),
                       depth.unsqueeze(0).unsqueeze(0),
                       mask.unsqueeze(0).unsqueeze(0),
                       camera,
                       object_scale=scene.obj.scale)
Beispiel #6
0
    def _params_to_camera(cls, params, camera_init, device='cpu'):
        if len(params.shape) == 1:
            params = params.unsqueeze(0)

        intrinsic = camera_init.intrinsic.expand(params.shape[0], -1, -1).to(device)
        translations = params[:, :3].to(device)
        log_quaternions = params[:, 3:].to(device)
        cameras = Camera(intrinsic=intrinsic,
                         extrinsic=None,
                         translation=translations,
                         log_quaternion=log_quaternions,
                         width=camera_init.width,
                         height=camera_init.height,
                         z_span=camera_init.z_span).to(device)
        return cameras
    def load(cls, path, frames=None) -> 'Observation':
        if isinstance(path, str):
            path = Path(path)

        with open(path / f'cameras.json', 'r') as f:
            camera_json = json.load(f)
        if 'meta' in camera_json:
            meta = camera_json.pop('meta')
        else:
            meta = {}

        cameras = Camera(
            **{
                k: torch.tensor(v, dtype=torch.float32) if isinstance(v, list
                                                                      ) else v
                for k, v in camera_json.items()
            })

        color_ims = []
        depth_ims = []
        mask_ims = []
        if frames is None:
            inds = list(range(len(cameras)))
        elif isinstance(frames, int):
            inds = [frames]
        else:
            inds = frames

        cameras = cameras[inds]

        for i in inds:
            color_ims.append(
                imageio.imread(path / f"{i:04d}.color.png").astype(np.float32)
                / 255.0)
            depth_ims.append(
                imageio.imread(path / f"{i:04d}.depth.png").astype(np.float32)
                / 1000.0)
            mask_ims.append(
                imageio.imread(path / f"{i:04d}.mask.png").astype(np.bool))

        color = torch.stack(
            [torch.tensor(x).permute(2, 0, 1) for x in color_ims], dim=0)
        depth = torch.stack([torch.tensor(x).unsqueeze(0) for x in depth_ims],
                            dim=0)
        mask = torch.stack(
            [torch.tensor(x).float().unsqueeze(0) for x in mask_ims], dim=0)

        return cls(color, depth, mask, cameras, **meta)
Beispiel #8
0
def _process_batch(batch, rotation, cube_size, camera_dist, input_size, device,
                   is_gt):
    # Collapse viewpoint dimension to batch dimension:
    #   (B, V, C, H, W) => (B*V, C, H, W)
    batch_size = batch['mask'].shape[0]
    extrinsic = bv2b(batch['extrinsic'].to(device))
    intrinsic = bv2b(batch['intrinsic'].to(device))
    mask = bv2b(batch['mask'].unsqueeze(2).float().to(device))
    image = bv2b(gan_normalize(batch['render'].to(device)))
    if 'depth' in batch:
        depth = bv2b(batch['depth'].unsqueeze(2).to(device))
    else:
        depth = None

    # Project image features onto canonical volume.
    camera = Camera(intrinsic,
                    extrinsic,
                    z_span=cube_size / 2.0,
                    height=image.size(2),
                    width=image.size(3)).to(device)
    if rotation is not None:
        camera.rotate(rotation.expand(camera.length, -1))
        # translation = three.uniform(3, -cube_size/16, cube_size/16).view(1, 3).expand(camera.length, -1).to(device)
        # camera.translate(translation)
    _zoom = functools.partial(camera.zoom,
                              target_size=input_size,
                              target_dist=camera_dist)

    out = dict()
    # Zoom camera to canonical distance and size.
    out['image'], out['camera'] = _zoom(image, scale_mode='bilinear')
    out['mask'] = _zoom(mask, scale_mode='nearest')[0]
    if depth is not None:
        out['depth'] = camera.normalize_depth(
            _zoom(depth, scale_mode='nearest')[0])

    if is_gt:
        out['image'] = out['image'] * out['mask']
        out['depth'] = mask_normalized_depth(out['depth'], out['mask'])

    for k in {'image', 'depth', 'mask'}:
        out[k] = b2bv(out[k], batch_size=batch_size)

    return out
Beispiel #9
0
def sample_cameras_with_estimate(n,
                                 camera_est,
                                 translation_std=0.0,
                                 hemisphere=False,
                                 upright=False) -> Camera:
    device = camera_est.device
    intrinsic = camera_est.intrinsic.expand(n, -1, -1)
    translation = camera_est.translation.expand(n, -1)
    translation = translation + torch.randn_like(translation) * translation_std
    # quaternion = three.orientation.disk_sample_quats(n, min_angle=min_angle)
    # quaternion = three.orientation.evenly_distributed_quats(n)
    quaternion = three.orientation.evenly_distributed_quats(
        n, hemisphere=hemisphere, upright=upright)
    extrinsic = three.to_extrinsic_matrix(translation.cpu(),
                                          quaternion).to(device)
    viewport = camera_est.viewport.expand(n, -1)

    return Camera(intrinsic,
                  extrinsic,
                  camera_est.z_span,
                  width=camera_est.width,
                  height=camera_est.height,
                  viewport=viewport)