Exemple #1
0
def load_scene(path_meta, scene, max_depth=3, vol_prcnt=.995, vol_margin=1.5,
               fuse_semseg=False, device=0, verbose=2):
    if verbose>0:
        print('preparing data', scene)
    info_file = os.path.join(path_meta, scene, 'info.json')

    # get gpu device for this worker
    device = torch.device('cuda', device)  # gpu for this process

    # get the dataset
    transform = transforms.Compose([transforms.ResizeImage((640, 480)),
                                    transforms.ToTensor(),
                                    transforms.InstanceToSemseg('nyu40'),
                                    transforms.IntrinsicsPoseToProjection(),
                                    ])
    frame_types = ['depth', 'semseg'] if fuse_semseg else ['depth']
    dataset = SceneDataset(info_file, transform, frame_types)

    # find volume bounds and origin by backprojecting depth maps to point clouds
    # use a subset of the frames to save time
    if len(dataset) <= 200:
        dataset1 = dataset
    else:
        inds = np.linspace(0, len(dataset) - 1, 200).astype(int)
        dataset1 = torch.utils.data.Subset(dataset, inds)
    dataloader1 = torch.utils.data.DataLoader(dataset1, batch_size=None,
                                              batch_sampler=None, num_workers=4)

    pts = []
    for i, frame in enumerate(dataloader1):
        if verbose>1 and i%50==0:
            print(scene, 'backoprojecting depth maps to point clouds', i, len(dataset))
        projection = frame['projection'].to(device)
        depth = frame['depth'].to(device)
        depth[depth>max_depth]=0
        pts.append(depth_to_world(projection, depth).view(3,-1).T)
    pts = torch.cat(pts)
    pts = pts[torch.isfinite(pts[:,0])].cpu().numpy()
    # use top and bottom vol_prcnt of points plus vol_margin
    origin = torch.as_tensor(np.quantile(pts, 1-vol_prcnt, axis=0)-vol_margin).float()
    vol_max = torch.as_tensor(np.quantile(pts, vol_prcnt, axis=0)+vol_margin).float()

    return info_file, device, dataset, origin, vol_max
Exemple #2
0
def process(info_file, save_path, total_scenes_index, total_scenes_count):
    # gt depth data loader
    width, height = 640, 480
    transform = transforms.Compose([
        transforms.ResizeImage((width,height)),
        transforms.ToTensor(),
    ])
    dataset = SceneDataset(info_file, transform, frame_types=['depth'])
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=None,
                                             batch_sampler=None, num_workers=2)
    scene = dataset.info['scene']

    # get info about tsdf
    file_tsdf_pred = os.path.join(save_path, '%s.npz'%scene)
    temp = TSDF.load(file_tsdf_pred)
    voxel_size = int(temp.voxel_size*100)
    
    # re-fuse to remove hole filling since filled holes are penalized in 
    # mesh metrics
    vol_dim = list(temp.tsdf_vol.shape)
    origin = temp.origin
    tsdf_fusion = TSDFFusion(vol_dim, float(voxel_size)/100, origin, color=False)
    device = tsdf_fusion.device

    # mesh renderer
    renderer = Renderer()
    mesh_file = os.path.join(save_path, '%s.ply'%scene)
    mesh = trimesh.load(mesh_file, process=False)
    mesh_opengl = renderer.mesh_opengl(mesh)

    for i, d in enumerate(dataloader):
        if i%25==0:
            print(total_scenes_index, total_scenes_count,scene, i, len(dataloader))

        depth_trgt = d['depth'].numpy()
        _, depth_pred = renderer(height, width, d['intrinsics'], d['pose'], mesh_opengl)

        temp = eval_depth(depth_pred, depth_trgt)
        if i==0:
            metrics_depth = temp
        else:
            metrics_depth = {key:value+temp[key] 
                             for key, value in metrics_depth.items()}

        # # play video visualizations of depth
        # viz1 = (np.clip((depth_trgt-.5)/5,0,1)*255).astype(np.uint8)
        # viz2 = (np.clip((depth_pred-.5)/5,0,1)*255).astype(np.uint8)
        # viz1 = cv2.applyColorMap(viz1, cv2.COLORMAP_JET)
        # viz2 = cv2.applyColorMap(viz2, cv2.COLORMAP_JET)
        # viz1[depth_trgt==0]=0
        # viz2[depth_pred==0]=0
        # viz = np.hstack((viz1,viz2))
        # cv2.imshow('test', viz)
        # cv2.waitKey(1)

        tsdf_fusion.integrate((d['intrinsics'] @ d['pose'].inverse()[:3,:]).to(device),
                              torch.as_tensor(depth_pred).to(device))


    metrics_depth = {key:value/len(dataloader) 
                     for key, value in metrics_depth.items()}

    # save trimed mesh
    file_mesh_trim = os.path.join(save_path, '%s_trim.ply'%scene)
    tsdf_fusion.get_tsdf().get_mesh().export(file_mesh_trim)

    # eval tsdf
    file_tsdf_trgt = dataset.info['file_name_vol_%02d'%voxel_size]
    metrics_tsdf = eval_tsdf(file_tsdf_pred, file_tsdf_trgt)

    # eval trimed mesh
    file_mesh_trgt = dataset.info['file_name_mesh_gt']
    metrics_mesh = eval_mesh(file_mesh_trim, file_mesh_trgt)

    # transfer labels from pred mesh to gt mesh using nearest neighbors
    file_attributes = os.path.join(save_path, '%s_attributes.npz'%scene)
    if os.path.exists(file_attributes):
        mesh.vertex_attributes = np.load(file_attributes)
        print(mesh.vertex_attributes)
        mesh_trgt = trimesh.load(file_mesh_trgt, process=False)
        mesh_transfer = project_to_mesh(mesh, mesh_trgt, 'semseg')
        semseg = mesh_transfer.vertex_attributes['semseg']
        # save as txt for benchmark evaluation
        np.savetxt(os.path.join(save_path, '%s.txt'%scene), semseg, fmt='%d')
        mesh_transfer.export(os.path.join(save_path, '%s_transfer.ply'%scene))

        # TODO: semseg val evaluation

    metrics = {**metrics_depth, **metrics_mesh, **metrics_tsdf}
    print(metrics)

    rslt_file = os.path.join(save_path, '%s_metrics.json'%scene)
    json.dump(metrics, open(rslt_file, 'w'))

    return scene, metrics
Exemple #3
0
def fuse_scene(path_meta,
               scene,
               voxel_size,
               trunc_ratio=3,
               max_depth=3,
               vol_prcnt=.995,
               vol_margin=1.5,
               fuse_semseg=False,
               device=0,
               verbose=2):
    """ Use TSDF fusion with GT depth maps to generate GT TSDFs

    Args:
        path_meta: path to save the TSDFs 
            (we recommend creating a parallel directory structure to save 
            derived data so that we don't modify the original dataset)
        scene: name of scene to process
        voxel_size: voxel size of TSDF
        trunc_ratio: truncation distance in voxel units
        max_depth: mask out large depth values since they are noisy
        vol_prcnt: for computing the bounding volume of the TSDF... ignore outliers
        vol_margin: padding for computing bounding volume of the TSDF
        fuse_semseg: whether to accumulate semseg images for GT semseg
            (prefered method is to not accumulate and insted transfer labels
            from ground truth labeled mesh)
        device: cpu/ which gpu
        verbose: how much logging to print

    Returns:
        writes a TSDF (.npz) file into path_meta/scene

    Notes: we use a conservative value of max_depth=3 to reduce noise in the 
    ground truth. However, this means some distant data is missing which can
    create artifacts. Nevertheless, we found we acheived the best 2d metrics 
    with the less noisy ground truth.
    """

    if verbose > 0:
        print('fusing', scene, 'voxel size', voxel_size)

    info_file = os.path.join(path_meta, scene, 'info.json')

    # get gpu device for this worker
    device = torch.device('cuda', device)  # gpu for this process

    # get the dataset
    transform = transforms.Compose([
        transforms.ResizeImage((640, 480)),
        transforms.ToTensor(),
        transforms.InstanceToSemseg('nyu40'),
        transforms.IntrinsicsPoseToProjection(),
    ])
    frame_types = ['depth', 'semseg'] if fuse_semseg else ['depth']
    dataset = SceneDataset(info_file, transform, frame_types)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=None,
                                             batch_sampler=None,
                                             num_workers=4)

    # find volume bounds and origin by backprojecting depth maps to point clouds
    # use a subset of the frames to save time
    if len(dataset) <= 200:
        dataset1 = dataset
    else:
        inds = np.linspace(0, len(dataset) - 1, 200).astype(np.int)
        dataset1 = torch.utils.data.Subset(dataset, inds)
    dataloader1 = torch.utils.data.DataLoader(dataset1,
                                              batch_size=None,
                                              batch_sampler=None,
                                              num_workers=4)

    pts = []
    for i, frame in enumerate(dataloader1):
        projection = frame['projection'].to(device)
        depth = frame['depth'].to(device)
        depth[depth > max_depth] = 0
        pts.append(depth_to_world(projection, depth).view(3, -1).T)
    pts = torch.cat(pts)
    pts = pts[torch.isfinite(pts[:, 0])].cpu().numpy()
    # use top and bottom vol_prcnt of points plus vol_margin
    origin = torch.as_tensor(
        np.quantile(pts, 1 - vol_prcnt, axis=0) - vol_margin).float()
    vol_max = torch.as_tensor(
        np.quantile(pts, vol_prcnt, axis=0) + vol_margin).float()
    vol_dim = ((vol_max - origin) / (float(voxel_size) / 100)).int().tolist()

    # initialize tsdf
    tsdf_fusion = TSDFFusion(vol_dim,
                             float(voxel_size) / 100,
                             origin,
                             trunc_ratio,
                             device,
                             label=fuse_semseg)

    # integrate frames
    for i, frame in enumerate(dataloader):
        if verbose > 1 and i % 25 == 0:
            print(scene, 'integrating voxel size', voxel_size, i, len(dataset))

        projection = frame['projection'].to(device)
        image = frame['image'].to(device)
        depth = frame['depth'].to(device)
        semseg = frame['semseg'].to(device) if fuse_semseg else None

        # only use reliable depth
        depth[depth > max_depth] = 0

        tsdf_fusion.integrate(projection, depth, image, semseg)

    # save mesh and tsdf
    file_name_vol = os.path.join(path_meta, scene,
                                 'tsdf_%02d.npz' % voxel_size)
    file_name_mesh = os.path.join(path_meta, scene,
                                  'mesh_%02d.ply' % voxel_size)
    tsdf = tsdf_fusion.get_tsdf()
    tsdf.save(file_name_vol)
    mesh = tsdf.get_mesh()
    mesh.export(file_name_mesh)
    if fuse_semseg:
        mesh = tsdf.get_mesh('instance')
        mesh.export(file_name_mesh.replace('.ply', '_semseg.ply'))

    # update info json
    data = load_info_json(info_file)
    data['file_name_vol_%02d' % voxel_size] = file_name_vol
    json.dump(data, open(info_file, 'w'))
Exemple #4
0
def process(info_file, model, num_frames, save_path, total_scenes_index,
            total_scenes_count):
    """ Run the netork on a scene and save output

    Args:
        info_file: path to info_json file for the scene
        model: pytorch model that implemets Atlas
        frames: number of frames to use in reconstruction (-1 for all)
        save_path: where to save outputs
        total_scenes_index: used to print which scene we are on
        total_scenes_count: used to print the total number of scenes to process
    """

    voxel_scale = model.voxel_sizes[0]
    dataset = SceneDataset(info_file,
                           voxel_sizes=[voxel_scale],
                           voxel_types=model.voxel_types,
                           num_frames=num_frames)

    # compute voxel origin
    if 'file_name_vol_%02d' % voxel_scale in dataset.info:
        # compute voxel origin from ground truth
        tsdf_trgt = dataset.get_tsdf()['vol_%02d' % voxel_scale]
        voxel_size = float(voxel_scale) / 100
        # shift by integer number of voxels for padding
        shift = torch.tensor([.5, .5, .5]) // voxel_size
        offset = tsdf_trgt.origin - shift * voxel_size

    else:
        # use default origin
        # assume floor is a z=0 so pad bottom a bit
        offset = torch.tensor([0, 0, -.5])
    T = torch.eye(4)
    T[:3, 3] = offset

    transform = transforms.Compose([
        transforms.ResizeImage((640, 480)),
        transforms.ToTensor(),
        transforms.TransformSpace(T, model.voxel_dim_val, [0, 0, 0]),
        transforms.IntrinsicsPoseToProjection(),
    ])
    dataset.transform = transform
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=None,
                                             batch_sampler=None,
                                             num_workers=32)

    scene = dataset.info['scene']

    model.initialize_volume()
    torch.cuda.empty_cache()

    # trainer = pl.Trainer(
    #     distributed_backend='dp',
    #     benchmark=False,
    #     gpus=[5],
    #     precision=32)
    #     #num_sanitey_val_steps=0)

    # print(total_scenes_index,
    #       total_scenes_count,
    #       dataset.info['dataset'],
    #       scene,
    #       len(dataloader)
    # )

    # model.test_offset = offset.cuda()
    # model.save_path = save_path
    # model.scene = scene
    # trainer.test(model, test_dataloaders=dataloader)
    anime = None
    model.scale = 1
    for j, d in tqdm(enumerate(dataloader)):
        # if j < 700:
        #     continue
        if j % 2 != 0:
            continue

        # logging progress
        # if j%25==0:
        #     print(total_scenes_index,
        #           total_scenes_count,
        #           dataset.info['dataset'],
        #           scene,
        #           j,
        #           len(dataloader)
        #     )

        #print(d['projection'].unsqueeze(0).shape, d['image'].unsqueeze(0).shape)
        model.inference1(d['projection'].unsqueeze(0).cuda(),
                         image=d['image'].unsqueeze(0).cuda())

        if j == len(dataloader) - 1:  # or (j%100 == 0):
            volume = model.valid[0][0].cpu().numpy().astype(np.uint8)
            anime = mlab.pipeline.scalar_field(volume)
            mlab.pipeline.volume(anime)
            mlab.axes()
            mlab.show()

            volume = model.volume[0][0].cpu().numpy().astype(np.uint8)
            anime = mlab.pipeline.scalar_field(volume)
            mlab.pipeline.volume(anime)
            mlab.axes()
            mlab.show()

        continue

        outputs, losses = model.inference2()

        tsdf_pred = model.postprocess(outputs)[0]
        #print(tsdf_pred.tsdf_vol.shape)

        # TODO: set origin in model... make consistent with offset above?
        tsdf_pred.origin = offset.view(1, 3).cuda()

        if j == len(dataloader) - 1:  # or (j%100 == 0):
            volume = tsdf_pred.tsdf_vol.cpu().numpy().astype(np.uint8)
            anime = mlab.pipeline.scalar_field(volume)
            mlab.pipeline.volume(anime)
            mlab.axes()
            mlab.show()

        if 'semseg' in tsdf_pred.attribute_vols:
            #mesh_pred = tsdf_pred.get_mesh('semseg')
            mesh_pred = tsdf_pred.get_mesh()

            # save vertex attributes seperately since trimesh doesn't
            np.savez(os.path.join(save_path, '%s_attributes.npz' % scene),
                     **mesh_pred.vertex_attributes)
        else:
            mesh_pred = tsdf_pred.get_mesh()

        tsdf_pred.save(os.path.join(save_path, '%s_%d.npz' % (scene, j)))
        mesh_pred.export(os.path.join(save_path, '%s_%d.ply' % (scene, j)))

    outputs, losses = model.inference2()

    tsdf_pred = model.postprocess(outputs)[0]

    # # TODO: set origin in model... make consistent with offset above?
    tsdf_pred.origin = offset.view(1, 3).cuda()

    if 'semseg' in tsdf_pred.attribute_vols:
        mesh_pred = tsdf_pred.get_mesh('semseg')

        # save vertex attributes seperately since trimesh doesn't
        np.savez(os.path.join(save_path, '%s_attributes.npz' % scene),
                 **mesh_pred.vertex_attributes)
    else:
        mesh_pred = tsdf_pred.get_mesh()

    tsdf_pred.save(
        os.path.join(save_path, '%s_%.1f.npz' % (scene, model.scale)))
    mesh_pred.export(
        os.path.join(save_path, '%s_%.1f.ply' % (scene, model.scale)))
Exemple #5
0
def process(info_file, model, num_frames, save_path, total_scenes_index,
            total_scenes_count, window_conf):
    """ Run the netork on a scene and save output

    Args:
        info_file: path to info_json file for the scene
        model: pytorch model that implemets Atlas
        frames: number of frames to use in reconstruction (-1 for all)
        save_path: where to save outputs
        total_scenes_index: used to print which scene we are on
        total_scenes_count: used to print the total number of scenes to process
    """
    global my_mesh
    voxel_scale = model.voxel_sizes[0]
    dataset = SceneDataset(info_file,
                           voxel_sizes=[voxel_scale],
                           voxel_types=model.voxel_types,
                           num_frames=num_frames)

    # compute voxel origin
    if 'file_name_vol_%02d' % voxel_scale in dataset.info:
        # compute voxel origin from ground truth
        tsdf_trgt = dataset.get_tsdf()['vol_%02d' % voxel_scale]
        voxel_size = float(voxel_scale) / 100
        # shift by integer number of voxels for padding
        shift = torch.tensor([.5, .5, .5]) // voxel_size
        offset = tsdf_trgt.origin - shift * voxel_size

    else:
        # use default origin
        # assume floor is a z=0 so pad bottom a bit
        offset = torch.tensor([0, 0, -.5])

    T = torch.eye(4)
    T[:3, 3] = offset

    transform = transforms.Compose([
        transforms.ResizeImage((640, 480)),
        transforms.ToTensor(),
        transforms.TransformSpace(T, model.voxel_dim_val, [0, 0, 0]),
        transforms.IntrinsicsPoseToProjection(),
    ])
    dataset.transform = transform
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=None,
                                             batch_sampler=None,
                                             num_workers=2)

    scene = dataset.info['scene']

    model.initialize_volume()
    torch.cuda.empty_cache()

    start = time.time()

    for j, d in enumerate(dataloader):

        # logging progress
        if j % 25 == 0:
            print(time.time() - start)
            start = time.time()
            print(total_scenes_index, total_scenes_count,
                  dataset.info['dataset'], scene, j, len(dataloader))

        model.inference1(d['projection'].unsqueeze(0).cuda(),
                         image=d['image'].unsqueeze(0).cuda())

        outputs, losses = model.inference2()

        tsdf_pred = model.postprocess(outputs)[0]

        # TODO: set origin in model... make consistent with offset above?
        tsdf_pred.origin = offset.view(1, 3).cuda()

        if 'semseg' in tsdf_pred.attribute_vols:
            mesh_pred = tsdf_pred.get_mesh('semseg')
            # save vertex attributes seperately since trimesh doesn't
            np.savez(os.path.join(save_path, '%s_attributes.npz' % scene),
                     **mesh_pred.vertex_attributes)
        else:
            mesh_pred = tsdf_pred.get_mesh()

        tsdf_pred.save(os.path.join(save_path, '%s.npz' % scene))
        my_mesh = mesh_pred
    #mesh_pred.show()
    tsdf_pred.save(os.path.join(save_path, '%s.npz' % scene))
    mesh_pred.export(os.path.join(save_path, '%s.ply' % scene))
    trimesh.exchange.gltf.export_glb(scene,
                                     extras=None,
                                     include_normals=None,
                                     tree_postprocessor=None)