def load_scene(path_meta, scene, max_depth=3, vol_prcnt=.995, vol_margin=1.5, fuse_semseg=False, device=0, verbose=2): if verbose>0: print('preparing data', scene) info_file = os.path.join(path_meta, scene, 'info.json') # get gpu device for this worker device = torch.device('cuda', device) # gpu for this process # get the dataset transform = transforms.Compose([transforms.ResizeImage((640, 480)), transforms.ToTensor(), transforms.InstanceToSemseg('nyu40'), transforms.IntrinsicsPoseToProjection(), ]) frame_types = ['depth', 'semseg'] if fuse_semseg else ['depth'] dataset = SceneDataset(info_file, transform, frame_types) # find volume bounds and origin by backprojecting depth maps to point clouds # use a subset of the frames to save time if len(dataset) <= 200: dataset1 = dataset else: inds = np.linspace(0, len(dataset) - 1, 200).astype(int) dataset1 = torch.utils.data.Subset(dataset, inds) dataloader1 = torch.utils.data.DataLoader(dataset1, batch_size=None, batch_sampler=None, num_workers=4) pts = [] for i, frame in enumerate(dataloader1): if verbose>1 and i%50==0: print(scene, 'backoprojecting depth maps to point clouds', i, len(dataset)) projection = frame['projection'].to(device) depth = frame['depth'].to(device) depth[depth>max_depth]=0 pts.append(depth_to_world(projection, depth).view(3,-1).T) pts = torch.cat(pts) pts = pts[torch.isfinite(pts[:,0])].cpu().numpy() # use top and bottom vol_prcnt of points plus vol_margin origin = torch.as_tensor(np.quantile(pts, 1-vol_prcnt, axis=0)-vol_margin).float() vol_max = torch.as_tensor(np.quantile(pts, vol_prcnt, axis=0)+vol_margin).float() return info_file, device, dataset, origin, vol_max
def process(info_file, save_path, total_scenes_index, total_scenes_count): # gt depth data loader width, height = 640, 480 transform = transforms.Compose([ transforms.ResizeImage((width,height)), transforms.ToTensor(), ]) dataset = SceneDataset(info_file, transform, frame_types=['depth']) dataloader = torch.utils.data.DataLoader(dataset, batch_size=None, batch_sampler=None, num_workers=2) scene = dataset.info['scene'] # get info about tsdf file_tsdf_pred = os.path.join(save_path, '%s.npz'%scene) temp = TSDF.load(file_tsdf_pred) voxel_size = int(temp.voxel_size*100) # re-fuse to remove hole filling since filled holes are penalized in # mesh metrics vol_dim = list(temp.tsdf_vol.shape) origin = temp.origin tsdf_fusion = TSDFFusion(vol_dim, float(voxel_size)/100, origin, color=False) device = tsdf_fusion.device # mesh renderer renderer = Renderer() mesh_file = os.path.join(save_path, '%s.ply'%scene) mesh = trimesh.load(mesh_file, process=False) mesh_opengl = renderer.mesh_opengl(mesh) for i, d in enumerate(dataloader): if i%25==0: print(total_scenes_index, total_scenes_count,scene, i, len(dataloader)) depth_trgt = d['depth'].numpy() _, depth_pred = renderer(height, width, d['intrinsics'], d['pose'], mesh_opengl) temp = eval_depth(depth_pred, depth_trgt) if i==0: metrics_depth = temp else: metrics_depth = {key:value+temp[key] for key, value in metrics_depth.items()} # # play video visualizations of depth # viz1 = (np.clip((depth_trgt-.5)/5,0,1)*255).astype(np.uint8) # viz2 = (np.clip((depth_pred-.5)/5,0,1)*255).astype(np.uint8) # viz1 = cv2.applyColorMap(viz1, cv2.COLORMAP_JET) # viz2 = cv2.applyColorMap(viz2, cv2.COLORMAP_JET) # viz1[depth_trgt==0]=0 # viz2[depth_pred==0]=0 # viz = np.hstack((viz1,viz2)) # cv2.imshow('test', viz) # cv2.waitKey(1) tsdf_fusion.integrate((d['intrinsics'] @ d['pose'].inverse()[:3,:]).to(device), torch.as_tensor(depth_pred).to(device)) metrics_depth = {key:value/len(dataloader) for key, value in metrics_depth.items()} # save trimed mesh file_mesh_trim = os.path.join(save_path, '%s_trim.ply'%scene) tsdf_fusion.get_tsdf().get_mesh().export(file_mesh_trim) # eval tsdf file_tsdf_trgt = dataset.info['file_name_vol_%02d'%voxel_size] metrics_tsdf = eval_tsdf(file_tsdf_pred, file_tsdf_trgt) # eval trimed mesh file_mesh_trgt = dataset.info['file_name_mesh_gt'] metrics_mesh = eval_mesh(file_mesh_trim, file_mesh_trgt) # transfer labels from pred mesh to gt mesh using nearest neighbors file_attributes = os.path.join(save_path, '%s_attributes.npz'%scene) if os.path.exists(file_attributes): mesh.vertex_attributes = np.load(file_attributes) print(mesh.vertex_attributes) mesh_trgt = trimesh.load(file_mesh_trgt, process=False) mesh_transfer = project_to_mesh(mesh, mesh_trgt, 'semseg') semseg = mesh_transfer.vertex_attributes['semseg'] # save as txt for benchmark evaluation np.savetxt(os.path.join(save_path, '%s.txt'%scene), semseg, fmt='%d') mesh_transfer.export(os.path.join(save_path, '%s_transfer.ply'%scene)) # TODO: semseg val evaluation metrics = {**metrics_depth, **metrics_mesh, **metrics_tsdf} print(metrics) rslt_file = os.path.join(save_path, '%s_metrics.json'%scene) json.dump(metrics, open(rslt_file, 'w')) return scene, metrics
def fuse_scene(path_meta, scene, voxel_size, trunc_ratio=3, max_depth=3, vol_prcnt=.995, vol_margin=1.5, fuse_semseg=False, device=0, verbose=2): """ Use TSDF fusion with GT depth maps to generate GT TSDFs Args: path_meta: path to save the TSDFs (we recommend creating a parallel directory structure to save derived data so that we don't modify the original dataset) scene: name of scene to process voxel_size: voxel size of TSDF trunc_ratio: truncation distance in voxel units max_depth: mask out large depth values since they are noisy vol_prcnt: for computing the bounding volume of the TSDF... ignore outliers vol_margin: padding for computing bounding volume of the TSDF fuse_semseg: whether to accumulate semseg images for GT semseg (prefered method is to not accumulate and insted transfer labels from ground truth labeled mesh) device: cpu/ which gpu verbose: how much logging to print Returns: writes a TSDF (.npz) file into path_meta/scene Notes: we use a conservative value of max_depth=3 to reduce noise in the ground truth. However, this means some distant data is missing which can create artifacts. Nevertheless, we found we acheived the best 2d metrics with the less noisy ground truth. """ if verbose > 0: print('fusing', scene, 'voxel size', voxel_size) info_file = os.path.join(path_meta, scene, 'info.json') # get gpu device for this worker device = torch.device('cuda', device) # gpu for this process # get the dataset transform = transforms.Compose([ transforms.ResizeImage((640, 480)), transforms.ToTensor(), transforms.InstanceToSemseg('nyu40'), transforms.IntrinsicsPoseToProjection(), ]) frame_types = ['depth', 'semseg'] if fuse_semseg else ['depth'] dataset = SceneDataset(info_file, transform, frame_types) dataloader = torch.utils.data.DataLoader(dataset, batch_size=None, batch_sampler=None, num_workers=4) # find volume bounds and origin by backprojecting depth maps to point clouds # use a subset of the frames to save time if len(dataset) <= 200: dataset1 = dataset else: inds = np.linspace(0, len(dataset) - 1, 200).astype(np.int) dataset1 = torch.utils.data.Subset(dataset, inds) dataloader1 = torch.utils.data.DataLoader(dataset1, batch_size=None, batch_sampler=None, num_workers=4) pts = [] for i, frame in enumerate(dataloader1): projection = frame['projection'].to(device) depth = frame['depth'].to(device) depth[depth > max_depth] = 0 pts.append(depth_to_world(projection, depth).view(3, -1).T) pts = torch.cat(pts) pts = pts[torch.isfinite(pts[:, 0])].cpu().numpy() # use top and bottom vol_prcnt of points plus vol_margin origin = torch.as_tensor( np.quantile(pts, 1 - vol_prcnt, axis=0) - vol_margin).float() vol_max = torch.as_tensor( np.quantile(pts, vol_prcnt, axis=0) + vol_margin).float() vol_dim = ((vol_max - origin) / (float(voxel_size) / 100)).int().tolist() # initialize tsdf tsdf_fusion = TSDFFusion(vol_dim, float(voxel_size) / 100, origin, trunc_ratio, device, label=fuse_semseg) # integrate frames for i, frame in enumerate(dataloader): if verbose > 1 and i % 25 == 0: print(scene, 'integrating voxel size', voxel_size, i, len(dataset)) projection = frame['projection'].to(device) image = frame['image'].to(device) depth = frame['depth'].to(device) semseg = frame['semseg'].to(device) if fuse_semseg else None # only use reliable depth depth[depth > max_depth] = 0 tsdf_fusion.integrate(projection, depth, image, semseg) # save mesh and tsdf file_name_vol = os.path.join(path_meta, scene, 'tsdf_%02d.npz' % voxel_size) file_name_mesh = os.path.join(path_meta, scene, 'mesh_%02d.ply' % voxel_size) tsdf = tsdf_fusion.get_tsdf() tsdf.save(file_name_vol) mesh = tsdf.get_mesh() mesh.export(file_name_mesh) if fuse_semseg: mesh = tsdf.get_mesh('instance') mesh.export(file_name_mesh.replace('.ply', '_semseg.ply')) # update info json data = load_info_json(info_file) data['file_name_vol_%02d' % voxel_size] = file_name_vol json.dump(data, open(info_file, 'w'))
def process(info_file, model, num_frames, save_path, total_scenes_index, total_scenes_count): """ Run the netork on a scene and save output Args: info_file: path to info_json file for the scene model: pytorch model that implemets Atlas frames: number of frames to use in reconstruction (-1 for all) save_path: where to save outputs total_scenes_index: used to print which scene we are on total_scenes_count: used to print the total number of scenes to process """ voxel_scale = model.voxel_sizes[0] dataset = SceneDataset(info_file, voxel_sizes=[voxel_scale], voxel_types=model.voxel_types, num_frames=num_frames) # compute voxel origin if 'file_name_vol_%02d' % voxel_scale in dataset.info: # compute voxel origin from ground truth tsdf_trgt = dataset.get_tsdf()['vol_%02d' % voxel_scale] voxel_size = float(voxel_scale) / 100 # shift by integer number of voxels for padding shift = torch.tensor([.5, .5, .5]) // voxel_size offset = tsdf_trgt.origin - shift * voxel_size else: # use default origin # assume floor is a z=0 so pad bottom a bit offset = torch.tensor([0, 0, -.5]) T = torch.eye(4) T[:3, 3] = offset transform = transforms.Compose([ transforms.ResizeImage((640, 480)), transforms.ToTensor(), transforms.TransformSpace(T, model.voxel_dim_val, [0, 0, 0]), transforms.IntrinsicsPoseToProjection(), ]) dataset.transform = transform dataloader = torch.utils.data.DataLoader(dataset, batch_size=None, batch_sampler=None, num_workers=32) scene = dataset.info['scene'] model.initialize_volume() torch.cuda.empty_cache() # trainer = pl.Trainer( # distributed_backend='dp', # benchmark=False, # gpus=[5], # precision=32) # #num_sanitey_val_steps=0) # print(total_scenes_index, # total_scenes_count, # dataset.info['dataset'], # scene, # len(dataloader) # ) # model.test_offset = offset.cuda() # model.save_path = save_path # model.scene = scene # trainer.test(model, test_dataloaders=dataloader) anime = None model.scale = 1 for j, d in tqdm(enumerate(dataloader)): # if j < 700: # continue if j % 2 != 0: continue # logging progress # if j%25==0: # print(total_scenes_index, # total_scenes_count, # dataset.info['dataset'], # scene, # j, # len(dataloader) # ) #print(d['projection'].unsqueeze(0).shape, d['image'].unsqueeze(0).shape) model.inference1(d['projection'].unsqueeze(0).cuda(), image=d['image'].unsqueeze(0).cuda()) if j == len(dataloader) - 1: # or (j%100 == 0): volume = model.valid[0][0].cpu().numpy().astype(np.uint8) anime = mlab.pipeline.scalar_field(volume) mlab.pipeline.volume(anime) mlab.axes() mlab.show() volume = model.volume[0][0].cpu().numpy().astype(np.uint8) anime = mlab.pipeline.scalar_field(volume) mlab.pipeline.volume(anime) mlab.axes() mlab.show() continue outputs, losses = model.inference2() tsdf_pred = model.postprocess(outputs)[0] #print(tsdf_pred.tsdf_vol.shape) # TODO: set origin in model... make consistent with offset above? tsdf_pred.origin = offset.view(1, 3).cuda() if j == len(dataloader) - 1: # or (j%100 == 0): volume = tsdf_pred.tsdf_vol.cpu().numpy().astype(np.uint8) anime = mlab.pipeline.scalar_field(volume) mlab.pipeline.volume(anime) mlab.axes() mlab.show() if 'semseg' in tsdf_pred.attribute_vols: #mesh_pred = tsdf_pred.get_mesh('semseg') mesh_pred = tsdf_pred.get_mesh() # save vertex attributes seperately since trimesh doesn't np.savez(os.path.join(save_path, '%s_attributes.npz' % scene), **mesh_pred.vertex_attributes) else: mesh_pred = tsdf_pred.get_mesh() tsdf_pred.save(os.path.join(save_path, '%s_%d.npz' % (scene, j))) mesh_pred.export(os.path.join(save_path, '%s_%d.ply' % (scene, j))) outputs, losses = model.inference2() tsdf_pred = model.postprocess(outputs)[0] # # TODO: set origin in model... make consistent with offset above? tsdf_pred.origin = offset.view(1, 3).cuda() if 'semseg' in tsdf_pred.attribute_vols: mesh_pred = tsdf_pred.get_mesh('semseg') # save vertex attributes seperately since trimesh doesn't np.savez(os.path.join(save_path, '%s_attributes.npz' % scene), **mesh_pred.vertex_attributes) else: mesh_pred = tsdf_pred.get_mesh() tsdf_pred.save( os.path.join(save_path, '%s_%.1f.npz' % (scene, model.scale))) mesh_pred.export( os.path.join(save_path, '%s_%.1f.ply' % (scene, model.scale)))
def process(info_file, model, num_frames, save_path, total_scenes_index, total_scenes_count, window_conf): """ Run the netork on a scene and save output Args: info_file: path to info_json file for the scene model: pytorch model that implemets Atlas frames: number of frames to use in reconstruction (-1 for all) save_path: where to save outputs total_scenes_index: used to print which scene we are on total_scenes_count: used to print the total number of scenes to process """ global my_mesh voxel_scale = model.voxel_sizes[0] dataset = SceneDataset(info_file, voxel_sizes=[voxel_scale], voxel_types=model.voxel_types, num_frames=num_frames) # compute voxel origin if 'file_name_vol_%02d' % voxel_scale in dataset.info: # compute voxel origin from ground truth tsdf_trgt = dataset.get_tsdf()['vol_%02d' % voxel_scale] voxel_size = float(voxel_scale) / 100 # shift by integer number of voxels for padding shift = torch.tensor([.5, .5, .5]) // voxel_size offset = tsdf_trgt.origin - shift * voxel_size else: # use default origin # assume floor is a z=0 so pad bottom a bit offset = torch.tensor([0, 0, -.5]) T = torch.eye(4) T[:3, 3] = offset transform = transforms.Compose([ transforms.ResizeImage((640, 480)), transforms.ToTensor(), transforms.TransformSpace(T, model.voxel_dim_val, [0, 0, 0]), transforms.IntrinsicsPoseToProjection(), ]) dataset.transform = transform dataloader = torch.utils.data.DataLoader(dataset, batch_size=None, batch_sampler=None, num_workers=2) scene = dataset.info['scene'] model.initialize_volume() torch.cuda.empty_cache() start = time.time() for j, d in enumerate(dataloader): # logging progress if j % 25 == 0: print(time.time() - start) start = time.time() print(total_scenes_index, total_scenes_count, dataset.info['dataset'], scene, j, len(dataloader)) model.inference1(d['projection'].unsqueeze(0).cuda(), image=d['image'].unsqueeze(0).cuda()) outputs, losses = model.inference2() tsdf_pred = model.postprocess(outputs)[0] # TODO: set origin in model... make consistent with offset above? tsdf_pred.origin = offset.view(1, 3).cuda() if 'semseg' in tsdf_pred.attribute_vols: mesh_pred = tsdf_pred.get_mesh('semseg') # save vertex attributes seperately since trimesh doesn't np.savez(os.path.join(save_path, '%s_attributes.npz' % scene), **mesh_pred.vertex_attributes) else: mesh_pred = tsdf_pred.get_mesh() tsdf_pred.save(os.path.join(save_path, '%s.npz' % scene)) my_mesh = mesh_pred #mesh_pred.show() tsdf_pred.save(os.path.join(save_path, '%s.npz' % scene)) mesh_pred.export(os.path.join(save_path, '%s.ply' % scene)) trimesh.exchange.gltf.export_glb(scene, extras=None, include_normals=None, tree_postprocessor=None)