def get_transform(self, is_train): """ Gets a transform to preprocess the input data""" if is_train: voxel_dim = self.voxel_dim_train random_rotation = self.cfg.DATA.RANDOM_ROTATION_3D random_translation = self.cfg.DATA.RANDOM_TRANSLATION_3D paddingXY = self.cfg.DATA.PAD_XY_3D paddingZ = self.cfg.DATA.PAD_Z_3D else: # center volume voxel_dim = self.voxel_dim_val random_rotation = False random_translation = False paddingXY = 0 paddingZ = 0 transform = [] transform += [ transforms.ResizeImage((640, 480)), transforms.ToTensor(), transforms.InstanceToSemseg('nyu40'), transforms.RandomTransformSpace(voxel_dim, random_rotation, random_translation, paddingXY, paddingZ), transforms.FlattenTSDF(), transforms.IntrinsicsPoseToProjection(), ] return transforms.Compose(transform)
def load_scene(path_meta, scene, max_depth=3, vol_prcnt=.995, vol_margin=1.5, fuse_semseg=False, device=0, verbose=2): if verbose>0: print('preparing data', scene) info_file = os.path.join(path_meta, scene, 'info.json') # get gpu device for this worker device = torch.device('cuda', device) # gpu for this process # get the dataset transform = transforms.Compose([transforms.ResizeImage((640, 480)), transforms.ToTensor(), transforms.InstanceToSemseg('nyu40'), transforms.IntrinsicsPoseToProjection(), ]) frame_types = ['depth', 'semseg'] if fuse_semseg else ['depth'] dataset = SceneDataset(info_file, transform, frame_types) # find volume bounds and origin by backprojecting depth maps to point clouds # use a subset of the frames to save time if len(dataset) <= 200: dataset1 = dataset else: inds = np.linspace(0, len(dataset) - 1, 200).astype(int) dataset1 = torch.utils.data.Subset(dataset, inds) dataloader1 = torch.utils.data.DataLoader(dataset1, batch_size=None, batch_sampler=None, num_workers=4) pts = [] for i, frame in enumerate(dataloader1): if verbose>1 and i%50==0: print(scene, 'backoprojecting depth maps to point clouds', i, len(dataset)) projection = frame['projection'].to(device) depth = frame['depth'].to(device) depth[depth>max_depth]=0 pts.append(depth_to_world(projection, depth).view(3,-1).T) pts = torch.cat(pts) pts = pts[torch.isfinite(pts[:,0])].cpu().numpy() # use top and bottom vol_prcnt of points plus vol_margin origin = torch.as_tensor(np.quantile(pts, 1-vol_prcnt, axis=0)-vol_margin).float() vol_max = torch.as_tensor(np.quantile(pts, vol_prcnt, axis=0)+vol_margin).float() return info_file, device, dataset, origin, vol_max
def fuse_scene(path_meta, scene, voxel_size, trunc_ratio=3, max_depth=3, vol_prcnt=.995, vol_margin=1.5, fuse_semseg=False, device=0, verbose=2): """ Use TSDF fusion with GT depth maps to generate GT TSDFs Args: path_meta: path to save the TSDFs (we recommend creating a parallel directory structure to save derived data so that we don't modify the original dataset) scene: name of scene to process voxel_size: voxel size of TSDF trunc_ratio: truncation distance in voxel units max_depth: mask out large depth values since they are noisy vol_prcnt: for computing the bounding volume of the TSDF... ignore outliers vol_margin: padding for computing bounding volume of the TSDF fuse_semseg: whether to accumulate semseg images for GT semseg (prefered method is to not accumulate and insted transfer labels from ground truth labeled mesh) device: cpu/ which gpu verbose: how much logging to print Returns: writes a TSDF (.npz) file into path_meta/scene Notes: we use a conservative value of max_depth=3 to reduce noise in the ground truth. However, this means some distant data is missing which can create artifacts. Nevertheless, we found we acheived the best 2d metrics with the less noisy ground truth. """ if verbose > 0: print('fusing', scene, 'voxel size', voxel_size) info_file = os.path.join(path_meta, scene, 'info.json') # get gpu device for this worker device = torch.device('cuda', device) # gpu for this process # get the dataset transform = transforms.Compose([ transforms.ResizeImage((640, 480)), transforms.ToTensor(), transforms.InstanceToSemseg('nyu40'), transforms.IntrinsicsPoseToProjection(), ]) frame_types = ['depth', 'semseg'] if fuse_semseg else ['depth'] dataset = SceneDataset(info_file, transform, frame_types) dataloader = torch.utils.data.DataLoader(dataset, batch_size=None, batch_sampler=None, num_workers=4) # find volume bounds and origin by backprojecting depth maps to point clouds # use a subset of the frames to save time if len(dataset) <= 200: dataset1 = dataset else: inds = np.linspace(0, len(dataset) - 1, 200).astype(np.int) dataset1 = torch.utils.data.Subset(dataset, inds) dataloader1 = torch.utils.data.DataLoader(dataset1, batch_size=None, batch_sampler=None, num_workers=4) pts = [] for i, frame in enumerate(dataloader1): projection = frame['projection'].to(device) depth = frame['depth'].to(device) depth[depth > max_depth] = 0 pts.append(depth_to_world(projection, depth).view(3, -1).T) pts = torch.cat(pts) pts = pts[torch.isfinite(pts[:, 0])].cpu().numpy() # use top and bottom vol_prcnt of points plus vol_margin origin = torch.as_tensor( np.quantile(pts, 1 - vol_prcnt, axis=0) - vol_margin).float() vol_max = torch.as_tensor( np.quantile(pts, vol_prcnt, axis=0) + vol_margin).float() vol_dim = ((vol_max - origin) / (float(voxel_size) / 100)).int().tolist() # initialize tsdf tsdf_fusion = TSDFFusion(vol_dim, float(voxel_size) / 100, origin, trunc_ratio, device, label=fuse_semseg) # integrate frames for i, frame in enumerate(dataloader): if verbose > 1 and i % 25 == 0: print(scene, 'integrating voxel size', voxel_size, i, len(dataset)) projection = frame['projection'].to(device) image = frame['image'].to(device) depth = frame['depth'].to(device) semseg = frame['semseg'].to(device) if fuse_semseg else None # only use reliable depth depth[depth > max_depth] = 0 tsdf_fusion.integrate(projection, depth, image, semseg) # save mesh and tsdf file_name_vol = os.path.join(path_meta, scene, 'tsdf_%02d.npz' % voxel_size) file_name_mesh = os.path.join(path_meta, scene, 'mesh_%02d.ply' % voxel_size) tsdf = tsdf_fusion.get_tsdf() tsdf.save(file_name_vol) mesh = tsdf.get_mesh() mesh.export(file_name_mesh) if fuse_semseg: mesh = tsdf.get_mesh('instance') mesh.export(file_name_mesh.replace('.ply', '_semseg.ply')) # update info json data = load_info_json(info_file) data['file_name_vol_%02d' % voxel_size] = file_name_vol json.dump(data, open(info_file, 'w'))
def label_scene(path_meta, scene, voxel_size, dist_thresh=.05, verbose=2): """ Transfer instance labels from ground truth mesh to TSDF For each voxel find the nearest vertex and transfer the label if it is close enough to the voxel. Args: path_meta: path to save the TSDFs (we recommend creating a parallel directory structure to save derived data so that we don't modify the original dataset) scene: name of scene to process voxel_size: voxel size of TSDF to process dist_thresh: beyond this distance labels are not transferd verbose: how much logging to print Returns: Updates the TSDF (.npz) file with the instance volume """ # dist_thresh: beyond this distance to nearest gt mesh vertex, # voxels are not labeled if verbose > 0: print('labeling', scene) info_file = os.path.join(path_meta, scene, 'info.json') data = load_info_json(info_file) # each vertex in gt mesh indexs a seg group segIndices = json.load(open(data['file_name_seg_indices'], 'r'))['segIndices'] # maps seg groups to instances segGroups = json.load(open(data['file_name_seg_groups'], 'r'))['segGroups'] mapping = { ind: group['id'] + 1 for group in segGroups for ind in group['segments'] } # get per vertex instance ids (0 is unknown, [1,...] are objects) n = len(segIndices) instance_verts = torch.zeros(n, dtype=torch.long) for i in range(n): if segIndices[i] in mapping: instance_verts[i] = mapping[segIndices[i]] # load vertex locations mesh = trimesh.load(data['file_name_mesh_gt'], process=False) verts = mesh.vertices # construct kdtree of vertices for fast nn lookup pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(verts) kdtree = o3d.geometry.KDTreeFlann(pcd) # load tsdf volume tsdf = TSDF.load(data['file_name_vol_%02d' % voxel_size]) coords = coordinates(tsdf.tsdf_vol.size(), device=torch.device('cpu')) coords = coords.type(torch.float) * tsdf.voxel_size + tsdf.origin.T mask = tsdf.tsdf_vol.abs().view(-1) < 1 # transfer vertex instance ids to voxels near surface instance_vol = torch.zeros(len(mask), dtype=torch.long) for i in mask.nonzero(): _, inds, dist = kdtree.search_knn_vector_3d(coords[:, i], 1) if dist[0] < dist_thresh: instance_vol[i] = instance_verts[inds[0]] tsdf.attribute_vols['instance'] = instance_vol.view( list(tsdf.tsdf_vol.size())) tsdf.save(data['file_name_vol_%02d' % voxel_size]) key = 'vol_%02d' % voxel_size temp_data = { key: tsdf, 'instances': data['instances'], 'dataset': data['dataset'] } tsdf = transforms.InstanceToSemseg('nyu40')(temp_data)[key] mesh = tsdf.get_mesh('semseg') fname = data['file_name_vol_%02d' % voxel_size] mesh.export(fname.replace('tsdf', 'mesh').replace('.npz', '_semseg.ply'))