def __init__(self, config, input_transform=None, target_transform=None, augment_data=True, elastic_distortion=False, cache=False, phase=DatasetPhase.Train): if isinstance(phase, str): phase = str2datasetphase_type(phase) # Use cropped rooms for train/val data_root = config.scannet_path if phase not in [DatasetPhase.Train, DatasetPhase.TrainVal]: self.CLIP_BOUND = self.TEST_CLIP_BOUND data_paths = read_txt( os.path.join(data_root, self.DATA_PATH_FILE[phase])) logging.info('Loading {}: {}'.format(self.__class__.__name__, self.DATA_PATH_FILE[phase])) super().__init__(data_paths, data_root=data_root, input_transform=input_transform, target_transform=target_transform, ignore_label=config.ignore_label, return_transformation=config.return_transformation, augment_data=augment_data, elastic_distortion=elastic_distortion, config=config)
def load_extrinsics(extrinsics_file): """Load the camera extrinsics from a .txt file. """ lines = read_txt(extrinsics_file) params = [float(x) for x in lines[0].split(' ')] extrinsics_matrix = np.asarray(params).reshape([4, 4]) return extrinsics_matrix
def __init__(self, config, prevoxel_transform=None, input_transform=None, target_transform=None, augment_data=True, elastic_distortion=False, cache=False, phase=DatasetPhase.Train): if isinstance(phase, str): phase = str2datasetphase_type(phase) if phase not in [DatasetPhase.Train, DatasetPhase.TrainVal]: self.CLIP_BOUND = self.TEST_CLIP_BOUND data_root = config.synthia_path data_paths = read_txt( osp.join('./splits/synthia4d', self.DATA_PATH_FILE[phase])) data_paths = [d.split()[0] for d in data_paths] logging.info('Loading {}: {}'.format(self.__class__.__name__, self.DATA_PATH_FILE[phase])) super().__init__(data_paths, data_root=data_root, input_transform=input_transform, target_transform=target_transform, ignore_label=config.ignore_label, return_transformation=config.return_transformation, augment_data=augment_data, elastic_distortion=elastic_distortion, config=config)
def __init__(self, config, prevoxel_transform=None, input_transform=None, target_transform=None, cache=False, augment_data=True, elastic_distortion=False, phase=DatasetPhase.Train): if isinstance(phase, str): phase = str2datasetphase_type(phase) if phase not in [DatasetPhase.Train, DatasetPhase.TrainVal]: self.CLIP_BOUND = self.TEST_CLIP_BOUND data_root = config.data.stanford3d_path if isinstance(self.DATA_PATH_FILE[phase], (list, tuple)): data_paths = [] for split in self.DATA_PATH_FILE[phase]: data_paths += read_txt(os.path.join(data_root, 'splits', split)) else: data_paths = read_txt( os.path.join(data_root, 'splits', self.DATA_PATH_FILE[phase])) if config.data.voxel_size: self.VOXEL_SIZE = config.data.voxel_size logging.info('voxel size: {}'.format(self.VOXEL_SIZE)) logging.info('Loading {} {}: {}'.format(self.__class__.__name__, phase, self.DATA_PATH_FILE[phase])) VoxelizationDataset.__init__( self, data_paths, data_root=data_root, prevoxel_transform=prevoxel_transform, input_transform=input_transform, target_transform=target_transform, ignore_label=config.data.ignore_label, return_transformation=config.data.return_transformation, augment_data=augment_data, elastic_distortion=elastic_distortion, config=config)
def load_intrinsics(intrinsics_file): """Load the camera intrinsics from a intrinsics.txt file. intrinsics.txt: a text file containing 4 values that represent (in this order) {focal length, principal-point-x, principal-point-y, baseline (m) with the corresponding right camera} """ lines = read_txt(intrinsics_file) assert len(lines) == 7 intrinsics = { 'focal_length': float(lines[0]), 'pp_x': float(lines[2]), 'pp_y': float(lines[4]), 'baseline': float(lines[6]), } return intrinsics
def load_datafile(self, index): filepath = self.data_root / self.data_paths[index] scene_id = os.path.splitext(self.data_paths[index].split( os.sep)[-1])[0] scene_f = self.config.scannet_alignment_path % (scene_id, scene_id) ptc = read_plyfile(filepath) if os.path.isfile(scene_f): alignment_txt = [ l for l in read_txt(scene_f) if l.startswith('axisAlignment = ') ][0] rot = np.array([float(x) for x in alignment_txt[16:].split() ]).reshape(4, 4) xyz1 = np.hstack((ptc[:, :3], np.ones((ptc.shape[0], 1)))) ptc[:, :3] = (xyz1 @ rot.T)[:, :3] return ptc, None, None
def __init__(self, config, prevoxel_transform=None, input_transform=None, target_transform=None, augment_data=True, elastic_distortion=False, cache=False, phase=DatasetPhase.Train): if isinstance(phase, str): phase = str2datasetphase_type(phase) if phase not in [DatasetPhase.Train, DatasetPhase.TrainVal]: self.CLIP_BOUND = self.TEST_CLIP_BOUND data_root = config.synthia_path data_paths = read_txt(osp.join(data_root, self.DATA_PATH_FILE[phase])) data_paths = sorted([d.split()[0] for d in data_paths]) seq2files = defaultdict(list) for f in data_paths: seq_name = f.split(os.sep)[0] seq2files[seq_name].append(f) self.camera_path = config.synthia_camera_path self.camera_intrinsic_file = config.synthia_camera_intrinsic_file self.camera_extrinsics_file = config.synthia_camera_extrinsics_file # Force sort file sequence for easier debugging. file_seq_list = [] for key in sorted(seq2files.keys()): file_seq_list.append(sorted(seq2files[key])) logging.info('Loading {}: {}'.format(self.__class__.__name__, self.DATA_PATH_FILE[phase])) TemporalVoxelizationDataset.__init__( self, file_seq_list, data_root=data_root, input_transform=input_transform, target_transform=target_transform, ignore_label=config.ignore_label, temporal_dilation=config.temporal_dilation, temporal_numseq=config.temporal_numseq, return_transformation=config.return_transformation, augment_data=augment_data, elastic_distortion=elastic_distortion, config=config)
def __init__(self, config, input_transform=None, target_transform=None, augment_data=True, cache=False, phase=DatasetPhase.Train): if isinstance(phase, str): phase = str2datasetphase_type(phase) data_root = config.sunrgbd_path data_paths = read_txt(os.path.join(data_root, self.DATA_PATH_FILE[phase])) logging.info('Loading {}: {}'.format(self.__class__.__name__, self.DATA_PATH_FILE[phase])) super().__init__( data_paths, data_root=data_root, input_transform=input_transform, target_transform=target_transform, ignore_label=config.ignore_label, return_transformation=config.return_transformation, augment_data=augment_data, config=config)
import open3d as o3d import lib.pc_utils as pc_utils from config import get_config from lib.utils import read_txt SCANNET_RAW_PATH = '/cvgl2/u/jgwak/Datasets/scannet_raw' SCANNET_ALIGNMENT_PATH = '/cvgl2/u/jgwak/Datasets/scannet_raw/scans/%s/%s.txt' VOTENET_PRED_PATH = 'outputs/visualization/votenet_scannet' OURS_PRED_PATH = 'outputs/visualization/ours_scannet' SIS_PRED_PATH = 'outputs/visualization/3dsis_scannet' NUM_BBOX_POINTS = 1000 config = get_config() files = sorted( read_txt( '/scr/jgwak/Datasets/scannet_votenet_rgb/scannet_votenet_test.txt')) for i, fn in enumerate(files): filename = fn.split(os.sep)[-1][:-4] if not os.path.isfile(os.path.join(SIS_PRED_PATH, f'{filename}.npz')): continue file_path = os.path.join(SCANNET_RAW_PATH, 'scans', filename, f"{filename}_vh_clean.ply") assert os.path.isfile(file_path) mesh = o3d.io.read_triangle_mesh(file_path) mesh.compute_vertex_normals() scene_f = SCANNET_ALIGNMENT_PATH % (filename, filename) alignment_txt = [ l for l in read_txt(scene_f) if l.startswith('axisAlignment = ') ][0] rot = np.array([float(x) for x in alignment_txt[16:].split()]).reshape(4, 4)
import os import numpy as np import lib.pc_utils as pc_utils from lib.utils import read_txt OURS_PRED_PATH = 'outputs/visualization/ours_stanford' STANFORD_PATH = '/cvgl/group/Stanford3dDataset_v1.2/Area_5' NUM_BBOX_POINTS = 1000 files = sorted(read_txt('/scr/jgwak/Datasets/stanford3d/test.txt')) for i, fn in enumerate(files): area_name = os.path.splitext(fn.split(os.sep)[-1])[0] area_name = '_'.join(area_name.split('_')[1:])[:-2] # area_name = 'office_34' # i = [i for i, fn in enumerate(files) if area_name in fn][0] if area_name.startswith('WC_') or area_name.startswith('hallway_'): continue ptc_fn = os.path.join(STANFORD_PATH, area_name, f'{area_name}.txt') ptc = np.array([l.split() for l in read_txt(ptc_fn)]).astype(float) pred_ours = np.load(os.path.join(OURS_PRED_PATH, 'out_%03d.npy.npz' % i)) gt = pc_utils.visualize_bboxes(pred_ours['gt'][:, :6], pred_ours['gt'][:, 6], num_points=NUM_BBOX_POINTS) pred_ours = pc_utils.visualize_bboxes(pred_ours['pred'][:, :6], pred_ours['pred'][:, 6], num_points=NUM_BBOX_POINTS) params = pc_utils.visualize_pcd(gt, ptc, save_image=f'viz_Area5_{area_name}_gt.png') pc_utils.visualize_pcd(ptc, camera_params=params, save_image=f'viz_Area5_{area_name}_input.png') pc_utils.visualize_pcd(pred_ours, ptc, camera_params=params, save_image=f'viz_Area5_{area_name}_ours.png')