def test(config): """Test point cloud data loader. """ from torch.utils.data import DataLoader from lib.utils import Timer timer = Timer() DatasetClass = StanfordVoxelization2cmDataset transformations = [ t.RandomHorizontalFlip(DatasetClass.ROTATION_AXIS, DatasetClass.IS_TEMPORAL), t.ChromaticAutoContrast(), t.ChromaticTranslation(config.data_aug_color_trans_ratio), t.ChromaticJitter(config.data_aug_color_jitter_std), t.HueSaturationTranslation(config.data_aug_hue_max, config.data_aug_saturation_max), ] dataset = DatasetClass(config, input_transform=t.Compose(transformations), augment_data=True, cache=True, elastic_distortion=True) data_loader = DataLoader( dataset=dataset, collate_fn=t.cfl_collate_fn_factory(limit_numpoints=False), batch_size=4, shuffle=True) # Start from index 1 iter = data_loader.__iter__() for i in range(100): timer.tic() data = iter.next() print(timer.toc())
def test(config, intensity=False): """Test point cloud data loader. """ from torch.utils.data import DataLoader from lib.utils import Timer import open3d as o3d def make_pcd(coords, feats): pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(coords[:, :3].float().numpy()) pcd.colors = o3d.utility.Vector3dVector(feats[:, :3].numpy() / 255) if intensity: pcd.intensities = o3d.utility.Vector3dVector(feats[:, 3:3].numpy()) return pcd timer = Timer() DatasetClass = FacilityArea5Dataset transformations = [ t.RandomHorizontalFlip(DatasetClass.ROTATION_AXIS, DatasetClass.IS_TEMPORAL), t.ChromaticAutoContrast(), t.ChromaticTranslation(config.data_aug_color_trans_ratio), t.ChromaticJitter(config.data_aug_color_jitter_std), ] dataset = DatasetClass(config, prevoxel_transform=t.ElasticDistortion( DatasetClass.ELASTIC_DISTORT_PARAMS), input_transform=t.Compose(transformations), augment_data=True, cache=True, elastic_distortion=True) data_loader = DataLoader( dataset=dataset, collate_fn=t.cfl_collate_fn_factory(limit_numpoints=False), batch_size=1, shuffle=True) # Start from index 1 iter = data_loader.__iter__() for i in range(100): timer.tic() coords, feats, labels = iter.next() pcd = make_pcd(coords, feats) o3d.visualization.draw_geometries([pcd]) print(timer.toc())
def initialize_data_loader(DatasetClass, config, phase, threads, shuffle, repeat, augment_data, batch_size, limit_numpoints, elastic_distortion=False, input_transform=None, target_transform=None): if isinstance(phase, str): phase = str2datasetphase_type(phase) if config.return_transformation: collate_fn = t.cflt_collate_fn_factory(limit_numpoints) else: collate_fn = t.cfl_collate_fn_factory(limit_numpoints) input_transforms = [] if input_transform is not None: input_transforms += input_transform if augment_data: input_transforms += [ t.RandomDropout(0.2), t.RandomHorizontalFlip(DatasetClass.ROTATION_AXIS, DatasetClass.IS_TEMPORAL), t.ChromaticAutoContrast(), t.ChromaticTranslation(config.data_aug_color_trans_ratio), t.ChromaticJitter(config.data_aug_color_jitter_std), # t.HueSaturationTranslation(config.data_aug_hue_max, config.data_aug_saturation_max), ] if len(input_transforms) > 0: input_transforms = t.Compose(input_transforms) else: input_transforms = None dataset = DatasetClass(config, input_transform=input_transforms, target_transform=target_transform, cache=config.cache_data, augment_data=augment_data, elastic_distortion=elastic_distortion, phase=phase) if repeat: # Use the inf random sampler data_loader = DataLoader(dataset=dataset, num_workers=threads, batch_size=batch_size, collate_fn=collate_fn, sampler=InfSampler(dataset, shuffle)) else: # Default shuffle=False data_loader = DataLoader(dataset=dataset, num_workers=threads, batch_size=batch_size, collate_fn=collate_fn, shuffle=shuffle) return data_loader
def initialize_data_loader(DatasetClass, config, phase, num_workers, shuffle, repeat, augment_data, batch_size, limit_numpoints, input_transform=None, target_transform=None): if isinstance(phase, str): phase = str2datasetphase_type(phase) if config.return_transformation: collate_fn = t.cflt_collate_fn_factory(limit_numpoints) else: collate_fn = t.cfl_collate_fn_factory(limit_numpoints) prevoxel_transform_train = [] if augment_data: prevoxel_transform_train.append( t.ElasticDistortion(DatasetClass.ELASTIC_DISTORT_PARAMS)) if len(prevoxel_transform_train) > 0: prevoxel_transforms = t.Compose(prevoxel_transform_train) else: prevoxel_transforms = None input_transforms = [] if input_transform is not None: input_transforms += input_transform if augment_data: input_transforms += [ t.RandomDropout(0.2), t.RandomHorizontalFlip(DatasetClass.ROTATION_AXIS, DatasetClass.IS_TEMPORAL), t.ChromaticAutoContrast(), t.ChromaticTranslation(config.data_aug_color_trans_ratio), t.ChromaticJitter(config.data_aug_color_jitter_std), # t.HueSaturationTranslation(config.data_aug_hue_max, config.data_aug_saturation_max), ] if len(input_transforms) > 0: input_transforms = t.Compose(input_transforms) else: input_transforms = None dataset = DatasetClass(config, prevoxel_transform=prevoxel_transforms, input_transform=input_transforms, target_transform=target_transform, cache=config.cache_data, augment_data=augment_data, phase=phase) data_args = { 'dataset': dataset, 'num_workers': num_workers, 'batch_size': batch_size, 'collate_fn': collate_fn, } if repeat: data_args['sampler'] = InfSampler(dataset, shuffle) else: data_args['shuffle'] = shuffle data_loader = DataLoader(**data_args) return data_loader
def cifar10_dataset(dataset_base_path, train_flag=True, batch_size=1, train_val_split=True, num_val_per_cls=500, num_workers=None, prefetch_buffer_size=None, progress_bar=True): x_data, y_data = load_data(dataset_base_path, train_flag) y_data = y_data.astype(np.int32) def train_gen(x, y): indices = np.arange(x.shape[0]) np.random.shuffle(indices) for i_ in indices: yield x[i_], y[i_] def val_gen(x, y): indices = np.arange(x.shape[0]) if progress_bar: for i_ in tqdm.tqdm(indices): yield x[i_], y[i_] else: for i_ in indices: yield x[i_], y[i_] def create_dataset(gen, trans): dataset_ = tf.data.Dataset.from_generator( gen, (tf.float32, tf.int32), (tf.TensorShape([32, 32, 3]), tf.TensorShape([]))) dataset_ = (dataset_.map( lambda x_, y_: (trans(x_), y_), num_parallel_calls=num_workers).batch(batch_size).prefetch( prefetch_buffer_size)) return dataset_ if train_flag: transform = transforms.Compose([ transforms.Pad(4, padding_mode="reflect"), transforms.RandomHorizontalFlip(), transforms.RandomCrop(32), transforms.Standardize(), transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010)) ], tensor=True) if train_val_split: train_sample = [] val_sample = [] for i in range(10): cls_ind = np.where(y_data == i)[0] random_permed_cls_ind = np.random.permutation(cls_ind).tolist() train_sample.extend(random_permed_cls_ind[num_val_per_cls:]) val_sample.extend(random_permed_cls_ind[:num_val_per_cls]) dataset_val = create_dataset( lambda: val_gen(x_data[val_sample], y_data[val_sample]), transform) dataset_train = create_dataset( lambda: train_gen(x_data[train_sample], y_data[train_sample]), transform) return { "train": { "data": dataset_train, "size": len(train_sample) }, "val": { "data": dataset_val, "size": len(val_sample) } } else: dataset_train = create_dataset(lambda: train_gen(x_data, y_data), transform) return {"train": {"data": dataset_train, "size": x_data.shape[0]}} else: transform = transforms.Compose([ transforms.Standardize(), transforms.Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.2010)) ], tensor=True) dataset_test = create_dataset(lambda: val_gen(x_data, y_data), transform) return {"test": {"data": dataset_test, "size": x_data.shape[0]}}