def kitti_kitti_validation(img_height, img_width, batch_size, num_workers): """A loader that loads images and depth ground truth for depth validation from the kitti validation set. """ transforms = [ tf.CreateScaledImage(True), tf.Resize((img_height, img_width), image_types=('color', )), tf.ConvertDepth(), tf.CreateColoraug(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_kitti_val_depth'), tf.AddKeyValue('validation_mask', 'validation_mask_kitti_kitti'), tf.AddKeyValue('validation_clamp', 'validation_clamp_kitti'), tf.AddKeyValue('purposes', ('depth', )), ] dataset = StandardDataset(dataset='kitti', split='kitti_split', trainvaltest_split='validation', video_mode='mono', stereo_mode='mono', keys_to_load=('color', 'depth'), data_transforms=transforms, video_frames=(0, ), disable_const_items=True) loader = DataLoader(dataset, batch_size, False, num_workers=num_workers, pin_memory=True, drop_last=False) print( f" - Can use {len(dataset)} images from the kitti (kitti_split) validation set for depth validation", flush=True) return loader
def kitti_2015_train(resize_height, resize_width, batch_size, num_workers): """A loader that loads images and ground truth for segmentation from the kitti_2015 train set """ labels = labels_cityscape_seg.getlabels() num_classes = len(labels_cityscape_seg.gettrainid2label()) transforms = [ tf.CreateScaledImage(True), tf.Resize((resize_height, resize_width), image_types=('color', )), tf.ConvertSegmentation(), tf.CreateColoraug(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_2015_val_seg'), tf.AddKeyValue('purposes', ('segmentation', )), tf.AddKeyValue('num_classes', num_classes) ] dataset = StandardDataset( dataset='kitti_2015', trainvaltest_split='train', video_mode='mono', stereo_mode='mono', labels_mode='fromid', labels=labels, keys_to_load=['color', 'segmentation'], data_transforms=transforms, disable_const_items=True ) loader = DataLoader( dataset, batch_size, False, num_workers=num_workers, pin_memory=True, drop_last=False ) print(f" - Can use {len(dataset)} images from the kitti_2015 train set for segmentation validation", flush=True) return loader
fraction=1.0), # values for visualization only mytransforms.ConvertDepth( ), # The convert transforms should come after the mytransforms.ConvertSegmentation(), # Scaling/Rotating/Cropping mytransforms.ToTensor(), ] # With the parameters specified above, a StandardDataset can now be created. You can interate through # it using the PyTorch DataLoader class. # There are several optional arguments in the my_dataset class that are not featured here for the sake of # simplicity. Note that, for example, it is possible to include the previous and/or subsequent frames # in a video sequence using tha parameter video_frames. my_dataset = StandardDataset( dataset, split=split, trainvaltest_split=trainvaltest_split, keys_to_load=keys_to_load, data_transforms=data_transforms, ) my_loader = DataLoader(my_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True, drop_last=True) # Print the sizes of the first 3 elements to show how the elements are indexed. Each element # of the dataset is a dictionary with the 3-tuples as keys. The first entry corrsponds to the image # category. The second is a video frame index which will be always zero or non-video datasets. # The third entry is a resolution parameter showing if it is a scaled image. for element, _ in zip(my_loader, range(3)):
def cityscapes_sequence_train(resize_height, resize_width, crop_height, crop_width, batch_size, num_workers): """A loader that loads images for adaptation from the cityscapes_sequence training set. This loader returns sequences from the left camera, as well as from the right camera. """ transforms_common = [ tf.RandomHorizontalFlip(), tf.CreateScaledImage(), tf.Resize((resize_height * 568 // 512, resize_width * 1092 // 1024), image_types=('color', )), # crop away the sides and bottom parts of the image tf.SidesCrop((resize_height * 320 // 512, resize_width * 1024 // 1024), (resize_height * 32 // 512, resize_width * 33 // 1024)), tf.CreateColoraug(new_element=True), tf.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0), tf.RemoveOriginals(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'cityscapes_sequence_adaptation'), tf.AddKeyValue('purposes', ('adaptation', )), ] dataset_name = 'cityscapes_sequence' cfg_common = { 'dataset': dataset_name, 'trainvaltest_split': 'train', 'video_mode': 'mono', 'stereo_mode': 'mono', } cfg_left = {'keys_to_load': ('color', ), 'keys_to_video': ('color', )} cfg_right = { 'keys_to_load': ('color_right', ), 'keys_to_video': ('color_right', ) } dataset_left = StandardDataset(data_transforms=transforms_common, **cfg_left, **cfg_common) dataset_right = StandardDataset(data_transforms=[tf.ExchangeStereo()] + transforms_common, **cfg_right, **cfg_common) dataset = ConcatDataset((dataset_left, dataset_right)) loader = DataLoader(dataset, batch_size, True, num_workers=num_workers, pin_memory=True, drop_last=True) print( f" - Can use {len(dataset)} images from the cityscapes_sequence train set for adaptation", flush=True) return loader
def motsynth_seg_train(resize_height, resize_width, crop_height, crop_width, batch_size, num_workers): """A loader that loads images and ground truth for segmentation from the cityscapes training set. """ labels = labels_motsynth_seg.getlabels() num_classes = len(labels_motsynth_seg.gettrainid2label()) # transforms = [ # tf.RandomHorizontalFlip(), # tf.CreateScaledImage(), # tf.Resize((resize_height, resize_width)), # tf.RandomRescale(1.5), # tf.RandomCrop((crop_height, crop_width)), # tf.ConvertSegmentation(), # tf.CreateColoraug(new_element=True), # tf.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0), # tf.RemoveOriginals(), # tf.ToTensor(), # tf.NormalizeZeroMean(), # tf.AddKeyValue('domain', 'motsynth_train_seg'), # tf.AddKeyValue('purposes', ('segmentation', 'domain')), # tf.AddKeyValue('num_classes', num_classes) # ] transforms = [ tf.CreateScaledImage(), tf.Resize((crop_height, crop_width)), tf.ConvertSegmentation(), tf.CreateColoraug(new_element=True), tf.RemoveOriginals(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'motsynth_train_seg'), tf.AddKeyValue('purposes', ('segmentation', 'domain')), tf.AddKeyValue('num_classes', num_classes) ] dataset_name = 'kek' dataset = StandardDataset(dataset=dataset_name, trainvaltest_split='train', video_mode='mono', stereo_mode='mono', labels_mode='fromid', disable_const_items=True, labels=labels, keys_to_load=('color', 'segmentation'), data_transforms=transforms, simple_mode=True, video_frames=(0, ), seq_to_load=['000', '045']) loader = DataLoader(dataset, batch_size, True, num_workers=num_workers, pin_memory=True, drop_last=True) print( f" - Can use {len(dataset)} images from the motsynth_seg train set for segmentation training", flush=True) return loader
def kitti_zhou_train(resize_height, resize_width, crop_height, crop_width, batch_size, num_workers): """A loader that loads image sequences for depth training from the kitti training set. This loader returns sequences from the left camera, as well as from the right camera. """ transforms_common = [ tf.RandomHorizontalFlip(), tf.CreateScaledImage(), tf.Resize((resize_height, resize_width), image_types=('color', 'depth', 'camera_intrinsics', 'K')), tf.ConvertDepth(), tf.CreateColoraug(new_element=True), tf.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0, fraction=0.5), tf.RemoveOriginals(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_zhou_train_depth'), tf.AddKeyValue('purposes', ('depth', 'domain')), ] dataset_name = 'kitti' cfg_common = { 'dataset': dataset_name, 'trainvaltest_split': 'train', 'video_mode': 'video', 'stereo_mode': 'mono', 'split': 'zhou_split', 'video_frames': (0, -1, 1), 'disable_const_items': False } cfg_left = {'keys_to_load': ('color', ), 'keys_to_video': ('color', )} cfg_right = { 'keys_to_load': ('color_right', ), 'keys_to_video': ('color_right', ) } dataset_left = StandardDataset(data_transforms=transforms_common, **cfg_left, **cfg_common) dataset_right = StandardDataset(data_transforms=[tf.ExchangeStereo()] + transforms_common, **cfg_right, **cfg_common) dataset = ConcatDataset((dataset_left, dataset_right)) loader = DataLoader(dataset, batch_size, True, num_workers=num_workers, pin_memory=True, drop_last=True) print( f" - Can use {len(dataset)} images from the kitti (zhou_split) train split for depth training", flush=True) return loader
def check_scaled_dataset(dataset_name, scaled_dataset_name, trainvaltest_split, keys_to_load, scaled_size, split=None): """ Checks whether the images in a dataset generated by the dataset_scaler are identical to the images that are generated by loading the original dataset and scaling them afterwards :param dataset_name: Name of the unscaled dataset :param scaled_dataset_name: Name of the scaled dataset :param trainvaltest_split: 'train', 'validation' or 'test' :param keys_to_load: keys that are supposed to be loaded, e.g. 'color', 'depth', 'segmentation', ... :param scaled_size: Size of the scaled image (h, w) :param split: Name of the dataset split, if one exists """ dataset = dataset_name data_transforms = [ mytransforms.CreateScaledImage(), mytransforms.Resize(output_size=scaled_size), mytransforms.CreateColoraug(), mytransforms.ToTensor(), ] if keys_to_load is not None: if any('depth' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertDepth()) if any('segmentation' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertSegmentation()) print('\n Loading {} dataset'.format(dataset)) my_dataset = StandardDataset(dataset, split=split, trainvaltest_split=trainvaltest_split, keys_to_load=keys_to_load, data_transforms=data_transforms, output_filenames=True) my_loader = DataLoader(my_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True, drop_last=True) print_dataset(my_loader) dataset_s = scaled_dataset_name data_transforms = [ mytransforms.CreateScaledImage(), mytransforms.CreateColoraug(), mytransforms.ToTensor(), ] if keys_to_load is not None: if any('depth' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertDepth()) if any('segmentation' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertSegmentation()) print('\n Loading {} dataset'.format(dataset_s)) my_dataset_s = StandardDataset(dataset_s, split=split, trainvaltest_split=trainvaltest_split, keys_to_load=keys_to_load, data_transforms=data_transforms, output_filenames=True) my_loader_s = DataLoader(my_dataset_s, batch_size=1, shuffle=False, num_workers=0, pin_memory=True, drop_last=True) print_dataset(my_loader_s) print("Testing dataset_scaler") samples = [] samples_s = [] iter_my_loader = iter(my_loader) iter_my_loader_s = iter(my_loader_s) for _ in range(2): samples.append(next(iter_my_loader).copy()) samples_s.append(next(iter_my_loader_s).copy()) for key in keys_to_load: print("Check if {} entries are equal:".format(key)) print(" Should be False: {}".format( torch.equal(samples[1][(key, 0, 0)], samples_s[0][(key, 0, 0)]))) print(" Should be True: {}".format( torch.equal(samples[0][(key, 0, 0)], samples_s[0][(key, 0, 0)]))) print(" Should be True: {}".format( torch.equal(samples[1][(key, 0, 0)], samples_s[1][(key, 0, 0)])))