def cityscapes_train(resize_height, resize_width, crop_height, crop_width, batch_size, num_workers): """A loader that loads images and ground truth for segmentation from the cityscapes training set. """ labels = labels_cityscape_seg.getlabels() num_classes = len(labels_cityscape_seg.gettrainid2label()) transforms = [ tf.RandomHorizontalFlip(), tf.CreateScaledImage(), tf.Resize((resize_height, resize_width)), tf.RandomRescale(1.5), tf.RandomCrop((crop_height, crop_width)), tf.ConvertSegmentation(), tf.CreateColoraug(new_element=True), tf.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0), tf.RemoveOriginals(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'cityscapes_train_seg'), tf.AddKeyValue('purposes', ('segmentation', 'domain')), tf.AddKeyValue('num_classes', num_classes) ] dataset_name = 'cityscapes' dataset = StandardDataset(dataset=dataset_name, trainvaltest_split='train', video_mode='mono', stereo_mode='mono', labels_mode='fromid', disable_const_items=True, labels=labels, keys_to_load=('color', 'segmentation'), data_transforms=transforms, video_frames=(0, )) loader = DataLoader(dataset, batch_size, True, num_workers=num_workers, pin_memory=True, drop_last=True) print( f" - Can use {len(dataset)} images from the cityscapes train set for segmentation training", flush=True) return loader
def cityscapes_validation(resize_height, resize_width, batch_size, num_workers): """A loader that loads images and ground truth for segmentation from the cityscapes validation set """ labels = labels_cityscape_seg.getlabels() num_classes = len(labels_cityscape_seg.gettrainid2label()) transforms = [ tf.CreateScaledImage(True), tf.Resize((resize_height, resize_width), image_types=('color', )), tf.ConvertSegmentation(), tf.CreateColoraug(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'cityscapes_val_seg'), tf.AddKeyValue('purposes', ('segmentation', )), tf.AddKeyValue('num_classes', num_classes) ] dataset = StandardDataset(dataset='cityscapes', trainvaltest_split='validation', video_mode='mono', stereo_mode='mono', labels_mode='fromid', labels=labels, keys_to_load=['color', 'segmentation'], data_transforms=transforms, disable_const_items=True) loader = DataLoader(dataset, batch_size, False, num_workers=num_workers, pin_memory=True, drop_last=False) print( f" - Can use {len(dataset)} images from the cityscapes validation set for segmentation validation", flush=True) return loader
def kitti_odom09_validation(img_height, img_width, batch_size, num_workers): """A loader that loads images and depth ground truth for depth validation from the kitti validation set. """ transforms = [ tf.CreateScaledImage(True), tf.Resize( (img_height, img_width), image_types=('color', ) ), tf.CreateColoraug(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_odom09_val_pose'), tf.AddKeyValue('purposes', ('depth', )), ] dataset = StandardDataset( dataset='kitti', split='odom09_split', trainvaltest_split='test', video_mode='video', stereo_mode='mono', keys_to_load=('color', 'poses'), keys_to_video=('color', ), data_transforms=transforms, video_frames=(0, -1, 1), disable_const_items=True ) loader = DataLoader( dataset, batch_size, False, num_workers=num_workers, pin_memory=True, drop_last=False ) print(f" - Can use {len(dataset)} images from the kitti (odom09 split) validation set for pose validation", flush=True) return loader
def motsynth_validation(img_height, img_width, batch_size, num_workers): """A loader that loads images and depth ground truth for depth validation from the kitti validation set. """ transforms = [ tf.CreateScaledImage(True), tf.Resize((img_height, img_width), image_types=('color', )), tf.ConvertDepth(), tf.CreateColoraug(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_zhou_val_depth'), tf.AddKeyValue('validation_mask', 'validation_mask_kitti_zhou'), tf.AddKeyValue('validation_clamp', 'validation_clamp_kitti'), tf.AddKeyValue('purposes', ('depth', )), ] dataset = StandardDataset(dataset='kek', trainvaltest_split='validation', video_mode='mono', stereo_mode='mono', keys_to_load=('color', 'depth'), data_transforms=transforms, video_frames=(0, ), simple_mode=True, labels_mode='fromid', seq_to_load=['001']) loader = DataLoader(dataset, batch_size, False, num_workers=num_workers, pin_memory=True, drop_last=False) print( f" - Can use {len(dataset)} images from the motsynth validation set for depth validation", flush=True) return loader
def kitti_2015_train(img_height, img_width, batch_size, num_workers): """A loader that loads images and depth ground truth for depth evaluation from the kitti_2015 training set (but for evaluation). """ transforms = [ tf.CreateScaledImage(True), tf.Resize((img_height, img_width), image_types=('color', )), tf.ConvertDepth(), tf.CreateColoraug(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_2015_train_depth'), tf.AddKeyValue('validation_mask', 'validation_mask_kitti_kitti'), tf.AddKeyValue('validation_clamp', 'validation_clamp_kitti'), tf.AddKeyValue('purposes', ('depth', )), ] dataset = StandardDataset(dataset='kitti_2015', trainvaltest_split='train', video_mode='mono', stereo_mode='mono', keys_to_load=('color', 'depth'), data_transforms=transforms, video_frames=(0, ), disable_const_items=True) loader = DataLoader(dataset, batch_size, False, num_workers=num_workers, pin_memory=True, drop_last=False) print( f" - Can use {len(dataset)} images from the kitti_2015 test set for depth evaluation", flush=True) return loader
def cityscapes_sequence_train(resize_height, resize_width, crop_height, crop_width, batch_size, num_workers): """A loader that loads images for adaptation from the cityscapes_sequence training set. This loader returns sequences from the left camera, as well as from the right camera. """ transforms_common = [ tf.RandomHorizontalFlip(), tf.CreateScaledImage(), tf.Resize((resize_height * 568 // 512, resize_width * 1092 // 1024), image_types=('color', )), # crop away the sides and bottom parts of the image tf.SidesCrop((resize_height * 320 // 512, resize_width * 1024 // 1024), (resize_height * 32 // 512, resize_width * 33 // 1024)), tf.CreateColoraug(new_element=True), tf.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0), tf.RemoveOriginals(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'cityscapes_sequence_adaptation'), tf.AddKeyValue('purposes', ('adaptation', )), ] dataset_name = 'cityscapes_sequence' cfg_common = { 'dataset': dataset_name, 'trainvaltest_split': 'train', 'video_mode': 'mono', 'stereo_mode': 'mono', } cfg_left = {'keys_to_load': ('color', ), 'keys_to_video': ('color', )} cfg_right = { 'keys_to_load': ('color_right', ), 'keys_to_video': ('color_right', ) } dataset_left = StandardDataset(data_transforms=transforms_common, **cfg_left, **cfg_common) dataset_right = StandardDataset(data_transforms=[tf.ExchangeStereo()] + transforms_common, **cfg_right, **cfg_common) dataset = ConcatDataset((dataset_left, dataset_right)) loader = DataLoader(dataset, batch_size, True, num_workers=num_workers, pin_memory=True, drop_last=True) print( f" - Can use {len(dataset)} images from the cityscapes_sequence train set for adaptation", flush=True) return loader
def kitti_zhou_train(resize_height, resize_width, crop_height, crop_width, batch_size, num_workers): """A loader that loads image sequences for depth training from the kitti training set. This loader returns sequences from the left camera, as well as from the right camera. """ transforms_common = [ tf.RandomHorizontalFlip(), tf.CreateScaledImage(), tf.Resize((resize_height, resize_width), image_types=('color', 'depth', 'camera_intrinsics', 'K')), tf.ConvertDepth(), tf.CreateColoraug(new_element=True), tf.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0, fraction=0.5), tf.RemoveOriginals(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_zhou_train_depth'), tf.AddKeyValue('purposes', ('depth', 'domain')), ] dataset_name = 'kitti' cfg_common = { 'dataset': dataset_name, 'trainvaltest_split': 'train', 'video_mode': 'video', 'stereo_mode': 'mono', 'split': 'zhou_split', 'video_frames': (0, -1, 1), 'disable_const_items': False } cfg_left = {'keys_to_load': ('color', ), 'keys_to_video': ('color', )} cfg_right = { 'keys_to_load': ('color_right', ), 'keys_to_video': ('color_right', ) } dataset_left = StandardDataset(data_transforms=transforms_common, **cfg_left, **cfg_common) dataset_right = StandardDataset(data_transforms=[tf.ExchangeStereo()] + transforms_common, **cfg_right, **cfg_common) dataset = ConcatDataset((dataset_left, dataset_right)) loader = DataLoader(dataset, batch_size, True, num_workers=num_workers, pin_memory=True, drop_last=True) print( f" - Can use {len(dataset)} images from the kitti (zhou_split) train split for depth training", flush=True) return loader