def cityscapes_train(resize_height, resize_width, crop_height, crop_width, batch_size, num_workers): """A loader that loads images and ground truth for segmentation from the cityscapes training set. """ labels = labels_cityscape_seg.getlabels() num_classes = len(labels_cityscape_seg.gettrainid2label()) transforms = [ tf.RandomHorizontalFlip(), tf.CreateScaledImage(), tf.Resize((resize_height, resize_width)), tf.RandomRescale(1.5), tf.RandomCrop((crop_height, crop_width)), tf.ConvertSegmentation(), tf.CreateColoraug(new_element=True), tf.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0), tf.RemoveOriginals(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'cityscapes_train_seg'), tf.AddKeyValue('purposes', ('segmentation', 'domain')), tf.AddKeyValue('num_classes', num_classes) ] dataset_name = 'cityscapes' dataset = StandardDataset(dataset=dataset_name, trainvaltest_split='train', video_mode='mono', stereo_mode='mono', labels_mode='fromid', disable_const_items=True, labels=labels, keys_to_load=('color', 'segmentation'), data_transforms=transforms, video_frames=(0, )) loader = DataLoader(dataset, batch_size, True, num_workers=num_workers, pin_memory=True, drop_last=True) print( f" - Can use {len(dataset)} images from the cityscapes train set for segmentation training", flush=True) return loader
def cityscapes_validation(resize_height, resize_width, batch_size, num_workers): """A loader that loads images and ground truth for segmentation from the cityscapes validation set """ labels = labels_cityscape_seg.getlabels() num_classes = len(labels_cityscape_seg.gettrainid2label()) transforms = [ tf.CreateScaledImage(True), tf.Resize((resize_height, resize_width), image_types=('color', )), tf.ConvertSegmentation(), tf.CreateColoraug(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'cityscapes_val_seg'), tf.AddKeyValue('purposes', ('segmentation', )), tf.AddKeyValue('num_classes', num_classes) ] dataset = StandardDataset(dataset='cityscapes', trainvaltest_split='validation', video_mode='mono', stereo_mode='mono', labels_mode='fromid', labels=labels, keys_to_load=['color', 'segmentation'], data_transforms=transforms, disable_const_items=True) loader = DataLoader(dataset, batch_size, False, num_workers=num_workers, pin_memory=True, drop_last=False) print( f" - Can use {len(dataset)} images from the cityscapes validation set for segmentation validation", flush=True) return loader
def kitti_odom09_validation(img_height, img_width, batch_size, num_workers): """A loader that loads images and depth ground truth for depth validation from the kitti validation set. """ transforms = [ tf.CreateScaledImage(True), tf.Resize( (img_height, img_width), image_types=('color', ) ), tf.CreateColoraug(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_odom09_val_pose'), tf.AddKeyValue('purposes', ('depth', )), ] dataset = StandardDataset( dataset='kitti', split='odom09_split', trainvaltest_split='test', video_mode='video', stereo_mode='mono', keys_to_load=('color', 'poses'), keys_to_video=('color', ), data_transforms=transforms, video_frames=(0, -1, 1), disable_const_items=True ) loader = DataLoader( dataset, batch_size, False, num_workers=num_workers, pin_memory=True, drop_last=False ) print(f" - Can use {len(dataset)} images from the kitti (odom09 split) validation set for pose validation", flush=True) return loader
def motsynth_validation(img_height, img_width, batch_size, num_workers): """A loader that loads images and depth ground truth for depth validation from the kitti validation set. """ transforms = [ tf.CreateScaledImage(True), tf.Resize((img_height, img_width), image_types=('color', )), tf.ConvertDepth(), tf.CreateColoraug(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_zhou_val_depth'), tf.AddKeyValue('validation_mask', 'validation_mask_kitti_zhou'), tf.AddKeyValue('validation_clamp', 'validation_clamp_kitti'), tf.AddKeyValue('purposes', ('depth', )), ] dataset = StandardDataset(dataset='kek', trainvaltest_split='validation', video_mode='mono', stereo_mode='mono', keys_to_load=('color', 'depth'), data_transforms=transforms, video_frames=(0, ), simple_mode=True, labels_mode='fromid', seq_to_load=['001']) loader = DataLoader(dataset, batch_size, False, num_workers=num_workers, pin_memory=True, drop_last=False) print( f" - Can use {len(dataset)} images from the motsynth validation set for depth validation", flush=True) return loader
def kitti_2015_train(img_height, img_width, batch_size, num_workers): """A loader that loads images and depth ground truth for depth evaluation from the kitti_2015 training set (but for evaluation). """ transforms = [ tf.CreateScaledImage(True), tf.Resize((img_height, img_width), image_types=('color', )), tf.ConvertDepth(), tf.CreateColoraug(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_2015_train_depth'), tf.AddKeyValue('validation_mask', 'validation_mask_kitti_kitti'), tf.AddKeyValue('validation_clamp', 'validation_clamp_kitti'), tf.AddKeyValue('purposes', ('depth', )), ] dataset = StandardDataset(dataset='kitti_2015', trainvaltest_split='train', video_mode='mono', stereo_mode='mono', keys_to_load=('color', 'depth'), data_transforms=transforms, video_frames=(0, ), disable_const_items=True) loader = DataLoader(dataset, batch_size, False, num_workers=num_workers, pin_memory=True, drop_last=False) print( f" - Can use {len(dataset)} images from the kitti_2015 test set for depth evaluation", flush=True) return loader
def __init__(self, options, model=None): if __name__ == "__main__": print(" -> Executing script", os.path.basename(__file__)) self.opt = options self.device = torch.device("cpu" if self.opt.no_cuda else "cuda") # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # LABELS # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ assert self.opt.train_set in {1, 2, 3, 12, 123}, "Invalid train_set!" assert self.opt.task_to_val in {0, 1, 2, 3, 12, 123}, "Invalid task!" keys_to_load = ['color', 'segmentation'] # Labels labels = self._get_labels_cityscapes() # Train IDs self.train_ids = set([labels[i].trainId for i in range(len(labels))]) self.train_ids.remove(255) self.train_ids = sorted(list(self.train_ids)) self.num_classes_model = len(self.train_ids) # Task handling if self.opt.task_to_val != 0: labels_task = self._get_task_labels_cityscapes() train_ids_task = set( [labels_task[i].trainId for i in range(len(labels_task))]) train_ids_task.remove(255) self.task_low = min(train_ids_task) self.task_high = max(train_ids_task) + 1 labels = labels_task self.train_ids = sorted(list(train_ids_task)) else: self.task_low = 0 self.task_high = self.num_classes_model self.opt.task_to_val = self.opt.train_set # Number of classes for the SegmentationRunningScore self.num_classes_score = self.task_high - self.task_low # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # DATASET DEFINITIONS # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Data augmentation test_data_transforms = [ mytransforms.CreateScaledImage(), mytransforms.Resize((self.opt.height, self.opt.width), image_types=['color']), mytransforms.ConvertSegmentation(), mytransforms.CreateColoraug(new_element=True, scales=self.opt.scales), mytransforms.RemoveOriginals(), mytransforms.ToTensor(), mytransforms.NormalizeZeroMean(), ] # If hyperparameter search, only load the respective validation set. Else, load the full validation set. if self.opt.hyperparameter: trainvaltest_split = 'train' folders_to_load = CitySet.get_city_set(-1) else: trainvaltest_split = 'validation' folders_to_load = None test_dataset = CityscapesDataset(dataset='cityscapes', split=self.opt.dataset_split, trainvaltest_split=trainvaltest_split, video_mode='mono', stereo_mode='mono', scales=self.opt.scales, labels_mode='fromid', labels=labels, keys_to_load=keys_to_load, data_transforms=test_data_transforms, video_frames=self.opt.video_frames, folders_to_load=folders_to_load) self.test_loader = DataLoader(dataset=test_dataset, batch_size=self.opt.batch_size, shuffle=False, num_workers=self.opt.num_workers, pin_memory=True, drop_last=False) print( "++++++++++++++++++++++ INIT VALIDATION ++++++++++++++++++++++++") print("Using dataset\n ", self.opt.dataset, "with split", self.opt.dataset_split) print("There are {:d} validation items\n ".format(len(test_dataset))) print("Validating classes up to train set\n ", self.opt.train_set) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # LOGGING OPTIONS # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # If no model is passed, standalone validation is to be carried out. The log_path needs to be set before # self.load_model() is invoked. if model is None: self.opt.validate = False self.opt.model_name = self.opt.load_model_name path_getter = GetPath() log_path = path_getter.get_checkpoint_path() self.log_path = os.path.join(log_path, 'erfnet', self.opt.model_name) # All outputs will be saved to save_path self.save_path = self.log_path # Create output path for standalone validation if not self.opt.validate: save_dir = 'eval_{}'.format(self.opt.dataset) if self.opt.hyperparameter: save_dir = save_dir + '_hyper' save_dir = save_dir + '_task_to_val{}'.format(self.opt.task_to_val) self.save_path = os.path.join(self.log_path, save_dir) if not os.path.exists(self.save_path): os.makedirs(self.save_path) # Copy this file to save_path shutil.copy2(__file__, self.save_path) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # MODEL DEFINITION # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Standalone validation if not self.opt.validate: # Create a conventional ERFNet self.model = ERFNet(self.num_classes_model, self.opt) self.load_model() self.model.to(self.device) # Validate while training else: self.model = model self.model.eval() # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # LOGGING OPTIONS II # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # self.called is used to decide which file mode shall be used when writing metrics to disk. self.called = False self.metric_model = SegmentationRunningScore(self.num_classes_score) # Metrics are only saved if val_frequency > 0! if self.opt.val_frequency != 0: print("Saving metrics to\n ", self.save_path) # Set up colour output. Coloured images are only output if standalone validation is carried out! if not self.opt.validate and self.opt.save_pred_to_disk: # Output path self.img_path = os.path.join( self.save_path, 'output_{}'.format(self.opt.weights_epoch)) if self.opt.pred_wout_blend: self.img_path += '_wout_blend' if not os.path.exists(self.img_path): os.makedirs(self.img_path) print("Saving prediction images to\n ", self.img_path) print("Save frequency\n ", self.opt.pred_frequency) # Get the colours from dataset. colors = [ (label.trainId - self.task_low, label.color) for label in labels if label.trainId != 255 and label.trainId in self.train_ids ] colors.append((255, (0, 0, 0))) # void class self.id_color = dict(colors) self.id_color_keys = [key for key in self.id_color.keys()] self.id_color_vals = [val for val in self.id_color.values()] # Ongoing index to name the outputs self.img_idx = 0 # Set up probability output. Probabilities are only output if standalone validation is carried out! if not self.opt.validate and self.opt.save_probs_to_disk: # Output path self.logit_path = os.path.join( self.save_path, 'probabilities_{}'.format(self.opt.weights_epoch)) if not os.path.exists(self.logit_path): os.makedirs(self.logit_path) print("Saving probabilities to\n ", self.logit_path) print("Save frequency\n ", self.opt.probs_frequency) # Ongoing index to name the probability outputs self.probs_idx = 0 print( "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") # Save all options to disk and print them to stdout self._print_options() self._save_opts(len(test_dataset))
def cityscapes_sequence_train(resize_height, resize_width, crop_height, crop_width, batch_size, num_workers): """A loader that loads images for adaptation from the cityscapes_sequence training set. This loader returns sequences from the left camera, as well as from the right camera. """ transforms_common = [ tf.RandomHorizontalFlip(), tf.CreateScaledImage(), tf.Resize((resize_height * 568 // 512, resize_width * 1092 // 1024), image_types=('color', )), # crop away the sides and bottom parts of the image tf.SidesCrop((resize_height * 320 // 512, resize_width * 1024 // 1024), (resize_height * 32 // 512, resize_width * 33 // 1024)), tf.CreateColoraug(new_element=True), tf.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0), tf.RemoveOriginals(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'cityscapes_sequence_adaptation'), tf.AddKeyValue('purposes', ('adaptation', )), ] dataset_name = 'cityscapes_sequence' cfg_common = { 'dataset': dataset_name, 'trainvaltest_split': 'train', 'video_mode': 'mono', 'stereo_mode': 'mono', } cfg_left = {'keys_to_load': ('color', ), 'keys_to_video': ('color', )} cfg_right = { 'keys_to_load': ('color_right', ), 'keys_to_video': ('color_right', ) } dataset_left = StandardDataset(data_transforms=transforms_common, **cfg_left, **cfg_common) dataset_right = StandardDataset(data_transforms=[tf.ExchangeStereo()] + transforms_common, **cfg_right, **cfg_common) dataset = ConcatDataset((dataset_left, dataset_right)) loader = DataLoader(dataset, batch_size, True, num_workers=num_workers, pin_memory=True, drop_last=True) print( f" - Can use {len(dataset)} images from the cityscapes_sequence train set for adaptation", flush=True) return loader
def process(self, new_dataset_name, output_size=None, scale_factor=None, keys_to_convert=(), splits_to_adapt=None): """ Scales every image in the dataset and saves them in the specified output folder. Also creates new json files with adapted camera intrinsics. One can define either a scale factor or a desired output size. :param new_dataset_name: name of the desired output folder. It is forbidden to use an existing folder :param output_size: target output size as a 2-tuple (h, w) :param scale_factor: Both dimensions gets scaled by this factor :param keys_to_convert: A tuple of keys, only the images behind these keys will be converted (optional) :param splits_to_adapt: Splits in seperate folders that will also be copied into a new folder and have their camera parameters adapted (optional) """ assert self.dataset != new_dataset_name new_path = self._gen_dataset_path(new_dataset_name) assert not os.path.isdir(new_path), 'You are not allowed to write into an existing dataset folder!' if scale_factor is not None: assert output_size is None assert isinstance(scale_factor, int) scale_mode = 'relative' elif output_size is not None: assert scale_factor is None assert isinstance(output_size, tuple) scale_mode = 'absolute' resizer = mytransforms.Resize(output_size=output_size) if type(splits_to_adapt) == str: splits_to_adapt = (splits_to_adapt,) camera_intrinsics = {} pending_writes = [] # Scale and save the images with Pool(processes=NUM_WORKERS) as pool_rd, Pool(processes=NUM_WORKERS) as pool_wr: for set_idx, sample, paths in self._print_progress(self._get_samples(pool_rd, keys_to_convert)): if scale_mode == 'relative': width, height = sample[('color', 0, 0)].size new_size = ( int(height / scale_factor), int(width / scale_factor) ) resizer = mytransforms.Resize(output_size=new_size) sample = resizer(sample) for key in sample: if key in paths: image = sample[key] new_filepath = os.path.join(new_path, paths[key]) os.makedirs(os.path.split(new_filepath)[0], exist_ok=True) args = (image, key[0], self.dataset, new_filepath) job = pool_wr.apply_async(save_image_file, args) pending_writes.append(job) elif key[0] in CAMERA_KEYS: if set_idx not in camera_intrinsics: camera_intrinsics[set_idx] = {} camera_intrinsics[set_idx][key[0]] = sample[key].tolist() # Limit the number of pending write operations # by waiting for old ones to complete while len(pending_writes) > NUM_WORKERS: pending_writes.pop(0).get() while pending_writes: pending_writes.pop(0).get() # Modify the json data and safe the new json files with open(os.path.join(self.dataset_path, 'basic_files.json')) as fd: basic_json_data = json.load(fd) names = basic_json_data['names'] positions = basic_json_data['positions'] for set_idx in camera_intrinsics: for camera_key in camera_intrinsics[set_idx]: camera_index = names.index(camera_key) basic_index = self._get_index_from_position(positions[camera_index], set_idx) basic_json_data['numerical_values'][camera_index][basic_index] = camera_intrinsics[set_idx][camera_key] with open(os.path.join(new_path, 'basic_files.json'), 'w') as fd: json.dump(basic_json_data, fd) # Modify the train, val, test.json, if present split_data_dict = self._load_split_data() split_data_dict = self._adapt_camera_intrinsics_in_split_file(split_data_dict, camera_intrinsics) for split, split_data in split_data_dict.items(): with open(os.path.join(new_path, split + '.json'), 'w') as fd: json.dump(split_data, fd) # Copy the parameters.json into the new path, adapt the split list with open(os.path.join(self.dataset_path, 'parameters.json')) as fd: parameters = json.load(fd) parameters['splits'] = splits_to_adapt with open(os.path.join(new_path, 'parameters.json'), 'w') as fd: json.dump(parameters, fd) # If there are any separate split folders given, adapt them too. if splits_to_adapt is not None: self._adapt_splits(splits_to_adapt, new_path, camera_intrinsics)
def __init__(self, options): print(" -> Executing script", os.path.basename(__file__)) self.opt = options self.device = torch.device("cpu" if self.opt.no_cuda else "cuda") # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # LABELS AND CITIES # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ assert self.opt.train_set in {123, 1}, "Invalid train_set!" keys_to_load = ['color', 'segmentation'] # Labels if self.opt.train_set == 1: labels = labels_cityscape_seg_train1.getlabels() else: labels = labels_cityscape_seg_train3_eval.getlabels() # Train IDs self.train_ids = set([labels[i].trainId for i in range(len(labels))]) self.train_ids.remove(255) self.num_classes = len(self.train_ids) # Apply city filter folders_to_train = CitySet.get_city_set(0) if self.opt.city: folders_to_train = CitySet.get_city_set(self.opt.train_set) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # DATASET DEFINITIONS # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Data augmentation train_data_transforms = [ mytransforms.RandomHorizontalFlip(), mytransforms.CreateScaledImage(), mytransforms.Resize((self.opt.height, self.opt.width), image_types=keys_to_load), mytransforms.RandomRescale(1.5), mytransforms.RandomCrop( (self.opt.crop_height, self.opt.crop_width)), mytransforms.ConvertSegmentation(), mytransforms.CreateColoraug(new_element=True, scales=self.opt.scales), mytransforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0), mytransforms.RemoveOriginals(), mytransforms.ToTensor(), mytransforms.NormalizeZeroMean(), ] train_dataset = CityscapesDataset( dataset="cityscapes", trainvaltest_split='train', video_mode='mono', stereo_mode='mono', scales=self.opt.scales, labels_mode='fromid', labels=labels, keys_to_load=keys_to_load, data_transforms=train_data_transforms, video_frames=self.opt.video_frames, folders_to_load=folders_to_train, ) self.train_loader = DataLoader(dataset=train_dataset, batch_size=self.opt.batch_size, shuffle=True, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) val_data_transforms = [ mytransforms.CreateScaledImage(), mytransforms.Resize((self.opt.height, self.opt.width), image_types=keys_to_load), mytransforms.ConvertSegmentation(), mytransforms.CreateColoraug(new_element=True, scales=self.opt.scales), mytransforms.RemoveOriginals(), mytransforms.ToTensor(), mytransforms.NormalizeZeroMean(), ] val_dataset = CityscapesDataset( dataset=self.opt.dataset, trainvaltest_split="train", video_mode='mono', stereo_mode='mono', scales=self.opt.scales, labels_mode='fromid', labels=labels, keys_to_load=keys_to_load, data_transforms=val_data_transforms, video_frames=self.opt.video_frames, folders_to_load=CitySet.get_city_set(-1)) self.val_loader = DataLoader(dataset=val_dataset, batch_size=self.opt.batch_size, shuffle=False, num_workers=self.opt.num_workers, pin_memory=True, drop_last=True) self.val_iter = iter(self.val_loader) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # LOGGING OPTIONS # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ print( "++++++++++++++++++++++ INIT TRAINING ++++++++++++++++++++++++++") print("Using dataset:\n ", self.opt.dataset, "with split", self.opt.dataset_split) print( "There are {:d} training items and {:d} validation items\n".format( len(train_dataset), len(val_dataset))) path_getter = GetPath() log_path = path_getter.get_checkpoint_path() self.log_path = os.path.join(log_path, 'erfnet', self.opt.model_name) self.writers = {} for mode in ["train", "validation"]: self.writers[mode] = SummaryWriter( os.path.join(self.log_path, mode)) # Copy this file to log dir shutil.copy2(__file__, self.log_path) print("Training model named:\n ", self.opt.model_name) print("Models and tensorboard events files are saved to:\n ", self.log_path) print("Training is using:\n ", self.device) print("Training takes place on train set:\n ", self.opt.train_set) print( "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # MODEL DEFINITION # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Instantiate model self.model = ERFNet(self.num_classes, self.opt) self.model.to(self.device) self.parameters_to_train = self.model.parameters() # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # OPTIMIZER SET-UP # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ self.model_optimizer = optim.Adam(params=self.parameters_to_train, lr=self.opt.learning_rate, weight_decay=self.opt.weight_decay) lambda1 = lambda epoch: pow((1 - ((epoch - 1) / self.opt.num_epochs)), 0.9) self.model_lr_scheduler = optim.lr_scheduler.LambdaLR( self.model_optimizer, lr_lambda=lambda1) # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # LOSSES # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ self.crossentropy = CrossEntropyLoss(ignore_background=True, device=self.device) self.crossentropy.to(self.device) self.metric_model = SegmentationRunningScore(self.num_classes) # Save all options to disk and print them to stdout self.save_opts(len(train_dataset), len(val_dataset)) self._print_options() # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # EVALUATOR DEFINITION # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ if self.opt.validate: self.evaluator = Evaluator(self.opt, self.model)
def kitti_zhou_train(resize_height, resize_width, crop_height, crop_width, batch_size, num_workers): """A loader that loads image sequences for depth training from the kitti training set. This loader returns sequences from the left camera, as well as from the right camera. """ transforms_common = [ tf.RandomHorizontalFlip(), tf.CreateScaledImage(), tf.Resize((resize_height, resize_width), image_types=('color', 'depth', 'camera_intrinsics', 'K')), tf.ConvertDepth(), tf.CreateColoraug(new_element=True), tf.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0, fraction=0.5), tf.RemoveOriginals(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_zhou_train_depth'), tf.AddKeyValue('purposes', ('depth', 'domain')), ] dataset_name = 'kitti' cfg_common = { 'dataset': dataset_name, 'trainvaltest_split': 'train', 'video_mode': 'video', 'stereo_mode': 'mono', 'split': 'zhou_split', 'video_frames': (0, -1, 1), 'disable_const_items': False } cfg_left = {'keys_to_load': ('color', ), 'keys_to_video': ('color', )} cfg_right = { 'keys_to_load': ('color_right', ), 'keys_to_video': ('color_right', ) } dataset_left = StandardDataset(data_transforms=transforms_common, **cfg_left, **cfg_common) dataset_right = StandardDataset(data_transforms=[tf.ExchangeStereo()] + transforms_common, **cfg_right, **cfg_common) dataset = ConcatDataset((dataset_left, dataset_right)) loader = DataLoader(dataset, batch_size, True, num_workers=num_workers, pin_memory=True, drop_last=True) print( f" - Can use {len(dataset)} images from the kitti (zhou_split) train split for depth training", flush=True) return loader
def check_scaled_dataset(dataset_name, scaled_dataset_name, trainvaltest_split, keys_to_load, scaled_size, split=None): """ Checks whether the images in a dataset generated by the dataset_scaler are identical to the images that are generated by loading the original dataset and scaling them afterwards :param dataset_name: Name of the unscaled dataset :param scaled_dataset_name: Name of the scaled dataset :param trainvaltest_split: 'train', 'validation' or 'test' :param keys_to_load: keys that are supposed to be loaded, e.g. 'color', 'depth', 'segmentation', ... :param scaled_size: Size of the scaled image (h, w) :param split: Name of the dataset split, if one exists """ dataset = dataset_name data_transforms = [ mytransforms.CreateScaledImage(), mytransforms.Resize(output_size=scaled_size), mytransforms.CreateColoraug(), mytransforms.ToTensor(), ] if keys_to_load is not None: if any('depth' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertDepth()) if any('segmentation' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertSegmentation()) print('\n Loading {} dataset'.format(dataset)) my_dataset = StandardDataset(dataset, split=split, trainvaltest_split=trainvaltest_split, keys_to_load=keys_to_load, data_transforms=data_transforms, output_filenames=True) my_loader = DataLoader(my_dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True, drop_last=True) print_dataset(my_loader) dataset_s = scaled_dataset_name data_transforms = [ mytransforms.CreateScaledImage(), mytransforms.CreateColoraug(), mytransforms.ToTensor(), ] if keys_to_load is not None: if any('depth' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertDepth()) if any('segmentation' in key for key in keys_to_load): data_transforms.insert(0, mytransforms.ConvertSegmentation()) print('\n Loading {} dataset'.format(dataset_s)) my_dataset_s = StandardDataset(dataset_s, split=split, trainvaltest_split=trainvaltest_split, keys_to_load=keys_to_load, data_transforms=data_transforms, output_filenames=True) my_loader_s = DataLoader(my_dataset_s, batch_size=1, shuffle=False, num_workers=0, pin_memory=True, drop_last=True) print_dataset(my_loader_s) print("Testing dataset_scaler") samples = [] samples_s = [] iter_my_loader = iter(my_loader) iter_my_loader_s = iter(my_loader_s) for _ in range(2): samples.append(next(iter_my_loader).copy()) samples_s.append(next(iter_my_loader_s).copy()) for key in keys_to_load: print("Check if {} entries are equal:".format(key)) print(" Should be False: {}".format( torch.equal(samples[1][(key, 0, 0)], samples_s[0][(key, 0, 0)]))) print(" Should be True: {}".format( torch.equal(samples[0][(key, 0, 0)], samples_s[0][(key, 0, 0)]))) print(" Should be True: {}".format( torch.equal(samples[1][(key, 0, 0)], samples_s[1][(key, 0, 0)])))
keys_to_load = ['color', 'depth', 'segmentation', 'camera_intrinsics'] # Optional; standard is just 'color' # The following parameters and the data_transforms list are optional. Standard is just the transform ToTensor() width = 640 height = 192 scales = [0, 1, 2, 3] data_transforms = [ #mytransforms.RandomExchangeStereo(), # (color, 0, -1) mytransforms.RandomHorizontalFlip(), mytransforms.RandomVerticalFlip(), mytransforms.CreateScaledImage(), # (color, 0, 0) mytransforms.RandomRotate(0.0), mytransforms.RandomTranslate(0), mytransforms.RandomRescale(scale=1.1, fraction=0.5), mytransforms.RandomCrop((320, 1088)), mytransforms.Resize((height, width)), mytransforms.MultiResize(scales), mytransforms.CreateColoraug(new_element=True, scales=scales), # (color_aug, 0, 0) mytransforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0), mytransforms.GaussianBlurr(fraction=0.5), mytransforms.RemoveOriginals(), mytransforms.ToTensor(), mytransforms.NormalizeZeroMean(), ] print('Loading {} dataset, {} split'.format(dataset, trainvaltest_split))