Beispiel #1
0
def cityscapes_train(resize_height, resize_width, crop_height, crop_width,
                     batch_size, num_workers):
    """A loader that loads images and ground truth for segmentation from the
    cityscapes training set.
    """

    labels = labels_cityscape_seg.getlabels()
    num_classes = len(labels_cityscape_seg.gettrainid2label())

    transforms = [
        tf.RandomHorizontalFlip(),
        tf.CreateScaledImage(),
        tf.Resize((resize_height, resize_width)),
        tf.RandomRescale(1.5),
        tf.RandomCrop((crop_height, crop_width)),
        tf.ConvertSegmentation(),
        tf.CreateColoraug(new_element=True),
        tf.ColorJitter(brightness=0.2,
                       contrast=0.2,
                       saturation=0.2,
                       hue=0.1,
                       gamma=0.0),
        tf.RemoveOriginals(),
        tf.ToTensor(),
        tf.NormalizeZeroMean(),
        tf.AddKeyValue('domain', 'cityscapes_train_seg'),
        tf.AddKeyValue('purposes', ('segmentation', 'domain')),
        tf.AddKeyValue('num_classes', num_classes)
    ]

    dataset_name = 'cityscapes'

    dataset = StandardDataset(dataset=dataset_name,
                              trainvaltest_split='train',
                              video_mode='mono',
                              stereo_mode='mono',
                              labels_mode='fromid',
                              disable_const_items=True,
                              labels=labels,
                              keys_to_load=('color', 'segmentation'),
                              data_transforms=transforms,
                              video_frames=(0, ))

    loader = DataLoader(dataset,
                        batch_size,
                        True,
                        num_workers=num_workers,
                        pin_memory=True,
                        drop_last=True)

    print(
        f"  - Can use {len(dataset)} images from the cityscapes train set for segmentation training",
        flush=True)

    return loader
Beispiel #2
0
def cityscapes_validation(resize_height, resize_width, batch_size,
                          num_workers):
    """A loader that loads images and ground truth for segmentation from the
    cityscapes validation set
    """

    labels = labels_cityscape_seg.getlabels()
    num_classes = len(labels_cityscape_seg.gettrainid2label())

    transforms = [
        tf.CreateScaledImage(True),
        tf.Resize((resize_height, resize_width), image_types=('color', )),
        tf.ConvertSegmentation(),
        tf.CreateColoraug(),
        tf.ToTensor(),
        tf.NormalizeZeroMean(),
        tf.AddKeyValue('domain', 'cityscapes_val_seg'),
        tf.AddKeyValue('purposes', ('segmentation', )),
        tf.AddKeyValue('num_classes', num_classes)
    ]

    dataset = StandardDataset(dataset='cityscapes',
                              trainvaltest_split='validation',
                              video_mode='mono',
                              stereo_mode='mono',
                              labels_mode='fromid',
                              labels=labels,
                              keys_to_load=['color', 'segmentation'],
                              data_transforms=transforms,
                              disable_const_items=True)

    loader = DataLoader(dataset,
                        batch_size,
                        False,
                        num_workers=num_workers,
                        pin_memory=True,
                        drop_last=False)

    print(
        f"  - Can use {len(dataset)} images from the cityscapes validation set for segmentation validation",
        flush=True)

    return loader
Beispiel #3
0
def check_dataset(dataset_name,
                  split=None,
                  trainvaltest_split='train',
                  keys_to_load=None,
                  folders_to_load=None):
    """ Loads a dataset and prints name and shape of the first NUM_SAMPLES entries. Performs no transforms other than
    the necessary ones.

    :param dataset_name: Name of the dataset
    :param split: Name of the dataset split, if one exists
    :param trainvaltest_split: 'train', 'validation' or 'test'
    :param keys_to_load: keys that are supposed to be loaded, e.g. 'color', 'depth', 'segmentation', ...
    """
    dataset = dataset_name
    data_transforms = [
        mytransforms.CreateScaledImage(),
        mytransforms.CreateColoraug(),
        mytransforms.ToTensor(),
    ]
    if keys_to_load is not None:
        if any('depth' in key for key in keys_to_load):
            data_transforms.insert(0, mytransforms.ConvertDepth())
        if any('segmentation' in key for key in keys_to_load):
            data_transforms.insert(0, mytransforms.ConvertSegmentation())
        if any('flow' in key for key in keys_to_load):
            data_transforms.insert(0, mytransforms.ConvertFlow())
    print('\n Loading {} dataset'.format(dataset))
    my_dataset = StandardDataset(dataset,
                                 split=split,
                                 trainvaltest_split=trainvaltest_split,
                                 keys_to_load=keys_to_load,
                                 data_transforms=data_transforms,
                                 folders_to_load=folders_to_load)
    my_loader = DataLoader(my_dataset,
                           batch_size=1,
                           shuffle=False,
                           num_workers=1,
                           pin_memory=True,
                           drop_last=True)
    print('Number of elements: {}'.format(len(my_dataset)))
    print_dataset(my_loader)
Beispiel #4
0
def kitti_odom09_validation(img_height, img_width, batch_size, num_workers):
    """A loader that loads images and depth ground truth for
    depth validation from the kitti validation set.
    """

    transforms = [
        tf.CreateScaledImage(True),
        tf.Resize(
            (img_height, img_width),
            image_types=('color', )
        ),
        tf.CreateColoraug(),
        tf.ToTensor(),
        tf.NormalizeZeroMean(),
        tf.AddKeyValue('domain', 'kitti_odom09_val_pose'),
        tf.AddKeyValue('purposes', ('depth', )),
    ]

    dataset = StandardDataset(
        dataset='kitti',
        split='odom09_split',
        trainvaltest_split='test',
        video_mode='video',
        stereo_mode='mono',
        keys_to_load=('color', 'poses'),
        keys_to_video=('color', ),
        data_transforms=transforms,
        video_frames=(0, -1, 1),
        disable_const_items=True
    )

    loader = DataLoader(
        dataset, batch_size, False,
        num_workers=num_workers, pin_memory=True, drop_last=False
    )

    print(f"  - Can use {len(dataset)} images from the kitti (odom09 split) validation set for pose validation",
          flush=True)

    return loader
Beispiel #5
0
def motsynth_validation(img_height, img_width, batch_size, num_workers):
    """A loader that loads images and depth ground truth for
    depth validation from the kitti validation set.
    """

    transforms = [
        tf.CreateScaledImage(True),
        tf.Resize((img_height, img_width), image_types=('color', )),
        tf.ConvertDepth(),
        tf.CreateColoraug(),
        tf.ToTensor(),
        tf.NormalizeZeroMean(),
        tf.AddKeyValue('domain', 'kitti_zhou_val_depth'),
        tf.AddKeyValue('validation_mask', 'validation_mask_kitti_zhou'),
        tf.AddKeyValue('validation_clamp', 'validation_clamp_kitti'),
        tf.AddKeyValue('purposes', ('depth', )),
    ]
    dataset = StandardDataset(dataset='kek',
                              trainvaltest_split='validation',
                              video_mode='mono',
                              stereo_mode='mono',
                              keys_to_load=('color', 'depth'),
                              data_transforms=transforms,
                              video_frames=(0, ),
                              simple_mode=True,
                              labels_mode='fromid',
                              seq_to_load=['001'])
    loader = DataLoader(dataset,
                        batch_size,
                        False,
                        num_workers=num_workers,
                        pin_memory=True,
                        drop_last=False)

    print(
        f"  - Can use {len(dataset)} images from the motsynth validation set for depth validation",
        flush=True)

    return loader
Beispiel #6
0
def kitti_2015_train(img_height, img_width, batch_size, num_workers):
    """A loader that loads images and depth ground truth for
    depth evaluation from the kitti_2015 training set (but for evaluation).
    """

    transforms = [
        tf.CreateScaledImage(True),
        tf.Resize((img_height, img_width), image_types=('color', )),
        tf.ConvertDepth(),
        tf.CreateColoraug(),
        tf.ToTensor(),
        tf.NormalizeZeroMean(),
        tf.AddKeyValue('domain', 'kitti_2015_train_depth'),
        tf.AddKeyValue('validation_mask', 'validation_mask_kitti_kitti'),
        tf.AddKeyValue('validation_clamp', 'validation_clamp_kitti'),
        tf.AddKeyValue('purposes', ('depth', )),
    ]

    dataset = StandardDataset(dataset='kitti_2015',
                              trainvaltest_split='train',
                              video_mode='mono',
                              stereo_mode='mono',
                              keys_to_load=('color', 'depth'),
                              data_transforms=transforms,
                              video_frames=(0, ),
                              disable_const_items=True)

    loader = DataLoader(dataset,
                        batch_size,
                        False,
                        num_workers=num_workers,
                        pin_memory=True,
                        drop_last=False)

    print(
        f"  - Can use {len(dataset)} images from the kitti_2015 test set for depth evaluation",
        flush=True)

    return loader
Beispiel #7
0
# validation.json and test.json files. In this example, we use all available keys for cityscapes.
keys_to_load = [
    'color', 'color_right', 'depth', 'segmentation', 'camera_intrinsics',
    'camera_intrinsics_right', 'velocity'
]

# When loading an image, some data transforms are performed on it. These transform will alter all
# image category in the same way. At minimum, the CreateScaledImage() and CreateColoraug() have to
# be included. For each image category like depth and segmentation, the corresponding Convert-tranform
# ist also necessary.
data_transforms = [
    mytransforms.CreateScaledImage(),
    mytransforms.RemoveOriginals(),
    mytransforms.RandomCrop((1024, 2048)),
    mytransforms.RandomHorizontalFlip(),
    mytransforms.CreateColoraug(new_element=True),
    mytransforms.ColorJitter(brightness=0.2,
                             contrast=0.5,
                             saturation=0.5,
                             hue=0.5,
                             gamma=0.5,
                             fraction=1.0),  # values for visualization only
    mytransforms.ConvertDepth(
    ),  # The convert transforms should come after the
    mytransforms.ConvertSegmentation(),  # Scaling/Rotating/Cropping
    mytransforms.ToTensor(),
]

# With the parameters specified above, a StandardDataset can now be created. You can interate through
# it using the PyTorch DataLoader class.
# There are several optional arguments in the my_dataset class that are not featured here for the sake of
Beispiel #8
0
    def __init__(self, options, model=None):

        if __name__ == "__main__":
            print(" -> Executing script", os.path.basename(__file__))

        self.opt = options
        self.device = torch.device("cpu" if self.opt.no_cuda else "cuda")

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           LABELS
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        assert self.opt.train_set in {1, 2, 3, 12, 123}, "Invalid train_set!"
        assert self.opt.task_to_val in {0, 1, 2, 3, 12, 123}, "Invalid task!"
        keys_to_load = ['color', 'segmentation']

        # Labels
        labels = self._get_labels_cityscapes()

        # Train IDs
        self.train_ids = set([labels[i].trainId for i in range(len(labels))])
        self.train_ids.remove(255)
        self.train_ids = sorted(list(self.train_ids))

        self.num_classes_model = len(self.train_ids)

        # Task handling
        if self.opt.task_to_val != 0:
            labels_task = self._get_task_labels_cityscapes()
            train_ids_task = set(
                [labels_task[i].trainId for i in range(len(labels_task))])
            train_ids_task.remove(255)
            self.task_low = min(train_ids_task)
            self.task_high = max(train_ids_task) + 1
            labels = labels_task
            self.train_ids = sorted(list(train_ids_task))
        else:
            self.task_low = 0
            self.task_high = self.num_classes_model
            self.opt.task_to_val = self.opt.train_set

        # Number of classes for the SegmentationRunningScore
        self.num_classes_score = self.task_high - self.task_low

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           DATASET DEFINITIONS
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        # Data augmentation
        test_data_transforms = [
            mytransforms.CreateScaledImage(),
            mytransforms.Resize((self.opt.height, self.opt.width),
                                image_types=['color']),
            mytransforms.ConvertSegmentation(),
            mytransforms.CreateColoraug(new_element=True,
                                        scales=self.opt.scales),
            mytransforms.RemoveOriginals(),
            mytransforms.ToTensor(),
            mytransforms.NormalizeZeroMean(),
        ]

        # If hyperparameter search, only load the respective validation set. Else, load the full validation set.
        if self.opt.hyperparameter:
            trainvaltest_split = 'train'
            folders_to_load = CitySet.get_city_set(-1)
        else:
            trainvaltest_split = 'validation'
            folders_to_load = None

        test_dataset = CityscapesDataset(dataset='cityscapes',
                                         split=self.opt.dataset_split,
                                         trainvaltest_split=trainvaltest_split,
                                         video_mode='mono',
                                         stereo_mode='mono',
                                         scales=self.opt.scales,
                                         labels_mode='fromid',
                                         labels=labels,
                                         keys_to_load=keys_to_load,
                                         data_transforms=test_data_transforms,
                                         video_frames=self.opt.video_frames,
                                         folders_to_load=folders_to_load)

        self.test_loader = DataLoader(dataset=test_dataset,
                                      batch_size=self.opt.batch_size,
                                      shuffle=False,
                                      num_workers=self.opt.num_workers,
                                      pin_memory=True,
                                      drop_last=False)

        print(
            "++++++++++++++++++++++ INIT VALIDATION ++++++++++++++++++++++++")
        print("Using dataset\n  ", self.opt.dataset, "with split",
              self.opt.dataset_split)
        print("There are {:d} validation items\n  ".format(len(test_dataset)))
        print("Validating classes up to train set\n  ", self.opt.train_set)

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           LOGGING OPTIONS
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        # If no model is passed, standalone validation is to be carried out. The log_path needs to be set before
        # self.load_model() is invoked.
        if model is None:
            self.opt.validate = False
            self.opt.model_name = self.opt.load_model_name

        path_getter = GetPath()
        log_path = path_getter.get_checkpoint_path()
        self.log_path = os.path.join(log_path, 'erfnet', self.opt.model_name)

        # All outputs will be saved to save_path
        self.save_path = self.log_path

        # Create output path for standalone validation
        if not self.opt.validate:
            save_dir = 'eval_{}'.format(self.opt.dataset)

            if self.opt.hyperparameter:
                save_dir = save_dir + '_hyper'

            save_dir = save_dir + '_task_to_val{}'.format(self.opt.task_to_val)

            self.save_path = os.path.join(self.log_path, save_dir)

            if not os.path.exists(self.save_path):
                os.makedirs(self.save_path)

        # Copy this file to save_path
        shutil.copy2(__file__, self.save_path)

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           MODEL DEFINITION
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        # Standalone validation
        if not self.opt.validate:
            # Create a conventional ERFNet
            self.model = ERFNet(self.num_classes_model, self.opt)
            self.load_model()
            self.model.to(self.device)

        # Validate while training
        else:
            self.model = model

        self.model.eval()

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           LOGGING OPTIONS II
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        # self.called is used to decide which file mode shall be used when writing metrics to disk.
        self.called = False

        self.metric_model = SegmentationRunningScore(self.num_classes_score)

        # Metrics are only saved if val_frequency > 0!
        if self.opt.val_frequency != 0:
            print("Saving metrics to\n  ", self.save_path)

        # Set up colour output. Coloured images are only output if standalone validation is carried out!
        if not self.opt.validate and self.opt.save_pred_to_disk:
            # Output path
            self.img_path = os.path.join(
                self.save_path, 'output_{}'.format(self.opt.weights_epoch))

            if self.opt.pred_wout_blend:
                self.img_path += '_wout_blend'

            if not os.path.exists(self.img_path):
                os.makedirs(self.img_path)
            print("Saving prediction images to\n  ", self.img_path)
            print("Save frequency\n  ", self.opt.pred_frequency)

            # Get the colours from dataset.
            colors = [
                (label.trainId - self.task_low, label.color)
                for label in labels
                if label.trainId != 255 and label.trainId in self.train_ids
            ]
            colors.append((255, (0, 0, 0)))  # void class
            self.id_color = dict(colors)
            self.id_color_keys = [key for key in self.id_color.keys()]
            self.id_color_vals = [val for val in self.id_color.values()]

            # Ongoing index to name the outputs
            self.img_idx = 0

        # Set up probability output. Probabilities are only output if standalone validation is carried out!
        if not self.opt.validate and self.opt.save_probs_to_disk:
            # Output path
            self.logit_path = os.path.join(
                self.save_path,
                'probabilities_{}'.format(self.opt.weights_epoch))
            if not os.path.exists(self.logit_path):
                os.makedirs(self.logit_path)
            print("Saving probabilities to\n  ", self.logit_path)
            print("Save frequency\n  ", self.opt.probs_frequency)

            # Ongoing index to name the probability outputs
            self.probs_idx = 0

        print(
            "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")

        # Save all options to disk and print them to stdout
        self._print_options()
        self._save_opts(len(test_dataset))
Beispiel #9
0
def cityscapes_sequence_train(resize_height, resize_width, crop_height,
                              crop_width, batch_size, num_workers):
    """A loader that loads images for adaptation from the cityscapes_sequence training set.
    This loader returns sequences from the left camera, as well as from the right camera.
    """

    transforms_common = [
        tf.RandomHorizontalFlip(),
        tf.CreateScaledImage(),
        tf.Resize((resize_height * 568 // 512, resize_width * 1092 // 1024),
                  image_types=('color', )),
        # crop away the sides and bottom parts of the image
        tf.SidesCrop((resize_height * 320 // 512, resize_width * 1024 // 1024),
                     (resize_height * 32 // 512, resize_width * 33 // 1024)),
        tf.CreateColoraug(new_element=True),
        tf.ColorJitter(brightness=0.2,
                       contrast=0.2,
                       saturation=0.2,
                       hue=0.1,
                       gamma=0.0),
        tf.RemoveOriginals(),
        tf.ToTensor(),
        tf.NormalizeZeroMean(),
        tf.AddKeyValue('domain', 'cityscapes_sequence_adaptation'),
        tf.AddKeyValue('purposes', ('adaptation', )),
    ]

    dataset_name = 'cityscapes_sequence'

    cfg_common = {
        'dataset': dataset_name,
        'trainvaltest_split': 'train',
        'video_mode': 'mono',
        'stereo_mode': 'mono',
    }

    cfg_left = {'keys_to_load': ('color', ), 'keys_to_video': ('color', )}

    cfg_right = {
        'keys_to_load': ('color_right', ),
        'keys_to_video': ('color_right', )
    }

    dataset_left = StandardDataset(data_transforms=transforms_common,
                                   **cfg_left,
                                   **cfg_common)

    dataset_right = StandardDataset(data_transforms=[tf.ExchangeStereo()] +
                                    transforms_common,
                                    **cfg_right,
                                    **cfg_common)

    dataset = ConcatDataset((dataset_left, dataset_right))

    loader = DataLoader(dataset,
                        batch_size,
                        True,
                        num_workers=num_workers,
                        pin_memory=True,
                        drop_last=True)

    print(
        f"  - Can use {len(dataset)} images from the cityscapes_sequence train set for adaptation",
        flush=True)

    return loader
    def __init__(self, options):

        print(" -> Executing script", os.path.basename(__file__))

        self.opt = options
        self.device = torch.device("cpu" if self.opt.no_cuda else "cuda")

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           LABELS AND CITIES
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        assert self.opt.train_set in {123, 1}, "Invalid train_set!"
        keys_to_load = ['color', 'segmentation']

        # Labels
        if self.opt.train_set == 1:
            labels = labels_cityscape_seg_train1.getlabels()
        else:
            labels = labels_cityscape_seg_train3_eval.getlabels()

        # Train IDs
        self.train_ids = set([labels[i].trainId for i in range(len(labels))])
        self.train_ids.remove(255)

        self.num_classes = len(self.train_ids)

        # Apply city filter
        folders_to_train = CitySet.get_city_set(0)
        if self.opt.city:
            folders_to_train = CitySet.get_city_set(self.opt.train_set)

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           DATASET DEFINITIONS
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        # Data augmentation
        train_data_transforms = [
            mytransforms.RandomHorizontalFlip(),
            mytransforms.CreateScaledImage(),
            mytransforms.Resize((self.opt.height, self.opt.width),
                                image_types=keys_to_load),
            mytransforms.RandomRescale(1.5),
            mytransforms.RandomCrop(
                (self.opt.crop_height, self.opt.crop_width)),
            mytransforms.ConvertSegmentation(),
            mytransforms.CreateColoraug(new_element=True,
                                        scales=self.opt.scales),
            mytransforms.ColorJitter(brightness=0.2,
                                     contrast=0.2,
                                     saturation=0.2,
                                     hue=0.1,
                                     gamma=0.0),
            mytransforms.RemoveOriginals(),
            mytransforms.ToTensor(),
            mytransforms.NormalizeZeroMean(),
        ]

        train_dataset = CityscapesDataset(
            dataset="cityscapes",
            trainvaltest_split='train',
            video_mode='mono',
            stereo_mode='mono',
            scales=self.opt.scales,
            labels_mode='fromid',
            labels=labels,
            keys_to_load=keys_to_load,
            data_transforms=train_data_transforms,
            video_frames=self.opt.video_frames,
            folders_to_load=folders_to_train,
        )

        self.train_loader = DataLoader(dataset=train_dataset,
                                       batch_size=self.opt.batch_size,
                                       shuffle=True,
                                       num_workers=self.opt.num_workers,
                                       pin_memory=True,
                                       drop_last=True)

        val_data_transforms = [
            mytransforms.CreateScaledImage(),
            mytransforms.Resize((self.opt.height, self.opt.width),
                                image_types=keys_to_load),
            mytransforms.ConvertSegmentation(),
            mytransforms.CreateColoraug(new_element=True,
                                        scales=self.opt.scales),
            mytransforms.RemoveOriginals(),
            mytransforms.ToTensor(),
            mytransforms.NormalizeZeroMean(),
        ]

        val_dataset = CityscapesDataset(
            dataset=self.opt.dataset,
            trainvaltest_split="train",
            video_mode='mono',
            stereo_mode='mono',
            scales=self.opt.scales,
            labels_mode='fromid',
            labels=labels,
            keys_to_load=keys_to_load,
            data_transforms=val_data_transforms,
            video_frames=self.opt.video_frames,
            folders_to_load=CitySet.get_city_set(-1))

        self.val_loader = DataLoader(dataset=val_dataset,
                                     batch_size=self.opt.batch_size,
                                     shuffle=False,
                                     num_workers=self.opt.num_workers,
                                     pin_memory=True,
                                     drop_last=True)

        self.val_iter = iter(self.val_loader)

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           LOGGING OPTIONS
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        print(
            "++++++++++++++++++++++ INIT TRAINING ++++++++++++++++++++++++++")
        print("Using dataset:\n  ", self.opt.dataset, "with split",
              self.opt.dataset_split)
        print(
            "There are {:d} training items and {:d} validation items\n".format(
                len(train_dataset), len(val_dataset)))

        path_getter = GetPath()
        log_path = path_getter.get_checkpoint_path()
        self.log_path = os.path.join(log_path, 'erfnet', self.opt.model_name)

        self.writers = {}
        for mode in ["train", "validation"]:
            self.writers[mode] = SummaryWriter(
                os.path.join(self.log_path, mode))

        # Copy this file to log dir
        shutil.copy2(__file__, self.log_path)

        print("Training model named:\n  ", self.opt.model_name)
        print("Models and tensorboard events files are saved to:\n  ",
              self.log_path)
        print("Training is using:\n  ", self.device)
        print("Training takes place on train set:\n  ", self.opt.train_set)
        print(
            "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           MODEL DEFINITION
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        # Instantiate model
        self.model = ERFNet(self.num_classes, self.opt)
        self.model.to(self.device)
        self.parameters_to_train = self.model.parameters()

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           OPTIMIZER SET-UP
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        self.model_optimizer = optim.Adam(params=self.parameters_to_train,
                                          lr=self.opt.learning_rate,
                                          weight_decay=self.opt.weight_decay)
        lambda1 = lambda epoch: pow((1 -
                                     ((epoch - 1) / self.opt.num_epochs)), 0.9)
        self.model_lr_scheduler = optim.lr_scheduler.LambdaLR(
            self.model_optimizer, lr_lambda=lambda1)

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           LOSSES
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        self.crossentropy = CrossEntropyLoss(ignore_background=True,
                                             device=self.device)
        self.crossentropy.to(self.device)

        self.metric_model = SegmentationRunningScore(self.num_classes)

        # Save all options to disk and print them to stdout
        self.save_opts(len(train_dataset), len(val_dataset))
        self._print_options()

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           EVALUATOR DEFINITION
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        if self.opt.validate:
            self.evaluator = Evaluator(self.opt, self.model)
Beispiel #11
0
    def __init__(self,
                 dataset,
                 trainvaltest_split,
                 video_mode='mono',
                 stereo_mode='mono',
                 cluster_mode=None,
                 simple_mode=False,
                 labels=None,
                 labels_mode=None,
                 data_transforms=None,
                 scales=None,
                 keys_to_load=None,
                 keys_to_video=None,
                 keys_to_stereo=None,
                 split=None,
                 video_frames=None,
                 disable_const_items=True,
                 folders_to_load=None,
                 files_to_load=None,
                 n_files=None,
                 output_filenames=False,
                 flow_validation_mode=True):
        """Initialises the dataset by loading the desired data from the json file

        :param dataset: name of the dataset
        :param trainvaltest_split: can be train, validation or test
        :param video_mode: can be mono or video and defines if only the images or image sequences are to be loaded
        :param stereo_mode: can be mono or stereo and defines if the stereo images are to be loaded
        :param simple_mode: if True, the Data is read directly from a folder without using a .json file
        :param labels: gives the labels as defined in the named tuples style in Cityscapes. Get the labels from
            defintions folder
        :param labels_mode: can be fromid or fromrgb and defines if the segmentation masks are given as id or color
        :param data_transforms: takes the transforms.compose list
        :param scales: list of all scales at which the images should be loaded (list of exponents for powers of 2)
        :param keys_to_load: defines all keys which should be loaded
        :param keys_to_video: defines for which keys the sequences are to be loaded
        :param keys_to_stereo: defines for which keys the stereo images are supposed to be loaded
        :param split: dataset split that is supposed to be loaded. default is the complete dataset itself
        :param video_frames: all frames of the sequence that are supposed to be loaded (list of frame numbers relative
            to the main frame, e.g. [0, -2, -1, 1, 2])
        :param disable_const_items: removes the constant items like camera calibration from loading procedure
        :param folders_to_load: list of folders from which data should be loaded; folders not mentioned are skipped in
            the respective set. Only the last folder in a path is considered; filter is case insensitive.
            Default: None -> all folders are loaded from dataset
        :param files_to_load: list of files that should be loaded; files not mentioned are skipped in the respective
            set. File names need not be complete; filter is case insensitive.
            Default: None -> all files are loaded from dataset
        :param n_files: How many files shall be loaded. Files are selected randomly if there are more files than n_files
            Seeded by numpy.random.seed()
        """
        super(BaseDataset, self).__init__()
        assert isinstance(dataset, str)
        assert trainvaltest_split in (
            'train', 'validation',
            'test'), '''trainvaltest_split must be train,
        validation or test'''
        assert video_mode in ('mono',
                              'video'), 'video_mode must be mono or video'
        assert stereo_mode in ('mono',
                               'stereo'), 'stereo_mode must be mono or stereo'
        assert isinstance(simple_mode, bool)
        if data_transforms is None:
            data_transforms = [
                mytransforms.CreateScaledImage(),
                mytransforms.CreateColoraug(),
                mytransforms.ToTensor()
            ]
        if scales is None:
            scales = [0]
        if keys_to_load is None:
            keys_to_load = ['color']
        if keys_to_stereo is None and stereo_mode == 'stereo':
            keys_to_stereo = ['color']
        if keys_to_video is None and video_mode == 'video':
            keys_to_video = ['color']
        if video_frames is None:
            video_frames = [0, -1, 1]

        self.dataset = dataset
        self.video_mode = video_mode
        self.stereo_mode = stereo_mode
        self.scales = scales
        self.disable_const_items = disable_const_items
        self.output_filenames = output_filenames
        self.parameters = dps.DatasetParameterset(dataset)
        if labels is not None:
            self.parameters.labels = labels
        if labels_mode is not None:
            self.parameters.labels_mode = labels_mode
        path_getter = gp.GetPath()
        dataset_folder = path_getter.get_data_path()
        datasetpath = os.path.join(dataset_folder, self.dataset)
        self.datasetpath = datasetpath
        if split is None:
            splitpath = None
        else:
            splitpath = os.path.join(dataset_folder,
                                     self.dataset + '_' + split)

        if simple_mode is False:
            self.data = self.read_json_file(datasetpath, splitpath,
                                            trainvaltest_split, keys_to_load,
                                            keys_to_stereo, keys_to_video,
                                            video_frames, folders_to_load,
                                            files_to_load, n_files)
        else:
            self.data = self.read_from_folder(datasetpath, keys_to_load,
                                              video_mode, video_frames)

        self.load_transforms = transforms.Compose([
            mytransforms.LoadRGB(),
            mytransforms.LoadSegmentation(),
            mytransforms.LoadDepth(),
            mytransforms.LoadFlow(validation_mode=flow_validation_mode),
            mytransforms.LoadNumerics()
        ])

        # IMPORTANT to create a new list if the same list is passed to multiple datasets. Otherwise, due to the
        # mutability of lists, ConvertSegmentation will only be added once. Hence, the labels may be wrong for the 2nd,
        # 3rd, ... dataset!
        self.data_transforms = list(data_transforms)

        # Error if CreateColorAug and CreateScaledImage not in transforms.
        if mytransforms.CreateScaledImage not in data_transforms:
            raise Exception(
                'The transform CreateScaledImage() has to be part of the data_transforms list'
            )
        if mytransforms.CreateColoraug not in data_transforms:
            raise Exception(
                'The transform CreateColoraug() has to be part of the data_transforms list'
            )

        # Error if depth, segmentation or flow keys are given but not the corresponding Convert-Transform
        if any([key.startswith('segmentation') for key in keys_to_load]) and \
                mytransforms.ConvertSegmentation not in self.data_transforms:
            raise Exception(
                'When loading segmentation images, please add mytransforms.ConvertSegmentation() to '
                'the data_transforms')
        if any([key.startswith('depth') for key in keys_to_load]) and \
                mytransforms.ConvertDepth not in self.data_transforms:
            raise Exception(
                'When loading depth images, please add mytransforms.ConvertDepth() to the data_transforms'
            )
        if any([key.startswith('flow') for key in keys_to_load]) and \
                mytransforms.ConvertFlow not in self.data_transforms:
            raise Exception(
                'When loading flow images, please add mytransforms.ConvertFlow() to the data_transforms'
            )

        # In the flow validation mode, it is not allowed to use data-altering transforms
        if any([key.startswith('flow')
                for key in keys_to_load]) and flow_validation_mode:
            allowed_transforms = [
                mytransforms.CreateScaledImage, mytransforms.CreateColoraug,
                mytransforms.ConvertSegmentation, mytransforms.ConvertDepth,
                mytransforms.ConvertFlow, mytransforms.RemoveOriginals,
                mytransforms.ToTensor, mytransforms.Relabel,
                mytransforms.OneHotEncoding, mytransforms.NormalizeZeroMean,
                mytransforms.AdjustKeys, mytransforms.RemapKeys,
                mytransforms.AddKeyValue
            ]
            for transform in self.data_transforms:
                if transform not in allowed_transforms:
                    raise Exception(
                        'In flow validation mode, it is not allowed to use data-altering transforms'
                    )

        # Set the correct parameters to the ConvertDepth and ConvertSegmentation transforms
        for i, transform in zip(range(len(self.data_transforms)),
                                self.data_transforms):
            if isinstance(transform, mytransforms.ConvertDepth):
                transform.set_mode(self.parameters.depth_mode)
            elif isinstance(transform, mytransforms.ConvertSegmentation):
                transform.set_mode(self.parameters.labels,
                                   self.parameters.labels_mode)
            elif isinstance(transform, mytransforms.ConvertFlow):
                transform.set_mode(self.parameters.flow_mode,
                                   flow_validation_mode)

        self.data_transforms = transforms.Compose(self.data_transforms)
Beispiel #12
0
def kitti_zhou_train(resize_height, resize_width, crop_height, crop_width,
                     batch_size, num_workers):
    """A loader that loads image sequences for depth training from the
    kitti training set.
    This loader returns sequences from the left camera, as well as from the right camera.
    """

    transforms_common = [
        tf.RandomHorizontalFlip(),
        tf.CreateScaledImage(),
        tf.Resize((resize_height, resize_width),
                  image_types=('color', 'depth', 'camera_intrinsics', 'K')),
        tf.ConvertDepth(),
        tf.CreateColoraug(new_element=True),
        tf.ColorJitter(brightness=0.2,
                       contrast=0.2,
                       saturation=0.2,
                       hue=0.1,
                       gamma=0.0,
                       fraction=0.5),
        tf.RemoveOriginals(),
        tf.ToTensor(),
        tf.NormalizeZeroMean(),
        tf.AddKeyValue('domain', 'kitti_zhou_train_depth'),
        tf.AddKeyValue('purposes', ('depth', 'domain')),
    ]

    dataset_name = 'kitti'

    cfg_common = {
        'dataset': dataset_name,
        'trainvaltest_split': 'train',
        'video_mode': 'video',
        'stereo_mode': 'mono',
        'split': 'zhou_split',
        'video_frames': (0, -1, 1),
        'disable_const_items': False
    }

    cfg_left = {'keys_to_load': ('color', ), 'keys_to_video': ('color', )}

    cfg_right = {
        'keys_to_load': ('color_right', ),
        'keys_to_video': ('color_right', )
    }

    dataset_left = StandardDataset(data_transforms=transforms_common,
                                   **cfg_left,
                                   **cfg_common)

    dataset_right = StandardDataset(data_transforms=[tf.ExchangeStereo()] +
                                    transforms_common,
                                    **cfg_right,
                                    **cfg_common)

    dataset = ConcatDataset((dataset_left, dataset_right))

    loader = DataLoader(dataset,
                        batch_size,
                        True,
                        num_workers=num_workers,
                        pin_memory=True,
                        drop_last=True)

    print(
        f"  - Can use {len(dataset)} images from the kitti (zhou_split) train split for depth training",
        flush=True)

    return loader
Beispiel #13
0
def check_scaled_dataset(dataset_name,
                         scaled_dataset_name,
                         trainvaltest_split,
                         keys_to_load,
                         scaled_size,
                         split=None):
    """ Checks whether the images in a dataset generated by the dataset_scaler are identical to the images that are
    generated by loading the original dataset and scaling them afterwards

    :param dataset_name: Name of the unscaled dataset
    :param scaled_dataset_name: Name of the scaled dataset
    :param trainvaltest_split: 'train', 'validation' or 'test'
    :param keys_to_load: keys that are supposed to be loaded, e.g. 'color', 'depth', 'segmentation', ...
    :param scaled_size: Size of the scaled image (h, w)
    :param split: Name of the dataset split, if one exists
    """
    dataset = dataset_name
    data_transforms = [
        mytransforms.CreateScaledImage(),
        mytransforms.Resize(output_size=scaled_size),
        mytransforms.CreateColoraug(),
        mytransforms.ToTensor(),
    ]
    if keys_to_load is not None:
        if any('depth' in key for key in keys_to_load):
            data_transforms.insert(0, mytransforms.ConvertDepth())
        if any('segmentation' in key for key in keys_to_load):
            data_transforms.insert(0, mytransforms.ConvertSegmentation())
    print('\n Loading {} dataset'.format(dataset))
    my_dataset = StandardDataset(dataset,
                                 split=split,
                                 trainvaltest_split=trainvaltest_split,
                                 keys_to_load=keys_to_load,
                                 data_transforms=data_transforms,
                                 output_filenames=True)
    my_loader = DataLoader(my_dataset,
                           batch_size=1,
                           shuffle=False,
                           num_workers=0,
                           pin_memory=True,
                           drop_last=True)
    print_dataset(my_loader)

    dataset_s = scaled_dataset_name
    data_transforms = [
        mytransforms.CreateScaledImage(),
        mytransforms.CreateColoraug(),
        mytransforms.ToTensor(),
    ]
    if keys_to_load is not None:
        if any('depth' in key for key in keys_to_load):
            data_transforms.insert(0, mytransforms.ConvertDepth())
        if any('segmentation' in key for key in keys_to_load):
            data_transforms.insert(0, mytransforms.ConvertSegmentation())
    print('\n Loading {} dataset'.format(dataset_s))
    my_dataset_s = StandardDataset(dataset_s,
                                   split=split,
                                   trainvaltest_split=trainvaltest_split,
                                   keys_to_load=keys_to_load,
                                   data_transforms=data_transforms,
                                   output_filenames=True)
    my_loader_s = DataLoader(my_dataset_s,
                             batch_size=1,
                             shuffle=False,
                             num_workers=0,
                             pin_memory=True,
                             drop_last=True)
    print_dataset(my_loader_s)
    print("Testing dataset_scaler")
    samples = []
    samples_s = []
    iter_my_loader = iter(my_loader)
    iter_my_loader_s = iter(my_loader_s)
    for _ in range(2):
        samples.append(next(iter_my_loader).copy())
        samples_s.append(next(iter_my_loader_s).copy())
    for key in keys_to_load:
        print("Check if {} entries are equal:".format(key))
        print("  Should be False: {}".format(
            torch.equal(samples[1][(key, 0, 0)], samples_s[0][(key, 0, 0)])))
        print("  Should be True: {}".format(
            torch.equal(samples[0][(key, 0, 0)], samples_s[0][(key, 0, 0)])))
        print("  Should be True: {}".format(
            torch.equal(samples[1][(key, 0, 0)], samples_s[1][(key, 0, 0)])))
Beispiel #14
0
    # The following parameters and the data_transforms list are optional. Standard is just the transform ToTensor()
    width = 640
    height = 192
    scales = [0, 1, 2, 3]
    data_transforms = [  #mytransforms.RandomExchangeStereo(),  # (color, 0, -1)
        mytransforms.RandomHorizontalFlip(),
        mytransforms.RandomVerticalFlip(),
        mytransforms.CreateScaledImage(),  # (color, 0, 0)
        mytransforms.RandomRotate(0.0),
        mytransforms.RandomTranslate(0),
        mytransforms.RandomRescale(scale=1.1, fraction=0.5),
        mytransforms.RandomCrop((320, 1088)),
        mytransforms.Resize((height, width)),
        mytransforms.MultiResize(scales),
        mytransforms.CreateColoraug(new_element=True,
                                    scales=scales),  # (color_aug, 0, 0)
        mytransforms.ColorJitter(brightness=0.2,
                                 contrast=0.2,
                                 saturation=0.2,
                                 hue=0.1,
                                 gamma=0.0),
        mytransforms.GaussianBlurr(fraction=0.5),
        mytransforms.RemoveOriginals(),
        mytransforms.ToTensor(),
        mytransforms.NormalizeZeroMean(),
    ]

    print('Loading {} dataset, {} split'.format(dataset, trainvaltest_split))
    traindataset = StandardDataset(
        dataset,
        trainvaltest_split,