Beispiel #1
0
def cityscapes_train(resize_height, resize_width, crop_height, crop_width,
                     batch_size, num_workers):
    """A loader that loads images and ground truth for segmentation from the
    cityscapes training set.
    """

    labels = labels_cityscape_seg.getlabels()
    num_classes = len(labels_cityscape_seg.gettrainid2label())

    transforms = [
        tf.RandomHorizontalFlip(),
        tf.CreateScaledImage(),
        tf.Resize((resize_height, resize_width)),
        tf.RandomRescale(1.5),
        tf.RandomCrop((crop_height, crop_width)),
        tf.ConvertSegmentation(),
        tf.CreateColoraug(new_element=True),
        tf.ColorJitter(brightness=0.2,
                       contrast=0.2,
                       saturation=0.2,
                       hue=0.1,
                       gamma=0.0),
        tf.RemoveOriginals(),
        tf.ToTensor(),
        tf.NormalizeZeroMean(),
        tf.AddKeyValue('domain', 'cityscapes_train_seg'),
        tf.AddKeyValue('purposes', ('segmentation', 'domain')),
        tf.AddKeyValue('num_classes', num_classes)
    ]

    dataset_name = 'cityscapes'

    dataset = StandardDataset(dataset=dataset_name,
                              trainvaltest_split='train',
                              video_mode='mono',
                              stereo_mode='mono',
                              labels_mode='fromid',
                              disable_const_items=True,
                              labels=labels,
                              keys_to_load=('color', 'segmentation'),
                              data_transforms=transforms,
                              video_frames=(0, ))

    loader = DataLoader(dataset,
                        batch_size,
                        True,
                        num_workers=num_workers,
                        pin_memory=True,
                        drop_last=True)

    print(
        f"  - Can use {len(dataset)} images from the cityscapes train set for segmentation training",
        flush=True)

    return loader
Beispiel #2
0
# Specify the keys you want to load. These correspond to the 'names' entries in the train.json,
# validation.json and test.json files. In this example, we use all available keys for cityscapes.
keys_to_load = [
    'color', 'color_right', 'depth', 'segmentation', 'camera_intrinsics',
    'camera_intrinsics_right', 'velocity'
]

# When loading an image, some data transforms are performed on it. These transform will alter all
# image category in the same way. At minimum, the CreateScaledImage() and CreateColoraug() have to
# be included. For each image category like depth and segmentation, the corresponding Convert-tranform
# ist also necessary.
data_transforms = [
    mytransforms.CreateScaledImage(),
    mytransforms.RemoveOriginals(),
    mytransforms.RandomCrop((1024, 2048)),
    mytransforms.RandomHorizontalFlip(),
    mytransforms.CreateColoraug(new_element=True),
    mytransforms.ColorJitter(brightness=0.2,
                             contrast=0.5,
                             saturation=0.5,
                             hue=0.5,
                             gamma=0.5,
                             fraction=1.0),  # values for visualization only
    mytransforms.ConvertDepth(
    ),  # The convert transforms should come after the
    mytransforms.ConvertSegmentation(),  # Scaling/Rotating/Cropping
    mytransforms.ToTensor(),
]

# With the parameters specified above, a StandardDataset can now be created. You can interate through
# it using the PyTorch DataLoader class.
Beispiel #3
0
def cityscapes_sequence_train(resize_height, resize_width, crop_height,
                              crop_width, batch_size, num_workers):
    """A loader that loads images for adaptation from the cityscapes_sequence training set.
    This loader returns sequences from the left camera, as well as from the right camera.
    """

    transforms_common = [
        tf.RandomHorizontalFlip(),
        tf.CreateScaledImage(),
        tf.Resize((resize_height * 568 // 512, resize_width * 1092 // 1024),
                  image_types=('color', )),
        # crop away the sides and bottom parts of the image
        tf.SidesCrop((resize_height * 320 // 512, resize_width * 1024 // 1024),
                     (resize_height * 32 // 512, resize_width * 33 // 1024)),
        tf.CreateColoraug(new_element=True),
        tf.ColorJitter(brightness=0.2,
                       contrast=0.2,
                       saturation=0.2,
                       hue=0.1,
                       gamma=0.0),
        tf.RemoveOriginals(),
        tf.ToTensor(),
        tf.NormalizeZeroMean(),
        tf.AddKeyValue('domain', 'cityscapes_sequence_adaptation'),
        tf.AddKeyValue('purposes', ('adaptation', )),
    ]

    dataset_name = 'cityscapes_sequence'

    cfg_common = {
        'dataset': dataset_name,
        'trainvaltest_split': 'train',
        'video_mode': 'mono',
        'stereo_mode': 'mono',
    }

    cfg_left = {'keys_to_load': ('color', ), 'keys_to_video': ('color', )}

    cfg_right = {
        'keys_to_load': ('color_right', ),
        'keys_to_video': ('color_right', )
    }

    dataset_left = StandardDataset(data_transforms=transforms_common,
                                   **cfg_left,
                                   **cfg_common)

    dataset_right = StandardDataset(data_transforms=[tf.ExchangeStereo()] +
                                    transforms_common,
                                    **cfg_right,
                                    **cfg_common)

    dataset = ConcatDataset((dataset_left, dataset_right))

    loader = DataLoader(dataset,
                        batch_size,
                        True,
                        num_workers=num_workers,
                        pin_memory=True,
                        drop_last=True)

    print(
        f"  - Can use {len(dataset)} images from the cityscapes_sequence train set for adaptation",
        flush=True)

    return loader
    def __init__(self, options):

        print(" -> Executing script", os.path.basename(__file__))

        self.opt = options
        self.device = torch.device("cpu" if self.opt.no_cuda else "cuda")

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           LABELS AND CITIES
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        assert self.opt.train_set in {123, 1}, "Invalid train_set!"
        keys_to_load = ['color', 'segmentation']

        # Labels
        if self.opt.train_set == 1:
            labels = labels_cityscape_seg_train1.getlabels()
        else:
            labels = labels_cityscape_seg_train3_eval.getlabels()

        # Train IDs
        self.train_ids = set([labels[i].trainId for i in range(len(labels))])
        self.train_ids.remove(255)

        self.num_classes = len(self.train_ids)

        # Apply city filter
        folders_to_train = CitySet.get_city_set(0)
        if self.opt.city:
            folders_to_train = CitySet.get_city_set(self.opt.train_set)

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           DATASET DEFINITIONS
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        # Data augmentation
        train_data_transforms = [
            mytransforms.RandomHorizontalFlip(),
            mytransforms.CreateScaledImage(),
            mytransforms.Resize((self.opt.height, self.opt.width),
                                image_types=keys_to_load),
            mytransforms.RandomRescale(1.5),
            mytransforms.RandomCrop(
                (self.opt.crop_height, self.opt.crop_width)),
            mytransforms.ConvertSegmentation(),
            mytransforms.CreateColoraug(new_element=True,
                                        scales=self.opt.scales),
            mytransforms.ColorJitter(brightness=0.2,
                                     contrast=0.2,
                                     saturation=0.2,
                                     hue=0.1,
                                     gamma=0.0),
            mytransforms.RemoveOriginals(),
            mytransforms.ToTensor(),
            mytransforms.NormalizeZeroMean(),
        ]

        train_dataset = CityscapesDataset(
            dataset="cityscapes",
            trainvaltest_split='train',
            video_mode='mono',
            stereo_mode='mono',
            scales=self.opt.scales,
            labels_mode='fromid',
            labels=labels,
            keys_to_load=keys_to_load,
            data_transforms=train_data_transforms,
            video_frames=self.opt.video_frames,
            folders_to_load=folders_to_train,
        )

        self.train_loader = DataLoader(dataset=train_dataset,
                                       batch_size=self.opt.batch_size,
                                       shuffle=True,
                                       num_workers=self.opt.num_workers,
                                       pin_memory=True,
                                       drop_last=True)

        val_data_transforms = [
            mytransforms.CreateScaledImage(),
            mytransforms.Resize((self.opt.height, self.opt.width),
                                image_types=keys_to_load),
            mytransforms.ConvertSegmentation(),
            mytransforms.CreateColoraug(new_element=True,
                                        scales=self.opt.scales),
            mytransforms.RemoveOriginals(),
            mytransforms.ToTensor(),
            mytransforms.NormalizeZeroMean(),
        ]

        val_dataset = CityscapesDataset(
            dataset=self.opt.dataset,
            trainvaltest_split="train",
            video_mode='mono',
            stereo_mode='mono',
            scales=self.opt.scales,
            labels_mode='fromid',
            labels=labels,
            keys_to_load=keys_to_load,
            data_transforms=val_data_transforms,
            video_frames=self.opt.video_frames,
            folders_to_load=CitySet.get_city_set(-1))

        self.val_loader = DataLoader(dataset=val_dataset,
                                     batch_size=self.opt.batch_size,
                                     shuffle=False,
                                     num_workers=self.opt.num_workers,
                                     pin_memory=True,
                                     drop_last=True)

        self.val_iter = iter(self.val_loader)

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           LOGGING OPTIONS
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        print(
            "++++++++++++++++++++++ INIT TRAINING ++++++++++++++++++++++++++")
        print("Using dataset:\n  ", self.opt.dataset, "with split",
              self.opt.dataset_split)
        print(
            "There are {:d} training items and {:d} validation items\n".format(
                len(train_dataset), len(val_dataset)))

        path_getter = GetPath()
        log_path = path_getter.get_checkpoint_path()
        self.log_path = os.path.join(log_path, 'erfnet', self.opt.model_name)

        self.writers = {}
        for mode in ["train", "validation"]:
            self.writers[mode] = SummaryWriter(
                os.path.join(self.log_path, mode))

        # Copy this file to log dir
        shutil.copy2(__file__, self.log_path)

        print("Training model named:\n  ", self.opt.model_name)
        print("Models and tensorboard events files are saved to:\n  ",
              self.log_path)
        print("Training is using:\n  ", self.device)
        print("Training takes place on train set:\n  ", self.opt.train_set)
        print(
            "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           MODEL DEFINITION
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        # Instantiate model
        self.model = ERFNet(self.num_classes, self.opt)
        self.model.to(self.device)
        self.parameters_to_train = self.model.parameters()

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           OPTIMIZER SET-UP
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        self.model_optimizer = optim.Adam(params=self.parameters_to_train,
                                          lr=self.opt.learning_rate,
                                          weight_decay=self.opt.weight_decay)
        lambda1 = lambda epoch: pow((1 -
                                     ((epoch - 1) / self.opt.num_epochs)), 0.9)
        self.model_lr_scheduler = optim.lr_scheduler.LambdaLR(
            self.model_optimizer, lr_lambda=lambda1)

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           LOSSES
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        self.crossentropy = CrossEntropyLoss(ignore_background=True,
                                             device=self.device)
        self.crossentropy.to(self.device)

        self.metric_model = SegmentationRunningScore(self.num_classes)

        # Save all options to disk and print them to stdout
        self.save_opts(len(train_dataset), len(val_dataset))
        self._print_options()

        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        #                           EVALUATOR DEFINITION
        # +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
        if self.opt.validate:
            self.evaluator = Evaluator(self.opt, self.model)
Beispiel #5
0
def kitti_zhou_train(resize_height, resize_width, crop_height, crop_width,
                     batch_size, num_workers):
    """A loader that loads image sequences for depth training from the
    kitti training set.
    This loader returns sequences from the left camera, as well as from the right camera.
    """

    transforms_common = [
        tf.RandomHorizontalFlip(),
        tf.CreateScaledImage(),
        tf.Resize((resize_height, resize_width),
                  image_types=('color', 'depth', 'camera_intrinsics', 'K')),
        tf.ConvertDepth(),
        tf.CreateColoraug(new_element=True),
        tf.ColorJitter(brightness=0.2,
                       contrast=0.2,
                       saturation=0.2,
                       hue=0.1,
                       gamma=0.0,
                       fraction=0.5),
        tf.RemoveOriginals(),
        tf.ToTensor(),
        tf.NormalizeZeroMean(),
        tf.AddKeyValue('domain', 'kitti_zhou_train_depth'),
        tf.AddKeyValue('purposes', ('depth', 'domain')),
    ]

    dataset_name = 'kitti'

    cfg_common = {
        'dataset': dataset_name,
        'trainvaltest_split': 'train',
        'video_mode': 'video',
        'stereo_mode': 'mono',
        'split': 'zhou_split',
        'video_frames': (0, -1, 1),
        'disable_const_items': False
    }

    cfg_left = {'keys_to_load': ('color', ), 'keys_to_video': ('color', )}

    cfg_right = {
        'keys_to_load': ('color_right', ),
        'keys_to_video': ('color_right', )
    }

    dataset_left = StandardDataset(data_transforms=transforms_common,
                                   **cfg_left,
                                   **cfg_common)

    dataset_right = StandardDataset(data_transforms=[tf.ExchangeStereo()] +
                                    transforms_common,
                                    **cfg_right,
                                    **cfg_common)

    dataset = ConcatDataset((dataset_left, dataset_right))

    loader = DataLoader(dataset,
                        batch_size,
                        True,
                        num_workers=num_workers,
                        pin_memory=True,
                        drop_last=True)

    print(
        f"  - Can use {len(dataset)} images from the kitti (zhou_split) train split for depth training",
        flush=True)

    return loader