def get_test_dataloaer(args):
    train_transform, val_transform = get_transform(args)
    data_set = CityscapesDataset(root_dir=args.cityscapes_data_path,
                                 type="test",
                                 choose_size=args.data_choose_size,
                                 transform=val_transform)
    data_loader = DataLoader(dataset=data_set,
                             batch_size=args.test_batch_size,
                             shuffle=False,
                             num_workers=args.prefetch,
                             pin_memory=False,
                             drop_last=False)
    return data_loader
Beispiel #2
0
def get_voc_instance_dataset(args, train, repeat):
    train_transform, val_transform = get_transform(args)

    if train:
        data_set = VOCInstanceDataset(data_dir=args.voc2012_data_path,
                                      transform=train_transform,
                                      train=True,
                                      choose_size=args.data_choose_size,
                                      repeat=repeat)
    else:
        data_set = VOCInstanceDataset(data_dir=args.voc2012_data_path,
                                      transform=val_transform,
                                      train=False)

    return data_set
Beispiel #3
0
def get_voc_sbd_instance_dataset(args, train, repeat):
    train_transform, val_transform = get_transform(args)

    if train:
        voc_data_set = VOCInstanceDataset(data_dir=args.voc2012_data_path,
                                          transform=train_transform,
                                          train=True,
                                          choose_size=None,
                                          repeat=repeat)
        sbd_data_set = SBDInstanceDataset(data_dir=args.sbd_data_path,
                                          transform=train_transform,
                                          choose_size=None,
                                          repeat=args.sbd_repeat)
        return ConcatDataset(datasets=[voc_data_set, sbd_data_set])
    else:
        data_set = VOCInstanceDataset(data_dir=args.voc2012_data_path,
                                      transform=val_transform,
                                      train=False)

    return data_set
Beispiel #4
0
def main(args):
    """Main function."""

    # 1. Configurations
    torch.backends.cudnn.benchmark = True
    ENCODER_CONFIGS, DECODER_CONFIGS, Config, Encoder, Decoder = \
        AVAILABLE_MODELS[args.backbone_type]

    config = Config(args)
    config.save()

    logfile = os.path.join(config.checkpoint_dir, 'main.log')
    logger = get_logger(stream=False, logfile=logfile)

    # 2. Data
    input_transform = get_transform(config.data,
                                    size=config.input_size,
                                    mode=config.augmentation,
                                    noise=config.noise)
    target_transform = get_transform(config.data,
                                     size=config.input_size,
                                     mode='test')
    if config.data == 'wm811k':
        train_set = torch.utils.data.ConcatDataset([
            WM811KForDenoising('./data/wm811k/unlabeled/train/',
                               input_transform, target_transform),
            WM811KForDenoising('./data/wm811k/labeled/train/', input_transform,
                               target_transform),
        ])
        valid_set = torch.utils.data.ConcatDataset([
            WM811KForDenoising('./data/wm811k/unlabeled/valid/',
                               input_transform, target_transform),
            WM811KForDenoising('./data/wm811k/labeled/valid/', input_transform,
                               target_transform),
        ])
        test_set = torch.utils.data.ConcatDataset([
            WM811KForDenoising('./data/wm811k/unlabeled/test/',
                               input_transform, target_transform),
            WM811KForDenoising('./data/wm811k/labeled/test/', input_transform,
                               target_transform),
        ])
    else:
        raise ValueError(
            f"Denoising only supports 'wm811k' data. Received '{config.data}'."
        )

    # 3. Model
    encoder = Encoder(RESNET_ENCODER_CONFIGS[config.backbone_config],
                      in_channels=IN_CHANNELS[config.data])
    decoder = Decoder(RESNET_DECODER_CONFIGS[config.backbone_config],
                      input_shape=encoder.output_shape,
                      output_shape=(OUT_CHANNELS[config.data],
                                    config.input_size, config.input_size))

    # 4. Optimization
    params = [{
        'params': encoder.parameters()
    }, {
        'params': decoder.parameters()
    }]
    optimizer = get_optimizer(params=params,
                              name=config.optimizer,
                              lr=config.learning_rate,
                              weight_decay=config.weight_decay,
                              momentum=config.momentum)
    scheduler = get_scheduler(optimizer=optimizer,
                              name=config.scheduler,
                              epochs=config.epochs,
                              milestone=config.milestone,
                              warmup_steps=config.warmup_steps)

    # 5. Experiment (Denoising)
    experiment_kwargs = {
        'encoder': encoder,
        'decoder': decoder,
        'optimizer': optimizer,
        'scheduler': scheduler,
        'loss_function': nn.CrossEntropyLoss(reduction='mean'),
        'metrics': None,
        'checkpoint_dir': config.checkpoint_dir,
        'write_summary': config.write_summary,
    }
    experiment = Denoising(**experiment_kwargs)

    # 6. Run (train, evaluate, and test model)
    run_kwargs = {
        'train_set': train_set,
        'valid_set': valid_set,
        'test_set': test_set,
        'epochs': config.epochs,
        'batch_size': config.batch_size,
        'num_workers': config.num_workers,
        'device': config.device,
        'logger': logger,
        'save_every': config.save_every,
    }

    logger.info(f"Data: {config.data}")
    logger.info(f"Augmentation: {config.augmentation}")
    logger.info(
        f"Train : Valid : Test = {len(train_set):,} : {len(valid_set):,} : {len(test_set):,}"
    )
    logger.info(
        f"Trainable parameters ({encoder.__class__.__name__}): {encoder.num_parameters:,}"
    )
    logger.info(
        f"Trainable parameters ({decoder.__class__.__name__}): {decoder.num_parameters:,}"
    )
    logger.info(f"Saving model checkpoints to: {experiment.checkpoint_dir}")
    logger.info(
        f"Epochs: {run_kwargs['epochs']}, Batch size: {run_kwargs['batch_size']}"
    )
    logger.info(
        f"Workers: {run_kwargs['num_workers']}, Device: {run_kwargs['device']}"
    )

    steps_per_epoch = len(train_set) // config.batch_size + 1
    logger.info(f"Training steps per epoch: {steps_per_epoch:,}")
    logger.info(
        f"Total number of training iterations: {steps_per_epoch * config.epochs:,}"
    )

    experiment.run(**run_kwargs)
    logger.handlers.clear()
Beispiel #5
0
def main(args):
    """Main function."""

    # 1. Configurations
    torch.backends.cudnn.benchmark = True
    BACKBONE_CONFIGS, Config, Backbone = AVAILABLE_MODELS[args.backbone_type]
    Projector = PROJECTOR_TYPES[args.projector_type]

    config = Config(args)
    config.save()

    logfile = os.path.join(config.checkpoint_dir, 'main.log')
    logger = get_logger(stream=False, logfile=logfile)

    # 2. Data
    if config.data == 'wm811k':
        data_transforms = {
            'transform':
            get_transform(data=config.data,
                          size=config.input_size,
                          mode='test'),
            'positive_transform':
            get_transform(
                data=config.data,
                size=config.input_size,
                mode=config.augmentation,
            ),
        }
        train_set = torch.utils.data.ConcatDataset([
            WM811KForPIRL('./data/wm811k/unlabeled/train/', **data_transforms),
            WM811KForPIRL('./data/wm811k/labeled/train/', **data_transforms),
        ])
        valid_set = torch.utils.data.ConcatDataset([
            WM811KForPIRL('./data/wm811k/unlabeled/valid/', **data_transforms),
            WM811KForPIRL('./data/wm811k/labeled/valid/', **data_transforms),
        ])
        test_set = torch.utils.data.ConcatDataset([
            WM811KForPIRL('./data/wm811k/unlabeled/test/', **data_transforms),
            WM811KForPIRL('./data/wm811k/labeled/test/', **data_transforms),
        ])
    else:
        raise ValueError(
            f"PIRL only supports 'wm811k' data. Received '{config.data}'.")

    # 3. Model
    backbone = Backbone(BACKBONE_CONFIGS[config.backbone_config],
                        in_channels=IN_CHANNELS[config.data])
    projector = Projector(backbone.out_channels, config.projector_size)

    # 4. Optimization
    params = [{
        'params': backbone.parameters()
    }, {
        'params': projector.parameters()
    }]
    optimizer = get_optimizer(params=params,
                              name=config.optimizer,
                              lr=config.learning_rate,
                              weight_decay=config.weight_decay,
                              momentum=config.momentum)
    scheduler = get_scheduler(optimizer=optimizer,
                              name=config.scheduler,
                              epochs=config.epochs,
                              milestone=config.milestone,
                              warmup_steps=config.warmup_steps)

    # 5. Experiment (PIRL)
    experiment_kwargs = {
        'backbone':
        backbone,
        'projector':
        projector,
        'memory':
        MemoryBank(size=(len(train_set), config.projector_size),
                   device=config.device),
        'optimizer':
        optimizer,
        'scheduler':
        scheduler,
        'loss_function':
        PIRLLoss(temperature=config.temperature),
        'loss_weight':
        config.loss_weight,
        'num_negatives':
        config.num_negatives,
        'metrics': {
            'top@1': TopKAccuracy(num_classes=1 + config.num_negatives, k=1),
            'top@5': TopKAccuracy(num_classes=1 + config.num_negatives, k=5)
        },
        'checkpoint_dir':
        config.checkpoint_dir,
        'write_summary':
        config.write_summary,
    }
    experiment = PIRL(**experiment_kwargs)

    # 6. Run (train, evaluate, and test model)
    run_kwargs = {
        'train_set': train_set,
        'valid_set': valid_set,
        'test_set': test_set,
        'epochs': config.epochs,
        'batch_size': config.batch_size,
        'num_workers': config.num_workers,
        'device': config.device,
        'logger': logger,
        'save_every': config.save_every,
    }

    logger.info(f"Data: {config.data}")
    logger.info(f"Augmentation: {config.augmentation}")
    logger.info(
        f"Train : Valid : Test = {len(train_set):,} : {len(valid_set):,} : {len(test_set):,}"
    )
    logger.info(
        f"Trainable parameters ({backbone.__class__.__name__}): {backbone.num_parameters:,}"
    )
    logger.info(
        f"Trainable parameters ({projector.__class__.__name__}): {projector.num_parameters:,}"
    )
    logger.info(f"Projector type: {config.projector_type}")
    logger.info(f"Projector dimension: {config.projector_size}")
    logger.info(f"Saving model checkpoints to: {experiment.checkpoint_dir}")
    logger.info(
        f"Epochs: {run_kwargs['epochs']}, Batch size: {run_kwargs['batch_size']}"
    )
    logger.info(
        f"Workers: {run_kwargs['num_workers']}, Device: {run_kwargs['device']}"
    )

    steps_per_epoch = len(train_set) // config.batch_size + 1
    logger.info(f"Training steps per epoch: {steps_per_epoch:,}")
    logger.info(
        f"Total number of training iterations: {steps_per_epoch * config.epochs:,}"
    )

    if config.resume_from_checkpoint is not None:
        logger.info(
            f"Resuming from a checkpoint: {config.resume_from_checkpoint}")
        model_ckpt = os.path.join(config.resume_from_checkpoint,
                                  'best_model.pt')
        memory_ckpt = os.path.join(config.resume_from_checkpoint,
                                   'best_memory.pt')
        experiment.load_model_from_checkpoint(
            model_ckpt)  # load model & optimizer
        experiment.memory.load(memory_ckpt)  # load memory bank

        # Assign optimizer variables to appropriate device
        for state in experiment.optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.to(config.device)

    experiment.run(**run_kwargs)
    logger.handlers.clear()
Beispiel #6
0
def get_sbd_instance_dataset(args):
    train_transform, val_transform = get_transform(args)
    data_set = SBDInstanceDataset(data_dir=args.sbd_data_path, transform=train_transform,
                                  choose_size=args.sbd_data_choose_size, repeat=args.sbd_repeat)

    return data_set
def get_cityscapes_dataset(args, train=True, smask_mapping=False):
    if 'balance' in args.aug and train:
        aug_save = args.aug
        temp = ''
        temp += '_color' if 'color' in args.aug else ''
        temp += '_bbox' if 'bbox' in args.aug else ''
        args.aug = temp
        train_transform, val_transform = get_transform(args)
        if smask_mapping:
            train_transform = transforms.Compose(
                [train_transform, CityScapesIDMapping()])
            val_transform = transforms.Compose(
                [val_transform, CityScapesIDMapping()])

        data_sets = [
            BalancedCityscapesDataset(crop_size=args.crop_size,
                                      fake_size=args.data_choose_size,
                                      label=i,
                                      root_dir=args.cityscapes_data_path,
                                      type="train",
                                      choose_size=args.data_choose_size,
                                      transform=train_transform,
                                      repeat=1) for i in range(1, 9)
        ]
        non_empty = []
        for i in range(len(data_sets)):
            if data_sets[i].real_size > 0:
                non_empty.append(i)
        data_sets = [data_sets[i] for i in non_empty]

        args.aug = aug_save
        # if args.data_choose_size is None:
        #     fake_size = 3000 // 8
        # else:
        #     fake_size = max([d.real_size for d in data_sets])

        fake_size = args.cityscapes_fake_size // 8
        for d in data_sets:
            d.fake_size = fake_size

        data_set = ConcatDataset(data_sets)
        print(f'===> ConcatDataset size:{len(data_set)}')
        return data_set
    else:
        train_transform, val_transform = get_transform(args)
        if smask_mapping:
            train_transform = transforms.Compose(
                [train_transform, CityScapesIDMapping()])
            val_transform = transforms.Compose(
                [val_transform, CityScapesIDMapping()])

        if train:
            data_set = CityscapesDataset(root_dir=args.cityscapes_data_path,
                                         type="train",
                                         choose_size=args.data_choose_size,
                                         transform=train_transform,
                                         repeat=args.dataset_repeat)
        else:
            data_set = CityscapesDataset(root_dir=args.cityscapes_data_path,
                                         type="val",
                                         choose_size=args.data_choose_size,
                                         transform=val_transform)

        return data_set
Beispiel #8
0
def main(args):
    """Main function."""

    # 0. CONFIGURATIONS
    torch.backends.cudnn.benchmark = True
    BACKBONE_CONFIGS, Config, Backbone = AVAILABLE_MODELS[args.backbone_type]
    Projector = PROJECTOR_TYPES[args.projector_type]

    config = Config(args)
    config.save()

    logfile = os.path.join(config.checkpoint_dir, 'main.log')
    logger = get_logger(stream=False, logfile=logfile)

    # 1. DATA
    input_transform = get_transform(config.data,
                                    size=config.input_size,
                                    mode='train')
    if config.data == 'wm811k':
        in_channels = 2
        train_set = torch.utils.data.ConcatDataset([
            WM811KForSimCLR('./data/wm811k/unlabeled/train/',
                            transform=input_transform),
            WM811KForSimCLR('./data/wm811k/labeled/train/',
                            transform=input_transform),
        ])
        valid_set = torch.utils.data.ConcatDataset([
            WM811KForSimCLR('./data/wm811k/unlabeled/valid/',
                            transform=input_transform),
            WM811KForSimCLR('./data/wm811k/labeled/valid/',
                            transform=input_transform),
        ])
        test_set = torch.utils.data.ConcatDataset([
            WM811KForSimCLR('./data/wm811k/unlabeled/test/',
                            transform=input_transform),
            WM811KForSimCLR('./data/wm811k/labeled/test/',
                            transform=input_transform),
        ])
    elif config.data == 'cifar10':
        in_channels = 3
        train_set = CIFAR10ForSimCLR('./data/cifar10/',
                                     train=True,
                                     transform=input_transform)
        valid_set = CIFAR10ForSimCLR('./data/cifar10/',
                                     train=False,
                                     transform=input_transform)
        test_set = valid_set
    elif config.data == 'stl10':
        raise NotImplementedError
    elif config.data == 'imagenet':
        raise NotImplementedError
    else:
        raise ValueError
    logger.info(f"Data type: {config.data}")
    logger.info(
        f"Train : Valid : Test = {len(train_set):,} : {len(valid_set):,} : {len(test_set):,}"
    )
    steps_per_epoch = len(train_set) // config.batch_size + 1
    logger.info(f"Training steps per epoch: {steps_per_epoch:,}")
    logger.info(
        f"Total number of training iterations: {steps_per_epoch * config.epochs:,}"
    )

    # 2. MODEL
    backbone = Backbone(BACKBONE_CONFIGS[config.backbone_config], in_channels)
    projector = Projector(backbone.out_channels, config.projector_size)
    logger.info(
        f"Trainable parameters ({backbone.__class__.__name__}): {backbone.num_parameters:,}"
    )
    logger.info(
        f"Trainable parameters ({projector.__class__.__name__}): {projector.num_parameters:,}"
    )
    logger.info(f"Embedding dimension: {config.projector_size}")

    # 3. OPTIMIZATION (TODO: add LARS optimizer)
    params = [{
        'params': backbone.parameters()
    }, {
        'params': projector.parameters()
    }]
    optimizer = get_optimizer(params=params,
                              name=config.optimizer,
                              lr=config.learning_rate,
                              weight_decay=config.weight_decay,
                              momentum=config.momentum)
    scheduler = get_scheduler(optimizer=optimizer,
                              name=config.scheduler,
                              epochs=config.epochs,
                              milestone=config.milestone,
                              warmup_steps=config.warmup_steps)

    # 4. EXPERIMENT (SimCLR)
    experiment_kwargs = {
        'backbone': backbone,
        'projector': projector,
        'optimizer': optimizer,
        'scheduler': scheduler,
        'loss_function': SimCLRLoss(temperature=config.temperature),
        'metrics': None,
        'checkpoint_dir': config.checkpoint_dir,
        'write_summary': config.write_summary,
    }
    experiment = SimCLR(**experiment_kwargs)
    logger.info(f"Saving model checkpoints to: {experiment.checkpoint_dir}")

    # 5. RUN (SimCLR)
    run_kwargs = {
        'train_set': train_set,
        'valid_set': valid_set,
        'test_set': test_set,
        'epochs': config.epochs,
        'batch_size': config.batch_size,
        'num_workers': config.num_workers,
        'device': config.device,
        'logger': logger,
        'save_every': config.save_every,
    }
    logger.info(
        f"Epochs: {run_kwargs['epochs']}, Batch size: {run_kwargs['batch_size']}"
    )
    logger.info(
        f"Workers: {run_kwargs['num_workers']}, Device: {run_kwargs['device']}"
    )

    if config.resume_from_checkpoint is not None:
        logger.info(
            f"Resuming from checkpoint: {config.resume_from_checkpoint}")
        model_ckpt = os.path.join(config.resume_from_checkpoint,
                                  'best_model.pt')
        experiment.load_model_from_checkpoint(model_ckpt)

        # Assign optimizer variables to appropriate device
        for state in experiment.optimizer.state.values():
            for k, v in state.items():
                if isinstance(v, torch.Tensor):
                    state[k] = v.to(config.device)

    experiment.run(**run_kwargs)
    logger.handlers.clear()
Beispiel #9
0
def main(args):
    """Main function."""

    # 0. Main configurations
    BACKBONE_CONFIGS, Config, Backbone = AVAILABLE_MODELS[args.backbone_type]
    Classifier = CLASSIFIER_TYPES['linear']
    config = Config(args)
    config.save()

    np.random.seed(config.seed)
    torch.manual_seed(config.seed)  # For reproducibility
    torch.backends.cudnn.benchmark = True

    logfile = os.path.join(config.checkpoint_dir, 'main.log')
    logger = get_logger(stream=False, logfile=logfile)

    in_channels = IN_CHANNELS[config.data]
    num_classes = NUM_CLASSES[config.data]

    # 1. Dataset
    if config.data == 'wm811k':
        train_transform = get_transform(config.data,
                                        size=config.input_size,
                                        mode=config.augmentation)
        test_transform = get_transform(config.data,
                                       size=config.input_size,
                                       mode='test')
        train_set = WM811K('./data/wm811k/labeled/train/',
                           transform=train_transform,
                           proportion=config.label_proportion,
                           seed=config.seed)
        valid_set = WM811K('./data/wm811k/labeled/valid/',
                           transform=test_transform)
        test_set = WM811K('./data/wm811k/labeled/test/',
                          transform=test_transform)
    else:
        raise NotImplementedError

    steps_per_epoch = len(train_set) // config.batch_size + 1
    logger.info(f"Data type: {config.data}")
    logger.info(
        f"Train : Valid : Test = {len(train_set):,} : {len(valid_set):,} : {len(test_set):,}"
    )
    logger.info(f"Training steps per epoch: {steps_per_epoch:,}")
    logger.info(
        f"Total number of training iterations: {steps_per_epoch * config.epochs:,}"
    )

    # 2. Model
    backbone = Backbone(BACKBONE_CONFIGS[config.backbone_config], in_channels)
    classifier = Classifier(
        in_channels=backbone.out_channels,
        num_classes=num_classes,
        dropout=config.dropout,
    )

    # 3. Optimization (TODO: add LARS)
    params = [
        {
            'params': backbone.parameters(),
            'lr': config.learning_rate
        },
        {
            'params': classifier.parameters(),
            'lr': config.learning_rate
        },
    ]
    optimizer = get_optimizer(params=params,
                              name=config.optimizer,
                              lr=config.learning_rate,
                              weight_decay=config.weight_decay)
    scheduler = get_scheduler(optimizer=optimizer,
                              name=config.scheduler,
                              epochs=config.epochs,
                              milestone=config.milestone,
                              warmup_steps=config.warmup_steps)

    # 4. Experiment (Mixup)
    experiment_kwargs = {
        'backbone': backbone,
        'classifier': classifier,
        'optimizer': optimizer,
        'scheduler': scheduler,
        'loss_function': nn.CrossEntropyLoss(),
        'checkpoint_dir': config.checkpoint_dir,
        'write_summary': config.write_summary,
        'metrics': {
            'accuracy': MultiAccuracy(num_classes=num_classes),
            'f1': MultiF1Score(num_classes=num_classes, average='macro'),
        },
    }
    experiment = Mixup(**experiment_kwargs)
    logger.info(f"Saving model checkpoints to: {experiment.checkpoint_dir}")

    # 9. RUN (Mixup)
    run_kwargs = {
        'train_set': train_set,
        'valid_set': valid_set,
        'test_set': test_set,
        'epochs': config.epochs,
        'batch_size': config.batch_size,
        'num_workers': config.num_workers,
        'device': config.device,
        'logger': logger,
        'eval_metric': config.eval_metric,
        'balance': config.balance,
        'disable_mixup': config.disable_mixup,
    }
    logger.info(
        f"Epochs: {run_kwargs['epochs']}, Batch size: {run_kwargs['batch_size']}"
    )
    logger.info(
        f"Workers: {run_kwargs['num_workers']}, Device: {run_kwargs['device']}"
    )
    logger.info(f"Mixup enabled: {not config.disable_mixup}")

    experiment.run(**run_kwargs)
    logger.handlers.clear()