def _train_save_load(self, tmpdir, loss, val_metric, model='UNet3D', max_num_epochs=1, log_after_iters=2,
                         validate_after_iters=2, max_num_iterations=4, weight_map=False):
        binary_loss = loss in ['BCEWithLogitsLoss', 'DiceLoss', 'GeneralizedDiceLoss']

        device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')

        test_config = copy.deepcopy(CONFIG_BASE)
        test_config['model']['name'] = model
        test_config.update({
            # get device to train on
            'device': device,
            'loss': {'name': loss, 'weight': np.random.rand(2).astype(np.float32), 'pos_weight': 3.},
            'eval_metric': {'name': val_metric}
        })
        test_config['model']['final_sigmoid'] = binary_loss

        if weight_map:
            test_config['loaders']['weight_internal_path'] = 'weight_map'

        loss_criterion = get_loss_criterion(test_config)
        eval_criterion = get_evaluation_metric(test_config)
        model = get_model(test_config)
        model = model.to(device)

        if loss in ['BCEWithLogitsLoss']:
            label_dtype = 'float32'
        else:
            label_dtype = 'long'
        test_config['loaders']['train']['transformer']['label'][0]['dtype'] = label_dtype
        test_config['loaders']['val']['transformer']['label'][0]['dtype'] = label_dtype

        train, val = TestUNet3DTrainer._create_random_dataset((3, 128, 128, 128), (3, 64, 64, 64), binary_loss)
        test_config['loaders']['train']['file_paths'] = [train]
        test_config['loaders']['val']['file_paths'] = [val]

        loaders = get_train_loaders(test_config)

        optimizer = _create_optimizer(test_config, model)

        test_config['lr_scheduler']['name'] = 'MultiStepLR'
        lr_scheduler = _create_lr_scheduler(test_config, optimizer)

        logger = get_logger('UNet3DTrainer', logging.DEBUG)

        formatter = DefaultTensorboardFormatter()
        trainer = UNet3DTrainer(model, optimizer, lr_scheduler,
                                loss_criterion, eval_criterion,
                                device, loaders, tmpdir,
                                max_num_epochs=max_num_epochs,
                                log_after_iters=log_after_iters,
                                validate_after_iters=validate_after_iters,
                                max_num_iterations=max_num_iterations,
                                tensorboard_formatter=formatter)
        trainer.fit()
        # test loading the trainer from the checkpoint
        trainer = UNet3DTrainer.from_checkpoint(os.path.join(tmpdir, 'last_checkpoint.pytorch'),
                                                model, optimizer, lr_scheduler,
                                                loss_criterion, eval_criterion,
                                                loaders, tensorboard_formatter=formatter)
        return trainer
Beispiel #2
0
def _train_save_load(tmpdir, train_config, loss, val_metric, model, weight_map, shape):
    binary_loss = loss in ['BCEWithLogitsLoss', 'DiceLoss', 'BCEDiceLoss', 'GeneralizedDiceLoss']

    device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')

    train_config['model']['name'] = model
    train_config.update({
        # get device to train on
        'device': device,
        'loss': {'name': loss, 'weight': np.random.rand(2).astype(np.float32), 'pos_weight': 3.},
        'eval_metric': {'name': val_metric}
    })
    train_config['model']['final_sigmoid'] = binary_loss

    if weight_map:
        train_config['loaders']['weight_internal_path'] = 'weight_map'

    loss_criterion = get_loss_criterion(train_config)
    eval_criterion = get_evaluation_metric(train_config)
    model = get_model(train_config['model'])
    model = model.to(device)

    if loss in ['BCEWithLogitsLoss']:
        label_dtype = 'float32'
        train_config['loaders']['train']['transformer']['label'][0]['dtype'] = label_dtype
        train_config['loaders']['val']['transformer']['label'][0]['dtype'] = label_dtype

    train = _create_random_dataset(shape, binary_loss)
    val = _create_random_dataset(shape, binary_loss)
    train_config['loaders']['train']['file_paths'] = [train]
    train_config['loaders']['val']['file_paths'] = [val]

    loaders = get_train_loaders(train_config)

    optimizer = create_optimizer(train_config['optimizer'], model)
    lr_scheduler = create_lr_scheduler(train_config.get('lr_scheduler', None), optimizer)

    formatter = DefaultTensorboardFormatter()
    trainer = UNet3DTrainer(model, optimizer, lr_scheduler,
                            loss_criterion, eval_criterion,
                            device, loaders, tmpdir,
                            max_num_epochs=train_config['trainer']['max_num_epochs'],
                            log_after_iters=train_config['trainer']['log_after_iters'],
                            validate_after_iters=train_config['trainer']['log_after_iters'],
                            max_num_iterations=train_config['trainer']['max_num_iterations'],
                            tensorboard_formatter=formatter)
    trainer.fit()
    # test loading the trainer from the checkpoint
    trainer = UNet3DTrainer(model, optimizer, lr_scheduler,
                            loss_criterion, eval_criterion,
                            device, loaders, tmpdir,
                            tensorboard_formatter=formatter,
                            max_num_epochs=train_config['trainer']['max_num_epochs'],
                            log_after_iters=train_config['trainer']['log_after_iters'],
                            validate_after_iters=train_config['trainer']['log_after_iters'],
                            max_num_iterations=train_config['trainer']['max_num_iterations'],
                            resume=os.path.join(tmpdir, 'last_checkpoint.pytorch'))
    return trainer
Beispiel #3
0
def main():
    # Load and log experiment configuration
    config = load_config()
    logger.info(config)

    manual_seed = config.get('manual_seed', None)
    if manual_seed is not None:
        logger.info(f'Seed the RNG for all devices with {manual_seed}')
        torch.manual_seed(manual_seed)
        # see https://pytorch.org/docs/stable/notes/randomness.html
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    # Create the model
    model = get_model(config)
    # use DataParallel if more than 1 GPU available
    device = config['device']
    if torch.cuda.device_count() > 1 and not device.type == 'cpu':
        model = nn.DataParallel(model)
        logger.info(f'Using {torch.cuda.device_count()} GPUs for training')

    # put the model on GPUs
    logger.info(f"Sending the model to '{config['device']}'")
    model = model.to(device)

    # Log the number of learnable parameters
    logger.info(
        f'Number of learnable params {get_number_of_learnable_parameters(model)}'
    )

    # Create loss criterion
    loss_criterion = get_loss_criterion(config)
    # Create evaluation metric
    eval_criterion = get_evaluation_metric(config)

    # Create data loaders
    loaders = get_train_loaders(config)

    # Create the optimizer
    optimizer = _create_optimizer(config, model)

    # Create learning rate adjustment strategy
    lr_scheduler = _create_lr_scheduler(config, optimizer)

    # Create model trainer
    trainer = _create_trainer(config,
                              model=model,
                              optimizer=optimizer,
                              lr_scheduler=lr_scheduler,
                              loss_criterion=loss_criterion,
                              eval_criterion=eval_criterion,
                              loaders=loaders)
    # Start training
    trainer.fit()
Beispiel #4
0
def create_trainer(config):
    # Create the model
    model = get_model(config['model'])
    # use DataParallel if more than 1 GPU available
    device = config['device']
    if torch.cuda.device_count() > 1 and not device.type == 'cpu':
        model = nn.DataParallel(model)
        logger.info(f'Using {torch.cuda.device_count()} GPUs for training')

    # put the model on GPUs
    logger.info(f"Sending the model to '{config['device']}'")
    model = model.to(device)

    # Log the number of learnable parameters
    logger.info(
        f'Number of learnable params {get_number_of_learnable_parameters(model)}'
    )

    # Create loss criterion
    loss_criterion = get_loss_criterion(config)
    # Create evaluation metric
    eval_criterion = get_evaluation_metric(config)

    # Create data loaders
    loaders = get_train_loaders(config)

    # Create the optimizer
    optimizer = create_optimizer(config['optimizer'], model)

    # Create learning rate adjustment strategy
    lr_scheduler = create_lr_scheduler(config.get('lr_scheduler', None),
                                       optimizer)

    trainer_config = config['trainer']
    # Create tensorboard formatter
    tensorboard_formatter = get_tensorboard_formatter(
        trainer_config.pop('tensorboard_formatter', None))
    # Create trainer
    resume = trainer_config.pop('resume', None)
    pre_trained = trainer_config.pop('pre_trained', None)

    return UNet3DTrainer(model=model,
                         optimizer=optimizer,
                         lr_scheduler=lr_scheduler,
                         loss_criterion=loss_criterion,
                         eval_criterion=eval_criterion,
                         tensorboard_formatter=tensorboard_formatter,
                         device=config['device'],
                         loaders=loaders,
                         resume=resume,
                         pre_trained=pre_trained,
                         **trainer_config)
Beispiel #5
0
    def build(config):
        # Create the model
        model = get_model(config['model'])
        # use DataParallel if more than 1 GPU available
        device = config['device']
        if torch.cuda.device_count() > 1 and not device.type == 'cpu':
            model = nn.DataParallel(model)
            logger.info(f'Using {torch.cuda.device_count()} GPUs for training')

        # put the model on GPUs
        logger.info(f"Sending the model to '{config['device']}'")
        model = model.to(device)

        # Log the number of learnable parameters
        logger.info(
            f'Number of learnable params {get_number_of_learnable_parameters(model)}'
        )

        # Create loss criterion
        loss_criterion = get_loss_criterion(config)
        # Create evaluation metric
        eval_criterion = get_evaluation_metric(config)

        # Create data loaders
        loaders = get_train_loaders(config)

        # Create the optimizer
        optimizer = create_optimizer(config['optimizer'], model)

        # Create learning rate adjustment strategy
        lr_scheduler = create_lr_scheduler(config.get('lr_scheduler', None),
                                           optimizer)

        # Create model trainer
        trainer = _create_trainer(config,
                                  model=model,
                                  optimizer=optimizer,
                                  lr_scheduler=lr_scheduler,
                                  loss_criterion=loss_criterion,
                                  eval_criterion=eval_criterion,
                                  loaders=loaders)

        return trainer