def _train_save_load(self, tmpdir, loss, val_metric, model='UNet3D', max_num_epochs=1, log_after_iters=2, validate_after_iters=2, max_num_iterations=4, weight_map=False): binary_loss = loss in ['BCEWithLogitsLoss', 'DiceLoss', 'GeneralizedDiceLoss'] device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu') test_config = copy.deepcopy(CONFIG_BASE) test_config['model']['name'] = model test_config.update({ # get device to train on 'device': device, 'loss': {'name': loss, 'weight': np.random.rand(2).astype(np.float32)}, 'eval_metric': {'name': val_metric} }) test_config['model']['final_sigmoid'] = binary_loss if weight_map: test_config['loaders']['weight_internal_path'] = 'weight_map' loss_criterion = get_loss_criterion(test_config) eval_criterion = get_evaluation_metric(test_config) model = get_model(test_config) model = model.to(device) if loss in ['BCEWithLogitsLoss']: label_dtype = 'float32' else: label_dtype = 'long' test_config['loaders']['transformer']['train']['label'][0]['dtype'] = label_dtype test_config['loaders']['transformer']['test']['label'][0]['dtype'] = label_dtype train, val = TestUNet3DTrainer._create_random_dataset((128, 128, 128), (64, 64, 64), binary_loss) test_config['loaders']['train_path'] = [train] test_config['loaders']['val_path'] = [val] loaders = get_train_loaders(test_config) optimizer = _create_optimizer(test_config, model) test_config['lr_scheduler']['name'] = 'MultiStepLR' lr_scheduler = _create_lr_scheduler(test_config, optimizer) logger = get_logger('UNet3DTrainer', logging.DEBUG) formatter = DefaultTensorboardFormatter() trainer = UNet3DTrainer(model, optimizer, lr_scheduler, loss_criterion, eval_criterion, device, loaders, tmpdir, max_num_epochs=max_num_epochs, log_after_iters=log_after_iters, validate_after_iters=validate_after_iters, max_num_iterations=max_num_iterations, logger=logger, tensorboard_formatter=formatter) trainer.fit() # test loading the trainer from the checkpoint trainer = UNet3DTrainer.from_checkpoint(os.path.join(tmpdir, 'last_checkpoint.pytorch'), model, optimizer, lr_scheduler, loss_criterion, eval_criterion, loaders, logger=logger, tensorboard_formatter=formatter) return trainer
def main(): # Load and log experiment configuration config = load_config() # Create main logger logfile = config.get('logfile', None) logger = get_logger('UNet3DTrainer', logfile=logfile) logger.info(config) manual_seed = config.get('manual_seed', None) if manual_seed is not None: logger.info(f'Seed the RNG for all devices with {manual_seed}') torch.manual_seed(manual_seed) # see https://pytorch.org/docs/stable/notes/randomness.html torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Create the model model = get_model(config) # multiple GPUs if (torch.cuda.device_count() > 1): logger.info("There are {} GPUs available".format(torch.cuda.device_count())) model = nn.DataParallel(model) # put the model on GPUs logger.info(f"Sending the model to '{config['device']}'") model = model.to(config['device']) # Log the number of learnable parameters logger.info(f'Number of learnable params {get_number_of_learnable_parameters(model)}') # Create loss criterion loss_criterion = get_loss_criterion(config) logger.info(f"Created loss criterion: {config['loss']['name']}") # Create evaluation metric eval_criterion = get_evaluation_metric(config) logger.info(f"Created eval criterion: {config['eval_metric']['name']}") # Create data loaders loaders = get_train_loaders(config) # Create the optimizer optimizer = _create_optimizer(config, model) # Create learning rate adjustment strategy lr_scheduler = _create_lr_scheduler(config, optimizer) # Create model trainer trainer = _create_trainer(config, model=model, optimizer=optimizer, lr_scheduler=lr_scheduler, loss_criterion=loss_criterion, eval_criterion=eval_criterion, loaders=loaders, logger=logger) # Start training trainer.fit()