def main(): # Load configuration config = load_config() # Create the model model = get_model(config) # Load model state model_path = config['model_path'] logger.info(f'Loading model from {model_path}...') utils.load_checkpoint(model_path, model) # use DataParallel if more than 1 GPU available device = config['device'] if torch.cuda.device_count() > 1 and not device.type == 'cpu': model = nn.DataParallel(model) logger.info(f'Using {torch.cuda.device_count()} GPUs for prediction') logger.info(f"Sending the model to '{device}'") model = model.to(device) output_dir = config['loaders'].get('output_dir', None) if output_dir is not None: os.makedirs(output_dir, exist_ok=True) logger.info(f'Saving predictions to: {output_dir}') for test_loader in get_test_loaders(config): logger.info(f"Processing '{test_loader.dataset.file_path}'...") output_file = _get_output_file(dataset=test_loader.dataset, output_dir=output_dir) predictor = _get_predictor(model, test_loader, output_file, config) # run the model prediction on the entire dataset and save to the 'output_file' H5 predictor.predict()
def _train_save_load(self, tmpdir, loss, val_metric, model='UNet3D', max_num_epochs=1, log_after_iters=2, validate_after_iters=2, max_num_iterations=4, weight_map=False): binary_loss = loss in ['BCEWithLogitsLoss', 'DiceLoss', 'GeneralizedDiceLoss'] device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu') test_config = copy.deepcopy(CONFIG_BASE) test_config['model']['name'] = model test_config.update({ # get device to train on 'device': device, 'loss': {'name': loss, 'weight': np.random.rand(2).astype(np.float32), 'pos_weight': 3.}, 'eval_metric': {'name': val_metric} }) test_config['model']['final_sigmoid'] = binary_loss if weight_map: test_config['loaders']['weight_internal_path'] = 'weight_map' loss_criterion = get_loss_criterion(test_config) eval_criterion = get_evaluation_metric(test_config) model = get_model(test_config) model = model.to(device) if loss in ['BCEWithLogitsLoss']: label_dtype = 'float32' else: label_dtype = 'long' test_config['loaders']['train']['transformer']['label'][0]['dtype'] = label_dtype test_config['loaders']['val']['transformer']['label'][0]['dtype'] = label_dtype train, val = TestUNet3DTrainer._create_random_dataset((3, 128, 128, 128), (3, 64, 64, 64), binary_loss) test_config['loaders']['train']['file_paths'] = [train] test_config['loaders']['val']['file_paths'] = [val] loaders = get_train_loaders(test_config) optimizer = _create_optimizer(test_config, model) test_config['lr_scheduler']['name'] = 'MultiStepLR' lr_scheduler = _create_lr_scheduler(test_config, optimizer) logger = get_logger('UNet3DTrainer', logging.DEBUG) formatter = DefaultTensorboardFormatter() trainer = UNet3DTrainer(model, optimizer, lr_scheduler, loss_criterion, eval_criterion, device, loaders, tmpdir, max_num_epochs=max_num_epochs, log_after_iters=log_after_iters, validate_after_iters=validate_after_iters, max_num_iterations=max_num_iterations, tensorboard_formatter=formatter) trainer.fit() # test loading the trainer from the checkpoint trainer = UNet3DTrainer.from_checkpoint(os.path.join(tmpdir, 'last_checkpoint.pytorch'), model, optimizer, lr_scheduler, loss_criterion, eval_criterion, loaders, tensorboard_formatter=formatter) return trainer
def main(): # Load configuration config = load_config() # Create the model model = get_model(config['model']) # Load model state model_path = config['model_path'] logger.info(f'Loading model from {model_path}...') utils.load_checkpoint(model_path, model) # use DataParallel if more than 1 GPU available device = config['device'] if torch.cuda.device_count() > 1 and not device.type == 'cpu': model = nn.DataParallel(model) logger.info(f'Using {torch.cuda.device_count()} GPUs for prediction') logger.info(f"Sending the model to '{device}'") model = model.to(device) output_dir = config['loaders'].get('output_dir', None) if output_dir is not None: os.makedirs(output_dir, exist_ok=True) logger.info(f'Saving predictions to: {output_dir}') # create predictor instance predictor = _get_predictor(model, output_dir, config) for test_loader in get_test_loaders(config): # run the model prediction on the test_loader and save the results in the output_dir predictor(test_loader)
def test_stanard_predictor(self, tmpdir, test_config): # Add output dir test_config['loaders']['output_dir'] = tmpdir # create random dataset tmp = NamedTemporaryFile(delete=False) with h5py.File(tmp.name, 'w') as f: shape = (32, 64, 64) f.create_dataset('raw', data=np.random.rand(*shape)) # Add input file test_config['loaders']['test']['file_paths'] = [tmp.name] # Create the model with random weights model = get_model(test_config) # Create device and update config device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu') test_config['device'] = device model = model.to(device) for test_loader in get_test_loaders(test_config): output_file = _get_output_file(dataset=test_loader.dataset, output_dir=tmpdir) predictor = _get_predictor(model, test_loader, output_file, test_config) # run the model prediction on the entire dataset and save to the 'output_file' H5 predictor.predict()
def _train_save_load(tmpdir, train_config, loss, val_metric, model, weight_map, shape): binary_loss = loss in ['BCEWithLogitsLoss', 'DiceLoss', 'BCEDiceLoss', 'GeneralizedDiceLoss'] device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu') train_config['model']['name'] = model train_config.update({ # get device to train on 'device': device, 'loss': {'name': loss, 'weight': np.random.rand(2).astype(np.float32), 'pos_weight': 3.}, 'eval_metric': {'name': val_metric} }) train_config['model']['final_sigmoid'] = binary_loss if weight_map: train_config['loaders']['weight_internal_path'] = 'weight_map' loss_criterion = get_loss_criterion(train_config) eval_criterion = get_evaluation_metric(train_config) model = get_model(train_config['model']) model = model.to(device) if loss in ['BCEWithLogitsLoss']: label_dtype = 'float32' train_config['loaders']['train']['transformer']['label'][0]['dtype'] = label_dtype train_config['loaders']['val']['transformer']['label'][0]['dtype'] = label_dtype train = _create_random_dataset(shape, binary_loss) val = _create_random_dataset(shape, binary_loss) train_config['loaders']['train']['file_paths'] = [train] train_config['loaders']['val']['file_paths'] = [val] loaders = get_train_loaders(train_config) optimizer = create_optimizer(train_config['optimizer'], model) lr_scheduler = create_lr_scheduler(train_config.get('lr_scheduler', None), optimizer) formatter = DefaultTensorboardFormatter() trainer = UNet3DTrainer(model, optimizer, lr_scheduler, loss_criterion, eval_criterion, device, loaders, tmpdir, max_num_epochs=train_config['trainer']['max_num_epochs'], log_after_iters=train_config['trainer']['log_after_iters'], validate_after_iters=train_config['trainer']['log_after_iters'], max_num_iterations=train_config['trainer']['max_num_iterations'], tensorboard_formatter=formatter) trainer.fit() # test loading the trainer from the checkpoint trainer = UNet3DTrainer(model, optimizer, lr_scheduler, loss_criterion, eval_criterion, device, loaders, tmpdir, tensorboard_formatter=formatter, max_num_epochs=train_config['trainer']['max_num_epochs'], log_after_iters=train_config['trainer']['log_after_iters'], validate_after_iters=train_config['trainer']['log_after_iters'], max_num_iterations=train_config['trainer']['max_num_iterations'], resume=os.path.join(tmpdir, 'last_checkpoint.pytorch')) return trainer
def __call__(self): logger = utils.get_logger('UNet3DPredictor') if not self.state: # skip network predictions and return input_paths gui_logger.info( f"Skipping '{self.__class__.__name__}'. Disabled by the user.") return self.paths else: # create config/download models only when cnn_prediction enabled config = create_predict_config(self.paths, self.cnn_config) # Create the model model = get_model(config) # Load model state model_path = config['model_path'] model_name = config["model_name"] logger.info(f"Loading model '{model_name}' from {model_path}") utils.load_checkpoint(model_path, model) logger.info(f"Sending the model to '{config['device']}'") model = model.to(config['device']) logger.info('Loading HDF5 datasets...') # Run prediction output_paths = [] for test_loader in get_test_loaders(config): gui_logger.info( f"Running network prediction on {test_loader.dataset.file_path}..." ) runtime = time.time() logger.info(f"Processing '{test_loader.dataset.file_path}'...") output_file = _get_output_file(test_loader.dataset, model_name) predictor = _get_predictor(model, test_loader, output_file, config) # run the model prediction on the entire dataset and save to the 'output_file' H5 predictor.predict() # save resulting output path output_paths.append(output_file) runtime = time.time() - runtime gui_logger.info(f"Network prediction took {runtime:.2f} s") self._update_voxel_size(self.paths, output_paths) # free GPU memory after the inference is finished if torch.cuda.is_available(): torch.cuda.empty_cache() return output_paths
def main(): parser = ArgumentParser() parser.add_argument("-r", "--runconfig", dest='runconfig', type=str, required=True, help=f"The run config yaml file") parser.add_argument("-n", "--numworkers", dest='numworkers', type=int, required=True, help=f"Number of workers") parser.add_argument("-d", "--device", dest='device', type=str, required=False, help=f"Device") args = parser.parse_args() runconfig = args.runconfig nworkers = int(args.numworkers) # Load configuration config = load_config(runconfig, nworkers, args.device) # Create the model model = get_model(config['model']) # Load model state model_path = config['model_path'] logger.info(f'Loading model from {model_path}...') utils.load_checkpoint(model_path, model) # use DataParallel if more than 1 GPU available device = config['device'] if torch.cuda.device_count() > 1 and not device.type == 'cpu': model = nn.DataParallel(model) logger.info(f'Using {torch.cuda.device_count()} GPUs for prediction') logger.info(f"Sending the model to '{device}'") model = model.to(device) output_dir = config['loaders'].get('output_dir', None) if output_dir is not None: os.makedirs(output_dir, exist_ok=True) logger.info(f'Saving predictions to: {output_dir}') # create predictor instance predictor = _get_predictor(model, output_dir, config) for test_loader in get_test_loaders(config): # run the model prediction on the test_loader and save the results in the output_dir predictor(test_loader)
def main(): # Load and log experiment configuration config = load_config() logger.info(config) manual_seed = config.get('manual_seed', None) if manual_seed is not None: logger.info(f'Seed the RNG for all devices with {manual_seed}') torch.manual_seed(manual_seed) # see https://pytorch.org/docs/stable/notes/randomness.html torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # Create the model model = get_model(config) # use DataParallel if more than 1 GPU available device = config['device'] if torch.cuda.device_count() > 1 and not device.type == 'cpu': model = nn.DataParallel(model) logger.info(f'Using {torch.cuda.device_count()} GPUs for training') # put the model on GPUs logger.info(f"Sending the model to '{config['device']}'") model = model.to(device) # Log the number of learnable parameters logger.info( f'Number of learnable params {get_number_of_learnable_parameters(model)}' ) # Create loss criterion loss_criterion = get_loss_criterion(config) # Create evaluation metric eval_criterion = get_evaluation_metric(config) # Create data loaders loaders = get_train_loaders(config) # Create the optimizer optimizer = _create_optimizer(config, model) # Create learning rate adjustment strategy lr_scheduler = _create_lr_scheduler(config, optimizer) # Create model trainer trainer = _create_trainer(config, model=model, optimizer=optimizer, lr_scheduler=lr_scheduler, loss_criterion=loss_criterion, eval_criterion=eval_criterion, loaders=loaders) # Start training trainer.fit()
def create_trainer(config): # Create the model model = get_model(config['model']) # use DataParallel if more than 1 GPU available device = config['device'] if torch.cuda.device_count() > 1 and not device.type == 'cpu': model = nn.DataParallel(model) logger.info(f'Using {torch.cuda.device_count()} GPUs for training') # put the model on GPUs logger.info(f"Sending the model to '{config['device']}'") model = model.to(device) # Log the number of learnable parameters logger.info( f'Number of learnable params {get_number_of_learnable_parameters(model)}' ) # Create loss criterion loss_criterion = get_loss_criterion(config) # Create evaluation metric eval_criterion = get_evaluation_metric(config) # Create data loaders loaders = get_train_loaders(config) # Create the optimizer optimizer = create_optimizer(config['optimizer'], model) # Create learning rate adjustment strategy lr_scheduler = create_lr_scheduler(config.get('lr_scheduler', None), optimizer) trainer_config = config['trainer'] # Create tensorboard formatter tensorboard_formatter = get_tensorboard_formatter( trainer_config.pop('tensorboard_formatter', None)) # Create trainer resume = trainer_config.pop('resume', None) pre_trained = trainer_config.pop('pre_trained', None) return UNet3DTrainer(model=model, optimizer=optimizer, lr_scheduler=lr_scheduler, loss_criterion=loss_criterion, eval_criterion=eval_criterion, tensorboard_formatter=tensorboard_formatter, device=config['device'], loaders=loaders, resume=resume, pre_trained=pre_trained, **trainer_config)
def load_model(config): # create the model model = get_model(config) # load model state model_path = config["model_path"] logger.info(f"Loading model from {model_path}...") utils.load_checkpoint(model_path, model) device = config["device"] logger.info(f"Sending the model to '{device}'") model = model.to(device) return model
def build(config): # Create the model model = get_model(config['model']) # use DataParallel if more than 1 GPU available device = config['device'] if torch.cuda.device_count() > 1 and not device.type == 'cpu': model = nn.DataParallel(model) logger.info(f'Using {torch.cuda.device_count()} GPUs for training') # put the model on GPUs logger.info(f"Sending the model to '{config['device']}'") model = model.to(device) # Log the number of learnable parameters logger.info( f'Number of learnable params {get_number_of_learnable_parameters(model)}' ) # Create loss criterion loss_criterion = get_loss_criterion(config) # Create evaluation metric eval_criterion = get_evaluation_metric(config) # Create data loaders loaders = get_train_loaders(config) # Create the optimizer optimizer = create_optimizer(config['optimizer'], model) # Create learning rate adjustment strategy lr_scheduler = create_lr_scheduler(config.get('lr_scheduler', None), optimizer) # Create model trainer trainer = _create_trainer(config, model=model, optimizer=optimizer, lr_scheduler=lr_scheduler, loss_criterion=loss_criterion, eval_criterion=eval_criterion, loaders=loaders) return trainer
def main(): # Load configuration config = load_config() # Create the model model = get_model(config) # Load model state model_path = config['model_path'] logger.info(f'Loading model from {model_path}...') utils.load_checkpoint(model_path, model) # use DataParallel if more than 1 GPU available device = config['device'] if torch.cuda.device_count() > 1 and not device.type == 'cpu': model = nn.DataParallel(model) logger.info(f'Using {torch.cuda.device_count()} GPUs for prediction') logger.info(f"Sending the model to '{device}'") model = model.to(device) param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] logger.info(f"parmeter {param_count}!!!!!!!!!!!!!!!!!!!!") logger.info('Loading HDF5 datasets...') for test_loader in hdf5.get_test_loaders(config): logger.info(f"Processing '{test_loader.dataset.file_path}'...") output_file = _get_output_file(test_loader.dataset) predictor = _get_predictor(model, test_loader, output_file, config) # run the model prediction on the entire dataset and save to the 'output_file' H5 predictor.predict() path = './predict_h5/' output_path = './predict_npz/' datalist = os.listdir(path) for i in datalist: file = h5py.File(path + i, 'r') ar = file['predictions'][1, :, :, :] new_ar = np.exp(ar) / np.sum(np.exp(file['predictions']), axis=0) np.savez(output_path + i[0:-4] + '.npz', prediction=new_ar)
def main(): # Load configuration config = load_config() # Create the model model = get_model(config) # Load model state model_path = config['model_path'] logger.info(f'Loading model from {model_path}...') utils.load_checkpoint(model_path, model) logger.info(f"Sending the model to '{config['device']}'") model = model.to(config['device']) logger.info('Loading HDF5 datasets...') for test_loader in get_test_loaders(config): logger.info(f"Processing '{test_loader.dataset.file_path}'...") output_file = _get_output_file(test_loader.dataset) predictor = _get_predictor(model, test_loader, output_file, config) # run the model prediction on the entire dataset and save to the 'output_file' H5 predictor.predict()