def run_process( self, target_spacing: Sequence[float], case_id: str, output_dir_stage: Path, cropped_data_dir: Path, ) -> None: """ Process a single case Result is saved into :param:`output_dir_stage` Args: target_spacing: target spacing for processed case case_id: case identifier output_dir_stage: path to output directory cropped_data_dir: path to source directory """ data, seg, properties = load_case_cropped(cropped_data_dir, case_id) seg = seg[None] data, seg, properties = self.apply_process(data, target_spacing, properties, seg) properties["use_nonzero_mask_for_norm"] = self.use_mask_for_norm data = data.astype(np.float32) seg = seg.astype(np.int32) candidates = self.compute_candidates( data=data, seg=seg, properties=properties, ) logger.info(f"Saving: {case_id} into {output_dir_stage}.") np.savez_compressed( str(output_dir_stage / f"{case_id}.npz"), data=data, seg=seg, ) save_pickle(candidates, output_dir_stage / f"{case_id}_boxes.pkl") save_pickle(properties, output_dir_stage / f"{case_id}.pkl")
def create_splits(source, target): files = [] for p in source.glob('subset*'): path = Path(p) if not p.is_dir(): continue _files = [ str(i).rsplit('.', 1)[0] for i in path.iterdir() if i.suffix == ".mhd" ] files.append(_files) splits = [] for i in range(len(files)): train_ids = list(range(len(files))) test = files[i] train_ids.pop(i) val = files[(i + 1) % len(files)] train_ids.pop((i + 1) % len(files)) assert len(train_ids) == len(files) - 2 train = [tr for tri in train_ids for tr in files[tri]] splits.append({"train": train, "val": val, "test": test}) save_pickle(splits, target)
def run_test( self, data_files, target_spacing, target_dir: PathLike, ) -> None: """ Preprocess and save test data Args: data_files: path to data files target_spacing: spacing to resample target_dir: directory to save data to """ target_dir = Path(target_dir) data, seg, properties = self.preprocess_test_case( data_files=data_files, target_spacing=target_spacing, ) case_id = get_case_id_from_path(str(data_files[0]), remove_modality=True) np.savez_compressed(str(target_dir / f"{case_id}.npz"), data=data) save_pickle(properties, target_dir / f"{case_id}")
def _sweep( task: str, model: str, fold: int, ): """ Determine best postprocessing parameters for a trained model Args: task: current task model: full name of the model run determine empricial parameters for e.g. RetinaUNetV001_D3V001_3d fold: current fold """ nndet_data_dir = Path(os.getenv("det_models")) task = get_task(task, name=True, models=True) train_dir = nndet_data_dir / task / model / f"fold{fold}" cfg = OmegaConf.load(str(train_dir / "config.yaml")) os.chdir(str(train_dir)) logger.remove() logger.add(sys.stdout, format="{level} {message}", level="INFO") log_file = Path(os.getcwd()) / "sweep.log" logger.add(log_file, level="INFO") logger.info(f"Log file at {log_file}") plan = load_pickle(train_dir / "plan.pkl") data_dir = Path(cfg.host["preprocessed_output_dir"] ) / plan["data_identifier"] / "imagesTr" module = MODULE_REGISTRY[cfg["module"]]( model_cfg=OmegaConf.to_container(cfg["model_cfg"], resolve=True), trainer_cfg=OmegaConf.to_container(cfg["trainer_cfg"], resolve=True), plan=plan, ) splits = load_pickle(train_dir / "splits.pkl") case_ids = splits[cfg["exp"]["fold"]]["val"] inference_plan = module.sweep( cfg=OmegaConf.to_container(cfg, resolve=True), save_dir=train_dir, train_data_dir=data_dir, case_ids=case_ids, run_prediction=True, # TODO: add commmand line arg ) plan["inference_plan"] = inference_plan save_pickle(plan, train_dir / "plan_inference.pkl") ensembler_cls = module.get_ensembler_cls( key="boxes", dim=plan["network_dim"]) # TODO: make this configurable for restore in [True, False]: target_dir = train_dir / "val_predictions" if restore else \ train_dir / "val_predictions_preprocessed" extract_results( source_dir=train_dir / "sweep_predictions", target_dir=target_dir, ensembler_cls=ensembler_cls, restore=restore, **inference_plan, ) _evaluate( task=cfg["task"], model=cfg["exp"]["id"], fold=cfg["exp"]["fold"], test=False, do_boxes_eval=True, # TODO: make this configurable do_analyze_boxes=True, # TODO: make this configurable )
def _train( task: str, ov: List[str], do_sweep: bool, ): """ Run training Args: task: task to run training for ov: overwrites for config manager do_sweep: determine best emprical parameters for run """ print(f"Overwrites: {ov}") initialize_config_module(config_module="nndet.conf") cfg = compose(task, "config.yaml", overrides=ov if ov is not None else []) assert cfg.host.parent_data is not None, 'Parent data can not be None' assert cfg.host.parent_results is not None, 'Output dir can not be None' train_dir = init_train_dir(cfg) pl_logger = MLFlowLogger( experiment_name=cfg["task"], tags={ "host": socket.gethostname(), "fold": cfg["exp"]["fold"], "task": cfg["task"], "job_id": os.getenv('LSB_JOBID', 'no_id'), "mlflow.runName": cfg["exp"]["id"], }, save_dir=os.getenv("MLFLOW_TRACKING_URI", "./mlruns"), ) pl_logger.log_hyperparams( flatten_mapping( {"model": OmegaConf.to_container(cfg["model_cfg"], resolve=True)})) pl_logger.log_hyperparams( flatten_mapping({ "trainer": OmegaConf.to_container(cfg["trainer_cfg"], resolve=True) })) logger.remove() logger.add(sys.stdout, format="{level} {message}", level="INFO") log_file = Path(os.getcwd()) / "train.log" logger.add(log_file, level="INFO") logger.info(f"Log file at {log_file}") meta_data = {} meta_data["torch_version"] = str(torch.__version__) meta_data["date"] = str(datetime.now()) meta_data["git"] = log_git(nndet.__path__[0], repo_name="nndet") save_json(meta_data, "./meta.json") try: write_requirements_to_file("requirements.txt") except Exception as e: logger.error(f"Could not log req: {e}") plan_path = Path(str(cfg.host["plan_path"])) plan = load_pickle(plan_path) save_json(create_debug_plan(plan), "./plan_debug.json") data_dir = Path(cfg.host["preprocessed_output_dir"] ) / plan["data_identifier"] / "imagesTr" datamodule = Datamodule( augment_cfg=OmegaConf.to_container(cfg["augment_cfg"], resolve=True), plan=plan, data_dir=data_dir, fold=cfg["exp"]["fold"], ) module = MODULE_REGISTRY[cfg["module"]]( model_cfg=OmegaConf.to_container(cfg["model_cfg"], resolve=True), trainer_cfg=OmegaConf.to_container(cfg["trainer_cfg"], resolve=True), plan=plan, ) callbacks = [] checkpoint_cb = ModelCheckpoint( dirpath=train_dir, filename='model_best', save_last=True, save_top_k=1, monitor=cfg["trainer_cfg"]["monitor_key"], mode=cfg["trainer_cfg"]["monitor_mode"], ) checkpoint_cb.CHECKPOINT_NAME_LAST = 'model_last' callbacks.append(checkpoint_cb) callbacks.append(LearningRateMonitor(logging_interval="epoch")) OmegaConf.save(cfg, str(Path(os.getcwd()) / "config.yaml")) OmegaConf.save(cfg, str(Path(os.getcwd()) / "config_resolved.yaml"), resolve=True) save_pickle(plan, train_dir / "plan.pkl") # backup plan splits = load_pickle( Path(cfg.host.preprocessed_output_dir) / datamodule.splits_file) save_pickle(splits, train_dir / "splits.pkl") trainer_kwargs = {} if cfg["train"]["mode"].lower() == "resume": trainer_kwargs[ "resume_from_checkpoint"] = train_dir / "model_last.ckpt" num_gpus = cfg["trainer_cfg"]["gpus"] logger.info(f"Using {num_gpus} GPUs for training") plugins = cfg["trainer_cfg"].get("plugins", None) logger.info(f"Using {plugins} plugins for training") trainer = pl.Trainer( gpus=list(range(num_gpus)) if num_gpus > 1 else num_gpus, accelerator=cfg["trainer_cfg"]["accelerator"], precision=cfg["trainer_cfg"]["precision"], amp_backend=cfg["trainer_cfg"]["amp_backend"], amp_level=cfg["trainer_cfg"]["amp_level"], benchmark=cfg["trainer_cfg"]["benchmark"], deterministic=cfg["trainer_cfg"]["deterministic"], callbacks=callbacks, logger=pl_logger, max_epochs=module.max_epochs, progress_bar_refresh_rate=None if bool(int(os.getenv("det_verbose", 1))) else 0, reload_dataloaders_every_epoch=False, num_sanity_val_steps=10, weights_summary='full', plugins=plugins, terminate_on_nan=True, # TODO: make modular move_metrics_to_cpu=True, **trainer_kwargs) trainer.fit(module, datamodule=datamodule) if do_sweep: case_ids = splits[cfg["exp"]["fold"]]["val"] if "debug" in cfg and "num_cases_val" in cfg["debug"]: case_ids = case_ids[:cfg["debug"]["num_cases_val"]] inference_plan = module.sweep( cfg=OmegaConf.to_container(cfg, resolve=True), save_dir=train_dir, train_data_dir=data_dir, case_ids=case_ids, run_prediction=True, ) plan["inference_plan"] = inference_plan save_pickle(plan, train_dir / "plan_inference.pkl") ensembler_cls = module.get_ensembler_cls( key="boxes", dim=plan["network_dim"]) # TODO: make this configurable for restore in [True, False]: target_dir = train_dir / "val_predictions" if restore else \ train_dir / "val_predictions_preprocessed" extract_results( source_dir=train_dir / "sweep_predictions", target_dir=target_dir, ensembler_cls=ensembler_cls, restore=restore, **inference_plan, ) _evaluate( task=cfg["task"], model=cfg["exp"]["id"], fold=cfg["exp"]["fold"], test=False, do_boxes_eval=True, # TODO: make this configurable do_analyze_boxes=True, # TODO: make this configurable )
class Predictor: def __init__(self, ensembler: Dict[str, Callable], models: Sequence[AbstractModel], crop_size: Sequence[int], overlap: float = 0.5, tile_keys: Sequence[str] = ('data',), model_keys: Sequence[str] = ('data',), tta_transforms: Sequence[AbstractTransform] = (NoOp(),), tta_inverse_transforms: Sequence[AbstractTransform] = (NoOp(),), pre_transform: AbstractTransform = None, post_transform: AbstractTransform = None, batch_size: int = 4, model_weights: Sequence[float] = None, device: torch_device = "cuda:0", ensemble_on_device: bool = True, ): """ Predict entire cases with TTA and Model-Ensembling Workflow - Load whole patient -> create predictor from patient - tile patient * for each model: * for each batch (batches of tiles): * for each tta transform: - pre transform - tta transform - post transform - predict batch - inverse tta transform - forward predictions and batch to ensembler classes <- return patient result Args: ensembler: Callable to instantiate ensembler from case and properties models: models to ensemble crop_size: size of each crop (for most cases this should be the same as in training) overlap: overlap of crops tile_keys: keys which are tiles model_keys: this kyes are passed as positional arugments to the model tta_transforms: tta transformations tta_inverse_transforms: inverse tta transformation pre_transform: transform which is performed before every tta transform post_transform: transform which is performed after every tta transform batch_size: batch size to use for prediction model_weights: additional weighting of individual models device: device used for prediction ensemble_on_device: The results will be passed to the ensembler class with the current device. The ensembler needs to make sure to avoid memory leaks! """ self.ensemble_on_device = ensemble_on_device self.device = device self.ensembler_fns = ensembler self.ensembler = {} self.models = models self.model_weights = [1.] * len(models) if model_weights is None else model_weights self.crop_size = crop_size self.overlap = overlap self.tile_keys = tile_keys self.model_keys = model_keys self.batch_size = batch_size if len(tta_transforms) != len(tta_inverse_transforms): raise ValueError("Every tta transform needs a reverse transform") self.tta_transforms = tta_transforms self.tta_inverse_transforms = tta_inverse_transforms self.post_transform = post_transform self.pre_transform = pre_transform self.grid_mode = 'symmetric' self.save_get_mode = 'shift' @classmethod def create(cls, *args, **kwargs): """ Create predictor object with specific ensembler objects Raises: NotImplementedError: Need to be overwritten in subclasses """ raise NotImplementedError @classmethod def get_ensembler(cls, key: Hashable, dim: int) -> Callable: """ Return ensembler class for specific keys Typically: `boxes`, `seg`, `instances` Args: key: Key to return dim: number of spatial dimensions the network expects Raises: NotImplementedError: Need to be overwritten in subclasses Returns: Callable: Ensembler class """ raise NotImplementedError def predict_case(self, case: Dict, properties: Optional[Dict] = None, save_dir: Optional[Union[Path, str]] = None, case_id: Optional[str] = None, restore: bool = False, ) -> dict: """ Load and predict a single case. Args: case: data of a single case properties: additional properties of the case. E.g. to restore prediction in original image space save_dir: directory to save predictions case_id: used for saving restore: restore prediction in original image space ("revert" preprocessing) Returns: dict: result of each ensembler (converted to numpy) """ tic = time.perf_counter() for name, fn in self.ensembler_fns.items(): self.ensembler[name] = fn(case, properties=properties) tiles = self.tile_case(case) self.predict_tiles(tiles) result = {key: value.get_case_result(restore=restore) for key, value in self.ensembler.items()} if save_dir is not None: save_dir = Path(save_dir) save_dir.mkdir(parents=True, exist_ok=True) for ensembler in self.ensembler.values(): ensembler.save_state(save_dir, name=case_id) save_pickle(properties, save_dir / f"{case_id}_properties.pkl") toc = time.perf_counter() logger.info(f"Prediction took {toc - tic} s") return result
df, num_processes=num_processes) # generate split logger.info("Generating luna splits... ") saved_original_splits = load_json(task_data_dir / "splits.json") logger.info( f"Found {len(list(saved_original_splits.keys()))} ids in splits.json") original_fold_ids = defaultdict(list) for cid, fid in saved_original_splits.items(): original_fold_ids[fid].append(cid) splits = [] for test_fold in range(10): all_folds = list(range(10)) all_folds.pop(test_fold) train_ids = [] for af in all_folds: train_ids.extend(original_fold_ids[af]) splits.append({ "train": train_ids, "val": original_fold_ids[test_fold], }) save_pickle(splits, target_preprocessed_dir / "splits_final.pkl") save_json(splits, target_preprocessed_dir / "splits_final.json") if __name__ == '__main__': main()