def convert_raw(task, overwrite, ov): task_name_full = get_task(task, name=True) task_num, task_name = task_name_full[4:].split('_', 1) new_task_name_full = f"Task{task_num}FG_{task_name}" cfg = compose(task, "config.yaml", overrides=ov if ov is not None else []) print(cfg.pretty()) source_splitted_dir = Path(cfg["host"]["splitted_4d_output_dir"]) target_splitted_dir = Path(str(source_splitted_dir).replace(task_name_full, new_task_name_full)) if target_splitted_dir.is_dir() and overwrite: shutil.rmtree(target_splitted_dir) target_splitted_dir.mkdir(parents=True) logger.remove() logger.add(sys.stdout, level="INFO") logger.add(target_splitted_dir.parent / "convert_cls2fg.log", level="DEBUG") # update dataset_info source_data_info = Path(cfg["host"]["data_dir"]) data_info = load_dataset_info(source_data_info) data_info.pop("labels") data_info["labels"] = {"0": "fg"} data_info["task"] = new_task_name_full save_json(data_info, target_splitted_dir.parent / "dataset.json", indent=4) for postfix in ["Tr", "Ts"]: source_image_dir = source_splitted_dir / f"images{postfix}" source_label_dir = source_splitted_dir / f"labels{postfix}" if not source_image_dir.is_dir(): logger.info(f"{source_image_dir} is not a dir. Skipping it.") continue # copy images and labels shutil.copytree(source_image_dir, target_splitted_dir / f"images{postfix}") shutil.copytree(source_label_dir, target_splitted_dir / f"labels{postfix}") # remap properties file to foreground class target_label_dir = target_splitted_dir / f"labels{postfix}" for f in [l for l in target_label_dir.glob("*.json")]: props = load_json(f) props["instances"] = {key: 0 for key in props["instances"].keys()} save_json(props, f)
task_names = [n for n in PurePath(nnunet_dir).parts if "Task" in n] if len(task_names) > 1: logger.error( f"Found multiple task names trying to continue with {task_names[-1]}" ) logger.info(f"Found nnunet task {task_names[-1]} in nnunet path") nnunet_task = task_names[-1] if task is None: logger.info(f"Using nnunet task {nnunet_task} as detection task id") task = nnunet_task else: task = get_task(task, name=True) task_dir = Path(os.getenv("det_models")) / task initialize_config_module(config_module="nndet.conf") cfg = compose(task, "config.yaml", overrides=[]) logger.remove() logger.add(sys.stdout, level="INFO") log_file = task_dir / "nnUNet" / "import.log" logger.add(log_file, level="INFO") if simple: nndet_unet_dir = task_dir / "nnUNet_Simple" / "consolidated" else: nndet_unet_dir = task_dir / "nnUNet" / "consolidated" instance_classes = cfg["data"]["labels"] stuff_classes = cfg.get("labels_stuff", {}) num_instance_classes = len(instance_classes) stuff_classes = {
def _train( task: str, ov: List[str], do_sweep: bool, ): """ Run training Args: task: task to run training for ov: overwrites for config manager do_sweep: determine best emprical parameters for run """ print(f"Overwrites: {ov}") initialize_config_module(config_module="nndet.conf") cfg = compose(task, "config.yaml", overrides=ov if ov is not None else []) assert cfg.host.parent_data is not None, 'Parent data can not be None' assert cfg.host.parent_results is not None, 'Output dir can not be None' train_dir = init_train_dir(cfg) pl_logger = MLFlowLogger( experiment_name=cfg["task"], tags={ "host": socket.gethostname(), "fold": cfg["exp"]["fold"], "task": cfg["task"], "job_id": os.getenv('LSB_JOBID', 'no_id'), "mlflow.runName": cfg["exp"]["id"], }, save_dir=os.getenv("MLFLOW_TRACKING_URI", "./mlruns"), ) pl_logger.log_hyperparams( flatten_mapping( {"model": OmegaConf.to_container(cfg["model_cfg"], resolve=True)})) pl_logger.log_hyperparams( flatten_mapping({ "trainer": OmegaConf.to_container(cfg["trainer_cfg"], resolve=True) })) logger.remove() logger.add(sys.stdout, format="{level} {message}", level="INFO") log_file = Path(os.getcwd()) / "train.log" logger.add(log_file, level="INFO") logger.info(f"Log file at {log_file}") meta_data = {} meta_data["torch_version"] = str(torch.__version__) meta_data["date"] = str(datetime.now()) meta_data["git"] = log_git(nndet.__path__[0], repo_name="nndet") save_json(meta_data, "./meta.json") try: write_requirements_to_file("requirements.txt") except Exception as e: logger.error(f"Could not log req: {e}") plan_path = Path(str(cfg.host["plan_path"])) plan = load_pickle(plan_path) save_json(create_debug_plan(plan), "./plan_debug.json") data_dir = Path(cfg.host["preprocessed_output_dir"] ) / plan["data_identifier"] / "imagesTr" datamodule = Datamodule( augment_cfg=OmegaConf.to_container(cfg["augment_cfg"], resolve=True), plan=plan, data_dir=data_dir, fold=cfg["exp"]["fold"], ) module = MODULE_REGISTRY[cfg["module"]]( model_cfg=OmegaConf.to_container(cfg["model_cfg"], resolve=True), trainer_cfg=OmegaConf.to_container(cfg["trainer_cfg"], resolve=True), plan=plan, ) callbacks = [] checkpoint_cb = ModelCheckpoint( dirpath=train_dir, filename='model_best', save_last=True, save_top_k=1, monitor=cfg["trainer_cfg"]["monitor_key"], mode=cfg["trainer_cfg"]["monitor_mode"], ) checkpoint_cb.CHECKPOINT_NAME_LAST = 'model_last' callbacks.append(checkpoint_cb) callbacks.append(LearningRateMonitor(logging_interval="epoch")) OmegaConf.save(cfg, str(Path(os.getcwd()) / "config.yaml")) OmegaConf.save(cfg, str(Path(os.getcwd()) / "config_resolved.yaml"), resolve=True) save_pickle(plan, train_dir / "plan.pkl") # backup plan splits = load_pickle( Path(cfg.host.preprocessed_output_dir) / datamodule.splits_file) save_pickle(splits, train_dir / "splits.pkl") trainer_kwargs = {} if cfg["train"]["mode"].lower() == "resume": trainer_kwargs[ "resume_from_checkpoint"] = train_dir / "model_last.ckpt" num_gpus = cfg["trainer_cfg"]["gpus"] logger.info(f"Using {num_gpus} GPUs for training") plugins = cfg["trainer_cfg"].get("plugins", None) logger.info(f"Using {plugins} plugins for training") trainer = pl.Trainer( gpus=list(range(num_gpus)) if num_gpus > 1 else num_gpus, accelerator=cfg["trainer_cfg"]["accelerator"], precision=cfg["trainer_cfg"]["precision"], amp_backend=cfg["trainer_cfg"]["amp_backend"], amp_level=cfg["trainer_cfg"]["amp_level"], benchmark=cfg["trainer_cfg"]["benchmark"], deterministic=cfg["trainer_cfg"]["deterministic"], callbacks=callbacks, logger=pl_logger, max_epochs=module.max_epochs, progress_bar_refresh_rate=None if bool(int(os.getenv("det_verbose", 1))) else 0, reload_dataloaders_every_epoch=False, num_sanity_val_steps=10, weights_summary='full', plugins=plugins, terminate_on_nan=True, # TODO: make modular move_metrics_to_cpu=True, **trainer_kwargs) trainer.fit(module, datamodule=datamodule) if do_sweep: case_ids = splits[cfg["exp"]["fold"]]["val"] if "debug" in cfg and "num_cases_val" in cfg["debug"]: case_ids = case_ids[:cfg["debug"]["num_cases_val"]] inference_plan = module.sweep( cfg=OmegaConf.to_container(cfg, resolve=True), save_dir=train_dir, train_data_dir=data_dir, case_ids=case_ids, run_prediction=True, ) plan["inference_plan"] = inference_plan save_pickle(plan, train_dir / "plan_inference.pkl") ensembler_cls = module.get_ensembler_cls( key="boxes", dim=plan["network_dim"]) # TODO: make this configurable for restore in [True, False]: target_dir = train_dir / "val_predictions" if restore else \ train_dir / "val_predictions_preprocessed" extract_results( source_dir=train_dir / "sweep_predictions", target_dir=target_dir, ensembler_cls=ensembler_cls, restore=restore, **inference_plan, ) _evaluate( task=cfg["task"], model=cfg["exp"]["id"], fold=cfg["exp"]["fold"], test=False, do_boxes_eval=True, # TODO: make this configurable do_analyze_boxes=True, # TODO: make this configurable )
help="overwrites for config file", required=False, ) args = parser.parse_args() tasks = args.tasks new_tasks = args.new_tasks ov = args.overwrites stuff = args.stuff print(f"Overwrites: {ov}") initialize_config_module(config_module="nndet.conf") if new_tasks is None: new_tasks = tasks for task, new_task in zip(tasks, new_tasks): task = get_task(task, name=True) if nnUNet_raw_data is None: raise RuntimeError(f"Please set `nnUNet_raw_data` for nnUNet!") target_dir = Path(nnUNet_raw_data) / new_task logger.remove() logger.add(sys.stdout, level="INFO") logger.add(target_dir / "nnunet_export.log", level="DEBUG") _ov = copy.deepcopy(ov) if ov is not None else [] cfg = compose(task, "config.yaml", overrides=ov if ov is not None else []) print(cfg.pretty) run(cfg, target_dir, stuff=stuff)
def main(): parser = argparse.ArgumentParser() parser.add_argument('tasks', type=str, nargs='+', help="Single or multiple task identifiers to process consecutively", ) parser.add_argument('-o', '--overwrites', type=str, nargs='+', help="overwrites for config file", default=[], required=False) parser.add_argument('--full_check', help="Run a full check of the data.", action='store_true', ) parser.add_argument('--no_check', help="Skip basic check.", action='store_true', ) parser.add_argument('-np', '--num_processes', type=int, default=4, required=False, help="Number of processes to use for croppping.", ) parser.add_argument('-npp', '--num_processes_preprocessing', type=int, default=3, required=False, help="Number of processes to use for resampling.", ) args = parser.parse_args() tasks = args.tasks ov = args.overwrites full_check = args.full_check no_check = args.no_check num_processes = args.num_processes num_processes_preprocessing = args.num_processes_preprocessing initialize_config_module(config_module="nndet.conf") # perform preprocessing checks first if not no_check: for task in tasks: _ov = copy.deepcopy(ov) if ov is not None else [] cfg = compose(task, "config.yaml", overrides=_ov) check_dataset_file(cfg["task"]) check_data_and_label_splitted( cfg["task"], test=False, labels=True, full_check=full_check, ) if cfg["data"]["test_labels"]: check_data_and_label_splitted( cfg["task"], test=True, labels=True, full_check=full_check, ) # start preprocessing for task in tasks: _ov = copy.deepcopy(ov) if ov is not None else [] cfg = compose(task, "config.yaml", overrides=_ov) run(OmegaConf.to_container(cfg, resolve=True), num_processes=num_processes, num_processes_preprocessing=num_processes_preprocessing, )