def interpret( maps_dir, data_group, name, caps_directory, tsv_path, selection_metrics, multi_cohort, target_node, save_individual, batch_size, nproc, use_cpu, verbose=0, ): verbose_list = ["warning", "info", "debug"] maps_manager = MapsManager(maps_dir, verbose=verbose_list[verbose]) maps_manager.interpret( data_group=data_group, name=name, caps_directory=caps_directory, tsv_path=tsv_path, selection_metrics=selection_metrics, multi_cohort=multi_cohort, target_node=target_node, save_individual=save_individual, batch_size=batch_size, num_workers=nproc, use_cpu=use_cpu, )
def train( maps_dir: str, train_dict: Dict[str, Any], split_list: List[int], erase_existing: bool = True, ): maps_manager = MapsManager(maps_dir, train_dict, verbose="info") maps_manager.train(split_list=split_list, overwrite=erase_existing)
def test_interpret(cli_commands): cnn_input = cli_commands if os.path.exists("results"): shutil.rmtree("results") train_error = not os.system("clinicadl " + " ".join(cnn_input)) maps_manager = MapsManager("results", verbose="debug") maps_manager.interpret("train", "test") interpret_map = maps_manager.get_interpretation("train", "test") assert train_error shutil.rmtree("results")
def predict( maps_dir, data_group, caps_directory, tsv_path, labels=True, gpu=True, num_workers=0, batch_size=1, prepare_dl=True, selection_metrics=None, diagnoses=None, multi_cohort=False, overwrite=False, ): """ This function loads a MAPS and predicts the global metrics and individual values for all the models selected using a metric in selection_metrics. Args: maps_dir (str): file with the model (pth format). data_group: prefix of all classification outputs. caps_directory (str): path to the CAPS folder. For more information please refer to [clinica documentation](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/). tsv_path (str): path to a TSV file containing the list of participants and sessions to interpret. labels: by default is True. If False no metrics tsv files will be written. gpu: if true, it uses gpu. num_workers: num_workers used in DataLoader batch_size: batch size of the DataLoader prepare_dl: if true, uses extracted patches/slices otherwise extract them on-the-fly. selection_metrics: list of metrics to find best models to be evaluated. diagnoses: list of diagnoses to be tested if tsv_path is a folder. verbose: level of verbosity. multi_cohort (bool): If True caps_directory is the path to a TSV file linking cohort names and paths. overwrite (bool): If True former definition of data group is erased """ verbose_list = ["warning", "info", "debug"] maps_manager = MapsManager(maps_dir, verbose=verbose_list[0]) maps_manager.predict( data_group, caps_directory=caps_directory, tsv_path=tsv_path, selection_metrics=selection_metrics, multi_cohort=multi_cohort, diagnoses=diagnoses, use_labels=labels, prepare_dl=prepare_dl, batch_size=batch_size, num_workers=num_workers, use_cpu=not gpu, overwrite=overwrite, )
def test_resume(input_directory): flag_error = not system(f"clinicadl -vv train resume {input_directory}") assert flag_error maps_manager = MapsManager(input_directory) split_manager = maps_manager._init_split_manager() for split in split_manager.split_iterator(): performances_flag = pathlib.Path(input_directory, f"split-{split}", "best-loss", "train").exists() assert performances_flag shutil.rmtree(input_directory)
def predict( maps_dir: str, data_group: str, caps_directory: str, tsv_path: str, use_labels: bool = True, label: str = None, gpu: bool = True, n_proc: int = 0, batch_size: int = 1, selection_metrics: List[str] = None, diagnoses: List[str] = None, multi_cohort: bool = False, overwrite: bool = False, ): """ This function loads a MAPS and predicts the global metrics and individual values for all the models selected using a metric in selection_metrics. Args: maps_dir: path to the MAPS. data_group: name of the data group tested. caps_directory: path to the CAPS folder. For more information please refer to [clinica documentation](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/). tsv_path: path to a TSV file containing the list of participants and sessions to interpret. use_labels: by default is True. If False no metrics tsv files will be written. label: Name of the target value, if different from training. gpu: if true, it uses gpu. n_proc: num_workers used in DataLoader batch_size: batch size of the DataLoader selection_metrics: list of metrics to find best models to be evaluated. diagnoses: list of diagnoses to be tested if tsv_path is a folder. multi_cohort: If True caps_directory is the path to a TSV file linking cohort names and paths. overwrite: If True former definition of data group is erased """ verbose_list = ["warning", "info", "debug"] maps_manager = MapsManager(maps_dir, verbose=verbose_list[0]) maps_manager.predict( data_group, caps_directory=caps_directory, tsv_path=tsv_path, selection_metrics=selection_metrics, multi_cohort=multi_cohort, diagnoses=diagnoses, label=label, use_labels=use_labels, batch_size=batch_size, n_proc=n_proc, gpu=gpu, overwrite=overwrite, )
def meta_maps_analysis(launch_dir, evaluation_metric="loss"): """ This function summarizes the validation performance according to `evaluation_metric` of several MAPS stored in the folder `launch_dir`. The output TSV files are written in `launch_dir`. Args: launch_dir (str): Path to the directory containing several MAPS. evaluation_metric (str): Name of the metric used for validation evaluation. """ jobs_list = [ job for job in os.listdir(launch_dir) if path.exists(path.join(launch_dir, job, "maps.json")) ] selection_set = set() # Set of all selection metrics seen folds_set = set() # Set of all folds seen performances_dict = dict() for job in jobs_list: performances_dict[job] = dict() maps_manager = MapsManager(path.join(launch_dir, job)) folds = maps_manager._find_folds() folds_set = folds_set | set(folds) for fold in folds: performances_dict[job][fold] = dict() selection_metrics = maps_manager._find_selection_metrics(fold) selection_set = selection_set | set(selection_metrics) for metric in selection_metrics: validation_metrics = maps_manager.get_metrics( "validation", fold, metric ) if evaluation_metric not in validation_metrics: raise ValueError( f"Evaluation metric {evaluation_metric} not found in " f"MAPS {job}, for fold {fold} and selection {metric}." ) performances_dict[job][fold][metric] = validation_metrics[ evaluation_metric ] # Produce one analysis for each selection metric for metric in selection_set: df = pd.DataFrame() filename = f"analysis_metric-{evaluation_metric}_selection-{metric}.tsv" for job in jobs_list: for fold in folds_set: df.loc[job, f"fold-{fold}"] = performances_dict[job][fold][metric] df.to_csv(path.join(launch_dir, filename), sep="\t")
def test_predict(predict_commands): model_folder, use_labels, modes = predict_commands out_dir = join(model_folder, "split-0/best-loss/test-RANDOM") if exists(out_dir): shutil.rmtree(out_dir) # Correction of JSON file for ROI if "roi" in modes: json_path = join(model_folder, "maps.json") with open(json_path, "r") as f: parameters = json.load(f) parameters["roi_list"] = ["leftHippocampusBox", "rightHippocampusBox"] json_data = json.dumps(parameters, skipkeys=True, indent=4) with open(json_path, "w") as f: f.write(json_data) maps_manager = MapsManager(model_folder, verbose="debug") maps_manager.predict( data_group="test-RANDOM", caps_directory="data/dataset/OasisCaps_example", tsv_path="data/dataset/OasisCaps_example/data.tsv", gpu=False, use_labels=use_labels, overwrite=True, ) for mode in modes: maps_manager.get_prediction(data_group="test-RANDOM", mode=mode) if use_labels: maps_manager.get_metrics(data_group="test-RANDOM", mode=mode)
def save_tensor( maps_dir, data_group, caps_directory, tsv_path, gpu=True, selection_metrics=None, diagnoses=None, multi_cohort=False, nifti=False, overwrite=False, ): """ This function loads a MAPS and compute reconstruction outputs and will save them in the MAPS for all the models selected. Args: maps_dir (str): file with the model (pth format). data_group: prefix of all classification outputs. caps_directory (str): path to the CAPS folder. For more information please refer to [clinica documentation](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/). tsv_path (str): path to a TSV file containing the list of participants and sessions to interpret. gpu: if true, it uses gpu. selection_metrics: list of metrics to find best models to be evaluated. diagnoses: list of diagnoses to be tested if tsv_path is a folder. verbose: level of verbosity. multi_cohort (bool): If True caps_directory is the path to a TSV file linking cohort names and paths. nifti (bool): If True will save the outputs as nifti files instead of Pytorch tensors. overwrite (bool): If True former definition of data group is erased """ verbose_list = ["warning", "info", "debug"] maps_manager = MapsManager(maps_dir, verbose=verbose_list[0]) maps_manager.save_tensors( data_group, caps_directory=caps_directory, tsv_path=tsv_path, selection_metrics=selection_metrics, multi_cohort=multi_cohort, diagnoses=diagnoses, gpu=gpu, nifti=nifti, overwrite=overwrite, )
def automatic_resume(model_path, user_split_list=None, verbose=0): logger = getLogger("clinicadl") verbose_list = ["warning", "info", "debug"] maps_manager = MapsManager(model_path, verbose=verbose_list[verbose]) existing_split_list = maps_manager._find_splits() stopped_splits = [ split for split in existing_split_list if "tmp" in os.listdir(path.join(model_path, f"{maps_manager.split_name}-{split}")) ] # Find finished split finished_splits = list() for split in existing_split_list: if split not in stopped_splits: performance_dir_list = [ performance_dir for performance_dir in os.listdir( path.join(model_path, f"{maps_manager.split_name}-{split}") ) if "best-" in performance_dir ] if len(performance_dir_list) > 0: finished_splits.append(split) split_manager = maps_manager._init_split_manager(split_list=user_split_list) split_iterator = split_manager.split_iterator() absent_splits = [ split for split in split_iterator if split not in finished_splits and split not in stopped_splits ] # To ensure retro-compatibility with random search logger.info( f"Finished splits {finished_splits}\n" f"Stopped splits {stopped_splits}\n" f"Absent splits {absent_splits}" ) if len(stopped_splits) > 0: maps_manager.resume(stopped_splits) if len(absent_splits) > 0: maps_manager.train(absent_splits, overwrite=True)
def test_predict(predict_commands): model_folder, use_labels, modes = predict_commands out_dir = join(model_folder, "fold-0/best-loss/test-RANDOM") if os.path.exists(out_dir): shutil.rmtree(out_dir) maps_manager = MapsManager(model_folder, verbose="debug") maps_manager.predict( data_group="test-RANDOM", caps_directory="data/dataset/OasisCaps_example", tsv_path="data/dataset/OasisCaps_example/data.tsv", use_cpu=True, use_labels=use_labels, overwrite=True, ) for mode in modes: maps_manager.get_prediction(data_group="test-RANDOM", mode=mode) if use_labels: maps_manager.get_metrics(data_group="test-RANDOM", mode=mode)
def automatic_resume(model_path, verbose=0): logger = getLogger("clinicadl") verbose_list = ["warning", "info", "debug"] maps_manager = MapsManager(model_path, verbose=verbose_list[verbose]) if len(glob(os.path.join(model_path, "fold-*"))) > 0: raise ValueError( "This MAPS cannot be resumed with the current version of ClinicaDL. " "Please use the same version as for training or rename manually the folders " "'fold-*' in 'split-*' to respect the new MAPS convention.") split_list = sorted([ int(split.split("-")[1]) for split in os.listdir(model_path) if split[:4:] == "split" ]) stopped_splits = [ split for split in split_list if "tmp" in os.listdir(path.join(model_path, f"split-{split}")) ] finished_splits = [ split for split in split_list if split not in stopped_splits ] split_manager = maps_manager._init_split_manager() split_iterator = split_manager.split_iterator() absent_splits = [ split for split in split_iterator if split not in finished_splits and split not in stopped_splits ] logger.info(f"List of finished splits {finished_splits}") logger.info(f"List of stopped splits {stopped_splits}") logger.info(f"List of absent splits {absent_splits}") # To ensure retro-compatibility with random search logger.info(f"Finished splits {finished_splits}\n" f"Stopped splits {stopped_splits}\n" f"Absent splits {absent_splits}") maps_manager.resume(stopped_splits) maps_manager.train(absent_splits)
def train(maps_dir, train_dict, folds, erase_existing=True): maps_manager = MapsManager(maps_dir, train_dict, verbose="info") maps_manager.train(folds=folds, overwrite=erase_existing)
def interpret( maps_dir: str, data_group: str, name: str, caps_directory: str, tsv_path: str, selection_metrics: List[str], diagnoses: List[str], multi_cohort: bool, target_node: int, save_individual: bool, batch_size: int, n_proc: int, gpu: bool, verbose=0, overwrite: bool = False, overwrite_name: bool = False, ): """ This function loads a MAPS and interprets all the models selected using a metric in selection_metrics. Args: maps_dir: path to the MAPS. data_group: name of the data group interpreted. caps_directory: path to the CAPS folder. For more information please refer to [clinica documentation](https://aramislab.paris.inria.fr/clinica/docs/public/latest/CAPS/Introduction/). tsv_path: path to a TSV file containing the list of participants and sessions to interpret. target_node: Node from which the interpretation is computed. save_individual: If True saves the individual map of each participant / session couple. gpu: if true, it uses gpu. n_proc: num_workers used in DataLoader batch_size: batch size of the DataLoader selection_metrics: list of metrics to find best models to be evaluated. diagnoses: list of diagnoses to be tested if tsv_path is a folder. multi_cohort: If True caps_directory is the path to a TSV file linking cohort names and paths. verbose: level of verbosity (0: warning, 1: info, 2: debug). overwrite: If True former definition of data group is erased. overwrite_name: If True former interpretability map with the same name is erased. """ verbose_list = ["warning", "info", "debug"] if verbose > 2: verbose_str = "debug" else: verbose_str = verbose_list[verbose] maps_manager = MapsManager(maps_dir, verbose=verbose_str) maps_manager.interpret( data_group=data_group, name=name, caps_directory=caps_directory, tsv_path=tsv_path, selection_metrics=selection_metrics, diagnoses=diagnoses, multi_cohort=multi_cohort, target_node=target_node, save_individual=save_individual, batch_size=batch_size, n_proc=n_proc, gpu=gpu, overwrite=overwrite, overwrite_name=overwrite_name, )