def main(options): # Read json model_options = argparse.Namespace() json_path = path.join(options.model_path, "commandline_cnn.json") model_options = read_json(model_options, json_path=json_path) num_cnn = compute_num_cnn(options.input_dir, options.tsv_path, model_options, data="test") # Load test data if options.diagnoses is None: options.diagnoses = model_options.diagnoses test_df = load_data_test(options.tsv_path, options.diagnoses) transformations = get_transforms(model_options.mode, model_options.minmaxnormalization) criterion = torch.nn.CrossEntropyLoss() # Loop on all folds trained best_model_dir = os.path.join(options.model_path, 'best_model_dir') folds_dir = os.listdir(best_model_dir) # Loop on folds for fold_dir in folds_dir: split = int(fold_dir[-1]) print("Fold %i" % split) for cnn_index in range(num_cnn): dataset = return_dataset(model_options.mode, options.input_dir, test_df, options.preprocessing, transformations, options, cnn_index=cnn_index) test_loader = DataLoader(dataset, batch_size=options.batch_size, shuffle=False, num_workers=options.num_workers, pin_memory=True) test_cnn(options.model_path, test_loader, options.dataset, split, criterion, cnn_index, model_options, options.gpu) for selection in ['best_acc', 'best_loss']: soft_voting_to_tsvs( options.model_path, split, selection, mode=options.mode, dataset=options.dataset, num_cnn=num_cnn, selection_threshold=model_options.selection_threshold)
def test_cnn(output_dir, data_loader, subset_name, split, criterion, model_options, gpu=False, multiclass=False): for selection in ["best_balanced_accuracy", "best_loss"]: # load the best trained model during the training model = create_model(model_options.model, gpu, dropout=model_options.dropout) model, best_epoch = load_model(model, os.path.join(output_dir, 'fold-%i' % split, 'models', selection), gpu=gpu, filename='model_best.pth.tar') results_df, metrics = test(model, data_loader, gpu, criterion, model_options.mode, multiclass) print("%s level balanced accuracy is %f" % (model_options.mode, metrics['balanced_accuracy'])) mode_level_to_tsvs(output_dir, results_df, metrics, split, selection, model_options.mode, dataset=subset_name) # Soft voting if model_options.mode in ["patch", "roi", "slice"]: soft_voting_to_tsvs( output_dir, split, selection=selection, mode=model_options.mode, dataset=subset_name, selection_threshold=model_options.selection_threshold)
model_options.preprocessing, transformations, model_options, cnn_index=cnn_index) train_loader = DataLoader(data_train, batch_size=options.batch_size, shuffle=False, num_workers=options.num_workers, pin_memory=True) valid_loader = DataLoader(data_valid, batch_size=options.batch_size, shuffle=False, num_workers=options.num_workers, pin_memory=True) test_cnn(options.model_path, train_loader, "train", split, criterion, cnn_index, model_options, options.gpu) test_cnn(options.model_path, valid_loader, "validation", split, criterion, cnn_index, model_options, options.gpu) for selection in ['best_acc', 'best_loss']: soft_voting_to_tsvs( options.model_path, split, selection, mode=options.mode, dataset=options.dataset, num_cnn=num_cnn, selection_threshold=model_options.selection_threshold)
def inference_from_model(caps_dir, tsv_path, model_path=None, json_file=None, prefix=None, no_labels=False, gpu=True, prepare_dl=False): """ Inference from previously trained model. This functions uses a previously trained model to classify the input(s). The model is stored in the variable model_path and it assumes the folder structure given by the training stage. Particullary to have a prediction at image level, it assumes that results of the validation set are stored in the model_path folder in order to perform soft-voiting at the slice/patch level and also for multicnn. Args: caps_dir: folder containing the tensor files (.pt version of MRI) tsv_path: file with the name of the MRIs to process (single or multiple) model_path: file with the model (pth format). json_file: file containing the training parameters. output_dir_arg: folder where results are stored. If None it uses current structure. no_labels: by default is false. In that case, output writes a file named measurements.tsv gpu: if true, it uses gpu. prepare_dl: if true, uses extracted patches/slices otherwise extract them on-the-fly. Returns: Files written in the output folder with prediction results and metrics. By default the output folder is named cnn_classification and it is inside the model_folder. Raises: """ import argparse parser = argparse.ArgumentParser() parser.add_argument("model_path", type=str, help="Path to the trained model folder.") options = parser.parse_args([model_path]) options = read_json(options, json_path=json_file) num_cnn = compute_num_cnn(caps_dir, tsv_path, options, "classify") print("Load model with these options:") print(options) # Overwrite options with user input options.use_cpu = not gpu options.prepare_dl = prepare_dl # Define the path currentDirectory = pathlib.Path(model_path) # Search for 'fold-*' pattern currentPattern = "fold-*" best_model = { 'best_acc': 'best_balanced_accuracy', 'best_loss': 'best_loss' } # loop depending the number of folds found in the model folder for fold_dir in currentDirectory.glob(currentPattern): fold = int(str(fold_dir).split("-")[-1]) fold_path = join(model_path, fold_dir) model_path = join(fold_path, 'models') if options.mode_task == 'multicnn': for cnn_dir in listdir(model_path): if not exists( join(model_path, cnn_dir, best_model['best_acc'], 'model_best.pth.tar')): raise FileNotFoundError( errno.ENOENT, strerror(errno.ENOENT), join(model_path, cnn_dir, best_model['best_acc'], 'model_best.pth.tar')) else: full_model_path = join(model_path, best_model['best_acc']) if not exists(join(full_model_path, 'model_best.pth.tar')): raise FileNotFoundError( errno.ENOENT, strerror(errno.ENOENT), join(full_model_path, 'model_best.pth.tar')) performance_dir = join(fold_path, 'cnn_classification', best_model['best_acc']) if not exists(performance_dir): makedirs(performance_dir) # It launch the corresponding function, depending on the mode. infered_classes, metrics = inference_from_model_generic( caps_dir, tsv_path, model_path, options, num_cnn=num_cnn) # Prepare outputs usr_prefix = str(prefix) # Write output files at %mode level print("Prediction results and metrics are written in the " "following folder: %s" % performance_dir) mode_level_to_tsvs(currentDirectory, infered_classes, metrics, fold, best_model['best_acc'], options.mode, dataset=usr_prefix) # Soft voting if hasattr(options, 'selection_threshold'): selection_thresh = options.selection_threshold else: selection_thresh = 0.8 # Write files at the image level (for patch, roi and slice). # It assumes the existance of validation files to perform soft-voting if options.mode in ["patch", "roi", "slice"]: soft_voting_to_tsvs(currentDirectory, fold, best_model["best_acc"], options.mode, usr_prefix, num_cnn=num_cnn, selection_threshold=selection_thresh)
def inference_from_model(caps_dir, tsv_path, model_path=None, json_file=None, prefix=None, labels=True, gpu=True, num_workers=0, batch_size=1, prepare_dl=False, selection_metrics=None, diagnoses=None, logger=None): """ Inference from previously trained model. This functions uses a previously trained model to classify the input(s). The model is stored in the variable model_path and it assumes the folder structure given by the training stage. Particullary to have a prediction at image level, it assumes that results of the validation set are stored in the model_path folder in order to perform soft-voiting at the slice/patch level and also for multicnn. Args: caps_dir: folder containing the tensor files (.pt version of MRI) tsv_path: file with the name of the MRIs to process (single or multiple) model_path: file with the model (pth format). json_file: file containing the training parameters. prefix: prefix of all classification outputs. labels: by default is True. If False no metrics tsv files will be written. measurements.tsv gpu: if true, it uses gpu. num_workers: num_workers used in DataLoader batch_size: batch size of the DataLoader prepare_dl: if true, uses extracted patches/slices otherwise extract them on-the-fly. selection_metrics: list of metrics to find best models to be evaluated. diagnoses: list of diagnoses to be tested if tsv_path is a folder. logger: Logger instance. Returns: Files written in the output folder with prediction results and metrics. By default the output folder is named cnn_classification and it is inside the model_folder. Raises: """ import argparse import logging if logger is None: logger = logging parser = argparse.ArgumentParser() parser.add_argument("model_path", type=str, help="Path to the trained model folder.") options = parser.parse_args([model_path]) options = read_json(options, json_path=json_file) logger.debug("Load model with these options:") logger.debug(options) # Overwrite options with user input options.use_cpu = not gpu options.nproc = num_workers options.batch_size = batch_size options.prepare_dl = prepare_dl if diagnoses is not None: options.diagnoses = diagnoses options = translate_parameters(options) if options.mode_task == "multicnn": num_cnn = compute_num_cnn(caps_dir, tsv_path, options, "test") else: num_cnn = None # Define the path currentDirectory = pathlib.Path(model_path) # Search for 'fold-*' pattern currentPattern = "fold-*" # loop depending the number of folds found in the model folder for fold_dir in currentDirectory.glob(currentPattern): fold = int(str(fold_dir).split("-")[-1]) fold_path = join(model_path, fold_dir) model_path = join(fold_path, 'models') for selection_metric in selection_metrics: if options.mode_task == 'multicnn': for cnn_dir in listdir(model_path): if not exists( join(model_path, cnn_dir, "best_%s" % selection_metric, 'model_best.pth.tar')): raise FileNotFoundError( errno.ENOENT, strerror(errno.ENOENT), join(model_path, cnn_dir, "best_%s" % selection_metric, 'model_best.pth.tar')) else: full_model_path = join(model_path, "best_%s" % selection_metric) if not exists(join(full_model_path, 'model_best.pth.tar')): raise FileNotFoundError( errno.ENOENT, strerror(errno.ENOENT), join(full_model_path, 'model_best.pth.tar')) performance_dir = join(fold_path, 'cnn_classification', 'best_%s' % selection_metric) makedirs(performance_dir, exist_ok=True) # It launch the corresponding function, depending on the mode. inference_from_model_generic(caps_dir, tsv_path, model_path, options, prefix, currentDirectory, fold, "best_%s" % selection_metric, labels=labels, num_cnn=num_cnn, logger=logger) # Soft voting if hasattr(options, 'selection_threshold'): selection_thresh = options.selection_threshold else: selection_thresh = 0.8 # Write files at the image level (for patch, roi and slice). # It assumes the existance of validation files to perform soft-voting if options.mode in ["patch", "roi", "slice"]: soft_voting_to_tsvs(currentDirectory, fold, "best_%s" % selection_metric, options.mode, prefix, num_cnn=num_cnn, selection_threshold=selection_thresh, use_labels=labels, logger=logger) logger.info("Prediction results and metrics are written in the " "following folder: %s" % performance_dir)
def test_cnn(output_dir, data_loader, subset_name, split, criterion, model_options, gpu=False, train_begin_time=None): metric_dict = {} for selection in ["best_balanced_accuracy", "best_loss"]: # load the best trained model during the training if model_options.model == 'UNet3D': print('********** init UNet3D model for test! **********') model = create_model(model_options.model, gpu=model_options.gpu, dropout=model_options.dropout, device_index=model_options.device, in_channels=model_options.in_channels, out_channels=model_options.out_channels, f_maps=model_options.f_maps, layer_order=model_options.layer_order, num_groups=model_options.num_groups, num_levels=model_options.num_levels) elif model_options.model == 'ResidualUNet3D': print('********** init ResidualUNet3D model for test! **********') model = create_model(model_options.model, gpu=model_options.gpu, dropout=model_options.dropout, device_index=model_options.device, in_channels=model_options.in_channels, out_channels=model_options.out_channels, f_maps=model_options.f_maps, layer_order=model_options.layer_order, num_groups=model_options.num_groups, num_levels=model_options.num_levels) elif model_options.model == 'UNet3D_add_more_fc': print( '********** init UNet3D_add_more_fc model for test! **********' ) model = create_model(model_options.model, gpu=model_options.gpu, dropout=model_options.dropout, device_index=model_options.device, in_channels=model_options.in_channels, out_channels=model_options.out_channels, f_maps=model_options.f_maps, layer_order=model_options.layer_order, num_groups=model_options.num_groups, num_levels=model_options.num_levels) elif model_options.model == 'ResidualUNet3D_add_more_fc': print( '********** init ResidualUNet3D_add_more_fc model for test! **********' ) model = create_model(model_options.model, gpu=model_options.gpu, dropout=model_options.dropout, device_index=model_options.device, in_channels=model_options.in_channels, out_channels=model_options.out_channels, f_maps=model_options.f_maps, layer_order=model_options.layer_order, num_groups=model_options.num_groups, num_levels=model_options.num_levels) elif model_options.model == 'VoxCNN': print('********** init VoxCNN model for test! **********') model = create_model(model_options.model, gpu=model_options.gpu, device_index=model_options.device) elif model_options.model == 'ConvNet3D': print('********** init ConvNet3D model for test! **********') model = create_model(model_options.model, gpu=model_options.gpu, device_index=model_options.device) elif 'gcn' in model_options.model: print('********** init {}-{} model for test! **********'.format( model_options.model, model_options.gnn_type)) model = create_model( model_options.model, gpu=model_options.gpu, device_index=model_options.device, gnn_type=model_options.gnn_type, gnn_dropout=model_options.gnn_dropout, gnn_dropout_adj=model_options.gnn_dropout_adj, gnn_non_linear=model_options.gnn_non_linear, gnn_undirected=model_options.gnn_undirected, gnn_self_loop=model_options.gnn_self_loop, gnn_threshold=model_options.gnn_threshold, ) elif model_options.model == 'ROI_GCN': print('********** init ROI_GCN model for test! **********') model = create_model( model_options.model, gpu=model_options.gpu, device_index=model_options.device, gnn_type=model_options.gnn_type, gnn_dropout=model_options.gnn_dropout, gnn_dropout_adj=model_options.gnn_dropout_adj, gnn_non_linear=model_options.gnn_non_linear, gnn_undirected=model_options.gnn_undirected, gnn_self_loop=model_options.gnn_self_loop, gnn_threshold=model_options.gnn_threshold, nodel_vetor_layer=model_options.nodel_vetor_layer, classify_layer=model_options.classify_layer, num_node_features=model_options.num_node_features, num_class=model_options.num_class, roi_size=model_options.roi_size, num_nodes=model_options.num_nodes, gnn_pooling_layers=model_options.gnn_pooling_layers, global_sort_pool_k=model_options.global_sort_pool_k, layers=model_options.layers, shortcut_type=model_options.shortcut_type, use_nl=model_options.use_nl, dropout=model_options.dropout, device=model_options.device) elif model_options.model == 'SwinTransformer3d': print( '********** init SwinTransformer3d model for test! **********') model = create_model( model_options.model, gpu=model_options.gpu, dropout=model_options.dropout, device_index=model_options.device, sw_patch_size=model_options.sw_patch_size, window_size=model_options.window_size, mlp_ratio=model_options.mlp_ratio, drop_rate=model_options.drop_rate, attn_drop_rate=model_options.attn_drop_rate, drop_path_rate=model_options.drop_path_rate, qk_scale=model_options.qk_scale, embed_dim=model_options.embed_dim, depths=model_options.depths, num_heads=model_options.num_heads, qkv_bias=model_options.qkv_bias, ape=model_options.ape, patch_norm=model_options.patch_norm, ) else: print('********** init model for test! **********') model = create_model(model_options.model, gpu=model_options.gpu, dropout=model_options.dropout, device_index=model_options.device) model, best_epoch = load_model(model, os.path.join(output_dir, 'fold-%i' % split, 'models', selection), gpu=gpu, filename='model_best.pth.tar', device_index=model_options.device) results_df, metrics = test(model, data_loader, gpu, criterion, model_options.mode, device_index=model_options.device, train_begin_time=train_begin_time, model_options=model_options, fi=split) print("[%s]: %s level balanced accuracy is %f" % (timeSince(train_begin_time), model_options.mode, metrics['balanced_accuracy'])) print('[{}]: {}_{}_result_df:'.format(timeSince(train_begin_time), subset_name, selection)) print(results_df) print('[{}]: {}_{}_metrics:\n{}'.format(timeSince(train_begin_time), subset_name, selection, metrics)) wandb.log({ '{}_accuracy_{}_singel_model'.format(subset_name, selection): metrics['accuracy'], '{}_balanced_accuracy_{}_singel_model'.format( subset_name, selection): metrics['balanced_accuracy'], '{}_sensitivity_{}_singel_model'.format(subset_name, selection): metrics['sensitivity'], '{}_specificity_{}_singel_model'.format(subset_name, selection): metrics['specificity'], '{}_ppv_{}_singel_model'.format(subset_name, selection): metrics['ppv'], '{}_npv_{}_singel_model'.format(subset_name, selection): metrics['npv'], '{}_total_loss_{}_singel_model'.format(subset_name, selection): metrics['total_loss'], }) mode_level_to_tsvs(output_dir, results_df, metrics, split, selection, model_options.mode, dataset=subset_name) # Soft voting if model_options.mode in ["patch", "roi", "slice"]: soft_voting_to_tsvs( output_dir, split, selection=selection, mode=model_options.mode, dataset=subset_name, selection_threshold=model_options.selection_threshold) # return metric dict metric_temp_dict = { '{}_accuracy_{}_singel_model'.format(subset_name, selection): metrics['accuracy'], '{}_balanced_accuracy_{}_singel_model'.format( subset_name, selection): metrics['balanced_accuracy'], '{}_sensitivity_{}_singel_model'.format(subset_name, selection): metrics['sensitivity'], '{}_specificity_{}_singel_model'.format(subset_name, selection): metrics['specificity'], '{}_ppv_{}_singel_model'.format(subset_name, selection): metrics['ppv'], '{}_npv_{}_singel_model'.format(subset_name, selection): metrics['npv'], '{}_total_loss_{}_singel_model'.format(subset_name, selection): metrics['total_loss'], } metric_dict.update(metric_temp_dict) return metric_dict