def main(options): # Read json model_options = argparse.Namespace() json_path = path.join(options.model_path, "commandline_cnn.json") model_options = read_json(model_options, json_path=json_path) num_cnn = compute_num_cnn(options.input_dir, options.tsv_path, model_options, data="test") # Load test data if options.diagnoses is None: options.diagnoses = model_options.diagnoses test_df = load_data_test(options.tsv_path, options.diagnoses) transformations = get_transforms(model_options.mode, model_options.minmaxnormalization) criterion = torch.nn.CrossEntropyLoss() # Loop on all folds trained best_model_dir = os.path.join(options.model_path, 'best_model_dir') folds_dir = os.listdir(best_model_dir) # Loop on folds for fold_dir in folds_dir: split = int(fold_dir[-1]) print("Fold %i" % split) for cnn_index in range(num_cnn): dataset = return_dataset(model_options.mode, options.input_dir, test_df, options.preprocessing, transformations, options, cnn_index=cnn_index) test_loader = DataLoader(dataset, batch_size=options.batch_size, shuffle=False, num_workers=options.num_workers, pin_memory=True) test_cnn(options.model_path, test_loader, options.dataset, split, criterion, cnn_index, model_options, options.gpu) for selection in ['best_acc', 'best_loss']: soft_voting_to_tsvs( options.model_path, split, selection, mode=options.mode, dataset=options.dataset, num_cnn=num_cnn, selection_threshold=model_options.selection_threshold)
parser.add_argument("--num_workers", '-w', default=8, type=int, help='the number of batch being loaded in parallel') if __name__ == "__main__": ret = parser.parse_known_args() options = ret[0] if ret[1]: print("unknown arguments: %s" % parser.parse_known_args()[1]) # Read json model_options = argparse.Namespace() json_path = path.join(options.model_path, "commandline_cnn.json") model_options = read_json(model_options, json_path=json_path) num_cnn = compute_num_cnn(model_options.input_dir, model_options.tsv_path, model_options, data="train") transformations = get_transforms(model_options.mode, model_options.minmaxnormalization) criterion = nn.CrossEntropyLoss() # Loop on all folds trained best_model_dir = os.path.join(options.model_path, 'best_model_dir') folds_dir = os.listdir(best_model_dir) for fold_dir in folds_dir: split = int(fold_dir[-1])
def main(options): options = read_json(options) if options.evaluation_steps % options.accumulation_steps != 0 and options.evaluation_steps != 1: raise Exception( 'Evaluation steps %d must be a multiple of accumulation steps %d' % (options.evaluation_steps, options.accumulation_steps)) if options.minmaxnormalization: transformations = MinMaxNormalization() else: transformations = None total_time = time() # Get the data. training_tsv, valid_tsv = load_data(options.diagnosis_path, options.diagnoses, options.split, options.n_splits, options.baseline) data_train = MRIDataset(options.input_dir, training_tsv, transform=transformations, preprocessing=options.preprocessing) data_valid = MRIDataset(options.input_dir, valid_tsv, transform=transformations, preprocessing=options.preprocessing) # Use argument load to distinguish training and testing train_loader = DataLoader(data_train, batch_size=options.batch_size, shuffle=True, num_workers=options.num_workers, pin_memory=True, drop_last=options.drop_last) valid_loader = DataLoader(data_valid, batch_size=options.batch_size, shuffle=False, num_workers=options.num_workers, pin_memory=True, drop_last=options.drop_last) # Initialize the model print('Initialization of the model') decoder = create_autoencoder(options.model) decoder, current_epoch = load_model(decoder, options.model_path, options.gpu, 'checkpoint.pth.tar', device_index=options.device) if options.gpu: device = torch.device('cuda:{}'.format(options.device)) decoder = decoder.to(device) options.beginning_epoch = current_epoch + 1 # Define criterion and optimizer criterion = torch.nn.MSELoss() optimizer_path = path.join(options.model_path, 'optimizer.pth.tar') optimizer = load_optimizer(optimizer_path, decoder) # Define output directories log_dir = path.join(options.output_dir, 'log_dir', 'fold_%i' % options.split, 'ConvAutoencoder') visualization_dir = path.join(options.output_dir, 'visualize', 'fold_%i' % options.split) model_dir = path.join(options.output_dir, 'best_model_dir', 'fold_%i' % options.split, 'ConvAutoencoder') print('Resuming the training task') train(decoder, train_loader, valid_loader, criterion, optimizer, False, log_dir, model_dir, options) if options.visualization: print("Visualization of autoencoder reconstruction") best_decoder, _ = load_model(decoder, path.join(model_dir, "best_loss"), options.gpu, filename='model_best.pth.tar', device_index=options.device) visualize_image(best_decoder, valid_loader, path.join(visualization_dir, "validation"), nb_images=3, device_index=options.device) visualize_image(best_decoder, train_loader, path.join(visualization_dir, "train"), nb_images=3, device_index=options.device) del decoder torch.cuda.empty_cache() total_time = time() - total_time print("Total time of computation: %d s" % total_time)
def inference_from_model(caps_dir, tsv_path, model_path=None, json_file=None, prefix=None, no_labels=False, gpu=True, prepare_dl=False): """ Inference from previously trained model. This functions uses a previously trained model to classify the input(s). The model is stored in the variable model_path and it assumes the folder structure given by the training stage. Particullary to have a prediction at image level, it assumes that results of the validation set are stored in the model_path folder in order to perform soft-voiting at the slice/patch level and also for multicnn. Args: caps_dir: folder containing the tensor files (.pt version of MRI) tsv_path: file with the name of the MRIs to process (single or multiple) model_path: file with the model (pth format). json_file: file containing the training parameters. output_dir_arg: folder where results are stored. If None it uses current structure. no_labels: by default is false. In that case, output writes a file named measurements.tsv gpu: if true, it uses gpu. prepare_dl: if true, uses extracted patches/slices otherwise extract them on-the-fly. Returns: Files written in the output folder with prediction results and metrics. By default the output folder is named cnn_classification and it is inside the model_folder. Raises: """ import argparse parser = argparse.ArgumentParser() parser.add_argument("model_path", type=str, help="Path to the trained model folder.") options = parser.parse_args([model_path]) options = read_json(options, json_path=json_file) num_cnn = compute_num_cnn(caps_dir, tsv_path, options, "classify") print("Load model with these options:") print(options) # Overwrite options with user input options.use_cpu = not gpu options.prepare_dl = prepare_dl # Define the path currentDirectory = pathlib.Path(model_path) # Search for 'fold-*' pattern currentPattern = "fold-*" best_model = { 'best_acc': 'best_balanced_accuracy', 'best_loss': 'best_loss' } # loop depending the number of folds found in the model folder for fold_dir in currentDirectory.glob(currentPattern): fold = int(str(fold_dir).split("-")[-1]) fold_path = join(model_path, fold_dir) model_path = join(fold_path, 'models') if options.mode_task == 'multicnn': for cnn_dir in listdir(model_path): if not exists( join(model_path, cnn_dir, best_model['best_acc'], 'model_best.pth.tar')): raise FileNotFoundError( errno.ENOENT, strerror(errno.ENOENT), join(model_path, cnn_dir, best_model['best_acc'], 'model_best.pth.tar')) else: full_model_path = join(model_path, best_model['best_acc']) if not exists(join(full_model_path, 'model_best.pth.tar')): raise FileNotFoundError( errno.ENOENT, strerror(errno.ENOENT), join(full_model_path, 'model_best.pth.tar')) performance_dir = join(fold_path, 'cnn_classification', best_model['best_acc']) if not exists(performance_dir): makedirs(performance_dir) # It launch the corresponding function, depending on the mode. infered_classes, metrics = inference_from_model_generic( caps_dir, tsv_path, model_path, options, num_cnn=num_cnn) # Prepare outputs usr_prefix = str(prefix) # Write output files at %mode level print("Prediction results and metrics are written in the " "following folder: %s" % performance_dir) mode_level_to_tsvs(currentDirectory, infered_classes, metrics, fold, best_model['best_acc'], options.mode, dataset=usr_prefix) # Soft voting if hasattr(options, 'selection_threshold'): selection_thresh = options.selection_threshold else: selection_thresh = 0.8 # Write files at the image level (for patch, roi and slice). # It assumes the existance of validation files to perform soft-voting if options.mode in ["patch", "roi", "slice"]: soft_voting_to_tsvs(currentDirectory, fold, best_model["best_acc"], options.mode, usr_prefix, num_cnn=num_cnn, selection_threshold=selection_thresh)
def inference_from_model(caps_dir, tsv_path, model_path=None, json_file=None, prefix=None, labels=True, gpu=True, num_workers=0, batch_size=1, prepare_dl=False, selection_metrics=None, diagnoses=None, logger=None): """ Inference from previously trained model. This functions uses a previously trained model to classify the input(s). The model is stored in the variable model_path and it assumes the folder structure given by the training stage. Particullary to have a prediction at image level, it assumes that results of the validation set are stored in the model_path folder in order to perform soft-voiting at the slice/patch level and also for multicnn. Args: caps_dir: folder containing the tensor files (.pt version of MRI) tsv_path: file with the name of the MRIs to process (single or multiple) model_path: file with the model (pth format). json_file: file containing the training parameters. prefix: prefix of all classification outputs. labels: by default is True. If False no metrics tsv files will be written. measurements.tsv gpu: if true, it uses gpu. num_workers: num_workers used in DataLoader batch_size: batch size of the DataLoader prepare_dl: if true, uses extracted patches/slices otherwise extract them on-the-fly. selection_metrics: list of metrics to find best models to be evaluated. diagnoses: list of diagnoses to be tested if tsv_path is a folder. logger: Logger instance. Returns: Files written in the output folder with prediction results and metrics. By default the output folder is named cnn_classification and it is inside the model_folder. Raises: """ import argparse import logging if logger is None: logger = logging parser = argparse.ArgumentParser() parser.add_argument("model_path", type=str, help="Path to the trained model folder.") options = parser.parse_args([model_path]) options = read_json(options, json_path=json_file) logger.debug("Load model with these options:") logger.debug(options) # Overwrite options with user input options.use_cpu = not gpu options.nproc = num_workers options.batch_size = batch_size options.prepare_dl = prepare_dl if diagnoses is not None: options.diagnoses = diagnoses options = translate_parameters(options) if options.mode_task == "multicnn": num_cnn = compute_num_cnn(caps_dir, tsv_path, options, "test") else: num_cnn = None # Define the path currentDirectory = pathlib.Path(model_path) # Search for 'fold-*' pattern currentPattern = "fold-*" # loop depending the number of folds found in the model folder for fold_dir in currentDirectory.glob(currentPattern): fold = int(str(fold_dir).split("-")[-1]) fold_path = join(model_path, fold_dir) model_path = join(fold_path, 'models') for selection_metric in selection_metrics: if options.mode_task == 'multicnn': for cnn_dir in listdir(model_path): if not exists( join(model_path, cnn_dir, "best_%s" % selection_metric, 'model_best.pth.tar')): raise FileNotFoundError( errno.ENOENT, strerror(errno.ENOENT), join(model_path, cnn_dir, "best_%s" % selection_metric, 'model_best.pth.tar')) else: full_model_path = join(model_path, "best_%s" % selection_metric) if not exists(join(full_model_path, 'model_best.pth.tar')): raise FileNotFoundError( errno.ENOENT, strerror(errno.ENOENT), join(full_model_path, 'model_best.pth.tar')) performance_dir = join(fold_path, 'cnn_classification', 'best_%s' % selection_metric) makedirs(performance_dir, exist_ok=True) # It launch the corresponding function, depending on the mode. inference_from_model_generic(caps_dir, tsv_path, model_path, options, prefix, currentDirectory, fold, "best_%s" % selection_metric, labels=labels, num_cnn=num_cnn, logger=logger) # Soft voting if hasattr(options, 'selection_threshold'): selection_thresh = options.selection_threshold else: selection_thresh = 0.8 # Write files at the image level (for patch, roi and slice). # It assumes the existance of validation files to perform soft-voting if options.mode in ["patch", "roi", "slice"]: soft_voting_to_tsvs(currentDirectory, fold, "best_%s" % selection_metric, options.mode, prefix, num_cnn=num_cnn, selection_threshold=selection_thresh, use_labels=labels, logger=logger) logger.info("Prediction results and metrics are written in the " "following folder: %s" % performance_dir)
def main(options): options = read_json(options) if options.evaluation_steps % options.accumulation_steps != 0 and options.evaluation_steps != 1: raise Exception( 'Evaluation steps %d must be a multiple of accumulation steps %d' % (options.evaluation_steps, options.accumulation_steps)) if options.minmaxnormalization: transformations = MinMaxNormalization() else: transformations = None total_time = time() # Get the data. training_tsv, valid_tsv = load_data(options.diagnosis_path, options.diagnoses, options.split, options.n_splits, options.baseline) data_train = MRIDataset(options.input_dir, training_tsv, transform=transformations, preprocessing=options.preprocessing) data_valid = MRIDataset(options.input_dir, valid_tsv, transform=transformations, preprocessing=options.preprocessing) # Use argument load to distinguish training and testing train_loader = DataLoader(data_train, batch_size=options.batch_size, shuffle=True, num_workers=options.num_workers, pin_memory=True, drop_last=options.drop_last) valid_loader = DataLoader(data_valid, batch_size=options.batch_size, shuffle=False, num_workers=options.num_workers, pin_memory=True, drop_last=options.drop_last) # Initialize the model print('Initialization of the model') if options.model == 'UNet3D': print('********** init UNet3D model for test! **********') model = create_model(options.model, gpu=options.gpu, dropout=options.dropout, device_index=options.device, in_channels=options.in_channels, out_channels=options.out_channels, f_maps=options.f_maps, layer_order=options.layer_order, num_groups=options.num_groups, num_levels=options.num_levels) elif options.model == 'ResidualUNet3D': print('********** init ResidualUNet3D model for test! **********') model = create_model(options.model, gpu=options.gpu, dropout=options.dropout, device_index=options.device, in_channels=options.in_channels, out_channels=options.out_channels, f_maps=options.f_maps, layer_order=options.layer_order, num_groups=options.num_groups, num_levels=options.num_levels) elif options.model == 'UNet3D_add_more_fc': print('********** init UNet3D_add_more_fc model for test! **********') model = create_model(options.model, gpu=options.gpu, dropout=options.dropout, device_index=options.device, in_channels=options.in_channels, out_channels=options.out_channels, f_maps=options.f_maps, layer_order=options.layer_order, num_groups=options.num_groups, num_levels=options.num_levels) elif options.model == 'ResidualUNet3D_add_more_fc': print( '********** init ResidualUNet3D_add_more_fc model for test! **********' ) model = create_model(options.model, gpu=options.gpu, dropout=options.dropout, device_index=options.device, in_channels=options.in_channels, out_channels=options.out_channels, f_maps=options.f_maps, layer_order=options.layer_order, num_groups=options.num_groups, num_levels=options.num_levels) elif options.model == 'VoxCNN': print('********** init VoxCNN model for test! **********') model = create_model(options.model, gpu=options.gpu, device_index=options.device) elif options.model == 'ConvNet3D': print('********** init ConvNet3D model for test! **********') model = create_model(options.model, gpu=options.gpu, device_index=options.device) elif 'gcn' in options.model: print('********** init {}-{} model for test! **********'.format( options.model, options.gnn_type)) model = create_model( options.model, gpu=options.gpu, device_index=options.device, gnn_type=options.gnn_type, gnn_dropout=options.gnn_dropout, gnn_dropout_adj=options.gnn_dropout_adj, gnn_non_linear=options.gnn_non_linear, gnn_undirected=options.gnn_undirected, gnn_self_loop=options.gnn_self_loop, gnn_threshold=options.gnn_threshold, ) elif options.model == 'ROI_GCN': print('********** init ROI_GCN model for test! **********') model = create_model(options.model, gpu=options.gpu, device_index=options.device, gnn_type=options.gnn_type, gnn_dropout=options.gnn_dropout, gnn_dropout_adj=options.gnn_dropout_adj, gnn_non_linear=options.gnn_non_linear, gnn_undirected=options.gnn_undirected, gnn_self_loop=options.gnn_self_loop, gnn_threshold=options.gnn_threshold, nodel_vetor_layer=options.nodel_vetor_layer, classify_layer=options.classify_layer, num_node_features=options.num_node_features, num_class=options.num_class, roi_size=options.roi_size, num_nodes=options.num_nodes, gnn_pooling_layers=options.gnn_pooling_layers, global_sort_pool_k=options.global_sort_pool_k, layers=options.layers, shortcut_type=options.shortcut_type, use_nl=options.use_nl, dropout=options.dropout, device=options.device) elif options.model == 'SwinTransformer3d': print('********** init SwinTransformer3d model for test! **********') model = create_model( options.model, gpu=options.gpu, dropout=options.dropout, device_index=options.device, sw_patch_size=options.sw_patch_size, window_size=options.window_size, mlp_ratio=options.mlp_ratio, drop_rate=options.drop_rate, attn_drop_rate=options.attn_drop_rate, drop_path_rate=options.drop_path_rate, qk_scale=options.qk_scale, embed_dim=options.embed_dim, depths=options.depths, num_heads=options.num_heads, qkv_bias=options.qkv_bias, ape=options.ape, patch_norm=options.patch_norm, ) else: model = create_model(options.model, gpu=options.gpu, dropout=options.dropout, device_index=options.device) model_dir = path.join(options.model_path, "best_model_dir", "CNN", "fold_" + str(options.split)) model, current_epoch = load_model(model, model_dir, options.gpu, 'checkpoint.pth.tar', device_index=options.device) options.beginning_epoch = current_epoch + 1 # Define criterion and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer_path = path.join(options.model_path, 'optimizer.pth.tar') optimizer = load_optimizer(optimizer_path, model) # Define output directories log_dir = path.join(options.output_dir, 'log_dir', 'fold_%i' % options.split, 'CNN') model_dir = path.join(options.output_dir, 'best_model_dir', 'fold_%i' % options.split, 'CNN') print('Resuming the training task') train(model, train_loader, valid_loader, criterion, optimizer, True, log_dir, model_dir, options) options.model_path = options.output_dir test_cnn(train_loader, "train", options.split, criterion, options) test_cnn(valid_loader, "validation", options.split, criterion, options) total_time = time() - total_time print("Total time of computation: %d s" % total_time)
def main(options): options = read_json(options) if options.evaluation_steps % options.accumulation_steps != 0 and options.evaluation_steps != 1: raise Exception( 'Evaluation steps %d must be a multiple of accumulation steps %d' % (options.evaluation_steps, options.accumulation_steps)) if options.minmaxnormalization: transformations = MinMaxNormalization() else: transformations = None total_time = time() # Get the data. training_tsv, valid_tsv = load_data(options.diagnosis_path, options.diagnoses, options.split, options.n_splits, options.baseline) data_train = MRIDataset(options.input_dir, training_tsv, transform=transformations, preprocessing=options.preprocessing) data_valid = MRIDataset(options.input_dir, valid_tsv, transform=transformations, preprocessing=options.preprocessing) # Use argument load to distinguish training and testing train_loader = DataLoader(data_train, batch_size=options.batch_size, shuffle=True, num_workers=options.num_workers, pin_memory=True) valid_loader = DataLoader(data_valid, batch_size=options.batch_size, shuffle=False, num_workers=options.num_workers, pin_memory=True) # Initialize the model print('Initialization of the model') model = create_model(options.model, options.gpu, dropout=options.dropout) model_dir = path.join(options.model_path, "best_model_dir", "CNN", "fold_" + str(options.split)) model, current_epoch = load_model(model, model_dir, options.gpu, 'checkpoint.pth.tar') options.beginning_epoch = current_epoch + 1 # Define criterion and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer_path = path.join(options.model_path, 'optimizer.pth.tar') optimizer = load_optimizer(optimizer_path, model) # Define output directories log_dir = path.join(options.output_dir, 'log_dir', 'fold_%i' % options.split, 'CNN') model_dir = path.join(options.output_dir, 'best_model_dir', 'fold_%i' % options.split, 'CNN') print('Resuming the training task') train(model, train_loader, valid_loader, criterion, optimizer, True, log_dir, model_dir, options) options.model_path = options.output_dir test_single_cnn(train_loader, "train", options.split, criterion, options) test_single_cnn(valid_loader, "validation", options.split, criterion, options) total_time = time() - total_time print("Total time of computation: %d s" % total_time)