def main(options): # Read json model_options = argparse.Namespace() json_path = path.join(options.model_path, "commandline_cnn.json") model_options = read_json(model_options, json_path=json_path) num_cnn = compute_num_cnn(options.input_dir, options.tsv_path, model_options, data="test") # Load test data if options.diagnoses is None: options.diagnoses = model_options.diagnoses test_df = load_data_test(options.tsv_path, options.diagnoses) transformations = get_transforms(model_options.mode, model_options.minmaxnormalization) criterion = torch.nn.CrossEntropyLoss() # Loop on all folds trained best_model_dir = os.path.join(options.model_path, 'best_model_dir') folds_dir = os.listdir(best_model_dir) # Loop on folds for fold_dir in folds_dir: split = int(fold_dir[-1]) print("Fold %i" % split) for cnn_index in range(num_cnn): dataset = return_dataset(model_options.mode, options.input_dir, test_df, options.preprocessing, transformations, options, cnn_index=cnn_index) test_loader = DataLoader(dataset, batch_size=options.batch_size, shuffle=False, num_workers=options.num_workers, pin_memory=True) test_cnn(options.model_path, test_loader, options.dataset, split, criterion, cnn_index, model_options, options.gpu) for selection in ['best_acc', 'best_loss']: soft_voting_to_tsvs( options.model_path, split, selection, mode=options.mode, dataset=options.dataset, num_cnn=num_cnn, selection_threshold=model_options.selection_threshold)
if __name__ == "__main__": ret = parser.parse_known_args() options = ret[0] if ret[1]: print("unknown arguments: %s" % parser.parse_known_args()[1]) # Read json model_options = argparse.Namespace() json_path = path.join(options.model_path, "commandline_cnn.json") model_options = read_json(model_options, json_path=json_path) num_cnn = compute_num_cnn(model_options.input_dir, model_options.tsv_path, model_options, data="train") transformations = get_transforms(model_options.mode, model_options.minmaxnormalization) criterion = nn.CrossEntropyLoss() # Loop on all folds trained best_model_dir = os.path.join(options.model_path, 'best_model_dir') folds_dir = os.listdir(best_model_dir) for fold_dir in folds_dir: split = int(fold_dir[-1]) print("Fold %i" % split) # Data management training_df, valid_df = load_data(model_options.tsv_path, model_options.diagnoses, split, model_options.n_splits, model_options.baseline)
def group_backprop(options): main_logger = return_logger(options.verbose, "main process") options = translate_parameters(options) fold_list = [ fold for fold in os.listdir(options.model_path) if fold[:5:] == "fold-" ] if len(fold_list) == 0: raise ValueError("No folds were found at path %s" % options.model_path) for fold in fold_list: main_logger.info(fold) for selection in options.selection: results_path = path.join(options.model_path, fold, 'gradients', selection, options.name) model_options = argparse.Namespace() model_options = read_json( model_options, path.join(options.model_path, 'commandline.json')) model_options = translate_parameters(model_options) model_options.gpu = options.gpu if options.tsv_path is None: options.tsv_path = model_options.tsv_path if options.input_dir is None: options.input_dir = model_options.input_dir if options.target_diagnosis is None: options.target_diagnosis = options.diagnosis criterion = get_criterion(model_options.loss) # Data management (remove data not well predicted by the CNN) training_df = load_data_test(options.tsv_path, [options.diagnosis], baseline=options.baseline) training_df.reset_index(drop=True, inplace=True) # Model creation _, all_transforms = get_transforms( model_options.mode, minmaxnormalization=model_options.minmaxnormalization) data_example = return_dataset(model_options.mode, options.input_dir, training_df, model_options.preprocessing, train_transformations=None, all_transformations=all_transforms, params=options) model = create_model(model_options, data_example.size) model_dir = os.path.join(options.model_path, fold, 'models', selection) model, best_epoch = load_model(model, model_dir, gpu=options.gpu, filename='model_best.pth.tar') options.output_dir = results_path commandline_to_json(options, logger=main_logger) # Keep only subjects who were correctly / wrongly predicted by the network training_df = sort_predicted(model, training_df, options.input_dir, model_options, criterion, options.keep_true, batch_size=options.batch_size, num_workers=options.num_workers, gpu=options.gpu) if len(training_df) > 0: # Save the tsv files used for the saliency maps training_df.to_csv(path.join('data.tsv'), sep='\t', index=False) data_train = return_dataset(model_options.mode, options.input_dir, training_df, model_options.preprocessing, train_transformations=None, all_transformations=all_transforms, params=options) train_loader = DataLoader(data_train, batch_size=options.batch_size, shuffle=True, num_workers=options.num_workers, pin_memory=True) interpreter = VanillaBackProp(model, gpu=options.gpu) cum_map = 0 for data in train_loader: if options.gpu: input_batch = data['image'].cuda() else: input_batch = data['image'] maps = interpreter.generate_gradients( input_batch, data_train.diagnosis_code[options.target_diagnosis]) cum_map += maps.sum(axis=0) mean_map = cum_map / len(data_train) if len(data_train.size) == 4: if options.nifti_template_path is not None: image_nii = nib.load(options.nifti_template_path) affine = image_nii.affine else: affine = np.eye(4) mean_map_nii = nib.Nifti1Image(mean_map[0], affine) nib.save(mean_map_nii, path.join(results_path, "map.nii.gz")) np.save(path.join(results_path, "map.npy"), mean_map[0]) else: jpg_path = path.join(results_path, "map.jpg") plt.imshow(mean_map[0], cmap="coolwarm", vmin=-options.vmax, vmax=options.vmax) plt.colorbar() plt.savefig(jpg_path) plt.close() numpy_path = path.join(results_path, "map.npy") np.save(numpy_path, mean_map[0]) else: main_logger.warn("There are no subjects for the given options")
def inference_from_model_generic(caps_dir, tsv_path, model_path, model_options, num_cnn=None, selection="best_balanced_accuracy"): ''' Inference using an image/subject CNN model ''' from os.path import join gpu = not model_options.use_cpu # Recreate the model with the network described in the json file # Initialize the model model = create_model(model_options.model, gpu, dropout=model_options.dropout) transformations = get_transforms(model_options.mode, model_options.minmaxnormalization) # Define loss and optimizer criterion = nn.CrossEntropyLoss() if model_options.mode_task == 'multicnn': predictions_df = pd.DataFrame() metrics_df = pd.DataFrame() for n in range(num_cnn): dataset = return_dataset(model_options.mode, caps_dir, tsv_path, model_options.preprocessing, transformations, model_options, cnn_index=n) test_loader = DataLoader(dataset, batch_size=model_options.batch_size, shuffle=False, num_workers=model_options.nproc, pin_memory=True) # load the best trained model during the training model, best_epoch = load_model(model, join(model_path, 'cnn-%i' % n, selection), gpu, filename='model_best.pth.tar') cnn_df, cnn_metrics = test(model, test_loader, gpu, criterion, mode=model_options.mode) predictions_df = pd.concat([predictions_df, cnn_df]) metrics_df = pd.concat( [metrics_df, pd.DataFrame(cnn_metrics, index=[0])]) predictions_df.reset_index(drop=True, inplace=True) metrics_df.reset_index(drop=True, inplace=True) else: # Load model from path best_model, best_epoch = load_model(model, join(model_path, selection), gpu, filename='model_best.pth.tar') # Read/localize the data data_to_test = return_dataset(model_options.mode, caps_dir, tsv_path, model_options.preprocessing, transformations, model_options) # Load the data test_loader = DataLoader(data_to_test, batch_size=model_options.batch_size, shuffle=False, num_workers=model_options.nproc, pin_memory=True) # Run the model on the data predictions_df, metrics = test(best_model, test_loader, gpu, criterion, mode=model_options.mode) metrics_df = pd.DataFrame(metrics, index=[0]) return predictions_df, metrics_df
def inference_from_model_generic(caps_dir, tsv_path, model_path, model_options, prefix, output_dir, fold, selection, labels=True, num_cnn=None, logger=None): from os.path import join import logging if logger is None: logger = logging gpu = not model_options.use_cpu _, all_transforms = get_transforms(model_options.mode, model_options.minmaxnormalization) test_df = load_data_test(tsv_path, model_options.diagnoses) # Define loss and optimizer criterion = get_criterion(model_options.loss) if model_options.mode_task == 'multicnn': for n in range(num_cnn): test_dataset = return_dataset(model_options.mode, caps_dir, test_df, model_options.preprocessing, train_transformations=None, all_transformations=all_transforms, params=model_options, cnn_index=n, labels=labels) test_loader = DataLoader(test_dataset, batch_size=model_options.batch_size, shuffle=False, num_workers=model_options.nproc, pin_memory=True) # load the best trained model during the training model = create_model(model_options, test_dataset.size) model, best_epoch = load_model(model, join(model_path, 'cnn-%i' % n, selection), gpu, filename='model_best.pth.tar') cnn_df, cnn_metrics = test(model, test_loader, gpu, criterion, mode=model_options.mode, use_labels=labels) if labels: logger.info( "%s balanced accuracy is %f for %s %i and model selected on %s" % (prefix, cnn_metrics["balanced_accuracy"], model_options.mode, n, selection)) mode_level_to_tsvs(output_dir, cnn_df, cnn_metrics, fold, selection, model_options.mode, dataset=prefix, cnn_index=n) else: # Read/localize the data test_dataset = return_dataset(model_options.mode, caps_dir, test_df, model_options.preprocessing, train_transformations=None, all_transformations=all_transforms, params=model_options, labels=labels) # Load the data test_loader = DataLoader(test_dataset, batch_size=model_options.batch_size, shuffle=False, num_workers=model_options.nproc, pin_memory=True) # Load model from path model = create_model(model_options, test_dataset.size) best_model, best_epoch = load_model(model, join(model_path, selection), gpu, filename='model_best.pth.tar') # Run the model on the data predictions_df, metrics = test(best_model, test_loader, gpu, criterion, mode=model_options.mode, use_labels=labels) if labels: logger.info( "%s level %s balanced accuracy is %f for model selected on %s" % (model_options.mode, prefix, metrics["balanced_accuracy"], selection)) mode_level_to_tsvs(output_dir, predictions_df, metrics, fold, selection, model_options.mode, dataset=prefix)
def inference_from_model_generic(caps_dir, tsv_path, model_path, model_options, num_cnn=None, selection="best_balanced_accuracy"): ''' Inference using an image/subject CNN model ''' from os.path import join gpu = not model_options.use_cpu # Recreate the model with the network described in the json file # Initialize the model if model_options.model == 'UNet3D': print('********** init UNet3D model for test! **********') model = create_model(model_options.model, gpu=model_options.gpu, dropout=model_options.dropout, device_index=model_options.device, in_channels=model_options.in_channels, out_channels=model_options.out_channels, f_maps=model_options.f_maps, layer_order=model_options.layer_order, num_groups=model_options.num_groups, num_levels=model_options.num_levels) elif model_options.model == 'ResidualUNet3D': print('********** init ResidualUNet3D model for test! **********') model = create_model(model_options.model, gpu=model_options.gpu, dropout=model_options.dropout, device_index=model_options.device, in_channels=model_options.in_channels, out_channels=model_options.out_channels, f_maps=model_options.f_maps, layer_order=model_options.layer_order, num_groups=model_options.num_groups, num_levels=model_options.num_levels) elif model_options.model == 'UNet3D_add_more_fc': print('********** init UNet3D_add_more_fc model for test! **********') model = create_model(model_options.model, gpu=model_options.gpu, dropout=model_options.dropout, device_index=model_options.device, in_channels=model_options.in_channels, out_channels=model_options.out_channels, f_maps=model_options.f_maps, layer_order=model_options.layer_order, num_groups=model_options.num_groups, num_levels=model_options.num_levels) elif model_options.model == 'ResidualUNet3D_add_more_fc': print( '********** init ResidualUNet3D_add_more_fc model for test! **********' ) model = create_model(model_options.model, gpu=model_options.gpu, dropout=model_options.dropout, device_index=model_options.device, in_channels=model_options.in_channels, out_channels=model_options.out_channels, f_maps=model_options.f_maps, layer_order=model_options.layer_order, num_groups=model_options.num_groups, num_levels=model_options.num_levels) elif model_options.model == 'VoxCNN': print('********** init VoxCNN model for test! **********') model = create_model(model_options.model, gpu=model_options.gpu, device_index=model_options.device) elif model_options.model == 'ConvNet3D': print('********** init ConvNet3D model for test! **********') model = create_model(model_options.model, gpu=model_options.gpu, device_index=model_options.device) elif 'gcn' in model_options.model: print('********** init {}-{} model for test! **********'.format( model_options.model, model_options.gnn_type)) model = create_model( model_options.model, gpu=model_options.gpu, device_index=model_options.device, gnn_type=model_options.gnn_type, gnn_dropout=model_options.gnn_dropout, gnn_dropout_adj=model_options.gnn_dropout_adj, gnn_non_linear=model_options.gnn_non_linear, gnn_undirected=model_options.gnn_undirected, gnn_self_loop=model_options.gnn_self_loop, gnn_threshold=model_options.gnn_threshold, ) elif model_options.model == 'ROI_GCN': print('********** init ROI_GCN model for test! **********') model = create_model( model_options.model, gpu=model_options.gpu, device_index=model_options.device, gnn_type=model_options.gnn_type, gnn_dropout=model_options.gnn_dropout, gnn_dropout_adj=model_options.gnn_dropout_adj, gnn_non_linear=model_options.gnn_non_linear, gnn_undirected=model_options.gnn_undirected, gnn_self_loop=model_options.gnn_self_loop, gnn_threshold=model_options.gnn_threshold, nodel_vetor_layer=model_options.nodel_vetor_layer, classify_layer=model_options.classify_layer, num_node_features=model_options.num_node_features, num_class=model_options.num_class, roi_size=model_options.roi_size, num_nodes=model_options.num_nodes, gnn_pooling_layers=model_options.gnn_pooling_layers, global_sort_pool_k=model_options.global_sort_pool_k, layers=model_options.layers, shortcut_type=model_options.shortcut_type, use_nl=model_options.use_nl, dropout=model_options.dropout, device=model_options.device) elif model_options.model == 'SwinTransformer3d': print('********** init SwinTransformer3d model for test! **********') model = create_model( model_options.model, gpu=model_options.gpu, dropout=model_options.dropout, device_index=model_options.device, sw_patch_size=model_options.sw_patch_size, window_size=model_options.window_size, mlp_ratio=model_options.mlp_ratio, drop_rate=model_options.drop_rate, attn_drop_rate=model_options.attn_drop_rate, drop_path_rate=model_options.drop_path_rate, qk_scale=model_options.qk_scale, embed_dim=model_options.embed_dim, depths=model_options.depths, num_heads=model_options.num_heads, qkv_bias=model_options.qkv_bias, ape=model_options.ape, patch_norm=model_options.patch_norm, ) else: model = create_model( model_options.model, gpu=model_options.gpu, dropout=model_options.dropout, device_index=model_options.device, ) transformations = get_transforms(model_options.mode, model_options.minmaxnormalization) # Define loss and optimizer criterion = nn.CrossEntropyLoss() if model_options.mode_task == 'multicnn': predictions_df = pd.DataFrame() metrics_df = pd.DataFrame() for n in range(num_cnn): dataset = return_dataset(model_options.mode, caps_dir, tsv_path, model_options.preprocessing, transformations, model_options, cnn_index=n) print('test data size:{}'.format(len(dataset))) test_loader = DataLoader(dataset, batch_size=model_options.batch_size, shuffle=False, num_workers=model_options.nproc, pin_memory=True, drop_last=model_options.drop_last) # load the best trained model during the training model, best_epoch = load_model(model, join(model_path, 'cnn-%i' % n, selection), gpu, filename='model_best.pth.tar', device_index=model_options.device) cnn_df, cnn_metrics = test(model, test_loader, gpu, criterion, mode=model_options.mode, device_index=model_options.device) predictions_df = pd.concat([predictions_df, cnn_df]) metrics_df = pd.concat( [metrics_df, pd.DataFrame(cnn_metrics, index=[0])]) predictions_df.reset_index(drop=True, inplace=True) metrics_df.reset_index(drop=True, inplace=True) else: # Load model from path best_model, best_epoch = load_model(model, join(model_path, selection), gpu, filename='model_best.pth.tar', device_index=model_options.device) # Read/localize the data data_to_test = return_dataset(model_options.mode, caps_dir, tsv_path, model_options.preprocessing, transformations, model_options) print('test data size:{}'.format(len(data_to_test))) # Load the data test_loader = DataLoader(data_to_test, batch_size=model_options.batch_size, shuffle=False, num_workers=model_options.nproc, pin_memory=True, drop_last=model_options.drop_last) # Run the model on the data predictions_df, metrics = test(best_model, test_loader, gpu, criterion, mode=model_options.mode, device_index=model_options.device, model_options=model_options) metrics_df = pd.DataFrame(metrics, index=[0]) return predictions_df, metrics_df
def individual_backprop(options): main_logger = return_logger(options.verbose, "main process") options = translate_parameters(options) fold_list = [ fold for fold in os.listdir(options.model_path) if fold[:5:] == "fold-" ] if len(fold_list) == 0: raise ValueError("No folds were found at path %s" % options.model_path) model_options = argparse.Namespace() model_options = read_json( model_options, path.join(options.model_path, 'commandline.json')) model_options = translate_parameters(model_options) model_options.gpu = options.gpu if model_options.network_type == "multicnn": raise NotImplementedError( "The interpretation of multi-CNN is not implemented.") if options.tsv_path is None and options.input_dir is None: options.multi_cohort = model_options.multi_cohort if options.tsv_path is None: options.tsv_path = model_options.tsv_path if options.input_dir is None: options.input_dir = model_options.input_dir if options.target_diagnosis is None: options.target_diagnosis = options.diagnosis for fold in fold_list: main_logger.info(fold) for selection in options.selection: results_path = path.join(options.model_path, fold, 'gradients', selection, options.name) criterion = get_criterion(model_options.loss) # Data management (remove data not well predicted by the CNN) training_df = load_data_test(options.tsv_path, [options.diagnosis], baseline=options.baseline, multi_cohort=options.multi_cohort) training_df.reset_index(drop=True, inplace=True) # Model creation _, all_transforms = get_transforms( model_options.mode, minmaxnormalization=model_options.minmaxnormalization) with warnings.catch_warnings(): warnings.simplefilter("ignore") data_example = return_dataset( model_options.mode, options.input_dir, training_df, model_options.preprocessing, train_transformations=None, all_transformations=all_transforms, prepare_dl=options.prepare_dl, multi_cohort=options.multi_cohort, params=model_options) model = create_model(model_options, data_example.size) model_dir = os.path.join(options.model_path, fold, 'models', selection) model, best_epoch = load_model(model, model_dir, gpu=options.gpu, filename='model_best.pth.tar') options.output_dir = results_path commandline_to_json(options, logger=main_logger) # Keep only subjects who were correctly / wrongly predicted by the network training_df = sort_predicted(model, training_df, options.input_dir, model_options, criterion, options.keep_true, batch_size=options.batch_size, num_workers=options.num_workers, gpu=options.gpu) if len(training_df) > 0: # Save the tsv files used for the saliency maps training_df.to_csv(path.join('data.tsv'), sep='\t', index=False) with warnings.catch_warnings(): warnings.simplefilter("ignore") data_train = return_dataset( model_options.mode, options.input_dir, training_df, model_options.preprocessing, train_transformations=None, all_transformations=all_transforms, prepare_dl=options.prepare_dl, multi_cohort=options.multi_cohort, params=model_options) train_loader = DataLoader(data_train, batch_size=options.batch_size, shuffle=True, num_workers=options.num_workers, pin_memory=True) interpreter = VanillaBackProp(model, gpu=options.gpu) for data in train_loader: if options.gpu: input_batch = data['image'].cuda() else: input_batch = data['image'] map_np = interpreter.generate_gradients( input_batch, data_train.diagnosis_code[options.target_diagnosis]) for i in range(options.batch_size): single_path = path.join(results_path, data['participant_id'][i], data['session_id'][i]) os.makedirs(single_path, exist_ok=True) if len(data_train.size) == 4: if options.nifti_template_path is not None: image_nii = nib.load( options.nifti_template_path) affine = image_nii.affine else: affine = np.eye(4) map_nii = nib.Nifti1Image(map_np[i, 0, :, :, :], affine) nib.save(map_nii, path.join(single_path, "map.nii.gz")) else: jpg_path = path.join(single_path, "map.jpg") plt.imshow(map_np[i, 0, :, :], cmap="coolwarm", vmin=-options.vmax, vmax=options.vmax) plt.colorbar() plt.savefig(jpg_path) plt.close() np.save(path.join(single_path, "map.npy"), map_np[i])