def test_split(): """Checks that: - split and kfold are working - the loading functions can find the output - no data leakage is introduced in split and kfold. """ n_splits = 5 flag_split = not os.system("clinicadl tsvtool split %s %s --age_name age" % (merged_tsv, reference_path)) flag_kfold = not os.system("clinicadl tsvtool kfold %s --n_splits %i" % (path.join(reference_path, "train"), n_splits)) assert flag_split assert flag_kfold flag_load = True try: _ = load_data_test(path.join(reference_path, "test"), diagnoses.split(" ")) for fold in range(n_splits): _, _ = load_data(path.join(reference_path, "train"), diagnoses.split(" "), fold, n_splits=n_splits) except FileNotFoundError: flag_load = False assert flag_load run_test_suite(reference_path, 0, "test") run_test_suite(path.join(reference_path, "train"), n_splits, "validation") shutil.rmtree(path.join(reference_path, "train")) shutil.rmtree(path.join(reference_path, "test"))
def main(options): # Read json model_options = argparse.Namespace() json_path = path.join(options.model_path, "commandline_cnn.json") model_options = read_json(model_options, json_path=json_path) num_cnn = compute_num_cnn(options.input_dir, options.tsv_path, model_options, data="test") # Load test data if options.diagnoses is None: options.diagnoses = model_options.diagnoses test_df = load_data_test(options.tsv_path, options.diagnoses) transformations = get_transforms(model_options.mode, model_options.minmaxnormalization) criterion = torch.nn.CrossEntropyLoss() # Loop on all folds trained best_model_dir = os.path.join(options.model_path, 'best_model_dir') folds_dir = os.listdir(best_model_dir) # Loop on folds for fold_dir in folds_dir: split = int(fold_dir[-1]) print("Fold %i" % split) for cnn_index in range(num_cnn): dataset = return_dataset(model_options.mode, options.input_dir, test_df, options.preprocessing, transformations, options, cnn_index=cnn_index) test_loader = DataLoader(dataset, batch_size=options.batch_size, shuffle=False, num_workers=options.num_workers, pin_memory=True) test_cnn(options.model_path, test_loader, options.dataset, split, criterion, cnn_index, model_options, options.gpu) for selection in ['best_acc', 'best_loss']: soft_voting_to_tsvs( options.model_path, split, selection, mode=options.mode, dataset=options.dataset, num_cnn=num_cnn, selection_threshold=model_options.selection_threshold)
def main(options): # Initialize the model print('Do transfer learning with existed model trained on ImageNet.') model = create_model(options.network, options.gpu) trg_size = (224, 224 ) # most of the imagenet pretrained model has this input size # All pre-trained models expect input images normalized in the same way, i.e. mini-batches of 3-channel RGB # images of shape (3 x H x W), where H and W are expected to be at least 224. The images have to be loaded in # to a range of [0, 1] and then normalized using mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225]. transformations = transforms.Compose([ MinMaxNormalization(), transforms.ToPILImage(), transforms.Resize(trg_size), transforms.ToTensor() ]) # Define loss and optimizer loss = torch.nn.CrossEntropyLoss() if options.split is None: fold_iterator = range(options.n_splits) else: fold_iterator = [options.split] # Loop on folds for fi in fold_iterator: print("Fold %i" % fi) if options.dataset == 'validation': _, test_df = load_data(options.diagnosis_tsv_path, options.diagnoses, fi, n_splits=options.n_splits, baseline=True) else: test_df = load_data_test(options.diagnosis_tsv_path, options.diagnoses) data_test = MRIDataset_slice(options.caps_directory, test_df, transformations=transformations, mri_plane=options.mri_plane, prepare_dl=options.prepare_dl) test_loader = DataLoader(data_test, batch_size=options.batch_size, shuffle=False, num_workers=options.num_workers, pin_memory=True) # load the best trained model during the training model, best_epoch = load_model(model, os.path.join(options.output_dir, 'best_model_dir', "fold_%i" % fi, 'CNN', str(options.selection)), gpu=options.gpu, filename='model_best.pth.tar') results_df, metrics = test(model, test_loader, options.gpu, loss) print("Slice level balanced accuracy is %f" % (metrics['balanced_accuracy'])) slice_level_to_tsvs(options.output_dir, results_df, metrics, fi, options.selection, dataset=options.dataset) # Soft voting soft_voting_to_tsvs(options.output_dir, fi, selection=options.selection, dataset=options.dataset, selection_threshold=options.selection_threshold)
def group_backprop(options): main_logger = return_logger(options.verbose, "main process") options = translate_parameters(options) fold_list = [ fold for fold in os.listdir(options.model_path) if fold[:5:] == "fold-" ] if len(fold_list) == 0: raise ValueError("No folds were found at path %s" % options.model_path) for fold in fold_list: main_logger.info(fold) for selection in options.selection: results_path = path.join(options.model_path, fold, 'gradients', selection, options.name) model_options = argparse.Namespace() model_options = read_json( model_options, path.join(options.model_path, 'commandline.json')) model_options = translate_parameters(model_options) model_options.gpu = options.gpu if options.tsv_path is None: options.tsv_path = model_options.tsv_path if options.input_dir is None: options.input_dir = model_options.input_dir if options.target_diagnosis is None: options.target_diagnosis = options.diagnosis criterion = get_criterion(model_options.loss) # Data management (remove data not well predicted by the CNN) training_df = load_data_test(options.tsv_path, [options.diagnosis], baseline=options.baseline) training_df.reset_index(drop=True, inplace=True) # Model creation _, all_transforms = get_transforms( model_options.mode, minmaxnormalization=model_options.minmaxnormalization) data_example = return_dataset(model_options.mode, options.input_dir, training_df, model_options.preprocessing, train_transformations=None, all_transformations=all_transforms, params=options) model = create_model(model_options, data_example.size) model_dir = os.path.join(options.model_path, fold, 'models', selection) model, best_epoch = load_model(model, model_dir, gpu=options.gpu, filename='model_best.pth.tar') options.output_dir = results_path commandline_to_json(options, logger=main_logger) # Keep only subjects who were correctly / wrongly predicted by the network training_df = sort_predicted(model, training_df, options.input_dir, model_options, criterion, options.keep_true, batch_size=options.batch_size, num_workers=options.num_workers, gpu=options.gpu) if len(training_df) > 0: # Save the tsv files used for the saliency maps training_df.to_csv(path.join('data.tsv'), sep='\t', index=False) data_train = return_dataset(model_options.mode, options.input_dir, training_df, model_options.preprocessing, train_transformations=None, all_transformations=all_transforms, params=options) train_loader = DataLoader(data_train, batch_size=options.batch_size, shuffle=True, num_workers=options.num_workers, pin_memory=True) interpreter = VanillaBackProp(model, gpu=options.gpu) cum_map = 0 for data in train_loader: if options.gpu: input_batch = data['image'].cuda() else: input_batch = data['image'] maps = interpreter.generate_gradients( input_batch, data_train.diagnosis_code[options.target_diagnosis]) cum_map += maps.sum(axis=0) mean_map = cum_map / len(data_train) if len(data_train.size) == 4: if options.nifti_template_path is not None: image_nii = nib.load(options.nifti_template_path) affine = image_nii.affine else: affine = np.eye(4) mean_map_nii = nib.Nifti1Image(mean_map[0], affine) nib.save(mean_map_nii, path.join(results_path, "map.nii.gz")) np.save(path.join(results_path, "map.npy"), mean_map[0]) else: jpg_path = path.join(results_path, "map.jpg") plt.imshow(mean_map[0], cmap="coolwarm", vmin=-options.vmax, vmax=options.vmax) plt.colorbar() plt.savefig(jpg_path) plt.close() numpy_path = path.join(results_path, "map.npy") np.save(numpy_path, mean_map[0]) else: main_logger.warn("There are no subjects for the given options")
def inference_from_model_generic(caps_dir, tsv_path, model_path, model_options, prefix, output_dir, fold, selection, labels=True, num_cnn=None, logger=None): from os.path import join import logging if logger is None: logger = logging gpu = not model_options.use_cpu _, all_transforms = get_transforms(model_options.mode, model_options.minmaxnormalization) test_df = load_data_test(tsv_path, model_options.diagnoses) # Define loss and optimizer criterion = get_criterion(model_options.loss) if model_options.mode_task == 'multicnn': for n in range(num_cnn): test_dataset = return_dataset(model_options.mode, caps_dir, test_df, model_options.preprocessing, train_transformations=None, all_transformations=all_transforms, params=model_options, cnn_index=n, labels=labels) test_loader = DataLoader(test_dataset, batch_size=model_options.batch_size, shuffle=False, num_workers=model_options.nproc, pin_memory=True) # load the best trained model during the training model = create_model(model_options, test_dataset.size) model, best_epoch = load_model(model, join(model_path, 'cnn-%i' % n, selection), gpu, filename='model_best.pth.tar') cnn_df, cnn_metrics = test(model, test_loader, gpu, criterion, mode=model_options.mode, use_labels=labels) if labels: logger.info( "%s balanced accuracy is %f for %s %i and model selected on %s" % (prefix, cnn_metrics["balanced_accuracy"], model_options.mode, n, selection)) mode_level_to_tsvs(output_dir, cnn_df, cnn_metrics, fold, selection, model_options.mode, dataset=prefix, cnn_index=n) else: # Read/localize the data test_dataset = return_dataset(model_options.mode, caps_dir, test_df, model_options.preprocessing, train_transformations=None, all_transformations=all_transforms, params=model_options, labels=labels) # Load the data test_loader = DataLoader(test_dataset, batch_size=model_options.batch_size, shuffle=False, num_workers=model_options.nproc, pin_memory=True) # Load model from path model = create_model(model_options, test_dataset.size) best_model, best_epoch = load_model(model, join(model_path, selection), gpu, filename='model_best.pth.tar') # Run the model on the data predictions_df, metrics = test(best_model, test_loader, gpu, criterion, mode=model_options.mode, use_labels=labels) if labels: logger.info( "%s level %s balanced accuracy is %f for model selected on %s" % (model_options.mode, prefix, metrics["balanced_accuracy"], selection)) mode_level_to_tsvs(output_dir, predictions_df, metrics, fold, selection, model_options.mode, dataset=prefix)
def main(options): # Initialize the model model = create_model(options.network, options.gpu) transformations = transforms.Compose([MinMaxNormalization()]) # Define loss and optimizer loss = torch.nn.CrossEntropyLoss() if options.split is None: fold_iterator = range(options.n_splits) else: fold_iterator = [options.split] # Loop on folds for fi in fold_iterator: print("Fold %i" % fi) if options.dataset == 'validation': _, test_df = load_data(options.diagnosis_tsv_path, options.diagnoses, fi, n_splits=options.n_splits, baseline=True) else: test_df = load_data_test(options.diagnosis_tsv_path, options.diagnoses) for n in range(options.num_cnn): dataset = MRIDataset_patch(options.caps_directory, test_df, options.patch_size, options.patch_stride, transformations=transformations, patch_index=n, prepare_dl=options.prepare_dl) test_loader = DataLoader(dataset, batch_size=options.batch_size, shuffle=False, num_workers=options.num_workers, pin_memory=True) # load the best trained model during the training model, best_epoch = load_model( model, os.path.join(options.output_dir, 'best_model_dir', "fold_%i" % fi, 'cnn-%i' % n, options.selection), options.gpu, filename='model_best.pth.tar') results_df, metrics = test(model, test_loader, options.gpu, loss) print("Patch level balanced accuracy is %f" % metrics['balanced_accuracy']) # write the test results into the tsv files patch_level_to_tsvs(options.output_dir, results_df, metrics, fi, options.selection, dataset=options.dataset, cnn_index=n) print("Selection threshold: ", options.selection_threshold) soft_voting_to_tsvs(options.output_dir, fi, options.selection, dataset=options.dataset, num_cnn=options.num_cnn, selection_threshold=options.selection_threshold)
model = create_model(model_options.network, options.gpu) criterion = nn.CrossEntropyLoss() model_dir = os.path.join(best_model_dir, fold_dir, 'CNN', options.selection) best_model, best_epoch = load_model(model, model_dir, options.gpu, filename='model_best.pth.tar') # Load test data if options.diagnoses is None: options.diagnoses = model_options.diagnoses test_tsv = load_data_test(options.tsv_path, options.diagnoses) if model_options.minmaxnormalization: transformations = MinMaxNormalization() else: transformations = None data_test = MRIDataset(options.caps_dir, test_tsv, model_options.preprocessing, transform=transformations) test_loader = DataLoader(data_test, batch_size=options.batch_size, shuffle=False, num_workers=options.num_workers,
def individual_backprop(options): main_logger = return_logger(options.verbose, "main process") options = translate_parameters(options) fold_list = [ fold for fold in os.listdir(options.model_path) if fold[:5:] == "fold-" ] if len(fold_list) == 0: raise ValueError("No folds were found at path %s" % options.model_path) model_options = argparse.Namespace() model_options = read_json( model_options, path.join(options.model_path, 'commandline.json')) model_options = translate_parameters(model_options) model_options.gpu = options.gpu if model_options.network_type == "multicnn": raise NotImplementedError( "The interpretation of multi-CNN is not implemented.") if options.tsv_path is None and options.input_dir is None: options.multi_cohort = model_options.multi_cohort if options.tsv_path is None: options.tsv_path = model_options.tsv_path if options.input_dir is None: options.input_dir = model_options.input_dir if options.target_diagnosis is None: options.target_diagnosis = options.diagnosis for fold in fold_list: main_logger.info(fold) for selection in options.selection: results_path = path.join(options.model_path, fold, 'gradients', selection, options.name) criterion = get_criterion(model_options.loss) # Data management (remove data not well predicted by the CNN) training_df = load_data_test(options.tsv_path, [options.diagnosis], baseline=options.baseline, multi_cohort=options.multi_cohort) training_df.reset_index(drop=True, inplace=True) # Model creation _, all_transforms = get_transforms( model_options.mode, minmaxnormalization=model_options.minmaxnormalization) with warnings.catch_warnings(): warnings.simplefilter("ignore") data_example = return_dataset( model_options.mode, options.input_dir, training_df, model_options.preprocessing, train_transformations=None, all_transformations=all_transforms, prepare_dl=options.prepare_dl, multi_cohort=options.multi_cohort, params=model_options) model = create_model(model_options, data_example.size) model_dir = os.path.join(options.model_path, fold, 'models', selection) model, best_epoch = load_model(model, model_dir, gpu=options.gpu, filename='model_best.pth.tar') options.output_dir = results_path commandline_to_json(options, logger=main_logger) # Keep only subjects who were correctly / wrongly predicted by the network training_df = sort_predicted(model, training_df, options.input_dir, model_options, criterion, options.keep_true, batch_size=options.batch_size, num_workers=options.num_workers, gpu=options.gpu) if len(training_df) > 0: # Save the tsv files used for the saliency maps training_df.to_csv(path.join('data.tsv'), sep='\t', index=False) with warnings.catch_warnings(): warnings.simplefilter("ignore") data_train = return_dataset( model_options.mode, options.input_dir, training_df, model_options.preprocessing, train_transformations=None, all_transformations=all_transforms, prepare_dl=options.prepare_dl, multi_cohort=options.multi_cohort, params=model_options) train_loader = DataLoader(data_train, batch_size=options.batch_size, shuffle=True, num_workers=options.num_workers, pin_memory=True) interpreter = VanillaBackProp(model, gpu=options.gpu) for data in train_loader: if options.gpu: input_batch = data['image'].cuda() else: input_batch = data['image'] map_np = interpreter.generate_gradients( input_batch, data_train.diagnosis_code[options.target_diagnosis]) for i in range(options.batch_size): single_path = path.join(results_path, data['participant_id'][i], data['session_id'][i]) os.makedirs(single_path, exist_ok=True) if len(data_train.size) == 4: if options.nifti_template_path is not None: image_nii = nib.load( options.nifti_template_path) affine = image_nii.affine else: affine = np.eye(4) map_nii = nib.Nifti1Image(map_np[i, 0, :, :, :], affine) nib.save(map_nii, path.join(single_path, "map.nii.gz")) else: jpg_path = path.join(single_path, "map.jpg") plt.imshow(map_np[i, 0, :, :], cmap="coolwarm", vmin=-options.vmax, vmax=options.vmax) plt.colorbar() plt.savefig(jpg_path) plt.close() np.save(path.join(single_path, "map.npy"), map_np[i])