def test_bids_df_anat(loader_parameters): """ Test for MRI anat nii.gz file format Test for when no file extensions are provided Test for multiple target_suffix Test behavior when "roi_suffix" is not None """ bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) df_test = bids_df.df.drop(columns=['path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) csv_ref = os.path.join(loader_parameters["path_data"][0], "df_ref.csv") csv_test = os.path.join(loader_parameters["path_data"][0], "df_test.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == { 'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': [] }
def test_bounding_box(download_data_testing_test_files, train_lst, target_lst, config): # Create mask mask_coord = [20, 40, 20, 90, 0, 25] mx1, mx2, my1, my2, mz1, mz2 = mask_coord mask = np.zeros((96, 96, 96)) mask[mx1:mx2 + 1, my1:my2 + 1, mz1:mz2 + 1] = 1 coord = imed_obj_detect.get_bounding_boxes(mask) assert coord[0] == mask_coord loader_params = { "data_list": train_lst, "dataset_type": "training", "requires_undo": False, "path_data": [__data_testing_dir__], "target_suffix": target_lst, "extensions": [".nii.gz"], "slice_filter_params": { "filter_empty_mask": False, "filter_empty_input": True }, "slice_axis": "axial" } if "Modified3DUNet" in config: config['model_params']["name"] = "Modified3DUNet" config['model_params'].update(config["Modified3DUNet"]) bounding_box_dict = {} bounding_box_path = os.path.join(PATH_OUTPUT, 'bounding_boxes.json') if not os.path.exists(PATH_OUTPUT): os.mkdir(PATH_OUTPUT) current_dir = os.getcwd() sub = train_lst[0] contrast = config['contrast_params']['contrast_lst'][0] bb_path = os.path.join(current_dir, __data_testing_dir__, sub, "anat", sub + "_" + contrast + ".nii.gz") bounding_box_dict[bb_path] = coord with open(bounding_box_path, 'w') as fp: json.dump(bounding_box_dict, fp, indent=4) # Update loader_params with config loader_params.update(config) bids_df = imed_loader_utils.BidsDataframe(loader_params, __tmp_dir__, derivatives=True) ds = imed_loader.load_dataset(bids_df, **loader_params) handler = ds.handlers if "Modified3DUNet" in config else ds.indexes for index in handler: seg_pair, _ = index if "Modified3DUNet" in config: assert seg_pair['input'][0].shape[-3:] == (mx2 - mx1, my2 - my1, mz2 - mz1) else: assert seg_pair['input'][0].shape[-2:] == (mx2 - mx1, my2 - my1) shutil.rmtree(PATH_OUTPUT)
def test_bids_df_microscopy_png(loader_parameters): """ Test for microscopy png file format Test for _sessions.tsv and _scans.tsv files Test for target_suffix as a nested list Test for when no contrast_params are provided """ bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) df_test = bids_df.df.drop(columns=['path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) csv_ref = os.path.join(loader_parameters["path_data"][0], "df_ref.csv") csv_test = os.path.join(loader_parameters["path_data"][0], "df_test.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == { 'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': [] }
def test_config_sha256(download_data_testing_test_files, initial_config): file_lst = ["sub-unf01_T2w.nii.gz"] loader_params = { "transforms_params": {}, "data_list": ['sub-unf01'], "dataset_type": "testing", "requires_undo": True, "contrast_params": { "contrast_lst": ['T2w'], "balance": {} }, "path_data": [__data_testing_dir__], "target_suffix": ["_lesion-manual"], "extensions": [".nii.gz"], "roi_params": { "suffix": "_seg-manual", "slice_filter_roi": 10 }, "slice_filter_params": { "filter_empty_mask": False, "filter_empty_input": True }, "slice_axis": "axial", "multichannel": False } bids_df = imed_loader_utils.BidsDataframe(loader_params, __tmp_dir__, derivatives=True) generate_sha_256(initial_config, bids_df.df, file_lst) assert (initial_config['training_sha256']['sub-unf01_T2w.nii.gz'] == 'f020b368fea15399fa112badd28b2df69e044dba5d23b3fe1646d12d7d3d39ac')
def test_sampler(download_data_testing_test_files, transforms_dict, train_lst, target_lst, roi_params): cuda_available, device = imed_utils.define_device(GPU_ID) loader_params = { "transforms_params": transforms_dict, "data_list": train_lst, "dataset_type": "training", "requires_undo": False, "contrast_params": { "contrast_lst": ['T2w'], "balance": {} }, "path_data": [__data_testing_dir__], "target_suffix": target_lst, "extensions": [".nii.gz"], "roi_params": roi_params, "model_params": { "name": "Unet" }, "slice_filter_params": { "filter_empty_mask": False, "filter_empty_input": True }, "slice_axis": "axial", "multichannel": False } # Get Training dataset bids_df = imed_loader_utils.BidsDataframe(loader_params, __tmp_dir__, derivatives=True) ds_train = imed_loader.load_dataset(bids_df, **loader_params) print('\nLoading without sampling') train_loader = DataLoader(ds_train, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, collate_fn=imed_loader_utils.imed_collate, num_workers=0) neg_percent, pos_percent = _cmpt_label(train_loader) assert abs(neg_percent - pos_percent) > 20 print('\nLoading with sampling') train_loader_balanced = DataLoader( ds_train, batch_size=BATCH_SIZE, sampler=imed_loader_utils.BalancedSampler(ds_train), shuffle=False, pin_memory=True, collate_fn=imed_loader_utils.imed_collate, num_workers=0) neg_percent_bal, pos_percent_bal = _cmpt_label(train_loader_balanced) # Check if the loader is more balanced. The actual distribution comes from a probabilistic model # This is however not very efficient to get close to 50 % # in the case where we have 16 slices, with 87.5 % of one class (positive sample). assert abs(neg_percent_bal - pos_percent_bal) < abs(neg_percent - pos_percent)
def split_dataset(initial_config): """ Args: initial_config (dict): The original config file, which we use as a basis from which to modify our hyperparameters. .. code-block:: JSON { "training_parameters": { "batch_size": 18, "loss": {"name": "DiceLoss"} }, "default_model": { "name": "Unet", "dropout_rate": 0.3, "depth": 3 }, "model_name": "seg_tumor_t2", "path_output": "./tmp/" } """ loader_parameters = initial_config["loader_parameters"] path_output = initial_config["path_output"] if not os.path.isdir(path_output): print('Creating output path: {}'.format(path_output)) os.makedirs(path_output) else: print('Output path already exists: {}'.format(path_output)) bids_df = imed_loader_utils.BidsDataframe(loader_parameters, path_output, derivatives=True) train_lst, valid_lst, test_lst = imed_loader_utils.get_new_subject_file_split( df=bids_df.df, data_testing=initial_config["split_dataset"]["data_testing"], split_method=initial_config["split_dataset"]["split_method"], random_seed=initial_config["split_dataset"]["random_seed"], train_frac=initial_config["split_dataset"]["train_fraction"], test_frac=initial_config["split_dataset"]["test_fraction"], path_output="./", balance=initial_config["split_dataset"]['balance'] \ if 'balance' in initial_config["split_dataset"] else None ) # save the subject distribution split_dct = {'train': train_lst, 'valid': valid_lst, 'test': test_lst} split_path = "./" + "common_split_datasets.joblib" joblib.dump(split_dct, split_path) initial_config["split_dataset"]["fname_split"] = split_path return initial_config
def test_load_dataset_2d_png(download_data_testing_test_files, loader_parameters, model_parameters, transform_parameters): """ Test to make sure load_dataset runs with 2D PNG data. """ loader_parameters.update({"model_params": model_parameters}) bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) data_lst = ['sub-rat3_ses-01_sample-data9_SEM.png'] ds = imed_loader.load_dataset(bids_df, **{**loader_parameters, **{'data_list': data_lst, 'transforms_params': transform_parameters, 'dataset_type': 'training'}}) assert ds[0]['input'].shape == (1, 756, 764) assert ds[0]['gt'].shape == (1, 756, 764)
def test_bids_df_ctscan(download_data_testing_test_files, loader_parameters): """ Test for ct-scan nii.gz file format Test for when dataset_description.json is not present in derivatives folder """ bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) df_test = bids_df.df.drop(columns=['path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) csv_ref = os.path.join(loader_parameters["path_data"][0], "df_ref.csv") csv_test = os.path.join(loader_parameters["path_data"][0], "df_test.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == {'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': []}
def test_bids_df_multi(download_data_testing_test_files, loader_parameters): """ Test for multiple folders in path_data """ bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) df_test = bids_df.df.drop(columns=['path']) df_test = df_test.sort_values(by=['filename']).reset_index(drop=True) csv_ref = os.path.join(loader_parameters["path_data"][0], "df_ref_multi.csv") csv_test = os.path.join(loader_parameters["path_data"][0], "df_test_multi.csv") df_test.to_csv(csv_test, index=False) diff = csv_diff.compare(csv_diff.load_csv(open(csv_ref)), csv_diff.load_csv(open(csv_test))) assert diff == {'added': [], 'removed': [], 'changed': [], 'columns_added': [], 'columns_removed': []}
def test_slice_filter(download_data_testing_test_files, transforms_dict, train_lst, target_lst, roi_params, slice_filter_params): if "ROICrop" in transforms_dict and roi_params["suffix"] is None: return cuda_available, device = imed_utils.define_device(GPU_ID) loader_params = { "transforms_params": transforms_dict, "data_list": train_lst, "dataset_type": "training", "requires_undo": False, "contrast_params": { "contrast_lst": ['T2w'], "balance": {} }, "path_data": [__data_testing_dir__], "target_suffix": target_lst, "extensions": [".nii.gz"], "roi_params": roi_params, "model_params": { "name": "Unet" }, "slice_filter_params": slice_filter_params, "slice_axis": "axial", "multichannel": False } # Get Training dataset bids_df = imed_loader_utils.BidsDataframe(loader_params, __tmp_dir__, derivatives=True) ds_train = imed_loader.load_dataset(bids_df, **loader_params) print('\tNumber of loaded slices: {}'.format(len(ds_train))) train_loader = DataLoader(ds_train, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, collate_fn=imed_loader_utils.imed_collate, num_workers=0) print('\tNumber of Neg/Pos slices in GT.') cmpt_neg, cmpt_pos = _cmpt_slice(train_loader) if slice_filter_params["filter_empty_mask"]: assert cmpt_neg == 0 assert cmpt_pos != 0 else: # We verify if there are still some negative slices (they are removed with our filter) assert cmpt_neg != 0 and cmpt_pos != 0
def test_get_target_filename_list(loader_parameters, model_parameters, transform_parameters): """ Test that all target_suffix are considered for target filename when list """ loader_parameters.update({"model_params": model_parameters}) bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) data_lst = ['sub-rat3_ses-01_sample-data9_SEM.png'] test_ds = imed_loader.load_dataset(bids_df, **{**loader_parameters, **{'data_list': data_lst, 'transforms_params': transform_parameters, 'dataset_type': 'training'}}) target_filename = test_ds.filename_pairs[0][1] assert len(target_filename) == len(loader_parameters["target_suffix"])
def test_HeMIS(download_data_testing_test_files, loader_parameters, p=0.0001): print('[INFO]: Starting test ... \n') bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) contrast_params = loader_parameters["contrast_params"] target_suffix = loader_parameters["target_suffix"] roi_params = loader_parameters["roi_params"] training_transform_dict = { "Resample": { "wspace": 0.75, "hspace": 0.75 }, "CenterCrop": { "size": [48, 48] }, "NumpyToTensor": {} } transform_lst, _ = imed_transforms.prepare_transforms( training_transform_dict) train_lst = ['sub-unf01'] print('[INFO]: Creating dataset ...\n') model_params = { "name": "HeMISUnet", "dropout_rate": 0.3, "bn_momentum": 0.9, "depth": 2, "in_channel": 1, "out_channel": 1, "missing_probability": 0.00001, "missing_probability_growth": 0.9, "contrasts": ["T1w", "T2w"], "ram": False, "path_hdf5": __path_hdf5__, "csv_path": __path_csv__, "target_lst": ["T2w"], "roi_lst": ["T2w"] } dataset = imed_adaptative.HDF5Dataset( bids_df=bids_df, subject_file_lst=train_lst, model_params=model_params, contrast_params=contrast_params, target_suffix=target_suffix, slice_axis=2, transform=transform_lst, metadata_choice=False, dim=2, slice_filter_fn=imed_loader_utils.SliceFilter(filter_empty_input=True, filter_empty_mask=True), roi_params=roi_params) dataset.load_into_ram(['T1w', 'T2w', 'T2star']) print("[INFO]: Dataset RAM status:") print(dataset.status) print("[INFO]: In memory Dataframe:") print(dataset.dataframe) # TODO # ds_train.filter_roi(nb_nonzero_thr=10) train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, collate_fn=imed_loader_utils.imed_collate, num_workers=1) model = models.HeMISUnet(contrasts=contrast_params["contrast_lst"], depth=3, drop_rate=DROPOUT, bn_momentum=BN) print(model) cuda_available = torch.cuda.is_available() if cuda_available: torch.cuda.set_device(GPU_ID) print("Using GPU ID {}".format(GPU_ID)) model.cuda() # Initialing Optimizer and scheduler step_scheduler_batch = False optimizer = optim.Adam(model.parameters(), lr=INIT_LR) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, N_EPOCHS) load_lst, reload_lst, pred_lst, opt_lst, schedul_lst, init_lst, gen_lst = [], [], [], [], [], [], [] for epoch in tqdm(range(1, N_EPOCHS + 1), desc="Training"): start_time = time.time() start_init = time.time() lr = scheduler.get_last_lr()[0] model.train() tot_init = time.time() - start_init init_lst.append(tot_init) num_steps = 0 start_gen = 0 for i, batch in enumerate(train_loader): if i > 0: tot_gen = time.time() - start_gen gen_lst.append(tot_gen) start_load = time.time() input_samples, gt_samples = imed_utils.unstack_tensors( batch["input"]), batch["gt"] print(batch["input_metadata"][0][0]["missing_mod"]) missing_mod = imed_training.get_metadata(batch["input_metadata"], model_params) print("Number of missing contrasts = {}.".format( len(input_samples) * len(input_samples[0]) - missing_mod.sum())) print("len input = {}".format(len(input_samples))) print("Batch = {}, {}".format(input_samples[0].shape, gt_samples[0].shape)) if cuda_available: var_input = imed_utils.cuda(input_samples) var_gt = imed_utils.cuda(gt_samples, non_blocking=True) else: var_input = input_samples var_gt = gt_samples tot_load = time.time() - start_load load_lst.append(tot_load) start_pred = time.time() preds = model(var_input, missing_mod) tot_pred = time.time() - start_pred pred_lst.append(tot_pred) start_opt = time.time() loss = -losses.DiceLoss()(preds, var_gt) optimizer.zero_grad() loss.backward() optimizer.step() if step_scheduler_batch: scheduler.step() num_steps += 1 tot_opt = time.time() - start_opt opt_lst.append(tot_opt) start_gen = time.time() start_schedul = time.time() if not step_scheduler_batch: scheduler.step() tot_schedul = time.time() - start_schedul schedul_lst.append(tot_schedul) start_reload = time.time() print("[INFO]: Updating Dataset") p = p**(2 / 3) dataset.update(p=p) print("[INFO]: Reloading dataset") train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, collate_fn=imed_loader_utils.imed_collate, num_workers=1) tot_reload = time.time() - start_reload reload_lst.append(tot_reload) end_time = time.time() total_time = end_time - start_time tqdm.write("Epoch {} took {:.2f} seconds.".format(epoch, total_time)) print('Mean SD init {} -- {}'.format(np.mean(init_lst), np.std(init_lst))) print('Mean SD load {} -- {}'.format(np.mean(load_lst), np.std(load_lst))) print('Mean SD reload {} -- {}'.format(np.mean(reload_lst), np.std(reload_lst))) print('Mean SD pred {} -- {}'.format(np.mean(pred_lst), np.std(pred_lst))) print('Mean SD opt {} -- {}'.format(np.mean(opt_lst), np.std(opt_lst))) print('Mean SD gen {} -- {}'.format(np.mean(gen_lst), np.std(gen_lst))) print('Mean SD scheduler {} -- {}'.format(np.mean(schedul_lst), np.std(schedul_lst)))
def test_hdf5(download_data_testing_test_files, loader_parameters): print('[INFO]: Starting test ... \n') bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) contrast_params = loader_parameters["contrast_params"] target_suffix = loader_parameters["target_suffix"] roi_params = loader_parameters["roi_params"] train_lst = ['sub-unf01'] training_transform_dict = { "Resample": { "wspace": 0.75, "hspace": 0.75 }, "CenterCrop": { "size": [48, 48] }, "NumpyToTensor": {} } transform_lst, _ = imed_transforms.prepare_transforms( training_transform_dict) bids_to_hdf5 = imed_adaptative.BIDStoHDF5( bids_df=bids_df, subject_file_lst=train_lst, path_hdf5=os.path.join(__data_testing_dir__, 'mytestfile.hdf5'), target_suffix=target_suffix, roi_params=roi_params, contrast_lst=contrast_params["contrast_lst"], metadata_choice="contrast", transform=transform_lst, contrast_balance={}, slice_axis=2, slice_filter_fn=imed_loader_utils.SliceFilter(filter_empty_input=True, filter_empty_mask=True)) # Checking architecture def print_attrs(name, obj): print("\nName of the object: {}".format(name)) print("Type: {}".format(type(obj))) print("Including the following attributes:") for key, val in obj.attrs.items(): print(" %s: %s" % (key, val)) print('\n[INFO]: HDF5 architecture:') with h5py.File(bids_to_hdf5.path_hdf5, "a") as hdf5_file: hdf5_file.visititems(print_attrs) print('\n[INFO]: HDF5 file successfully generated.') print('[INFO]: Generating dataframe ...\n') df = imed_adaptative.Dataframe(hdf5_file=hdf5_file, contrasts=['T1w', 'T2w', 'T2star'], path=os.path.join( __data_testing_dir__, 'hdf5.csv'), target_suffix=['T1w', 'T2w', 'T2star'], roi_suffix=['T1w', 'T2w', 'T2star'], dim=2, filter_slices=True) print(df.df) print('\n[INFO]: Dataframe successfully generated. ') print('[INFO]: Creating dataset ...\n') model_params = { "name": "HeMISUnet", "dropout_rate": 0.3, "bn_momentum": 0.9, "depth": 2, "in_channel": 1, "out_channel": 1, "missing_probability": 0.00001, "missing_probability_growth": 0.9, "contrasts": ["T1w", "T2w"], "ram": False, "path_hdf5": os.path.join(__data_testing_dir__, 'mytestfile.hdf5'), "csv_path": os.path.join(__data_testing_dir__, 'hdf5.csv'), "target_lst": ["T2w"], "roi_lst": ["T2w"] } dataset = imed_adaptative.HDF5Dataset( bids_df=bids_df, subject_file_lst=train_lst, target_suffix=target_suffix, slice_axis=2, model_params=model_params, contrast_params=contrast_params, transform=transform_lst, metadata_choice=False, dim=2, slice_filter_fn=imed_loader_utils.SliceFilter( filter_empty_input=True, filter_empty_mask=True), roi_params=roi_params) dataset.load_into_ram(['T1w', 'T2w', 'T2star']) print("Dataset RAM status:") print(dataset.status) print("In memory Dataframe:") print(dataset.dataframe) print('\n[INFO]: Test passed successfully. ') print("\n[INFO]: Starting loader test ...") device = torch.device( "cuda:" + str(GPU_ID) if torch.cuda.is_available() else "cpu") cuda_available = torch.cuda.is_available() if cuda_available: torch.cuda.set_device(device) print("Using GPU ID {}".format(device)) train_loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True, collate_fn=imed_loader_utils.imed_collate, num_workers=1) for i, batch in enumerate(train_loader): input_samples, gt_samples = batch["input"], batch["gt"] print("len input = {}".format(len(input_samples))) print("Batch = {}, {}".format(input_samples[0].shape, gt_samples[0].shape)) if cuda_available: var_input = imed_utils.cuda(input_samples) var_gt = imed_utils.cuda(gt_samples, non_blocking=True) else: var_input = input_samples var_gt = gt_samples break print( "Congrats your dataloader works! You can go home now and get a beer." ) return 0
def test_inference(download_data_testing_test_files, transforms_dict, test_lst, target_lst, roi_params, testing_params): cuda_available, device = imed_utils.define_device(GPU_ID) model_params = {"name": "Unet", "is_2d": True} loader_params = { "transforms_params": transforms_dict, "data_list": test_lst, "dataset_type": "testing", "requires_undo": True, "contrast_params": { "contrast_lst": ['T2w'], "balance": {} }, "path_data": [__data_testing_dir__], "target_suffix": target_lst, "extensions": [".nii.gz"], "roi_params": roi_params, "slice_filter_params": { "filter_empty_mask": False, "filter_empty_input": True }, "slice_axis": SLICE_AXIS, "multichannel": False } loader_params.update({"model_params": model_params}) bids_df = imed_loader_utils.BidsDataframe(loader_params, __tmp_dir__, derivatives=True) # Get Testing dataset ds_test = imed_loader.load_dataset(bids_df, **loader_params) test_loader = DataLoader(ds_test, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True, collate_fn=imed_loader_utils.imed_collate, num_workers=0) # Undo transform val_undo_transform = imed_transforms.UndoCompose( imed_transforms.Compose(transforms_dict)) # Update testing_params testing_params.update({ "slice_axis": loader_params["slice_axis"], "target_suffix": loader_params["target_suffix"], "undo_transforms": val_undo_transform }) # Model model = imed_models.Unet() if cuda_available: model.cuda() model.eval() metric_fns = [ imed_metrics.dice_score, imed_metrics.hausdorff_score, imed_metrics.precision_score, imed_metrics.recall_score, imed_metrics.specificity_score, imed_metrics.intersection_over_union, imed_metrics.accuracy_score ] metric_mgr = imed_metrics.MetricManager(metric_fns) if not os.path.isdir(__output_dir__): os.makedirs(__output_dir__) preds_npy, gt_npy = imed_testing.run_inference( test_loader=test_loader, model=model, model_params=model_params, testing_params=testing_params, ofolder=__output_dir__, cuda_available=cuda_available) metric_mgr(preds_npy, gt_npy) metrics_dict = metric_mgr.get_results() metric_mgr.reset() print(metrics_dict)
loader_params["contrast_params"]["contrast_lst"] = loader_params[ "contrast_params"]["training_validation"] # CHOOSE TO INDEX DERIVATIVES OR NOT # As per pybids, the indexing of derivatives works only if a "dataset_description.json" file # is present in "derivatives" or "labels" folder with minimal content: # {"Name": "Example dataset", "BIDSVersion": "1.0.2", "PipelineDescription": {"Name": "Example pipeline"}} derivatives = True # CREATE OUTPUT PATH path_output = context["path_output"] if not os.path.isdir(path_output): print('Creating output path: {}'.format(path_output)) os.makedirs(path_output) else: print('Output path already exists: {}'.format(path_output)) # CREATE BIDSDataframe OBJECT bids_df = imed_loader_utils.BidsDataframe(loader_params, path_output, derivatives) df = bids_df.df # DROP "path" COLUMN AND SORT BY FILENAME FOR TESTING PURPOSES WITH data-testing df = df.drop(columns=['path']) df = df.sort_values(by=['filename']).reset_index(drop=True) # SAVE DATAFRAME TO CSV FILE FOR data-testing path_csv = "test_df_new_loader.csv" df.to_csv(path_csv, index=False) print(df)
def run_command(context, n_gif=0, thr_increment=None, resume_training=False): """Run main command. This function is central in the ivadomed project as training / testing / evaluation commands are run via this function. All the process parameters are defined in the config. Args: context (dict): Dictionary containing all parameters that are needed for a given process. See :doc:`configuration_file` for more details. n_gif (int): Generates a GIF during training if larger than zero, one frame per epoch for a given slice. The parameter indicates the number of 2D slices used to generate GIFs, one GIF per slice. A GIF shows predictions of a given slice from the validation sub-dataset. They are saved within the output path. thr_increment (float): A threshold analysis is performed at the end of the training using the trained model and the training + validation sub-dataset to find the optimal binarization threshold. The specified value indicates the increment between 0 and 1 used during the ROC analysis (e.g. 0.1). resume_training (bool): Load a saved model ("checkpoint.pth.tar" in the output directory specified with flag "--path-output" or via the config file "output_path" ' This training state is saved everytime a new best model is saved in the log argument) for resume training directory. Returns: float or pandas.DataFrame or None: * If "train" command: Returns floats: best loss score for both training and validation. * If "test" command: Returns a pandas Dataframe: of metrics computed for each subject of the testing sub-dataset and return the prediction metrics before evaluation. * If "segment" command: No return value. """ command = copy.deepcopy(context["command"]) path_output = set_output_path(context) # Create a log with the version of the Ivadomed software and the version of the Annexed dataset (if present) create_dataset_and_ivadomed_version_log(context) cuda_available, device = imed_utils.define_device(context['gpu_ids'][0]) # BACKWARDS COMPATIBILITY: If bids_path is string, assign to list - Do this here so it propagates to all functions context['loader_parameters']['path_data'] = imed_utils.format_path_data( context['loader_parameters']['path_data']) # Loader params loader_params = set_loader_params(context, command == "train") # Get transforms for each subdataset transform_train_params, transform_valid_params, transform_test_params = \ imed_transforms.get_subdatasets_transforms(context["transformation"]) # MODEL PARAMETERS model_params, loader_params = set_model_params(context, loader_params) if command == 'segment': run_segment_command(context, model_params) return # BIDSDataframe of all image files # Indexing of derivatives is True for command train and test bids_df = imed_loader_utils.BidsDataframe(loader_params, path_output, derivatives=True) # Get subject filenames lists. "segment" command uses all participants of data path, hence no need to split train_lst, valid_lst, test_lst = imed_loader_utils.get_subdatasets_subject_files_list( context["split_dataset"], bids_df.df, path_output, context["loader_parameters"]['subject_selection']) # TESTING PARAMS # Aleatoric uncertainty if context['uncertainty'][ 'aleatoric'] and context['uncertainty']['n_it'] > 0: transformation_dict = transform_train_params else: transformation_dict = transform_test_params undo_transforms = imed_transforms.UndoCompose( imed_transforms.Compose(transformation_dict, requires_undo=True)) testing_params = copy.deepcopy(context["training_parameters"]) testing_params.update({'uncertainty': context["uncertainty"]}) testing_params.update({ 'target_suffix': loader_params["target_suffix"], 'undo_transforms': undo_transforms, 'slice_axis': loader_params['slice_axis'] }) if command == "train": imed_utils.display_selected_transfoms(transform_train_params, dataset_type=["training"]) imed_utils.display_selected_transfoms(transform_valid_params, dataset_type=["validation"]) elif command == "test": imed_utils.display_selected_transfoms(transformation_dict, dataset_type=["testing"]) # Check if multiple raters check_multiple_raters(command != "train", loader_params) if command == 'train': # Get Validation dataset ds_valid = get_dataset(bids_df, loader_params, valid_lst, transform_valid_params, cuda_available, device, 'validation') # Get Training dataset ds_train = get_dataset(bids_df, loader_params, train_lst, transform_train_params, cuda_available, device, 'training') metric_fns = imed_metrics.get_metric_fns(ds_train.task) # If FiLM, normalize data if 'film_layers' in model_params and any(model_params['film_layers']): model_params, ds_train, ds_valid, train_onehotencoder = \ film_normalize_data(context, model_params, ds_train, ds_valid, path_output) else: train_onehotencoder = None # Model directory create_path_model(context, model_params, ds_train, path_output, train_onehotencoder) save_config_file(context, path_output) # RUN TRAINING best_training_dice, best_training_loss, best_validation_dice, best_validation_loss = imed_training.train( model_params=model_params, dataset_train=ds_train, dataset_val=ds_valid, training_params=context["training_parameters"], path_output=path_output, device=device, cuda_available=cuda_available, metric_fns=metric_fns, n_gif=n_gif, resume_training=resume_training, debugging=context["debugging"]) if thr_increment: # LOAD DATASET if command != 'train': # If command == train, then ds_valid already load # Get Validation dataset ds_valid = get_dataset(bids_df, loader_params, valid_lst, transform_valid_params, cuda_available, device, 'validation') # Get Training dataset with no Data Augmentation ds_train = get_dataset(bids_df, loader_params, train_lst, transform_valid_params, cuda_available, device, 'training') # Choice of optimisation metric metric = "recall_specificity" if model_params[ "name"] in imed_utils.CLASSIFIER_LIST else "dice" # Model path model_path = os.path.join(path_output, "best_model.pt") # Run analysis thr = imed_testing.threshold_analysis(model_path=model_path, ds_lst=[ds_train, ds_valid], model_params=model_params, testing_params=testing_params, metric=metric, increment=thr_increment, fname_out=os.path.join( path_output, "roc.png"), cuda_available=cuda_available) # Update threshold in config file context["postprocessing"]["binarize_prediction"] = {"thr": thr} save_config_file(context, path_output) if command == 'train': return best_training_dice, best_training_loss, best_validation_dice, best_validation_loss if command == 'test': # LOAD DATASET # Warn user that the input-level dropout is set during inference if loader_params['is_input_dropout']: logger.warning( "Input-level dropout is set during testing. To turn this option off, set 'is_input_dropout'" "to 'false' in the configuration file.") ds_test = imed_loader.load_dataset( bids_df, **{ **loader_params, **{ 'data_list': test_lst, 'transforms_params': transformation_dict, 'dataset_type': 'testing', 'requires_undo': True } }, device=device, cuda_available=cuda_available) metric_fns = imed_metrics.get_metric_fns(ds_test.task) if 'film_layers' in model_params and any(model_params['film_layers']): ds_test, model_params = update_film_model_params( context, ds_test, model_params, path_output) # RUN INFERENCE pred_metrics = imed_testing.test( model_params=model_params, dataset_test=ds_test, testing_params=testing_params, path_output=path_output, device=device, cuda_available=cuda_available, metric_fns=metric_fns, postprocessing=context['postprocessing']) # RUN EVALUATION df_results = imed_evaluation.evaluate( bids_df, path_output=path_output, target_suffix=loader_params["target_suffix"], eval_params=context["evaluation_parameters"]) return df_results, pred_metrics
def run_segment_command(context, model_params): # BIDSDataframe of all image files # Indexing of derivatives is False for command segment bids_df = imed_loader_utils.BidsDataframe(context['loader_parameters'], context['path_output'], derivatives=False) # Append subjects filenames into a list bids_subjects = sorted(bids_df.df['filename'].to_list()) # Add postprocessing to packaged model path_model = os.path.join(context['path_output'], context['model_name']) path_model_config = os.path.join(path_model, context['model_name'] + ".json") model_config = imed_config_manager.load_json(path_model_config) model_config['postprocessing'] = context['postprocessing'] with open(path_model_config, 'w') as fp: json.dump(model_config, fp, indent=4) options = None # Initialize a list of already seen subject ids for multichannel seen_subj_ids = [] for subject in bids_subjects: if context['loader_parameters']['multichannel']: # Get subject_id for multichannel df_sub = bids_df.df.loc[bids_df.df['filename'] == subject] subj_id = re.sub(r'_' + df_sub['suffix'].values[0] + '.*', '', subject) if subj_id not in seen_subj_ids: # if subj_id has not been seen yet fname_img = [] provided_contrasts = [] contrasts = context['loader_parameters']['contrast_params'][ 'testing'] # Keep contrast order for c in contrasts: df_tmp = bids_df.df[ bids_df.df['filename'].str.contains(subj_id) & bids_df.df['suffix'].str.contains(c)] if ~df_tmp.empty: provided_contrasts.append(c) fname_img.append(df_tmp['path'].values[0]) seen_subj_ids.append(subj_id) if len(fname_img) != len(contrasts): logger.warning( "Missing contrast for subject {}. {} were provided but {} are required. Skipping " "subject.".format(subj_id, provided_contrasts, contrasts)) continue else: # Returns an empty list for subj_id already seen fname_img = [] else: fname_img = bids_df.df[bids_df.df['filename'] == subject]['path'].to_list() if 'film_layers' in model_params and any( model_params['film_layers']) and model_params['metadata']: metadata = bids_df.df[bids_df.df['filename'] == subject][ model_params['metadata']].values[0] options = {'metadata': metadata} if fname_img: pred_list, target_list = imed_inference.segment_volume( path_model, fname_images=fname_img, gpu_id=context['gpu_ids'][0], options=options) pred_path = os.path.join(context['path_output'], "pred_masks") if not os.path.exists(pred_path): os.makedirs(pred_path) for pred, target in zip(pred_list, target_list): filename = subject.split('.')[0] + target + "_pred" + \ ".nii.gz" nib.save(pred, os.path.join(pred_path, filename))
def test_image_orientation(download_data_testing_test_files, loader_parameters): device = torch.device("cuda:" + str(GPU_ID) if torch.cuda.is_available() else "cpu") cuda_available = torch.cuda.is_available() if cuda_available: torch.cuda.set_device(device) print("Using GPU ID {}".format(device)) bids_df = imed_loader_utils.BidsDataframe(loader_parameters, __tmp_dir__, derivatives=True) contrast_params = loader_parameters["contrast_params"] target_suffix = loader_parameters["target_suffix"] roi_params = loader_parameters["roi_params"] train_lst = ['sub-unf01'] training_transform_dict = { "Resample": { "wspace": 1.5, "hspace": 1, "dspace": 3 }, "CenterCrop": { "size": [176, 128, 160] }, "NumpyToTensor": {}, "NormalizeInstance": { "applied_to": ['im'] } } tranform_lst, training_undo_transform = imed_transforms.prepare_transforms( training_transform_dict) model_params = { "name": "Modified3DUNet", "dropout_rate": 0.3, "bn_momentum": 0.9, "depth": 2, "in_channel": 1, "out_channel": 1, "length_3D": [176, 128, 160], "stride_3D": [176, 128, 160], "attention": False, "n_filters": 8 } for dim in ['2d', '3d']: for slice_axis in [0, 1, 2]: if dim == '2d': ds = imed_loader.BidsDataset(bids_df=bids_df, subject_file_lst=train_lst, target_suffix=target_suffix, contrast_params=contrast_params, model_params=model_params, metadata_choice=False, slice_axis=slice_axis, transform=tranform_lst, multichannel=False) ds.load_filenames() else: ds = imed_loader.Bids3DDataset(bids_df=bids_df, subject_file_lst=train_lst, target_suffix=target_suffix, model_params=model_params, contrast_params=contrast_params, metadata_choice=False, slice_axis=slice_axis, transform=tranform_lst, multichannel=False) loader = DataLoader(ds, batch_size=1, shuffle=True, pin_memory=True, collate_fn=imed_loader_utils.imed_collate, num_workers=1) input_filename, gt_filename, roi_filename, metadata = ds.filename_pairs[ 0] segpair = imed_loader.SegmentationPair(input_filename, gt_filename, metadata=metadata, slice_axis=slice_axis) nib_original = nib.load(gt_filename[0]) # Get image with original, ras and hwd orientations input_init = nib_original.get_fdata() input_ras = nib.as_closest_canonical(nib_original).get_fdata() img, gt = segpair.get_pair_data() input_hwd = gt[0] pred_tmp_lst, z_tmp_lst = [], [] for i, batch in enumerate(loader): # batch["input_metadata"] = batch["input_metadata"][0] # Take only metadata from one input # batch["gt_metadata"] = batch["gt_metadata"][0] # Take only metadata from one label for smp_idx in range(len(batch['gt'])): # undo transformations if dim == '2d': preds_idx_undo, metadata_idx = training_undo_transform( batch["gt"][smp_idx], batch["gt_metadata"][smp_idx], data_type='gt') # add new sample to pred_tmp_lst pred_tmp_lst.append(preds_idx_undo[0]) z_tmp_lst.append( int(batch['input_metadata'][smp_idx][0] ['slice_index'])) else: preds_idx_undo, metadata_idx = training_undo_transform( batch["gt"][smp_idx], batch["gt_metadata"][smp_idx], data_type='gt') fname_ref = metadata_idx[0]['gt_filenames'][0] if (pred_tmp_lst and i == len(loader) - 1) or dim == '3d': # save the completely processed file as a nii nib_ref = nib.load(fname_ref) nib_ref_can = nib.as_closest_canonical(nib_ref) if dim == '2d': tmp_lst = [] for z in range(nib_ref_can.header.get_data_shape() [slice_axis]): tmp_lst.append( pred_tmp_lst[z_tmp_lst.index(z)]) arr = np.stack(tmp_lst, axis=-1) else: arr = np.array(preds_idx_undo[0]) # verify image after transform, undo transform and 3D reconstruction input_hwd_2 = imed_postpro.threshold_predictions(arr) # Some difference are generated due to transform and undo transform # (e.i. Resample interpolation) assert imed_metrics.dice_score(input_hwd_2, input_hwd) >= 0.8 input_ras_2 = imed_loader_utils.orient_img_ras( input_hwd_2, slice_axis) assert imed_metrics.dice_score(input_ras_2, input_ras) >= 0.8 input_init_2 = imed_loader_utils.reorient_image( input_hwd_2, slice_axis, nib_ref, nib_ref_can) assert imed_metrics.dice_score(input_init_2, input_init) >= 0.8 # re-init pred_stack_lst pred_tmp_lst, z_tmp_lst = [], []
def test_unet_time(train_lst, target_lst, config): cuda_available, device = imed_utils.define_device(GPU_ID) loader_params = { "data_list": train_lst, "dataset_type": "training", "requires_undo": False, "path_data": [__data_testing_dir__], "target_suffix": target_lst, "extensions": [".nii.gz"], "slice_filter_params": { "filter_empty_mask": False, "filter_empty_input": True }, "slice_axis": "axial" } # Update loader_params with config loader_params.update(config) # Get Training dataset bids_df = imed_loader_utils.BidsDataframe(loader_params, __tmp_dir__, derivatives=True) ds_train = imed_loader.load_dataset(bids_df, **loader_params) # Loader train_loader = DataLoader(ds_train, batch_size=1 if config["model_params"]["name"] == "Modified3DUNet" else BATCH_SIZE, shuffle=True, pin_memory=True, collate_fn=imed_loader_utils.imed_collate, num_workers=1) # MODEL model_params = loader_params["model_params"] model_params.update(MODEL_DEFAULT) # Get in_channel from contrast_lst if loader_params["multichannel"]: model_params["in_channel"] = len( loader_params["contrast_params"]["contrast_lst"]) else: model_params["in_channel"] = 1 # Get out_channel from target_suffix model_params["out_channel"] = len(loader_params["target_suffix"]) model_class = getattr(imed_models, model_params["name"]) model = model_class(**model_params) print("Training {}".format(model_params["name"])) if cuda_available: model.cuda() step_scheduler_batch = False # TODO: Add optim in pytest optimizer = optim.Adam(model.parameters(), lr=INIT_LR) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, N_EPOCHS) # TODO: add to pytest loss_fct = imed_losses.DiceLoss() load_lst, pred_lst, opt_lst, schedule_lst, init_lst, gen_lst = [], [], [], [], [], [] for epoch in tqdm(range(1, N_EPOCHS + 1), desc="Training"): start_time = time.time() start_init = time.time() model.train() tot_init = time.time() - start_init init_lst.append(tot_init) num_steps = 0 start_gen = 0 for i, batch in enumerate(train_loader): if i > 0: tot_gen = time.time() - start_gen gen_lst.append(tot_gen) start_load = time.time() input_samples = imed_utils.cuda(batch["input"], cuda_available) gt_samples = imed_utils.cuda(batch["gt"], cuda_available, non_blocking=True) tot_load = time.time() - start_load load_lst.append(tot_load) start_pred = time.time() if 'film_layers' in model_params: preds = model(input_samples, [[0, 1]]) else: preds = model(input_samples) tot_pred = time.time() - start_pred pred_lst.append(tot_pred) start_opt = time.time() loss = loss_fct(preds, gt_samples) optimizer.zero_grad() loss.backward() optimizer.step() if step_scheduler_batch: scheduler.step() num_steps += 1 tot_opt = time.time() - start_opt opt_lst.append(tot_opt) start_gen = time.time() start_schedule = time.time() if not step_scheduler_batch: scheduler.step() tot_schedule = time.time() - start_schedule schedule_lst.append(tot_schedule) end_time = time.time() total_time = end_time - start_time tqdm.write("Epoch {} took {:.2f} seconds.".format(epoch, total_time)) print('Mean SD init {} -- {}'.format(np.mean(init_lst), np.std(init_lst))) print('Mean SD load {} -- {}'.format(np.mean(load_lst), np.std(load_lst))) print('Mean SD pred {} -- {}'.format(np.mean(pred_lst), np.std(pred_lst))) print('Mean SDopt {} -- {}'.format(np.mean(opt_lst), np.std(opt_lst))) print('Mean SD gen {} -- {}'.format(np.mean(gen_lst), np.std(gen_lst))) print('Mean SD scheduler {} -- {}'.format(np.mean(schedule_lst), np.std(schedule_lst)))