def test_single_subject_resampling(self): voxel_size = [3, 3, 3] # nilearn from nilearn.image import resample_img nilearn_resampled_img = resample_img( self.X[0], interpolation="nearest", target_affine=np.diag(voxel_size) ) nilearn_resampled_array = nilearn_resampled_img.dataobj # photon resampler = PipelineElement( "ResampleImages", hyperparameters={}, voxel_size=voxel_size, batch_size=1 ) single_resampled_img, _, _ = resampler.transform(self.X[0]) branch = NeuroBranch("NeuroBranch", output_img=True) branch += resampler branch_resampled_img, _, _ = branch.transform(self.X[0]) # assert self.assertIsInstance(single_resampled_img, np.ndarray) self.assertIsInstance(branch_resampled_img[0], Nifti1Image) self.assertTrue(np.array_equal(nilearn_resampled_array, single_resampled_img)) self.assertTrue( np.array_equal(single_resampled_img, branch_resampled_img[0].dataobj) )
def create_instances_and_transform(neuro_class_str, param_dict, transformed_X): for i in range(1, 4): if i == 1 or i == 3: obj = NeuroBranch(name="single core application", nr_of_processes=1) else: obj = NeuroBranch(name="multi core application", nr_of_processes=3) if i < 3: obj += PipelineElement(neuro_class_str, **param_dict) if i >= 3: obj += PipelineElement(neuro_class_str, batch_size=5, **param_dict) # transform data obj.base_element.cache_folder = self.cache_folder_path obj.base_element.current_config = {"test_suite": 1} new_X, _, _ = obj.transform(self.X) obj.base_element.clear_cache() # compare output to nilearn version for index, nilearn_nifti in enumerate(transformed_X): photon_nifti = new_X[index] if isinstance(photon_nifti, Nifti1Image): self.assertTrue( np.array_equal(photon_nifti.dataobj, nilearn_nifti.dataobj) ) else: self.assertTrue( np.array_equal( np.asarray(photon_nifti), nilearn_nifti.dataobj ) ) print("finished testing object: all images are fine.")
def test_multi_subject_resampling(self): voxel_size = [3, 3, 3] # nilearn from nilearn.image import resample_img, index_img nilearn_resampled = resample_img( self.X[:3], interpolation="nearest", target_affine=np.diag(voxel_size) ) nilearn_resampled_img = [ index_img(nilearn_resampled, i) for i in range(nilearn_resampled.shape[-1]) ] nilearn_resampled_array = np.moveaxis(nilearn_resampled.dataobj, -1, 0) # photon resampler = PipelineElement( "ResampleImages", hyperparameters={}, voxel_size=voxel_size ) resampled_img, _, _ = resampler.transform(self.X[:3]) branch = NeuroBranch("NeuroBranch", output_img=True) branch += resampler branch_resampled_img, _, _ = branch.transform(self.X[:3]) # assert self.assertIsInstance(resampled_img, np.ndarray) self.assertIsInstance(branch_resampled_img, list) self.assertIsInstance(branch_resampled_img[0], Nifti1Image) self.assertTrue(np.array_equal(nilearn_resampled_array, resampled_img)) self.assertTrue( np.array_equal( branch_resampled_img[1].dataobj, nilearn_resampled_img[1].dataobj ) )
def test_single_subject_smoothing(self): # nilearn from nilearn.image import smooth_img nilearn_smoothed_img = smooth_img(self.X[0], fwhm=[3, 3, 3]) nilearn_smoothed_array = nilearn_smoothed_img.dataobj # photon smoother = PipelineElement( "SmoothImages", hyperparameters={}, fwhm=3, batch_size=1 ) photon_smoothed_array, _, _ = smoother.transform(self.X[0]) branch = NeuroBranch("NeuroBranch", output_img=True) branch += smoother photon_smoothed_img, _, _ = branch.transform(self.X[0]) # assert self.assertIsInstance(photon_smoothed_array, np.ndarray) self.assertIsInstance(photon_smoothed_img, Nifti1Image) self.assertTrue(np.array_equal(photon_smoothed_array, nilearn_smoothed_array)) self.assertTrue( np.array_equal(photon_smoothed_img.dataobj, nilearn_smoothed_img.dataobj) )
def test_neuro_module_branch(self): nmb = NeuroBranch('best_branch_ever') nmb += PipelineElement('SmoothImages', fwhm=10) nmb += PipelineElement('ResampleImages', voxel_size=5) nmb += PipelineElement('BrainAtlas', rois=['Hippocampus_L', 'Hippocampus_R'], atlas_name="AAL", extract_mode='vec') nmb.base_element.cache_folder = self.cache_folder_path CacheManager.clear_cache_files(nmb.base_element.cache_folder, True) # set the config so that caching works nmb.set_params(**{'SmoothImages__fwhm': 10, 'ResampleImages__voxel_size': 5}) # okay we are transforming 8 Niftis with 3 elements, so afterwards there should be 3*8 nr_niftis = 7 nmb.transform(self.X[:nr_niftis]) nr_files_in_folder = len(glob.glob(os.path.join(nmb.base_element.cache_folder, "*.p"))) self.assertTrue(nr_files_in_folder == 3 * nr_niftis) self.assertTrue(len(nmb.base_element.cache_man.cache_index.items()) == (3*nr_niftis)) # transform 3 items that should have been cached and two more that need new processing nmb.transform(self.X[nr_niftis-2::]) # now we should have 10 * 3 nr_files_in_folder = len(glob.glob(os.path.join(nmb.base_element.cache_folder, "*.p"))) self.assertTrue(nr_files_in_folder == (3 * len(self.X))) self.assertTrue(len(nmb.base_element.cache_man.cache_index.items()) == (3 * len(self.X)))
def setup_crazy_pipe(self): # erase all, we need a complex and crazy task self.hyperpipe.elements = list() nmb_list = list() for i in range(5): nmb = NeuroBranch(name=str(i), nr_of_processes=i + 3) nmb += PipelineElement("SmoothImages") nmb_list.append(nmb) my_switch = Switch("disabling_test_switch") my_switch += nmb_list[0] my_switch += nmb_list[1] my_stack = Stack("stack_of_branches") for i in range(3): my_branch = Branch("branch_" + str(i + 2)) my_branch += PipelineElement("StandardScaler") my_branch += nmb_list[i + 2] my_stack += my_branch self.hyperpipe.add(my_stack) self.hyperpipe.add(PipelineElement("StandardScaler")) self.hyperpipe.add(my_switch) self.hyperpipe.add(PipelineElement("SVC")) return nmb_list
def test_multi_subject_smoothing(self): # nilearn from nilearn.image import smooth_img nilearn_smoothed_img = smooth_img(self.X[0:3], fwhm=[3, 3, 3]) nilearn_smoothed_array = nilearn_smoothed_img[1].dataobj # photon smoother = PipelineElement('SmoothImages', hyperparameters={}, fwhm=3) photon_smoothed_array, _, _ = smoother.transform(self.X[0:3]) branch = NeuroBranch('NeuroBranch', output_img=True) branch += smoother photon_smoothed_img, _, _ = branch.transform(self.X[0:3]) # assert self.assertIsInstance(photon_smoothed_array, np.ndarray) self.assertIsInstance(photon_smoothed_img[0], Nifti1Image) self.assertTrue(np.array_equal(photon_smoothed_array[1], nilearn_smoothed_array)) self.assertTrue(np.array_equal(photon_smoothed_img[1].dataobj, nilearn_smoothed_img[1].dataobj))
def test_inverse_transform(self): settings = OutputSettings( project_folder=self.tmp_folder_path, overwrite_results=True ) # DESIGN YOUR PIPELINE pipe = Hyperpipe( "Limbic_System", optimizer="grid_search", metrics=["mean_absolute_error"], best_config_metric="mean_absolute_error", outer_cv=ShuffleSplit(n_splits=1, test_size=0.2), inner_cv=ShuffleSplit(n_splits=1, test_size=0.2), verbosity=2, cache_folder=self.cache_folder_path, eval_final_performance=True, output_settings=settings, ) # PICK AN ATLAS atlas = PipelineElement( "BrainAtlas", rois=["Hippocampus_L", "Amygdala_L"], atlas_name="AAL", extract_mode="vec", batch_size=20, ) # EITHER ADD A NEURO BRANCH OR THE ATLAS ITSELF neuro_branch = NeuroBranch("NeuroBranch") neuro_branch += atlas pipe += neuro_branch pipe += PipelineElement("LinearSVR") pipe.fit(self.X, self.y) # GET IMPORTANCE SCORES handler = ResultsHandler(pipe.results) importance_scores_optimum_pipe = handler.results.best_config_feature_importances manual_img, _, _ = pipe.optimum_pipe.inverse_transform( importance_scores_optimum_pipe, None ) img = image.load_img( os.path.join( self.tmp_folder_path, "Limbic_System_results/optimum_pipe_feature_importances_backmapped.nii.gz", ) ) self.assertTrue(np.array_equal(manual_img.get_data(), img.get_data()))
def test_single_subject_caching(self): nb = NeuroBranch("subject_caching_test") # increase complexity by adding batching nb += PipelineElement("ResampleImages", batch_size=4) test_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../test_data/") X = AtlasLibrary().get_nii_files_from_folder(test_folder, extension=".nii") y = np.random.randn(len(X)) cache_folder = self.cache_folder_path cache_folder = os.path.join(cache_folder, "subject_caching_test") nb.base_element.cache_folder = cache_folder nr_of_expected_pickles_per_config = len(X) def transform_and_check_folder(config, expected_nr_of_files): nb.set_params(**config) nb.transform(X, y) nr_of_generated_cache_files = len( glob.glob(os.path.join(cache_folder, "*.p"))) self.assertTrue( nr_of_generated_cache_files == expected_nr_of_files) # fit with first config # expect one cache file per input file transform_and_check_folder({"ResampleImages__voxel_size": 5}, nr_of_expected_pickles_per_config) # after fitting with second config, we expect two times the number of input files to be in cache transform_and_check_folder({"ResampleImages__voxel_size": 10}, 2 * nr_of_expected_pickles_per_config) # fit with first config again, we expect to not have generate other cache files, because they exist transform_and_check_folder({"ResampleImages__voxel_size": 5}, 2 * nr_of_expected_pickles_per_config) # clean up afterwards CacheManager.clear_cache_files(cache_folder)
def test_neuro_hyperpipe_parallelized_batched_caching(self): test_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../test_data/") X = AtlasLibrary().get_nii_files_from_folder(test_folder, extension=".nii") y = np.random.randn(len(X)) cache_path = self.cache_folder_path self.hyperpipe = Hyperpipe( "complex_case", inner_cv=KFold(n_splits=5), outer_cv=KFold(n_splits=3), optimizer="grid_search", cache_folder=cache_path, metrics=["mean_squared_error"], best_config_metric="mean_squared_error", output_settings=OutputSettings(project_folder="./tmp"), ) nb = NeuroBranch("SubjectCaching", nr_of_processes=1) # increase complexity by adding batching nb += PipelineElement("ResampleImages", {"voxel_size": [3, 5, 10]}, batch_size=4) nb += PipelineElement("BrainMask", batch_size=4) self.hyperpipe += nb self.hyperpipe += PipelineElement("StandardScaler", {}) self.hyperpipe += PipelineElement("PCA", {"n_components": [3, 4]}) self.hyperpipe += PipelineElement("SVR", {"kernel": ["rbf", "linear"]}) self.hyperpipe.fit(X, y) # assert cache is empty again nr_of_p_files = len( glob.glob(os.path.join(self.hyperpipe.cache_folder, "*.p"))) print(nr_of_p_files) self.assertTrue(nr_of_p_files == 0)
my_pipe = Hyperpipe( "Limbic_Pipeline", optimizer="grid_search", metrics=["mean_absolute_error"], best_config_metric="mean_absolute_error", outer_cv=ShuffleSplit(n_splits=2, test_size=0.2), inner_cv=ShuffleSplit(n_splits=2, test_size=0.2), verbosity=1, cache_folder="./cache", output_settings=settings, ) # CREATE NEURO BRANCH # specify the number of processes that should be used neuro_branch = NeuroBranch("NeuroBranch", nr_of_processes=1) # resample images to a desired voxel size - this also works with voxel_size as hyperparameter # it's also very reasonable to define a batch size for a large number of subjects neuro_branch += PipelineElement( "ResampleImages", hyperparameters={"voxel_size": Categorical([3, 5])}, batch_size=20 ) # additionally, you can smooth the entire image neuro_branch += PipelineElement( "SmoothImages", {"fwhm": Categorical([6, 8])}, batch_size=20 ) # now, apply a brain atlas and extract 4 ROIs # set "extract_mode" to "vec" so that all voxels within these ROIs are vectorized and concatenated neuro_branch += PipelineElement(
cache_folder="./cache", eval_final_performance=False, output_settings=settings, ) # CHOOSE BETWEEN MASKS # available masks # 'MNI_ICBM152_GrayMatter' # 'MNI_ICBM152_WhiteMatter' # 'MNI_ICBM152_WholeBrain' # 'Cerebellum' mask = PipelineElement("BrainMask", mask_image="MNI_ICBM152_GrayMatter", extract_mode="vec", batch_size=20) # EITHER ADD A NEURO BRANCH OR THE ATLAS ITSELF # we recommend to always use neuro elements within a branch neuro_branch = NeuroBranch("NeuroBranch") neuro_branch += mask pipe += neuro_branch # pipe += mask pipe += PipelineElement("PCA", n_components=5) pipe += PipelineElement("RandomForestRegressor") pipe.fit(X, y)
# DESIGN YOUR PIPELINE settings = OutputSettings(project_folder='./tmp/', overwrite_results=True) my_pipe = Hyperpipe('Limbic_Pipeline', optimizer='grid_search', metrics=['mean_absolute_error'], best_config_metric='mean_absolute_error', outer_cv=ShuffleSplit(n_splits=2, test_size=0.2), inner_cv=ShuffleSplit(n_splits=2, test_size=0.2), verbosity=1, cache_folder="./cache", output_settings=settings) # CREATE NEURO BRANCH # specify the number of processes that should be used neuro_branch = NeuroBranch('NeuroBranch', nr_of_processes=1) # resample images to a desired voxel size - this also works with voxel_size as hyperparameter # it's also very reasonable to define a batch size for a large number of subjects neuro_branch += PipelineElement('ResampleImages', hyperparameters={'voxel_size': Categorical([3, 5])}, batch_size=20) # additionally, you can smooth the entire image neuro_branch += PipelineElement('SmoothImages', {'fwhm': Categorical([6, 8])}, batch_size=20) # now, apply a brain atlas and extract 4 ROIs # set "extract_mode" to "vec" so that all voxels within these ROIs are vectorized and concatenated neuro_branch += PipelineElement('BrainAtlas', hyperparameters={}, rois=['Hippocampus_L', 'Hippocampus_R', 'Amygdala_L', 'Amygdala_R'], atlas_name="AAL", extract_mode='vec', batch_size=20) # finally, add your neuro branch to your hyperpipe
best_config_metric='mean_absolute_error', outer_cv=ShuffleSplit(n_splits=1, test_size=0.2), inner_cv=ShuffleSplit(n_splits=1, test_size=0.2), verbosity=2, cache_folder="./cache", eval_final_performance=False, output_settings=settings) # CHOOSE BETWEEN MASKS mask = PipelineElement('BrainMask', mask_image='MNI_ICBM152_GrayMatter', extract_mode='vec', batch_size=20) # EITHER ADD A NEURO BRANCH OR THE ATLAS ITSELF neuro_branch = NeuroBranch('NeuroBranch') neuro_branch += mask pipe += neuro_branch pipe += PipelineElement('LinearSVR') # since we're predicting age and age cannot be below 0 and some upper threshold like 90, we can restrict the SVR's # range of predictions pipe += PipelineElement('RangeRestrictor', {}, low=16, high=90) pipe.fit(X, y) dataset_files = fetch_oasis_vbm(n_subjects=100) X = np.array(dataset_files.gray_matter_maps) age = dataset_files.ext_vars['age'].astype(float) y = np.array(age)
def test_combi_from_single_and_group_caching(self): # 1. load data test_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../test_data/") X = AtlasLibrary().get_nii_files_from_folder(test_folder, extension=".nii") nr_of_expected_pickles_per_config = len(X) y = np.random.randn(len(X)) # 2. specify cache directories cache_folder_base = self.cache_folder_path cache_folder_neuro = os.path.join(cache_folder_base, "subject_caching_test") CacheManager.clear_cache_files(cache_folder_base) CacheManager.clear_cache_files(cache_folder_neuro) # 3. set up Neuro Branch nb = NeuroBranch("SubjectCaching", nr_of_processes=3) # increase complexity by adding batching nb += PipelineElement("ResampleImages", batch_size=4) nb += PipelineElement("BrainMask", batch_size=4) nb.base_element.cache_folder = cache_folder_neuro # 4. setup usual pipeline ss = PipelineElement("StandardScaler", {}) pca = PipelineElement("PCA", {"n_components": [3, 10, 50]}) svm = PipelineElement("SVR", {"kernel": ["rbf", "linear"]}) pipe = PhotonPipeline([("NeuroBranch", nb), ("StandardScaler", ss), ("PCA", pca), ("SVR", svm)]) pipe.caching = True pipe.fold_id = "12345643463434" pipe.cache_folder = cache_folder_base def transform_and_check_folder(config, expected_nr_of_files_group, expected_nr_subject): pipe.set_params(**config) pipe.fit(X, y) nr_of_generated_cache_files = len( glob.glob(os.path.join(cache_folder_base, "*.p"))) self.assertTrue( nr_of_generated_cache_files == expected_nr_of_files_group) nr_of_generated_cache_files_subject = len( glob.glob(os.path.join(cache_folder_neuro, "*.p"))) self.assertTrue( nr_of_generated_cache_files_subject == expected_nr_subject) config1 = { "NeuroBranch__ResampleImages__voxel_size": 5, "PCA__n_components": 7, "SVR__C": 2, } config2 = { "NeuroBranch__ResampleImages__voxel_size": 3, "PCA__n_components": 4, "SVR__C": 5, } # first config we expect to have a cached_file for the standard scaler and the pca # and we expect to have two files (one resampler, one brain mask) for each input data transform_and_check_folder(config1, 2, 2 * nr_of_expected_pickles_per_config) # second config we expect to have two cached_file for the standard scaler (one time for 5 voxel input and one # time for 3 voxel input) and two files two for the first and second config pcas, # and we expect to have 2 * nr of input data for resampler plus one time masker transform_and_check_folder(config2, 4, 4 * nr_of_expected_pickles_per_config) # when we transform with the first config again, nothing should happen transform_and_check_folder(config1, 4, 4 * nr_of_expected_pickles_per_config) # when we transform with an empty config, a new entry for pca and standard scaler should be generated, as well # as a new cache item for each input data from the neuro branch for each itemin the neuro branch with self.assertRaises(ValueError): transform_and_check_folder({}, 6, 6 * nr_of_expected_pickles_per_config) CacheManager.clear_cache_files(cache_folder_base) CacheManager.clear_cache_files(cache_folder_neuro)
'Schaefer2018_*Parcels_*Networks' (replace first asterisk with 100, 200, ..., 1000 and second with 7 or 17) """ # to list all roi names of a specific atlas, you can do the following AtlasLibrary().list_rois('AAL') AtlasLibrary().list_rois('HarvardOxford_Cortical_Threshold_25') AtlasLibrary().list_rois('HarvardOxford_Subcortical_Threshold_25') AtlasLibrary().list_rois('Schaefer2018_100Parcels_7Networks') # PICK AN ATLAS # V1.1 ---------------------------------------------------------------- atlas = PipelineElement('BrainAtlas', rois=['Hippocampus_L', 'Hippocampus_R', 'Amygdala_L', 'Amygdala_R'], atlas_name="AAL", extract_mode='vec', batch_size=20) neuro_branch_v1 = NeuroBranch('NeuroBranch', nr_of_processes=3) neuro_branch_v1 += atlas # V1.2 ---------------------------------------------------------------- atlas = PipelineElement('BrainAtlas', rois=['all'], atlas_name="Schaefer2018_100Parcels_7Networks", extract_mode='vec', batch_size=20) neuro_branch_v2 = NeuroBranch('NeuroBranch', nr_of_processes=3) neuro_branch_v2 += atlas # V2 ------------------------------------------------------------- # it's also possible to combine ROIs from different atlases neuro_stack = Stack('HarvardOxford')
AtlasLibrary().list_rois("HarvardOxford_Cortical_Threshold_25") AtlasLibrary().list_rois("HarvardOxford_Subcortical_Threshold_25") AtlasLibrary().list_rois("Schaefer2018_100Parcels_7Networks") # PICK AN ATLAS # V1.1 ---------------------------------------------------------------- atlas = PipelineElement( "BrainAtlas", rois=["Hippocampus_L", "Hippocampus_R", "Amygdala_L", "Amygdala_R"], atlas_name="AAL", extract_mode="vec", batch_size=20, ) neuro_branch_v1 = NeuroBranch("NeuroBranch", nr_of_processes=3) neuro_branch_v1 += atlas # V1.2 ---------------------------------------------------------------- atlas = PipelineElement( "BrainAtlas", rois=["all"], atlas_name="Schaefer2018_100Parcels_7Networks", extract_mode="vec", batch_size=20, ) neuro_branch_v2 = NeuroBranch("NeuroBranch", nr_of_processes=3) neuro_branch_v2 += atlas