def test_inverse_tansform(self): # simple pipe sk_pipe = SKPipeline([("SS", self.sk_ss), ("PCA", self.sk_pca)]) sk_pipe.fit(self.X, self.y) sk_transform = sk_pipe.transform(self.X) sk_inverse_transformed = sk_pipe.inverse_transform(sk_transform) photon_pipe = PhotonPipeline([("SS", self.p_ss), ("PCA", self.p_pca)]) photon_pipe.fit(self.X, self.y) p_transform, _, _ = photon_pipe.transform(self.X) p_inverse_transformed, _, _ = photon_pipe.inverse_transform( p_transform) self.assertTrue( np.array_equal(sk_inverse_transformed, p_inverse_transformed)) # now including stack stack = Stack('stack', [self.p_pca]) stack_pipeline = PhotonPipeline([ ("stack", stack), ('StandardScaler', PipelineElement('StandardScaler')), ('LinearSVC', PipelineElement('LinearSVC')) ]) stack_pipeline.fit(self.X, self.y) feature_importances = stack_pipeline.feature_importances_ inversed_data, _, _ = stack_pipeline.inverse_transform( feature_importances) self.assertEqual(inversed_data.shape[1], self.X.shape[1])
def test_neuro_module_branch(self): nmb = NeuroBranch('best_branch_ever') nmb += PipelineElement('SmoothImages', fwhm=10) nmb += PipelineElement('ResampleImages', voxel_size=5) nmb += PipelineElement('BrainAtlas', rois=['Hippocampus_L', 'Hippocampus_R'], atlas_name="AAL", extract_mode='vec') nmb.base_element.cache_folder = self.cache_folder_path CacheManager.clear_cache_files(nmb.base_element.cache_folder, True) # set the config so that caching works nmb.set_params(**{'SmoothImages__fwhm': 10, 'ResampleImages__voxel_size': 5}) # okay we are transforming 8 Niftis with 3 elements, so afterwards there should be 3*8 nr_niftis = 7 nmb.transform(self.X[:nr_niftis]) nr_files_in_folder = len(glob.glob(os.path.join(nmb.base_element.cache_folder, "*.p"))) self.assertTrue(nr_files_in_folder == 3 * nr_niftis) self.assertTrue(len(nmb.base_element.cache_man.cache_index.items()) == (3*nr_niftis)) # transform 3 items that should have been cached and two more that need new processing nmb.transform(self.X[nr_niftis-2::]) # now we should have 10 * 3 nr_files_in_folder = len(glob.glob(os.path.join(nmb.base_element.cache_folder, "*.p"))) self.assertTrue(nr_files_in_folder == (3 * len(self.X))) self.assertTrue(len(nmb.base_element.cache_man.cache_index.items()) == (3 * len(self.X)))
def objective_function_simple(self, cfg): cfg = {k: cfg[k] for k in cfg if cfg[k]} values = [] train_indices = list(self.pipe.cross_validation.outer_folds.values( ))[0].train_indices self._validation_X, self._validation_y, _ = PhotonDataHelper.split_data( self.X, self.y, kwargs=None, indices=train_indices) for inner_fold in list( list(self.pipe.cross_validation.inner_folds.values()) [0].values()): sc = PipelineElement("StandardScaler", {}) pca = PipelineElement("PCA", {}, random_state=42) svc = PipelineElement("SVC", {}, random_state=42, gamma='auto') my_pipe = PhotonPipeline([('StandardScaler', sc), ('PCA', pca), ('SVC', svc)]) my_pipe.set_params(**cfg) my_pipe.fit(self._validation_X[inner_fold.train_indices, :], self._validation_y[inner_fold.train_indices]) values.append( accuracy_score( self._validation_y[inner_fold.test_indices], my_pipe.predict( self._validation_X[inner_fold.test_indices, :]))) return 1 - np.mean(values)
def test_neuro_hyperpipe_parallelized_batched_caching(self): cache_path = self.cache_folder_path self.hyperpipe = Hyperpipe('complex_case', inner_cv=KFold(n_splits=5), outer_cv=KFold(n_splits=3), optimizer='grid_search', cache_folder=cache_path, metrics=['mean_squared_error'], best_config_metric='mean_squared_error', output_settings=OutputSettings( project_folder=self.tmp_folder_path)) nb = ParallelBranch("SubjectCaching", nr_of_processes=1) nb += PipelineElement.create("ResampleImages", StupidAdditionTransformer(), {'voxel_size': [3, 5, 10]}, batch_size=4) self.hyperpipe += nb self.hyperpipe += PipelineElement("StandardScaler", {}) self.hyperpipe += PipelineElement("PCA", {'n_components': [3, 4]}) self.hyperpipe += PipelineElement("SVR", {'kernel': ['rbf', 'linear']}) self.hyperpipe.fit(self.X, self.y) # assert cache is empty again nr_of_p_files = len( glob.glob(os.path.join(self.hyperpipe.cache_folder, "*.p"))) print(nr_of_p_files) self.assertTrue(nr_of_p_files == 0)
def setup_crazy_pipe(self): # erase all, we need a complex and crazy task self.hyperpipe.elements = list() nmb_list = list() for i in range(5): nmb = ParallelBranch(name=str(i), nr_of_processes=i + 3) sp = PipelineElement( 'PCA', hyperparameters={'n_components': IntegerRange(1, 50)}) nmb += sp nmb_list.append(nmb) my_switch = Switch('disabling_test_switch') my_switch += nmb_list[0] my_switch += nmb_list[1] my_stack = Stack('stack_of_branches') for i in range(3): my_branch = Branch('branch_' + str(i + 2)) my_branch += PipelineElement('StandardScaler') my_branch += nmb_list[i + 2] my_stack += my_branch self.hyperpipe.add(my_stack) self.hyperpipe.add(PipelineElement('StandardScaler')) self.hyperpipe.add(my_switch) self.hyperpipe.add(PipelineElement('SVC')) return nmb_list
def test_confounder_removal_statistically(self): cr = PipelineElement("ConfounderRemoval", {}, standardize_covariates=False) cr.fit(self.z[:, 1:3], self.z[:, 0], **{"confounder": self.z[:, 3]}) # use transform to write data to cache z_transformed = cr.transform(self.z[:, 1:3], **{"confounder": self.z[:, 3]}) corr = np.corrcoef( np.concatenate( [ self.z[:, 0].reshape(-1, 1), z_transformed[0], self.z[:, 3].reshape(-1, 1), ], axis=1, ), rowvar=False, ) # correlation between target and feature should be lower than 0.25 in this case # correlation between covariate and feature should be near zero self.assertLess(corr[1, 0], 0.25) self.assertLess(corr[2, 0], 0.25) self.assertAlmostEqual(corr[3, 1], 0) self.assertAlmostEqual(corr[3, 2], 0)
def create_instances_and_transform(neuro_class_str, param_dict, transformed_X): for i in range(1, 4): if i == 1 or i == 3: obj = NeuroBranch(name="single core application", nr_of_processes=1) else: obj = NeuroBranch(name="multi core application", nr_of_processes=3) if i < 3: obj += PipelineElement(neuro_class_str, **param_dict) if i >= 3: obj += PipelineElement(neuro_class_str, batch_size=5, **param_dict) # transform data obj.base_element.cache_folder = self.cache_folder_path obj.base_element.current_config = {"test_suite": 1} new_X, _, _ = obj.transform(self.X) obj.base_element.clear_cache() # compare output to nilearn version for index, nilearn_nifti in enumerate(transformed_X): photon_nifti = new_X[index] if isinstance(photon_nifti, Nifti1Image): self.assertTrue( np.array_equal(photon_nifti.dataobj, nilearn_nifti.dataobj) ) else: self.assertTrue( np.array_equal( np.asarray(photon_nifti), nilearn_nifti.dataobj ) ) print("finished testing object: all images are fine.")
def test_huge_combinations(self): hp = Hyperpipe( "huge_combinations", metrics=["accuracy"], best_config_metric="accuracy", output_settings=OutputSettings( project_folder=self.tmp_folder_path), ) hp += PipelineElement("PCA", hyperparameters={"n_components": [5, 10]}) stack = Stack("ensemble") for i in range(20): stack += PipelineElement( "SVC", hyperparameters={ "C": FloatRange(0.001, 5), "kernel": ["linear", "rbf", "sigmoid", "polynomial"], }, ) hp += stack hp += PipelineElement( "SVC", hyperparameters={"kernel": ["linear", "rbf", "sigmoid"]}) X, y = load_breast_cancer(True) with self.assertRaises(Warning): hp.fit(X, y)
def test_classification_9(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # crazy everything pipe += PipelineElement('StandardScaler') pipe += PipelineElement('SamplePairingClassification', {'draw_limit': [100], 'generator': Categorical(['nearest_pair', 'random_pair'])}, distance_metric='euclidean', test_disabled=True) # setup pipeline branches with half of the features each # if both PCAs are disabled, features are simply concatenated and passed to the final estimator source1_branch = Branch('source1_features') # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5] source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2)))) source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) source2_branch = Branch('source2_features') # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12] source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1])) source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) # setup source branches and stack their output (i.e. horizontal concatenation) pipe += Stack('source_stack', elements=[source1_branch, source2_branch]) # final estimator with stack output as features pipe += PipelineElement('RandomForestClassifier', hyperparameters={ 'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')}) self.run_hyperpipe(pipe, self.classification)
def test_classification_11(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack (train Random Forest on estimator stack proba outputs) # create estimator stack SVC1 = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear"]), "C": Categorical([0.01, 1, 5]), }, ) SVC2 = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["rbf"]), "C": Categorical([0.01, 1, 5]), }, ) RF = PipelineElement("RandomForestClassifier") # add to pipe pipe += Stack("estimator_stack", elements=[SVC1, SVC2, RF], use_probabilities=True) pipe += PipelineElement("RandomForestClassifier") self.run_hyperpipe(pipe, self.classification)
def test_classification_12(self): X, y = load_iris(True) # multiclass classification for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack (train Random Forest on estimator stack proba outputs) # create estimator stack SVC1 = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear"]), "C": Categorical([0.01, 1, 5]), }, ) SVC2 = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["rbf"]), "C": Categorical([0.01, 1, 5]), }, ) RF = PipelineElement("RandomForestClassifier") # add to pipe pipe += Stack("estimator_stack", elements=[SVC1, SVC2, RF], use_probabilities=True) pipe += PipelineElement("RandomForestClassifier") pipe.optimization.metrics = ["accuracy"] pipe.optimization.best_config_metric = "accuracy" pipe.fit(X, y)
def test_classification_6(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack (use mean in the end) SVR = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear", "rbf"]), "C": Categorical([0.01, 1, 5]), }, ) RF = PipelineElement( "RandomForestClassifier", hyperparameters={ "min_samples_split": FloatRange(start=0.05, step=0.1, stop=0.26, range_type="range") }, ) pipe += Stack("estimator_stack", elements=[SVR, RF]) pipe += PipelineElement("PhotonVotingClassifier") self.run_hyperpipe(pipe, self.classification)
def test_classification_2(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Switch switch = Switch("estimator_switch") switch += PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear", "rbf"]), "C": Categorical([0.01, 1, 5]), }, ) switch += PipelineElement( "RandomForestClassifier", hyperparameters={ "min_samples_split": FloatRange(start=0.05, step=0.1, stop=0.26, range_type="range") }, ) pipe += switch self.run_hyperpipe(pipe, self.classification)
def test_regression_9(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # sample pairing with confounder removal pipe += PipelineElement("StandardScaler") pipe += PipelineElement( "PCA", hyperparameters={"n_components": Categorical([None, 5])}, test_disabled=True, ) pipe += PipelineElement( "SamplePairingRegression", { "draw_limit": [100], "generator": Categorical(["nearest_pair", "random_pair"]), }, distance_metric="euclidean", test_disabled=False, ) pipe += PipelineElement( "SVR", hyperparameters={ "kernel": Categorical(["linear", "rbf"]), "C": Categorical([0.01, 1, 5]), }, ) self.run_hyperpipe(pipe, self.regression)
def test_single_subject_resampling(self): voxel_size = [3, 3, 3] # nilearn from nilearn.image import resample_img nilearn_resampled_img = resample_img( self.X[0], interpolation="nearest", target_affine=np.diag(voxel_size) ) nilearn_resampled_array = nilearn_resampled_img.dataobj # photon resampler = PipelineElement( "ResampleImages", hyperparameters={}, voxel_size=voxel_size, batch_size=1 ) single_resampled_img, _, _ = resampler.transform(self.X[0]) branch = NeuroBranch("NeuroBranch", output_img=True) branch += resampler branch_resampled_img, _, _ = branch.transform(self.X[0]) # assert self.assertIsInstance(single_resampled_img, np.ndarray) self.assertIsInstance(branch_resampled_img[0], Nifti1Image) self.assertTrue(np.array_equal(nilearn_resampled_array, single_resampled_img)) self.assertTrue( np.array_equal(single_resampled_img, branch_resampled_img[0].dataobj) )
def test_all_atlases(self): for atlas in AtlasLibrary().ATLAS_DICTIONARY.keys(): print("Running tests for atlas {}".format(atlas)) brain_atlas = PipelineElement( "BrainAtlas", atlas_name=atlas, extract_mode="vec" ) brain_atlas.transform(self.X)
def test_single_subject_smoothing(self): # nilearn from nilearn.image import smooth_img nilearn_smoothed_img = smooth_img(self.X[0], fwhm=[3, 3, 3]) nilearn_smoothed_array = nilearn_smoothed_img.dataobj # photon smoother = PipelineElement( "SmoothImages", hyperparameters={}, fwhm=3, batch_size=1 ) photon_smoothed_array, _, _ = smoother.transform(self.X[0]) branch = NeuroBranch("NeuroBranch", output_img=True) branch += smoother photon_smoothed_img, _, _ = branch.transform(self.X[0]) # assert self.assertIsInstance(photon_smoothed_array, np.ndarray) self.assertIsInstance(photon_smoothed_img, Nifti1Image) self.assertTrue(np.array_equal(photon_smoothed_array, nilearn_smoothed_array)) self.assertTrue( np.array_equal(photon_smoothed_img.dataobj, nilearn_smoothed_img.dataobj) )
def setUp(self): super(CachedPhotonPipelineTests, self).setUp() # Photon Version ss = PipelineElement("StandardScaler", {}) pca = PipelineElement("PCA", {'n_components': [3, 10, 50]}, random_state=3) svm = PipelineElement("SVC", {'kernel': ['rbf', 'linear']}, random_state=3) self.pipe = PhotonPipeline([('StandardScaler', ss), ('PCA', pca), ('SVC', svm)]) self.pipe.caching = True self.pipe.fold_id = "12345643463434" CacheManager.clear_cache_files(self.cache_folder_path) self.pipe.cache_folder = self.cache_folder_path self.config1 = {'PCA__n_components': 4, 'SVC__C': 3, 'SVC__kernel': 'rbf'} self.config2 = {'PCA__n_components': 7, 'SVC__C': 1, 'SVC__kernel': 'linear'} self.X, self.y = load_breast_cancer(return_X_y=True)
def setUp(self): super(InnerFoldTests, self).setUp() self.pipe = PhotonPipeline([ ('StandardScaler', PipelineElement('StandardScaler')), ('PCA', PipelineElement('PCA')), ('RidgeClassifier', PipelineElement('RidgeClassifier')) ]) self.config = { 'PCA__n_components': 5, 'RidgeClassifier__solver': 'svd', 'RidgeClassifier__random_state': 42 } self.outer_fold_id = 'TestID' self.inner_cv = KFold(n_splits=4) self.X, self.y = load_breast_cancer(return_X_y=True) self.cross_validation = Hyperpipe.CrossValidation( self.inner_cv, None, True, 0.2, True, False, False, None) self.cross_validation.inner_folds = { self.outer_fold_id: { i: FoldInfo(i, i + 1, train, test) for i, (train, test) in enumerate(self.inner_cv.split(self.X, self.y)) } } self.optimization = Hyperpipe.Optimization( 'grid_search', {}, ['accuracy', 'recall', 'specificity'], 'accuracy', None)
def setup_crazy_pipe(self): # erase all, we need a complex and crazy task self.hyperpipe.elements = list() nmb_list = list() for i in range(5): nmb = NeuroBranch(name=str(i), nr_of_processes=i + 3) nmb += PipelineElement("SmoothImages") nmb_list.append(nmb) my_switch = Switch("disabling_test_switch") my_switch += nmb_list[0] my_switch += nmb_list[1] my_stack = Stack("stack_of_branches") for i in range(3): my_branch = Branch("branch_" + str(i + 2)) my_branch += PipelineElement("StandardScaler") my_branch += nmb_list[i + 2] my_stack += my_branch self.hyperpipe.add(my_stack) self.hyperpipe.add(PipelineElement("StandardScaler")) self.hyperpipe.add(my_switch) self.hyperpipe.add(PipelineElement("SVC")) return nmb_list
def test_three_levels_of_feature_importances(self): hyperpipe = Hyperpipe( 'fimps', inner_cv=KFold(n_splits=4), outer_cv=KFold(n_splits=3), metrics=['mean_absolute_error', 'mean_squared_error'], best_config_metric='mean_squared_error', output_settings=OutputSettings( project_folder=self.tmp_folder_path)) hyperpipe += PipelineElement('StandardScaler') hyperpipe += PipelineElement('DecisionTreeRegressor') X, y = load_boston(True) hyperpipe.fit(X, y) exepcted_nr_of_feature_importances = X.shape[1] self.assertTrue( len(hyperpipe.results.best_config_feature_importances) == exepcted_nr_of_feature_importances) for outer_fold in hyperpipe.results.outer_folds: self.assertTrue( len(outer_fold.best_config.best_config_score. feature_importances) == exepcted_nr_of_feature_importances) for inner_fold in outer_fold.best_config.inner_folds: self.assertTrue( len(inner_fold.feature_importances) == exepcted_nr_of_feature_importances)
def setUp(self): self.batch_size = 10 nr_features = 3 origin_list = ["affe", "tiger", "schwein", "giraffe", "löwe"] self.data = None self.targets = None self.neuro_batch = PipelineElement( "dummy_batch", batch_size=self.batch_size, base_element=DummyBatchTransformer()) for element in origin_list: features = [element + str(i) for i in range(0, nr_features)] if self.data is None: self.data = np.array([features] * self.batch_size) else: self.data = np.vstack( (self.data, [features] * self.batch_size)) if self.targets is None: self.targets = np.array([element] * self.batch_size) else: self.targets = np.hstack( (self.targets, [element] * self.batch_size)) self.data = np.array(self.data) self.targets = np.array(self.targets) self.kwargs = {"animals": self.targets}
def test_multi_subject_resampling(self): voxel_size = [3, 3, 3] # nilearn from nilearn.image import resample_img, index_img nilearn_resampled = resample_img( self.X[:3], interpolation="nearest", target_affine=np.diag(voxel_size) ) nilearn_resampled_img = [ index_img(nilearn_resampled, i) for i in range(nilearn_resampled.shape[-1]) ] nilearn_resampled_array = np.moveaxis(nilearn_resampled.dataobj, -1, 0) # photon resampler = PipelineElement( "ResampleImages", hyperparameters={}, voxel_size=voxel_size ) resampled_img, _, _ = resampler.transform(self.X[:3]) branch = NeuroBranch("NeuroBranch", output_img=True) branch += resampler branch_resampled_img, _, _ = branch.transform(self.X[:3]) # assert self.assertIsInstance(resampled_img, np.ndarray) self.assertIsInstance(branch_resampled_img, list) self.assertIsInstance(branch_resampled_img[0], Nifti1Image) self.assertTrue(np.array_equal(nilearn_resampled_array, resampled_img)) self.assertTrue( np.array_equal( branch_resampled_img[1].dataobj, nilearn_resampled_img[1].dataobj ) )
def test_class_with_data_preproc(self): """ Test for simple pipeline with data. """ X, y = load_breast_cancer(return_X_y=True) # DESIGN YOUR PIPELINE my_pipe = Hyperpipe( 'basic_svm_pipe', optimizer='grid_search', metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'], best_config_metric='accuracy', eval_final_performance=False, outer_cv=KFold(n_splits=2), inner_cv=KFold(n_splits=3), verbosity=1, random_seed=42) preprocessing = Preprocessing() preprocessing += PipelineElement("LabelEncoder") my_pipe += preprocessing # ADD ELEMENTS TO YOUR PIPELINE # first normalize all features my_pipe.add(PipelineElement('StandardScaler')) # then do feature selection using a PCA, my_pipe += PipelineElement( 'PCA', hyperparameters={'n_components': IntegerRange(10, 12)}, test_disabled=True) # engage and optimize the good old SVM for Classification my_pipe += PipelineElement( 'SVC', hyperparameters={'kernel': Categorical(['rbf', 'linear'])}, C=2, gamma='scale') # NOW TRAIN YOUR PIPELINE my_pipe.fit(X, y) json_transformer = JsonTransformer() pipe_json = json_transformer.create_json(my_pipe) a = elements_to_dict(my_pipe.copy_me()) my_pipe_reload = json_transformer.from_json(pipe_json) pipe_json_reload = pipe_json = json_transformer.create_json( my_pipe_reload) self.assertEqual(pipe_json, pipe_json_reload) my_pipe_reload.fit(X, y) self.assertDictEqual(my_pipe.best_config, my_pipe_reload.best_config) self.assertDictEqual(elements_to_dict(my_pipe.copy_me()), elements_to_dict(my_pipe_reload.copy_me()))
def setUp(self): """ Set up for GridSearchTest. """ self.pipeline_elements = [PipelineElement("StandardScaler"), PipelineElement('PCA', hyperparameters={'n_components': IntegerRange(5, 20)}), PipelineElement("SVC")] self.optimizer = GridSearchOptimizer() self.optimizer_name = 'grid_search'
def test_one_hyperpipe(learning_curves, learning_curves_cut): if learning_curves and learning_curves_cut is None: learning_curves_cut = FloatRange(0, 1, 'range', 0.2) output_settings = OutputSettings( project_folder=self.tmp_folder_path, save_output=False) test_hyperpipe = Hyperpipe( 'test_pipe', learning_curves=learning_curves, learning_curves_cut=learning_curves_cut, metrics=['accuracy', 'recall', 'specificity'], best_config_metric='accuracy', inner_cv=self.inner_cv, output_settings=output_settings) self.assertEqual(test_hyperpipe.cross_validation.learning_curves, learning_curves) if learning_curves: self.assertEqual( test_hyperpipe.cross_validation.learning_curves_cut, learning_curves_cut) else: self.assertIsNone( test_hyperpipe.cross_validation.learning_curves_cut) test_hyperpipe += PipelineElement('StandardScaler') test_hyperpipe += PipelineElement('PCA', {'n_components': [1, 2]}, random_state=42) test_hyperpipe += PipelineElement('SVC', { 'C': [0.1], 'kernel': ['linear'] }, random_state=42) test_hyperpipe.fit(self.X, self.y) config_results = test_hyperpipe.results_handler.results.outer_folds[ 0].tested_config_list config_num = len(config_results) for config_nr in range(config_num): for inner_fold_nr in range(self.inner_cv.n_splits): curves = config_results[config_nr].inner_folds[ inner_fold_nr].learning_curves if learning_curves: self.assertEqual(len(curves), len(learning_curves_cut.values)) for learning_point_nr in range( len(learning_curves_cut.values)): test_metrics = list( curves[learning_point_nr][1].keys()) train_metrics = list( curves[learning_point_nr][2].keys()) self.assertEqual( test_hyperpipe.optimization.metrics, test_metrics) self.assertEqual( test_hyperpipe.optimization.metrics, train_metrics) else: self.assertEqual(curves, [])
def setUp(self): """ Set up for RandomGridSearchOptimizer. """ self.pipeline_elements = [PipelineElement("StandardScaler"), PipelineElement('PCA', hyperparameters={'n_components': IntegerRange(5, 20)}), PipelineElement("SVC")] self.optimizer = RandomSearchOptimizer(n_configurations=5) self.optimizer_name = 'random_search'
def test_false_collection_mode(self): custom_atlas = os.path.join(self.atlas_folder, 'AAL_SPM12/AAL.nii.gz') with self.assertRaises(ValueError): atlas = PipelineElement('BrainAtlas', atlas_name=custom_atlas, extract_mode='vec', batch_size=20) atlas.base_element.collection_mode = "array" atlas.transform(self.X)
def test_class_with_data_01(self): """ Test for simple pipeline with data. """ X, y = load_breast_cancer(True) # DESIGN YOUR PIPELINE my_pipe = Hyperpipe( "basic_svm_pipe", optimizer="grid_search", metrics=["accuracy", "precision", "recall", "balanced_accuracy"], best_config_metric="accuracy", eval_final_performance=False, outer_cv=KFold(n_splits=2), inner_cv=KFold(n_splits=3), verbosity=1, random_seed=42, ) preprocessing = Preprocessing() preprocessing += PipelineElement("LabelEncoder") my_pipe += preprocessing # ADD ELEMENTS TO YOUR PIPELINE # first normalize all features my_pipe.add(PipelineElement("StandardScaler")) # then do feature selection using a PCA, my_pipe += PipelineElement( "PCA", hyperparameters={"n_components": IntegerRange(10, 12)}, test_disabled=True, ) # engage and optimize the good old SVM for Classification my_pipe += PipelineElement( "SVC", hyperparameters={"kernel": Categorical(["rbf", "linear"])}, C=2, gamma="scale", ) # NOW TRAIN YOUR PIPELINE my_pipe.fit(X, y) json_transformer = JsonTransformer() pipe_json = json_transformer.create_json(my_pipe) my_pipe_reload = json_transformer.from_json(pipe_json) pipe_json_reload = pipe_json = json_transformer.create_json( my_pipe_reload) self.assertEqual(pipe_json, pipe_json_reload) my_pipe_reload.fit(X, y) self.assertDictEqual(my_pipe.best_config, my_pipe_reload.best_config)
def test_shall_continue(self): X, y = load_boston(True) inner_fold_length = 7 # DESIGN YOUR PIPELINE my_pipe = Hyperpipe( name="performance_pipe", optimizer="random_search", optimizer_params={"limit_in_minutes": 2}, metrics=["mean_squared_error"], best_config_metric="mean_squared_error", # outer_cv=KFold(n_splits=2, shuffle=True), inner_cv=KFold(n_splits=inner_fold_length), eval_final_performance=True, performance_constraints=[self.constraint_object], ) my_pipe += PipelineElement("StandardScaler") my_pipe += PipelineElement( "RandomForestRegressor", hyperparameters={"n_estimators": IntegerRange(5, 50)}, ) # NOW TRAIN YOUR PIPELINE my_pipe.fit(X, y) # clip config results results = my_pipe.results.outer_folds[0].tested_config_list configs = [] for i in range(len(configs) - 1): configs.append([ x.validation.metrics["mean_squared_error"] for x in results[i].inner_folds ]) threshold = np.inf for val in configs[:10]: challenger = np.mean(val) if threshold > challenger: threshold = challenger originals_for_std = configs[:10] for i, val in enumerate(configs[10:]): std = np.mean([np.std(x) for x in originals_for_std]) for j, v in enumerate(val): if np.mean(val[:j + 1]) > threshold + std: self.assertEqual(v, val[-1]) continue if len(val) == inner_fold_length - 1 and np.mean( val) < threshold + std: threshold = np.mean(val) if len(val) > 1: originals_for_std.append(val)