def objective_function_simple(self, cfg): cfg = {k: cfg[k] for k in cfg if cfg[k]} values = [] train_indices = list(self.pipe.cross_validation.outer_folds.values( ))[0].train_indices self._validation_X, self._validation_y, _ = PhotonDataHelper.split_data( self.X, self.y, kwargs=None, indices=train_indices) for inner_fold in list( list(self.pipe.cross_validation.inner_folds.values()) [0].values()): sc = PipelineElement("StandardScaler", {}) pca = PipelineElement("PCA", {}, random_state=42) svc = PipelineElement("SVC", {}, random_state=42, gamma='auto') my_pipe = PhotonPipeline([('StandardScaler', sc), ('PCA', pca), ('SVC', svc)]) my_pipe.set_params(**cfg) my_pipe.fit(self._validation_X[inner_fold.train_indices, :], self._validation_y[inner_fold.train_indices]) values.append( accuracy_score( self._validation_y[inner_fold.test_indices], my_pipe.predict( self._validation_X[inner_fold.test_indices, :]))) return 1 - np.mean(values)
def test_neuro_module_branch(self): nmb = NeuroBranch('best_branch_ever') nmb += PipelineElement('SmoothImages', fwhm=10) nmb += PipelineElement('ResampleImages', voxel_size=5) nmb += PipelineElement('BrainAtlas', rois=['Hippocampus_L', 'Hippocampus_R'], atlas_name="AAL", extract_mode='vec') nmb.base_element.cache_folder = self.cache_folder_path CacheManager.clear_cache_files(nmb.base_element.cache_folder, True) # set the config so that caching works nmb.set_params(**{'SmoothImages__fwhm': 10, 'ResampleImages__voxel_size': 5}) # okay we are transforming 8 Niftis with 3 elements, so afterwards there should be 3*8 nr_niftis = 7 nmb.transform(self.X[:nr_niftis]) nr_files_in_folder = len(glob.glob(os.path.join(nmb.base_element.cache_folder, "*.p"))) self.assertTrue(nr_files_in_folder == 3 * nr_niftis) self.assertTrue(len(nmb.base_element.cache_man.cache_index.items()) == (3*nr_niftis)) # transform 3 items that should have been cached and two more that need new processing nmb.transform(self.X[nr_niftis-2::]) # now we should have 10 * 3 nr_files_in_folder = len(glob.glob(os.path.join(nmb.base_element.cache_folder, "*.p"))) self.assertTrue(nr_files_in_folder == (3 * len(self.X))) self.assertTrue(len(nmb.base_element.cache_man.cache_index.items()) == (3 * len(self.X)))
def test_neuro_hyperpipe_parallelized_batched_caching(self): cache_path = self.cache_folder_path self.hyperpipe = Hyperpipe('complex_case', inner_cv=KFold(n_splits=5), outer_cv=KFold(n_splits=3), optimizer='grid_search', cache_folder=cache_path, metrics=['mean_squared_error'], best_config_metric='mean_squared_error', output_settings=OutputSettings( project_folder=self.tmp_folder_path)) nb = ParallelBranch("SubjectCaching", nr_of_processes=1) nb += PipelineElement.create("ResampleImages", StupidAdditionTransformer(), {'voxel_size': [3, 5, 10]}, batch_size=4) self.hyperpipe += nb self.hyperpipe += PipelineElement("StandardScaler", {}) self.hyperpipe += PipelineElement("PCA", {'n_components': [3, 4]}) self.hyperpipe += PipelineElement("SVR", {'kernel': ['rbf', 'linear']}) self.hyperpipe.fit(self.X, self.y) # assert cache is empty again nr_of_p_files = len( glob.glob(os.path.join(self.hyperpipe.cache_folder, "*.p"))) print(nr_of_p_files) self.assertTrue(nr_of_p_files == 0)
def setUp(self): self.X, self.y = load_breast_cancer(True) self.svc = PipelineElement('SVC', { 'C': [0.1, 1], 'kernel': ['rbf', 'sigmoid'] }) self.tree = PipelineElement('DecisionTreeClassifier', {'min_samples_split': [2, 3, 4]}) self.gpc = PipelineElement('GaussianProcessClassifier') self.pca = PipelineElement('PCA') self.estimator_branch = Branch('estimator_branch', [self.tree.copy_me()]) self.transformer_branch = Branch('transformer_branch', [self.pca.copy_me()]) self.estimator_switch = Switch( 'estimator_switch', [self.svc.copy_me(), self.tree.copy_me(), self.gpc.copy_me()]) self.estimator_switch_with_branch = Switch( 'estimator_switch_with_branch', [self.tree.copy_me(), self.estimator_branch.copy_me()]) self.transformer_switch_with_branch = Switch( 'transformer_switch_with_branch', [self.pca.copy_me(), self.transformer_branch.copy_me()]) self.switch_in_switch = Switch('Switch_in_switch', [ self.transformer_branch.copy_me(), self.transformer_switch_with_branch.copy_me() ])
def test_huge_combinations(self): hp = Hyperpipe( "huge_combinations", metrics=["accuracy"], best_config_metric="accuracy", output_settings=OutputSettings( project_folder=self.tmp_folder_path), ) hp += PipelineElement("PCA", hyperparameters={"n_components": [5, 10]}) stack = Stack("ensemble") for i in range(20): stack += PipelineElement( "SVC", hyperparameters={ "C": FloatRange(0.001, 5), "kernel": ["linear", "rbf", "sigmoid", "polynomial"], }, ) hp += stack hp += PipelineElement( "SVC", hyperparameters={"kernel": ["linear", "rbf", "sigmoid"]}) X, y = load_breast_cancer(True) with self.assertRaises(Warning): hp.fit(X, y)
def create_hyperpipe(self): # this is needed here for the parallelisation from photonai.base import Hyperpipe, PipelineElement, OutputSettings from photonai.optimization import FloatRange, Categorical, IntegerRange from sklearn.model_selection import GroupKFold from sklearn.model_selection import KFold settings = OutputSettings(mongodb_connect_url='mongodb://localhost:27017/photon_results', project_folder=self.tmp_folder_path) my_pipe = Hyperpipe('permutation_test_1', optimizer='grid_search', metrics=['accuracy', 'precision', 'recall'], best_config_metric='accuracy', outer_cv=GroupKFold(n_splits=2), inner_cv=KFold(n_splits=2), calculate_metrics_across_folds=True, eval_final_performance=True, verbosity=1, output_settings=settings) # Add transformer elements my_pipe += PipelineElement("StandardScaler", hyperparameters={}, test_disabled=False, with_mean=True, with_std=True) my_pipe += PipelineElement("PCA", hyperparameters={'n_components': IntegerRange(3, 5)}, test_disabled=False) # Add estimator my_pipe += PipelineElement("SVC", hyperparameters={'kernel': ['linear', 'rbf']}, # C': FloatRange(0.1, 5), gamma='scale', max_iter=1000000) return my_pipe
def test_cv_config_and_dummy_nr(self): X, y = load_boston(return_X_y=True) self.hyperpipe += PipelineElement('StandardScaler') self.hyperpipe += PipelineElement('PCA', {'n_components': IntegerRange(3, 5)}) self.hyperpipe += PipelineElement('SVR', {'C': FloatRange(0.001, 10, num=5), 'kernel': Categorical(['linear', 'rbf'])}) self.hyperpipe.fit(X, y) expected_configs = 2 * 5 * 2 # check version is present self.assertIsNotNone(self.hyperpipe.results.version) # check nr of outer and inner folds self.assertTrue(len(self.hyperpipe.results.outer_folds) == self.outer_fold_nr) self.assertTrue(len(self.hyperpipe.cross_validation.outer_folds) == self.outer_fold_nr) for outer_fold_id, inner_folds in self.hyperpipe.cross_validation.inner_folds.items(): self.assertTrue(len(inner_folds) == self.inner_fold_nr) for outer_fold_result in self.hyperpipe.results.outer_folds: # check that we have the right amount of configs tested in each outer fold self.assertTrue(len(outer_fold_result.tested_config_list) == expected_configs) for config_result in outer_fold_result.tested_config_list: # check that we have the right amount of inner-folds per config self.assertTrue(len(config_result.inner_folds) == self.inner_fold_nr) self.check_for_dummy()
def setUp(self): super(InnerFoldTests, self).setUp() self.pipe = PhotonPipeline([ ("StandardScaler", PipelineElement("StandardScaler")), ("PCA", PipelineElement("PCA")), ("RidgeClassifier", PipelineElement("RidgeClassifier")), ]) self.config = { "PCA__n_components": 5, "RidgeClassifier__solver": "svd", "RidgeClassifier__random_state": 42, } self.outer_fold_id = "TestID" self.inner_cv = KFold(n_splits=4) self.X, self.y = load_breast_cancer(True) self.cross_validation = Hyperpipe.CrossValidation( self.inner_cv, None, True, 0.2, True, False) self.cross_validation.inner_folds = { self.outer_fold_id: { i: FoldInfo(i, i + 1, train, test) for i, (train, test) in enumerate(self.inner_cv.split(self.X, self.y)) } } self.optimization = Hyperpipe.Optimization( "grid_search", {}, ["accuracy", "recall", "specificity"], "accuracy", None)
def setUp(self): super(CachedPhotonPipelineTests, self).setUp() # Photon Version ss = PipelineElement("StandardScaler", {}) pca = PipelineElement("PCA", {'n_components': [3, 10, 50]}, random_state=3) svm = PipelineElement("SVC", {'kernel': ['rbf', 'linear']}, random_state=3) self.pipe = PhotonPipeline([('StandardScaler', ss), ('PCA', pca), ('SVC', svm)]) self.pipe.caching = True self.pipe.fold_id = "12345643463434" CacheManager.clear_cache_files(self.cache_folder_path) self.pipe.cache_folder = self.cache_folder_path self.config1 = {'PCA__n_components': 4, 'SVC__C': 3, 'SVC__kernel': 'rbf'} self.config2 = {'PCA__n_components': 7, 'SVC__C': 1, 'SVC__kernel': 'linear'} self.X, self.y = load_breast_cancer(return_X_y=True)
def test_classification_9(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # crazy everything pipe += PipelineElement('StandardScaler') pipe += PipelineElement('SamplePairingClassification', {'draw_limit': [100], 'generator': Categorical(['nearest_pair', 'random_pair'])}, distance_metric='euclidean', test_disabled=True) # setup pipeline branches with half of the features each # if both PCAs are disabled, features are simply concatenated and passed to the final estimator source1_branch = Branch('source1_features') # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5] source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2)))) source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) source2_branch = Branch('source2_features') # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12] source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1])) source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) # setup source branches and stack their output (i.e. horizontal concatenation) pipe += Stack('source_stack', elements=[source1_branch, source2_branch]) # final estimator with stack output as features pipe += PipelineElement('RandomForestClassifier', hyperparameters={ 'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')}) self.run_hyperpipe(pipe, self.classification)
def setUp(self): super(CachedPhotonPipelineTests, self).setUp() # Photon Version ss = PipelineElement("StandardScaler", {}) pca = PipelineElement("PCA", {"n_components": [3, 10, 50]}, random_state=3) svm = PipelineElement("SVC", {"kernel": ["rbf", "linear"]}, random_state=3) self.pipe = PhotonPipeline([("StandardScaler", ss), ("PCA", pca), ("SVC", svm)]) self.pipe.caching = True self.pipe.fold_id = "12345643463434" self.pipe.cache_folder = self.cache_folder_path self.config1 = { "PCA__n_components": 4, "SVC__C": 3, "SVC__kernel": "rbf" } self.config2 = { "PCA__n_components": 7, "SVC__C": 1, "SVC__kernel": "linear" } self.X, self.y = load_breast_cancer(True)
def setup_crazy_pipe(self): # erase all, we need a complex and crazy task self.hyperpipe.elements = list() nmb_list = list() for i in range(5): nmb = ParallelBranch(name=str(i), nr_of_processes=i + 3) sp = PipelineElement( 'PCA', hyperparameters={'n_components': IntegerRange(1, 50)}) nmb += sp nmb_list.append(nmb) my_switch = Switch('disabling_test_switch') my_switch += nmb_list[0] my_switch += nmb_list[1] my_stack = Stack('stack_of_branches') for i in range(3): my_branch = Branch('branch_' + str(i + 2)) my_branch += PipelineElement('StandardScaler') my_branch += nmb_list[i + 2] my_stack += my_branch self.hyperpipe.add(my_stack) self.hyperpipe.add(PipelineElement('StandardScaler')) self.hyperpipe.add(my_switch) self.hyperpipe.add(PipelineElement('SVC')) return nmb_list
def create_instances_and_transform(neuro_class_str, param_dict, transformed_X): for i in range(1, 4): if i == 1 or i == 3: obj = NeuroBranch(name="single core application", nr_of_processes=1) else: obj = NeuroBranch(name="multi core application", nr_of_processes=3) if i < 3: obj += PipelineElement(neuro_class_str, **param_dict) if i >= 3: obj += PipelineElement(neuro_class_str, batch_size=5, **param_dict) # transform data obj.base_element.cache_folder = self.cache_folder_path obj.base_element.current_config = {"test_suite": 1} new_X, _, _ = obj.transform(self.X) obj.base_element.clear_cache() # compare output to nilearn version for index, nilearn_nifti in enumerate(transformed_X): photon_nifti = new_X[index] if isinstance(photon_nifti, Nifti1Image): self.assertTrue( np.array_equal(photon_nifti.dataobj, nilearn_nifti.dataobj) ) else: self.assertTrue( np.array_equal( np.asarray(photon_nifti), nilearn_nifti.dataobj ) ) print("finished testing object: all images are fine.")
def setup_crazy_pipe(self): # erase all, we need a complex and crazy task self.hyperpipe.elements = list() nmb_list = list() for i in range(5): nmb = NeuroBranch(name=str(i), nr_of_processes=i + 3) nmb += PipelineElement("SmoothImages") nmb_list.append(nmb) my_switch = Switch("disabling_test_switch") my_switch += nmb_list[0] my_switch += nmb_list[1] my_stack = Stack("stack_of_branches") for i in range(3): my_branch = Branch("branch_" + str(i + 2)) my_branch += PipelineElement("StandardScaler") my_branch += nmb_list[i + 2] my_stack += my_branch self.hyperpipe.add(my_stack) self.hyperpipe.add(PipelineElement("StandardScaler")) self.hyperpipe.add(my_switch) self.hyperpipe.add(PipelineElement("SVC")) return nmb_list
def test_classification_11(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack (train Random Forest on estimator stack proba outputs) # create estimator stack SVC1 = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear"]), "C": Categorical([0.01, 1, 5]), }, ) SVC2 = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["rbf"]), "C": Categorical([0.01, 1, 5]), }, ) RF = PipelineElement("RandomForestClassifier") # add to pipe pipe += Stack("estimator_stack", elements=[SVC1, SVC2, RF], use_probabilities=True) pipe += PipelineElement("RandomForestClassifier") self.run_hyperpipe(pipe, self.classification)
def test_classification_12(self): X, y = load_iris(True) # multiclass classification for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack (train Random Forest on estimator stack proba outputs) # create estimator stack SVC1 = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear"]), "C": Categorical([0.01, 1, 5]), }, ) SVC2 = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["rbf"]), "C": Categorical([0.01, 1, 5]), }, ) RF = PipelineElement("RandomForestClassifier") # add to pipe pipe += Stack("estimator_stack", elements=[SVC1, SVC2, RF], use_probabilities=True) pipe += PipelineElement("RandomForestClassifier") pipe.optimization.metrics = ["accuracy"] pipe.optimization.best_config_metric = "accuracy" pipe.fit(X, y)
def test_classification_6(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack (use mean in the end) SVR = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear", "rbf"]), "C": Categorical([0.01, 1, 5]), }, ) RF = PipelineElement( "RandomForestClassifier", hyperparameters={ "min_samples_split": FloatRange(start=0.05, step=0.1, stop=0.26, range_type="range") }, ) pipe += Stack("estimator_stack", elements=[SVR, RF]) pipe += PipelineElement("PhotonVotingClassifier") self.run_hyperpipe(pipe, self.classification)
def test_classification_2(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Switch switch = Switch("estimator_switch") switch += PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear", "rbf"]), "C": Categorical([0.01, 1, 5]), }, ) switch += PipelineElement( "RandomForestClassifier", hyperparameters={ "min_samples_split": FloatRange(start=0.05, step=0.1, stop=0.26, range_type="range") }, ) pipe += switch self.run_hyperpipe(pipe, self.classification)
def test_regression_9(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # sample pairing with confounder removal pipe += PipelineElement("StandardScaler") pipe += PipelineElement( "PCA", hyperparameters={"n_components": Categorical([None, 5])}, test_disabled=True, ) pipe += PipelineElement( "SamplePairingRegression", { "draw_limit": [100], "generator": Categorical(["nearest_pair", "random_pair"]), }, distance_metric="euclidean", test_disabled=False, ) pipe += PipelineElement( "SVR", hyperparameters={ "kernel": Categorical(["linear", "rbf"]), "C": Categorical([0.01, 1, 5]), }, ) self.run_hyperpipe(pipe, self.regression)
def test_three_levels_of_feature_importances(self): hyperpipe = Hyperpipe( "fimps", inner_cv=KFold(n_splits=4), outer_cv=KFold(n_splits=3), metrics=["mean_absolute_error", "mean_squared_error"], best_config_metric="mean_squared_error", output_settings=OutputSettings( project_folder=self.tmp_folder_path), ) hyperpipe += PipelineElement("StandardScaler") hyperpipe += PipelineElement("DecisionTreeRegressor") X, y = load_boston(True) hyperpipe.fit(X, y) exepcted_nr_of_feature_importances = X.shape[1] self.assertTrue( len(hyperpipe.results.best_config_feature_importances) == exepcted_nr_of_feature_importances) for outer_fold in hyperpipe.results.outer_folds: self.assertTrue( len(outer_fold.best_config.best_config_score. feature_importances) == exepcted_nr_of_feature_importances) for inner_fold in outer_fold.best_config.inner_folds: self.assertTrue( len(inner_fold.feature_importances) == exepcted_nr_of_feature_importances)
def setUp(self): super(InnerFoldTests, self).setUp() self.pipe = PhotonPipeline([ ('StandardScaler', PipelineElement('StandardScaler')), ('PCA', PipelineElement('PCA')), ('RidgeClassifier', PipelineElement('RidgeClassifier')) ]) self.config = { 'PCA__n_components': 5, 'RidgeClassifier__solver': 'svd', 'RidgeClassifier__random_state': 42 } self.outer_fold_id = 'TestID' self.inner_cv = KFold(n_splits=4) self.X, self.y = load_breast_cancer(return_X_y=True) self.cross_validation = Hyperpipe.CrossValidation( self.inner_cv, None, True, 0.2, True, False, False, None) self.cross_validation.inner_folds = { self.outer_fold_id: { i: FoldInfo(i, i + 1, train, test) for i, (train, test) in enumerate(self.inner_cv.split(self.X, self.y)) } } self.optimization = Hyperpipe.Optimization( 'grid_search', {}, ['accuracy', 'recall', 'specificity'], 'accuracy', None)
def test_inverse_tansform(self): # simple pipe sk_pipe = SKPipeline([("SS", self.sk_ss), ("PCA", self.sk_pca)]) sk_pipe.fit(self.X, self.y) sk_transform = sk_pipe.transform(self.X) sk_inverse_transformed = sk_pipe.inverse_transform(sk_transform) photon_pipe = PhotonPipeline([("SS", self.p_ss), ("PCA", self.p_pca)]) photon_pipe.fit(self.X, self.y) p_transform, _, _ = photon_pipe.transform(self.X) p_inverse_transformed, _, _ = photon_pipe.inverse_transform( p_transform) self.assertTrue( np.array_equal(sk_inverse_transformed, p_inverse_transformed)) # now including stack stack = Stack("stack", [self.p_pca]) stack_pipeline = PhotonPipeline([ ("stack", stack), ("StandardScaler", PipelineElement("StandardScaler")), ("LinearSVC", PipelineElement("LinearSVC")), ]) stack_pipeline.fit(self.X, self.y) feature_importances = stack_pipeline.feature_importances_ inversed_data, _, _ = stack_pipeline.inverse_transform( feature_importances) self.assertEqual(inversed_data.shape[1], self.X.shape[1])
def test_test_transform_single(self): nb = NeuroBranch('neuro_branch') nb += PipelineElement('SmoothImages', fwhm=10) nb += PipelineElement('ResampleImages', voxel_size=5) nb.base_element.cache_folder = self.cache_folder_path CacheManager.clear_cache_files(nb.base_element.cache_folder, True) # set the config so that caching works nb.set_params(**{ 'SmoothImages__fwhm': 10, 'ResampleImages__voxel_size': 5 }) nb.test_transform(self.X) self.assertTrue( os.path.exists("./neuro_branch_testcase_0_transformed.nii")) os.remove("./neuro_branch_testcase_0_transformed.nii") with self.assertRaises(ValueError): nb += PipelineElement('BrainAtlas', rois=['Hippocampus_L', 'Hippocampus_R'], atlas_name="AAL", extract_mode='vec') nb.test_transform(self.X)
def test_custom_mask(self): custom_mask = os.path.join(self.atlas_folder, 'Cerebellum/P_08_Cere.nii.gz') mask = PipelineElement('BrainMask', mask_image=custom_mask, extract_mode='vec', batch_size=20) X_masked = mask.transform(self.X) with self.assertRaises(FileNotFoundError): mask = PipelineElement('BrainMask', mask_image='XXXXX', extract_mode='vec', batch_size=20) mask.transform(self.X)
def test_class_with_data_preproc(self): """ Test for simple pipeline with data. """ X, y = load_breast_cancer(return_X_y=True) # DESIGN YOUR PIPELINE my_pipe = Hyperpipe( 'basic_svm_pipe', optimizer='grid_search', metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'], best_config_metric='accuracy', eval_final_performance=False, outer_cv=KFold(n_splits=2), inner_cv=KFold(n_splits=3), verbosity=1, random_seed=42) preprocessing = Preprocessing() preprocessing += PipelineElement("LabelEncoder") my_pipe += preprocessing # ADD ELEMENTS TO YOUR PIPELINE # first normalize all features my_pipe.add(PipelineElement('StandardScaler')) # then do feature selection using a PCA, my_pipe += PipelineElement( 'PCA', hyperparameters={'n_components': IntegerRange(10, 12)}, test_disabled=True) # engage and optimize the good old SVM for Classification my_pipe += PipelineElement( 'SVC', hyperparameters={'kernel': Categorical(['rbf', 'linear'])}, C=2, gamma='scale') # NOW TRAIN YOUR PIPELINE my_pipe.fit(X, y) json_transformer = JsonTransformer() pipe_json = json_transformer.create_json(my_pipe) a = elements_to_dict(my_pipe.copy_me()) my_pipe_reload = json_transformer.from_json(pipe_json) pipe_json_reload = pipe_json = json_transformer.create_json( my_pipe_reload) self.assertEqual(pipe_json, pipe_json_reload) my_pipe_reload.fit(X, y) self.assertDictEqual(my_pipe.best_config, my_pipe_reload.best_config) self.assertDictEqual(elements_to_dict(my_pipe.copy_me()), elements_to_dict(my_pipe_reload.copy_me()))
def test_one_hyperpipe(learning_curves, learning_curves_cut): if learning_curves and learning_curves_cut is None: learning_curves_cut = FloatRange(0, 1, 'range', 0.2) output_settings = OutputSettings( project_folder=self.tmp_folder_path, save_output=False) test_hyperpipe = Hyperpipe( 'test_pipe', learning_curves=learning_curves, learning_curves_cut=learning_curves_cut, metrics=['accuracy', 'recall', 'specificity'], best_config_metric='accuracy', inner_cv=self.inner_cv, output_settings=output_settings) self.assertEqual(test_hyperpipe.cross_validation.learning_curves, learning_curves) if learning_curves: self.assertEqual( test_hyperpipe.cross_validation.learning_curves_cut, learning_curves_cut) else: self.assertIsNone( test_hyperpipe.cross_validation.learning_curves_cut) test_hyperpipe += PipelineElement('StandardScaler') test_hyperpipe += PipelineElement('PCA', {'n_components': [1, 2]}, random_state=42) test_hyperpipe += PipelineElement('SVC', { 'C': [0.1], 'kernel': ['linear'] }, random_state=42) test_hyperpipe.fit(self.X, self.y) config_results = test_hyperpipe.results_handler.results.outer_folds[ 0].tested_config_list config_num = len(config_results) for config_nr in range(config_num): for inner_fold_nr in range(self.inner_cv.n_splits): curves = config_results[config_nr].inner_folds[ inner_fold_nr].learning_curves if learning_curves: self.assertEqual(len(curves), len(learning_curves_cut.values)) for learning_point_nr in range( len(learning_curves_cut.values)): test_metrics = list( curves[learning_point_nr][1].keys()) train_metrics = list( curves[learning_point_nr][2].keys()) self.assertEqual( test_hyperpipe.optimization.metrics, test_metrics) self.assertEqual( test_hyperpipe.optimization.metrics, train_metrics) else: self.assertEqual(curves, [])
def test_custom_atlas(self): custom_atlas = os.path.join(self.atlas_folder, 'AAL_SPM12/AAL.nii.gz') atlas = PipelineElement('BrainAtlas', atlas_name=custom_atlas, extract_mode='vec', batch_size=20) X_masked = atlas.transform(self.X) with self.assertRaises(FileNotFoundError): atlas = PipelineElement('BrainAtlas', atlas_name='XXXXX', extract_mode='vec', batch_size=20) atlas.transform(self.X)
def setUp(self): """ Set up for GridSearchTest. """ self.pipeline_elements = [PipelineElement("StandardScaler"), PipelineElement('PCA', hyperparameters={'n_components': IntegerRange(5, 20)}), PipelineElement("SVC")] self.optimizer = GridSearchOptimizer() self.optimizer_name = 'grid_search'
def setUp(self): """ Set up for RandomGridSearchOptimizer. """ self.pipeline_elements = [PipelineElement("StandardScaler"), PipelineElement('PCA', hyperparameters={'n_components': IntegerRange(5, 20)}), PipelineElement("SVC")] self.optimizer = RandomSearchOptimizer(n_configurations=5) self.optimizer_name = 'random_search'
def test_shall_continue(self): X, y = load_boston(True) inner_fold_length = 7 # DESIGN YOUR PIPELINE my_pipe = Hyperpipe( name="performance_pipe", optimizer="random_search", optimizer_params={"limit_in_minutes": 2}, metrics=["mean_squared_error"], best_config_metric="mean_squared_error", # outer_cv=KFold(n_splits=2, shuffle=True), inner_cv=KFold(n_splits=inner_fold_length), eval_final_performance=True, performance_constraints=[self.constraint_object], ) my_pipe += PipelineElement("StandardScaler") my_pipe += PipelineElement( "RandomForestRegressor", hyperparameters={"n_estimators": IntegerRange(5, 50)}, ) # NOW TRAIN YOUR PIPELINE my_pipe.fit(X, y) # clip config results results = my_pipe.results.outer_folds[0].tested_config_list configs = [] for i in range(len(configs) - 1): configs.append([ x.validation.metrics["mean_squared_error"] for x in results[i].inner_folds ]) threshold = np.inf for val in configs[:10]: challenger = np.mean(val) if threshold > challenger: threshold = challenger originals_for_std = configs[:10] for i, val in enumerate(configs[10:]): std = np.mean([np.std(x) for x in originals_for_std]) for j, v in enumerate(val): if np.mean(val[:j + 1]) > threshold + std: self.assertEqual(v, val[-1]) continue if len(val) == inner_fold_length - 1 and np.mean( val) < threshold + std: threshold = np.mean(val) if len(val) > 1: originals_for_std.append(val)