def test_overwrite_result_folder(self): """ Test for right handling of parameter output_settings.overwrite. """ def get_summary_file(): return os.path.join( self.hyperpipe.output_settings.results_folder, "photon_summary.txt" ) # Case 1: default output_settings1 = OutputSettings( project_folder=self.tmp_folder_path, save_output=True, overwrite_results=False, ) self.setup_hyperpipe(output_settings1) self.hyperpipe.fit(self.__X, self.__y) tmp_path = get_summary_file() time.sleep(2) # again with same settings self.setup_hyperpipe(output_settings1) self.hyperpipe.fit(self.__X, self.__y) tmp_path2 = get_summary_file() # we expect a new output folder each time with timestamp self.assertNotEqual(tmp_path, tmp_path2) # Case 2 overwrite results: all in the same folder output_settings2 = OutputSettings( project_folder=self.tmp_folder_path, save_output=True, overwrite_results=True, ) self.setup_hyperpipe(output_settings2) self.hyperpipe.fit(self.__X, self.__y) tmp_path = get_summary_file() tmp_date = os.path.getmtime(tmp_path) self.setup_hyperpipe(output_settings2) self.hyperpipe.fit(self.__X, self.__y) tmp_path2 = get_summary_file() tmp_date2 = os.path.getmtime(tmp_path2) # same folder but summary file is overwritten through the new analysis self.assertEqual(tmp_path, tmp_path2) self.assertNotEqual(tmp_date, tmp_date2) # Case 3: we have a cache folder self.hyperpipe.cache_folder = self.cache_folder_path shutil.rmtree(self.cache_folder_path, ignore_errors=True) self.hyperpipe.fit(self.__X, self.__y) self.assertTrue(os.path.exists(self.cache_folder_path))
def test_neuro_hyperpipe_parallelized_batched_caching(self): cache_path = self.cache_folder_path self.hyperpipe = Hyperpipe('complex_case', inner_cv=KFold(n_splits=5), outer_cv=KFold(n_splits=3), optimizer='grid_search', cache_folder=cache_path, metrics=['mean_squared_error'], best_config_metric='mean_squared_error', output_settings=OutputSettings( project_folder=self.tmp_folder_path)) nb = ParallelBranch("SubjectCaching", nr_of_processes=1) nb += PipelineElement.create("ResampleImages", StupidAdditionTransformer(), {'voxel_size': [3, 5, 10]}, batch_size=4) self.hyperpipe += nb self.hyperpipe += PipelineElement("StandardScaler", {}) self.hyperpipe += PipelineElement("PCA", {'n_components': [3, 4]}) self.hyperpipe += PipelineElement("SVR", {'kernel': ['rbf', 'linear']}) self.hyperpipe.fit(self.X, self.y) # assert cache is empty again nr_of_p_files = len( glob.glob(os.path.join(self.hyperpipe.cache_folder, "*.p"))) print(nr_of_p_files) self.assertTrue(nr_of_p_files == 0)
def setUp(self): self.s_split = ShuffleSplit(n_splits=3, test_size=0.2, random_state=42) self.time_limit = 20 settings = OutputSettings(project_folder='./tmp/') self.smac_helper = {"data": None, "initial_runs": None} # Scenario object scenario_dict = { "run_obj": "quality", "deterministic": "true", "wallclock_limit": self.time_limit } # DESIGN YOUR PIPELINE self.pipe = Hyperpipe('basic_svm_pipe', optimizer='smac', optimizer_params={ 'facade': SMAC4HPO, 'scenario_dict': scenario_dict, 'rng': 42, 'smac_helper': self.smac_helper }, metrics=['accuracy'], random_seed=42, best_config_metric='accuracy', inner_cv=self.s_split, verbosity=0, output_settings=settings)
def test_register_element(self): with self.assertRaises(ValueError): self.registry.register('MyCustomEstimator', 'custom_estimator.CustomEstimator', 'WrongType') self.registry.register('MyCustomEstimator', 'custom_estimator.CustomEstimator', 'Estimator') self.registry.activate() settings = OutputSettings(save_output=False, project_folder='./tmp/') # DESIGN YOUR PIPELINE pipe = Hyperpipe('custom_estimator_pipe', optimizer='random_grid_search', optimizer_params={'n_configurations': 2}, metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'], best_config_metric='accuracy', outer_cv=KFold(n_splits=2), inner_cv=KFold(n_splits=2), verbosity=1, output_settings=settings) pipe += PipelineElement('MyCustomEstimator') pipe.fit(np.random.randn(30, 30), np.random.randint(0, 2, 30)) self.registry.delete('MyCustomEstimator') os.remove(os.path.join(self.custom_folder, 'CustomElements.json'))
def test_huge_combinations(self): hp = Hyperpipe( "huge_combinations", metrics=["accuracy"], best_config_metric="accuracy", output_settings=OutputSettings( project_folder=self.tmp_folder_path), ) hp += PipelineElement("PCA", hyperparameters={"n_components": [5, 10]}) stack = Stack("ensemble") for i in range(20): stack += PipelineElement( "SVC", hyperparameters={ "C": FloatRange(0.001, 5), "kernel": ["linear", "rbf", "sigmoid", "polynomial"], }, ) hp += stack hp += PipelineElement( "SVC", hyperparameters={"kernel": ["linear", "rbf", "sigmoid"]}) X, y = load_breast_cancer(True) with self.assertRaises(Warning): hp.fit(X, y)
def test_three_levels_of_feature_importances(self): hyperpipe = Hyperpipe( "fimps", inner_cv=KFold(n_splits=4), outer_cv=KFold(n_splits=3), metrics=["mean_absolute_error", "mean_squared_error"], best_config_metric="mean_squared_error", output_settings=OutputSettings( project_folder=self.tmp_folder_path), ) hyperpipe += PipelineElement("StandardScaler") hyperpipe += PipelineElement("DecisionTreeRegressor") X, y = load_boston(True) hyperpipe.fit(X, y) exepcted_nr_of_feature_importances = X.shape[1] self.assertTrue( len(hyperpipe.results.best_config_feature_importances) == exepcted_nr_of_feature_importances) for outer_fold in hyperpipe.results.outer_folds: self.assertTrue( len(outer_fold.best_config.best_config_score. feature_importances) == exepcted_nr_of_feature_importances) for inner_fold in outer_fold.best_config.inner_folds: self.assertTrue( len(inner_fold.feature_importances) == exepcted_nr_of_feature_importances)
def test_save_optimum_pipe_custom_element(self): tmp_path = os.path.join(self.tmp_folder_path, 'optimum_pipypipe') settings = OutputSettings(project_folder=tmp_path, overwrite_results=True) my_pipe = Hyperpipe('hyperpipe', optimizer='random_grid_search', optimizer_params={'n_configurations': 1}, metrics=['accuracy', 'precision', 'recall'], best_config_metric='f1_score', outer_cv=KFold(n_splits=2), inner_cv=KFold(n_splits=2), verbosity=1, output_settings=settings) my_pipe += PipelineElement('KerasDnnClassifier', {}, epochs=1, hidden_layer_sizes=[5]) my_pipe.fit(self.__X, self.__y) model_path = os.path.join(my_pipe.output_settings.results_folder, 'photon_best_model.photon') self.assertTrue(os.path.exists(model_path)) # check if load_optimum_pipe also works # check if we have the meta information recovered loaded_optimum_pipe = Hyperpipe.load_optimum_pipe(model_path) self.assertIsNotNone(loaded_optimum_pipe._meta_information)
def test_write_convenience_files(self): """ Output creation testing. Only write if output_settings.save_output == True """ for file in self.files: self.assertTrue( os.path.isfile( os.path.join(self.output_settings.results_folder, file))) # correct rows with open( os.path.join(self.output_settings.results_folder, 'best_config_predictions.csv')) as f: self.assertEqual( sum([ outer_fold.number_samples_test for outer_fold in self.hyperpipe.results.outer_folds ]), sum(1 for _ in f) - 1) shutil.rmtree(self.tmp_folder_path, ignore_errors=True) self.output_settings = OutputSettings( project_folder=self.tmp_folder_path, save_output=False) self.hyperpipe.fit(self.__X, self.__y) self.assertIsNone(self.output_settings.results_folder)
def create_hyperpipe(self): # this is needed here for the parallelisation from photonai.base import Hyperpipe, PipelineElement, OutputSettings from photonai.optimization import FloatRange, Categorical, IntegerRange from sklearn.model_selection import GroupKFold from sklearn.model_selection import KFold settings = OutputSettings(mongodb_connect_url='mongodb://localhost:27017/photon_results', project_folder=self.tmp_folder_path) my_pipe = Hyperpipe('permutation_test_1', optimizer='grid_search', metrics=['accuracy', 'precision', 'recall'], best_config_metric='accuracy', outer_cv=GroupKFold(n_splits=2), inner_cv=KFold(n_splits=2), calculate_metrics_across_folds=True, eval_final_performance=True, verbosity=1, output_settings=settings) # Add transformer elements my_pipe += PipelineElement("StandardScaler", hyperparameters={}, test_disabled=False, with_mean=True, with_std=True) my_pipe += PipelineElement("PCA", hyperparameters={'n_components': IntegerRange(3, 5)}, test_disabled=False) # Add estimator my_pipe += PipelineElement("SVC", hyperparameters={'kernel': ['linear', 'rbf']}, # C': FloatRange(0.1, 5), gamma='scale', max_iter=1000000) return my_pipe
def test_register_element(self): with self.assertRaises(ValueError): self.registry.register("MyCustomEstimator", "custom_estimator.CustomEstimator", "WrongType") self.registry.register("MyCustomEstimator", "custom_estimator.CustomEstimator", "Estimator") self.registry.activate() settings = OutputSettings(save_output=False, project_folder="./tmp/") # DESIGN YOUR PIPELINE pipe = Hyperpipe( "custom_estimator_pipe", optimizer="random_grid_search", optimizer_params={"n_configurations": 2}, metrics=["accuracy", "precision", "recall", "balanced_accuracy"], best_config_metric="accuracy", outer_cv=KFold(n_splits=2), inner_cv=KFold(n_splits=2), verbosity=1, output_settings=settings, ) pipe += PipelineElement("MyCustomEstimator") pipe.fit(np.random.randn(30, 30), np.random.randint(0, 2, 30)) self.registry.delete("MyCustomEstimator") os.remove(os.path.join(self.custom_folder, "CustomElements.json"))
def setUp(self): self.time_limit = 60 * 2 settings = OutputSettings(project_folder="./tmp/") self.smac_helper = {"data": None, "initial_runs": None} # DESIGN YOUR PIPELINE self.pipe = Hyperpipe( "basic_svm_pipe", # the name of your pipeline optimizer="smac", # which optimizer PHOTON shall use optimizer_params={ "wallclock_limit": self.time_limit, "smac_helper": self.smac_helper, "run_limit": 20, }, metrics=["accuracy"], # the performance metrics of your interest best_config_metric="accuracy", inner_cv=KFold( n_splits=3 ), # test each configuration ten times respectively, verbosity=0, output_settings=settings, )
def create_hyperpipes( metrics: list = None, inner_cv=KFold(n_splits=3, shuffle=True, random_state=42), outer_cv=ShuffleSplit(n_splits=1, test_size=0.2), plots: bool = False, optimizer: str = "random_grid_search", optimizer_params: dict = {"n_configurations": 10}, eval_final_performance: bool = True, performance_constraints: list = None, cache_folder="./cache", tmp_folder="./tmp", ): pipe = Hyperpipe( name="architecture_test_pipe", output_settings=OutputSettings(project_folder=tmp_folder, plots=plots), optimizer=optimizer, optimizer_params=optimizer_params, best_config_metric="accuracy", metrics=metrics, inner_cv=inner_cv, outer_cv=outer_cv, eval_final_performance=eval_final_performance, performance_constraints=performance_constraints, cache_folder=cache_folder, verbosity=1, ) return pipe
def run_parallelized_permutation(hyperpipe_constructor, X, perm_run, y_perm, permutation_id, verbosity=-1, **kwargs): # Create new instance of hyperpipe and set all parameters perm_pipe = hyperpipe_constructor() perm_pipe.verbosity = verbosity perm_pipe.name = perm_pipe.name + '_perm_' + str(perm_run) perm_pipe.permutation_id = permutation_id # print(y_perm) po = OutputSettings( mongodb_connect_url=perm_pipe.output_settings.mongodb_connect_url, save_output=False) perm_pipe.output_settings = po perm_pipe.calculate_metrics_across_folds = False try: # Fit hyperpipe # WE DO PRINT BECAUSE WE HAVE NO COMMON LOGGER!!! print('Fitting permutation ' + str(perm_run) + ' ...') perm_pipe.fit(X, y_perm, **kwargs) perm_pipe.results.computation_completed = True perm_pipe.results.outer_folds = list() perm_pipe.results.best_config = None perm_pipe.results.save() print('Finished permutation ' + str(perm_run) + ' ...') except Exception as e: if perm_pipe.results is not None: perm_pipe.results.permutation_failed = str(e) perm_pipe.results.save() print('Failed permutation ' + str(perm_run) + ' ...') return perm_run
def create_hyperpipe(self): self.hyperpipe = Hyperpipe('optimizer_test', output_settings=OutputSettings(project_folder='./tmp'), metrics=['accuracy'], best_config_metric='accuracy', inner_cv=KFold(n_splits=3), outer_cv=ShuffleSplit(n_splits=2), optimizer=self.optimizer_name)
def test_one_hyperpipe(learning_curves, learning_curves_cut): if learning_curves and learning_curves_cut is None: learning_curves_cut = FloatRange(0, 1, 'range', 0.2) output_settings = OutputSettings( project_folder=self.tmp_folder_path, save_output=False) test_hyperpipe = Hyperpipe( 'test_pipe', learning_curves=learning_curves, learning_curves_cut=learning_curves_cut, metrics=['accuracy', 'recall', 'specificity'], best_config_metric='accuracy', inner_cv=self.inner_cv, output_settings=output_settings) self.assertEqual(test_hyperpipe.cross_validation.learning_curves, learning_curves) if learning_curves: self.assertEqual( test_hyperpipe.cross_validation.learning_curves_cut, learning_curves_cut) else: self.assertIsNone( test_hyperpipe.cross_validation.learning_curves_cut) test_hyperpipe += PipelineElement('StandardScaler') test_hyperpipe += PipelineElement('PCA', {'n_components': [1, 2]}, random_state=42) test_hyperpipe += PipelineElement('SVC', { 'C': [0.1], 'kernel': ['linear'] }, random_state=42) test_hyperpipe.fit(self.X, self.y) config_results = test_hyperpipe.results_handler.results.outer_folds[ 0].tested_config_list config_num = len(config_results) for config_nr in range(config_num): for inner_fold_nr in range(self.inner_cv.n_splits): curves = config_results[config_nr].inner_folds[ inner_fold_nr].learning_curves if learning_curves: self.assertEqual(len(curves), len(learning_curves_cut.values)) for learning_point_nr in range( len(learning_curves_cut.values)): test_metrics = list( curves[learning_point_nr][1].keys()) train_metrics = list( curves[learning_point_nr][2].keys()) self.assertEqual( test_hyperpipe.optimization.metrics, test_metrics) self.assertEqual( test_hyperpipe.optimization.metrics, train_metrics) else: self.assertEqual(curves, [])
def test_metrics_and_aggreation_eval_performance_false(self): self.hyperpipe = Hyperpipe('test_prediction_collection', inner_cv=KFold(n_splits=self.inner_fold_nr), metrics=['mean_absolute_error', 'mean_squared_error'], eval_final_performance=False, best_config_metric='mean_absolute_error', calculate_metrics_across_folds=True, output_settings=OutputSettings(project_folder=self.tmp_folder_path)) self.test_metrics_and_aggregations()
def setUp(self): """ Set default start settings for all tests. """ super(ResultsHandlerTest, self).setUp() self.files = [ "best_config_predictions.csv", "time_monitor.csv", "time_monitor_pie.png", "photon_result_file.p", "photon_summary.txt", "photon_best_model.photon", "optimum_pipe_feature_importances_backmapped.npz", "photon_code.py", "optimizer_history.png", ] self.output_settings = OutputSettings( project_folder=self.tmp_folder_path, save_output=True) self.ss_pipe_element = PipelineElement("StandardScaler") self.pca_pipe_element = PipelineElement("PCA", {"n_components": [1, 2]}, random_state=42) self.svc_pipe_element = PipelineElement( "SVC", { "C": [0.1], "kernel": ["linear"] }, # 'rbf', 'sigmoid'] random_state=42, ) self.inner_cv_object = KFold(n_splits=3) self.metrics = ["accuracy", "recall", "precision"] self.best_config_metric = "accuracy" self.hyperpipe = Hyperpipe( "god", inner_cv=self.inner_cv_object, metrics=self.metrics, best_config_metric=self.best_config_metric, outer_cv=KFold(n_splits=2), output_settings=self.output_settings, verbosity=1, ) self.hyperpipe += self.ss_pipe_element self.hyperpipe += self.pca_pipe_element self.hyperpipe.add(self.svc_pipe_element) dataset = load_breast_cancer() self.__X = dataset.data self.__y = dataset.target self.hyperpipe.fit(self.__X, self.__y)
def test_branch_in_branch(self): """ Test for deep Pipeline. """ my_pipe = Hyperpipe( "basic_stacking", optimizer="grid_search", metrics=["accuracy", "precision", "recall"], best_config_metric="f1_score", outer_cv=KFold(n_splits=2), inner_cv=KFold(n_splits=3), verbosity=1, cache_folder="./cache/", output_settings=OutputSettings(project_folder="./tmp/"), ) # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER tree_qua_branch = Branch("tree_branch") tree_qua_branch += PipelineElement("QuantileTransformer") tree_qua_branch += PipelineElement( "DecisionTreeClassifier", {"min_samples_split": IntegerRange(2, 4)}, criterion="gini", ) # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier svm_mima_branch = Branch("svm_branch") svm_mima_branch += PipelineElement("MinMaxScaler") svm_mima_branch += PipelineElement( "SVC", { "kernel": ["rbf", "linear"], # Categorical(['rbf', 'linear']), "C": IntegerRange(0.01, 2.0), }, gamma="auto", ) # BRANCH WITH StandardScaler AND KNeighborsClassifier knn_sta_branch = Branch("neighbour_branch") knn_sta_branch += PipelineElement("StandardScaler") knn_sta_branch += PipelineElement("KNeighborsClassifier") # voting = True to mean the result of every branch my_pipe += Stack("final_stack", [tree_qua_branch, svm_mima_branch, knn_sta_branch]) my_pipe += PipelineElement("LogisticRegression", solver="lbfgs") json_transformer = JsonTransformer() pipe_json = json_transformer.create_json(my_pipe) my_pipe_reload = json_transformer.from_json(pipe_json) pipe_json_reload = pipe_json = json_transformer.create_json( my_pipe_reload) self.assertEqual(pipe_json, pipe_json_reload)
def test_shall_continue(self): X, y = load_boston(return_X_y=True) inner_fold_length = 7 # DESIGN YOUR PIPELINE my_pipe = Hyperpipe( name='performance_pipe', optimizer='random_search', optimizer_params={'limit_in_minutes': 2}, metrics=['mean_squared_error'], best_config_metric='mean_squared_error', inner_cv=KFold(n_splits=inner_fold_length), eval_final_performance=True, output_settings=OutputSettings(project_folder='./tmp'), performance_constraints=[self.constraint_object]) my_pipe += PipelineElement('StandardScaler') my_pipe += PipelineElement( 'RandomForestRegressor', hyperparameters={'n_estimators': IntegerRange(5, 50)}) # NOW TRAIN YOUR PIPELINE my_pipe.fit(X, y) # clip config results results = my_pipe.results.outer_folds[0].tested_config_list configs = [] for i in range(len(configs) - 1): configs.append([ x.validation.metrics['mean_squared_error'] for x in results[i].inner_folds ]) threshold = np.inf for val in configs[:10]: challenger = np.mean(val) if threshold > challenger: threshold = challenger originals_for_std = configs[:10] for i, val in enumerate(configs[10:]): std = np.mean([np.std(x) for x in originals_for_std]) for j, v in enumerate(val): if np.mean(val[:j + 1]) > threshold + std: self.assertEqual(v, val[-1]) continue if len(val) == inner_fold_length - 1 and np.mean( val) < threshold + std: threshold = np.mean(val) if len(val) > 1: originals_for_std.append(val)
def setup_hyperpipe(self, output_settings=None): if output_settings is None: output_settings = OutputSettings( project_folder=self.tmp_folder_path) self.hyperpipe = Hyperpipe('god', inner_cv=self.inner_cv_object, metrics=self.metrics, best_config_metric=self.best_config_metric, output_settings=output_settings) self.hyperpipe += self.ss_pipe_element self.hyperpipe += self.pca_pipe_element self.hyperpipe.add(self.svc_pipe_element)
def test_branch_in_branch(self): """ Test for deep Pipeline. """ my_pipe = Hyperpipe( 'basic_stacking', optimizer='grid_search', metrics=['accuracy', 'precision', 'recall'], best_config_metric='f1_score', outer_cv=KFold(n_splits=2), inner_cv=KFold(n_splits=3), verbosity=1, cache_folder="./cache/", output_settings=OutputSettings(project_folder='./tmp/')) # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER tree_qua_branch = Branch('tree_branch') tree_qua_branch += PipelineElement('QuantileTransformer') tree_qua_branch += PipelineElement( 'DecisionTreeClassifier', {'min_samples_split': IntegerRange(2, 4)}, criterion='gini') # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier svm_mima_branch = Branch('svm_branch') svm_mima_branch += PipelineElement('MinMaxScaler') svm_mima_branch += PipelineElement( 'SVC', { 'kernel': ['rbf', 'linear'], # Categorical(['rbf', 'linear']), 'C': IntegerRange(0.01, 2.0) }, gamma='auto') # BRANCH WITH StandardScaler AND KNeighborsClassifier knn_sta_branch = Branch('neighbour_branch') knn_sta_branch += PipelineElement('StandardScaler') knn_sta_branch += PipelineElement('KNeighborsClassifier') # voting = True to mean the result of every branch my_pipe += Stack('final_stack', [tree_qua_branch, svm_mima_branch, knn_sta_branch]) my_pipe += PipelineElement('LogisticRegression', solver='lbfgs') json_transformer = JsonTransformer() pipe_json = json_transformer.create_json(my_pipe) my_pipe_reload = json_transformer.from_json(pipe_json) pipe_json_reload = pipe_json = json_transformer.create_json( my_pipe_reload) self.assertEqual(pipe_json, pipe_json_reload)
def test_inverse_transform(self): settings = OutputSettings( project_folder=self.tmp_folder_path, overwrite_results=True ) # DESIGN YOUR PIPELINE pipe = Hyperpipe( "Limbic_System", optimizer="grid_search", metrics=["mean_absolute_error"], best_config_metric="mean_absolute_error", outer_cv=ShuffleSplit(n_splits=1, test_size=0.2), inner_cv=ShuffleSplit(n_splits=1, test_size=0.2), verbosity=2, cache_folder=self.cache_folder_path, eval_final_performance=True, output_settings=settings, ) # PICK AN ATLAS atlas = PipelineElement( "BrainAtlas", rois=["Hippocampus_L", "Amygdala_L"], atlas_name="AAL", extract_mode="vec", batch_size=20, ) # EITHER ADD A NEURO BRANCH OR THE ATLAS ITSELF neuro_branch = NeuroBranch("NeuroBranch") neuro_branch += atlas pipe += neuro_branch pipe += PipelineElement("LinearSVR") pipe.fit(self.X, self.y) # GET IMPORTANCE SCORES handler = ResultsHandler(pipe.results) importance_scores_optimum_pipe = handler.results.best_config_feature_importances manual_img, _, _ = pipe.optimum_pipe.inverse_transform( importance_scores_optimum_pipe, None ) img = image.load_img( os.path.join( self.tmp_folder_path, "Limbic_System_results/optimum_pipe_feature_importances_backmapped.nii.gz", ) ) self.assertTrue(np.array_equal(manual_img.get_data(), img.get_data()))
def test_huge_combinations(self): hp = Hyperpipe('huge_combinations', inner_cv=KFold(n_splits=3), metrics=['accuracy'], best_config_metric='accuracy', output_settings=OutputSettings(project_folder=self.tmp_folder_path)) hp += PipelineElement("PCA", hyperparameters={'n_components': [5, 10]}) stack = Stack('ensemble') for i in range(20): stack += PipelineElement('SVC', hyperparameters={'C': FloatRange(0.001, 5), 'kernel': ["linear", "rbf", "sigmoid", "polynomial"]}) hp += stack hp += PipelineElement("SVC", hyperparameters={'kernel': ["linear", "rbf", "sigmoid"]}) X, y = load_breast_cancer(return_X_y=True) with self.assertRaises(Warning): hp.fit(X, y)
def setUp(self): super(ResultHandlerAndHelperTests, self).setUp() self.inner_fold_nr = 10 self.outer_fold_nr = 5 self.y_true = np.linspace(1, 100, 100) self.X = self.y_true self.hyperpipe = Hyperpipe('test_prediction_collection', inner_cv=KFold(n_splits=self.inner_fold_nr), outer_cv=KFold(n_splits=self.outer_fold_nr), metrics=['mean_absolute_error', 'mean_squared_error'], best_config_metric='mean_absolute_error', output_settings=OutputSettings(project_folder=self.tmp_folder_path), verbosity=0)
def create_hyperpipe(): # this is needed here for the parallelisation from photonai.base import Hyperpipe, PipelineElement, OutputSettings from photonai.optimization import FloatRange, Categorical, IntegerRange from sklearn.model_selection import GroupKFold from sklearn.model_selection import KFold settings = OutputSettings( mongodb_connect_url="mongodb://trap-umbriel:27017/photon_results", project_folder="./tmp/", ) my_pipe = Hyperpipe( "permutation_test_1", optimizer="grid_search", metrics=["accuracy", "precision", "recall"], best_config_metric="accuracy", outer_cv=GroupKFold(n_splits=2), inner_cv=KFold(n_splits=2), calculate_metrics_across_folds=True, eval_final_performance=True, verbosity=1, output_settings=settings, ) # Add transformer elements my_pipe += PipelineElement( "StandardScaler", hyperparameters={}, test_disabled=True, with_mean=True, with_std=True, ) my_pipe += PipelineElement( "PCA", # hyperparameters={'n_components': IntegerRange(5, 15)}, test_disabled=False, ) # Add estimator my_pipe += PipelineElement( "SVC", hyperparameters={"kernel": ["linear", "rbf"]}, # C': FloatRange(0.1, 5), gamma="scale", max_iter=1000000, ) return my_pipe
def setUp(self): """ Set default start settings for all tests. """ super(ResultsHandlerTest, self).setUp() self.files = [ 'best_config_predictions.csv', 'time_monitor.csv', 'time_monitor_pie.png', 'photon_result_file.p', 'photon_summary.txt', 'photon_best_model.photon', 'optimum_pipe_feature_importances_backmapped.npz', 'photon_code.py', 'optimizer_history.png' ] self.output_settings = OutputSettings( project_folder=self.tmp_folder_path, save_output=True) self.ss_pipe_element = PipelineElement('StandardScaler') self.pca_pipe_element = PipelineElement('PCA', {'n_components': [1, 2]}, random_state=42) self.svc_pipe_element = PipelineElement( 'SVC', { 'C': [0.1], 'kernel': ['linear'] }, # 'rbf', 'sigmoid'] random_state=42) self.inner_cv_object = KFold(n_splits=3) self.metrics = ["accuracy", 'recall', 'precision'] self.best_config_metric = "accuracy" self.hyperpipe = Hyperpipe('god', inner_cv=self.inner_cv_object, metrics=self.metrics, best_config_metric=self.best_config_metric, outer_cv=KFold(n_splits=2), output_settings=self.output_settings, verbosity=1) self.hyperpipe += self.ss_pipe_element self.hyperpipe += self.pca_pipe_element self.hyperpipe.add(self.svc_pipe_element) dataset = load_breast_cancer() self.__X = dataset.data self.__y = dataset.target self.hyperpipe.fit(self.__X, self.__y)
def test_load_from_file(self): X, y = load_breast_cancer(True) my_pipe = Hyperpipe( 'load_results_file_test', metrics=['accuracy'], best_config_metric='accuracy', output_settings=OutputSettings(project_folder='./tmp')) my_pipe += PipelineElement("StandardScaler") my_pipe += PipelineElement("SVC") my_pipe.fit(X, y) results_file = os.path.join(my_pipe.output_settings.results_folder, "photon_result_file.p") my_result_handler = ResultsHandler() my_result_handler.load_from_file(results_file) self.assertIsInstance(my_result_handler.results, MDBHyperpipe)
def create_hyperpipe_no_mongo(self): from photonai.base import Hyperpipe, OutputSettings from sklearn.model_selection import KFold settings = OutputSettings(project_folder=self.tmp_folder_path) my_pipe = Hyperpipe('permutation_test_1', optimizer='grid_search', metrics=['accuracy', 'precision', 'recall'], best_config_metric='accuracy', outer_cv=KFold(n_splits=2), inner_cv=KFold(n_splits=2), calculate_metrics_across_folds=True, eval_final_performance=True, verbosity=1, output_settings=settings) return my_pipe
def test_metrics_and_aggregations_no_outer_cv_but_eval_performance_true( self): self.hyperpipe = Hyperpipe( "test_prediction_collection", outer_cv=KFold(n_splits=self.outer_fold_nr), inner_cv=KFold(n_splits=self.inner_fold_nr), metrics=["mean_absolute_error", "mean_squared_error"], eval_final_performance=False, best_config_metric="mean_absolute_error", calculate_metrics_per_fold=True, calculate_metrics_across_folds=True, output_settings=OutputSettings( project_folder=self.tmp_folder_path), ) self.test_metrics_and_aggregations()
def test_class_switch(self): """ Test for Pipeline with data. """ X, y = load_breast_cancer(return_X_y=True) my_pipe = Hyperpipe( 'basic_switch_pipe', optimizer='random_grid_search', optimizer_params={'n_configurations': 15}, metrics=['accuracy', 'precision', 'recall'], best_config_metric='accuracy', outer_cv=KFold(n_splits=3), inner_cv=KFold(n_splits=5), verbosity=1, output_settings=OutputSettings(project_folder='./tmp/')) # Transformer Switch my_pipe += Switch('TransformerSwitch', [ PipelineElement('StandardScaler'), PipelineElement('PCA', test_disabled=True) ]) # Estimator Switch svm = PipelineElement('SVC', hyperparameters={'kernel': ['rbf', 'linear']}) tree = PipelineElement('DecisionTreeClassifier', hyperparameters={ 'min_samples_split': IntegerRange(2, 5), 'min_samples_leaf': IntegerRange(1, 5), 'criterion': ['gini', 'entropy'] }) my_pipe += Switch('EstimatorSwitch', [svm, tree]) json_transformer = JsonTransformer() pipe_json = json_transformer.create_json(my_pipe) my_pipe_reload = json_transformer.from_json(pipe_json) self.assertDictEqual(elements_to_dict(my_pipe.copy_me()), elements_to_dict(my_pipe_reload.copy_me()))