def test_NumberRange(self): """ Test for class IntegerRange and FloatRange. """ dtypes = {int: np.int32, float: np.float32} expected_linspace = [1, 2, 3, 4, 5, 6, 7, 8, 9] number_range_linspace = IntegerRange(start=1, stop=9, num=9, range_type="linspace") number_range_linspace.transform() self.assertListEqual(expected_linspace, number_range_linspace.values) expected_geomspace = [1, 10, 100, 1000, 10000] number_range_geomspace = IntegerRange(1, 10000, num=5, range_type="geomspace") number_range_geomspace.transform() self.assertListEqual(expected_geomspace, number_range_geomspace.values) number_range_range = IntegerRange(self.start, self.end, step=2, range_type="range") number_range_range.transform() self.assertListEqual(number_range_range.values, list(np.arange(self.start, self.end, 2))) number_range_logspace = FloatRange(-1, 1, num=50, range_type='logspace') number_range_logspace.transform() np.testing.assert_array_almost_equal(number_range_logspace.values, np.logspace(-1, 1, num=50).tolist()) # error tests with self.assertRaises(ValueError): number_range = IntegerRange(start=0, stop=self.end, range_type="geomspace") number_range.transform() with self.assertRaises(ValueError): number_range = IntegerRange(start=1, stop=15, range_type="logspace") number_range.transform() with self.assertRaises(ValueError): IntegerRange(start=self.start, stop=self.end, range_type="ownspace")
def setUp(self): """ Set default start setting for all tests. """ self.intger_range = IntegerRange(2,6) self.float_range = FloatRange(0.1, 5.7) self.categorical = Categorical(["a","b","c","d","e","f","g","h"]) self.bool = BooleanSwitch()
def test_domain(self): self.float_range.transform() self.intger_range.transform() self.assertListEqual(self.intger_range.values, list(np.arange(2, 6))) self.assertListEqual(self.float_range.values, list(np.linspace(0.1, 5.7, dtype=np.float64))) big_float_range = FloatRange(-300.57, np.pi * 4000) big_float_range.transform() self.assertListEqual(big_float_range.values, list(np.linspace(-300.57, np.pi * 4000))) self.assertListEqual(self.categorical.values, ["a", "b", "c", "d", "e", "f", "g", "h"]) self.assertListEqual(self.bool.values, [True, False])
def test_classification_2(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Switch switch = Switch("estimator_switch") switch += PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear", "rbf"]), "C": Categorical([0.01, 1, 5]), }, ) switch += PipelineElement( "RandomForestClassifier", hyperparameters={ "min_samples_split": FloatRange(start=0.05, step=0.1, stop=0.26, range_type="range") }, ) pipe += switch self.run_hyperpipe(pipe, self.classification)
def test_cv_config_and_dummy_nr(self): X, y = load_boston(return_X_y=True) self.hyperpipe += PipelineElement('StandardScaler') self.hyperpipe += PipelineElement('PCA', {'n_components': IntegerRange(3, 5)}) self.hyperpipe += PipelineElement('SVR', {'C': FloatRange(0.001, 10, num=5), 'kernel': Categorical(['linear', 'rbf'])}) self.hyperpipe.fit(X, y) expected_configs = 2 * 5 * 2 # check version is present self.assertIsNotNone(self.hyperpipe.results.version) # check nr of outer and inner folds self.assertTrue(len(self.hyperpipe.results.outer_folds) == self.outer_fold_nr) self.assertTrue(len(self.hyperpipe.cross_validation.outer_folds) == self.outer_fold_nr) for outer_fold_id, inner_folds in self.hyperpipe.cross_validation.inner_folds.items(): self.assertTrue(len(inner_folds) == self.inner_fold_nr) for outer_fold_result in self.hyperpipe.results.outer_folds: # check that we have the right amount of configs tested in each outer fold self.assertTrue(len(outer_fold_result.tested_config_list) == expected_configs) for config_result in outer_fold_result.tested_config_list: # check that we have the right amount of inner-folds per config self.assertTrue(len(config_result.inner_folds) == self.inner_fold_nr) self.check_for_dummy()
def test_huge_combinations(self): hp = Hyperpipe( "huge_combinations", metrics=["accuracy"], best_config_metric="accuracy", output_settings=OutputSettings( project_folder=self.tmp_folder_path), ) hp += PipelineElement("PCA", hyperparameters={"n_components": [5, 10]}) stack = Stack("ensemble") for i in range(20): stack += PipelineElement( "SVC", hyperparameters={ "C": FloatRange(0.001, 5), "kernel": ["linear", "rbf", "sigmoid", "polynomial"], }, ) hp += stack hp += PipelineElement( "SVC", hyperparameters={"kernel": ["linear", "rbf", "sigmoid"]}) X, y = load_breast_cancer(True) with self.assertRaises(Warning): hp.fit(X, y)
def test_classification_9(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # crazy everything pipe += PipelineElement('StandardScaler') pipe += PipelineElement('SamplePairingClassification', {'draw_limit': [100], 'generator': Categorical(['nearest_pair', 'random_pair'])}, distance_metric='euclidean', test_disabled=True) # setup pipeline branches with half of the features each # if both PCAs are disabled, features are simply concatenated and passed to the final estimator source1_branch = Branch('source1_features') # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5] source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2)))) source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) source2_branch = Branch('source2_features') # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12] source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1])) source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) # setup source branches and stack their output (i.e. horizontal concatenation) pipe += Stack('source_stack', elements=[source1_branch, source2_branch]) # final estimator with stack output as features pipe += PipelineElement('RandomForestClassifier', hyperparameters={ 'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')}) self.run_hyperpipe(pipe, self.classification)
def test_classification_6(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack (use mean in the end) SVR = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear", "rbf"]), "C": Categorical([0.01, 1, 5]), }, ) RF = PipelineElement( "RandomForestClassifier", hyperparameters={ "min_samples_split": FloatRange(start=0.05, step=0.1, stop=0.26, range_type="range") }, ) pipe += Stack("estimator_stack", elements=[SVR, RF]) pipe += PipelineElement("PhotonVotingClassifier") self.run_hyperpipe(pipe, self.classification)
def test_one_hyperpipe(learning_curves, learning_curves_cut): if learning_curves and learning_curves_cut is None: learning_curves_cut = FloatRange(0, 1, 'range', 0.2) output_settings = OutputSettings( project_folder=self.tmp_folder_path, save_output=False) test_hyperpipe = Hyperpipe( 'test_pipe', learning_curves=learning_curves, learning_curves_cut=learning_curves_cut, metrics=['accuracy', 'recall', 'specificity'], best_config_metric='accuracy', inner_cv=self.inner_cv, output_settings=output_settings) self.assertEqual(test_hyperpipe.cross_validation.learning_curves, learning_curves) if learning_curves: self.assertEqual( test_hyperpipe.cross_validation.learning_curves_cut, learning_curves_cut) else: self.assertIsNone( test_hyperpipe.cross_validation.learning_curves_cut) test_hyperpipe += PipelineElement('StandardScaler') test_hyperpipe += PipelineElement('PCA', {'n_components': [1, 2]}, random_state=42) test_hyperpipe += PipelineElement('SVC', { 'C': [0.1], 'kernel': ['linear'] }, random_state=42) test_hyperpipe.fit(self.X, self.y) config_results = test_hyperpipe.results_handler.results.outer_folds[ 0].tested_config_list config_num = len(config_results) for config_nr in range(config_num): for inner_fold_nr in range(self.inner_cv.n_splits): curves = config_results[config_nr].inner_folds[ inner_fold_nr].learning_curves if learning_curves: self.assertEqual(len(curves), len(learning_curves_cut.values)) for learning_point_nr in range( len(learning_curves_cut.values)): test_metrics = list( curves[learning_point_nr][1].keys()) train_metrics = list( curves[learning_point_nr][2].keys()) self.assertEqual( test_hyperpipe.optimization.metrics, test_metrics) self.assertEqual( test_hyperpipe.optimization.metrics, train_metrics) else: self.assertEqual(curves, [])
def test_classification_9(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # crazy everything pipe += PipelineElement("StandardScaler") pipe += PipelineElement( "SamplePairingClassification", { "draw_limit": [100], "generator": Categorical(["nearest_pair", "random_pair"]), }, distance_metric="euclidean", test_disabled=True, ) # setup pipeline branches with half of the features each # if both PCAs are disabled, features are simply concatenated and passed to the final estimator source1_branch = Branch("source1_features") # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5] source1_branch += DataFilter(indices=np.arange( start=0, stop=int(np.floor(self.X_shape[1] / 2)))) source1_branch += PipelineElement( "PCA", hyperparameters={"n_components": Categorical([None, 5])}, test_disabled=True, ) source2_branch = Branch("source2_features") # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12] source2_branch += DataFilter(indices=np.arange( start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1])) source2_branch += PipelineElement( "PCA", hyperparameters={"n_components": Categorical([None, 5])}, test_disabled=True, ) # setup source branches and stack their output (i.e. horizontal concatenation) pipe += Stack("source_stack", elements=[source1_branch, source2_branch]) # final estimator with stack output as features pipe += PipelineElement( "RandomForestClassifier", hyperparameters={ "min_samples_split": FloatRange(start=0.05, step=0.1, stop=0.26, range_type="range") }, ) self.run_hyperpipe(pipe, self.classification)
def test_classification_2(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Switch switch = Switch('estimator_switch') switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']), 'C': Categorical([.01, 1, 5])}) switch += PipelineElement('RandomForestClassifier', hyperparameters={ 'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')}) pipe += switch self.run_hyperpipe(pipe, self.classification)
def test_classification_6(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack (use mean in the end) SVR = PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']), 'C': Categorical([.01, 1, 5])}) RF = PipelineElement('RandomForestClassifier', hyperparameters={ 'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')}) pipe += Stack('estimator_stack', elements=[SVR, RF]) pipe += PipelineElement('PhotonVotingClassifier') self.run_hyperpipe(pipe, self.classification)
def test_huge_combinations(self): hp = Hyperpipe('huge_combinations', inner_cv=KFold(n_splits=3), metrics=['accuracy'], best_config_metric='accuracy', output_settings=OutputSettings(project_folder=self.tmp_folder_path)) hp += PipelineElement("PCA", hyperparameters={'n_components': [5, 10]}) stack = Stack('ensemble') for i in range(20): stack += PipelineElement('SVC', hyperparameters={'C': FloatRange(0.001, 5), 'kernel': ["linear", "rbf", "sigmoid", "polynomial"]}) hp += stack hp += PipelineElement("SVC", hyperparameters={'kernel': ["linear", "rbf", "sigmoid"]}) X, y = load_breast_cancer(return_X_y=True) with self.assertRaises(Warning): hp.fit(X, y)
def test_cv_config_and_dummy_nr(self): X, y = load_boston(True) self.hyperpipe += PipelineElement("StandardScaler") self.hyperpipe += PipelineElement("PCA", {"n_components": IntegerRange(3, 7)}) self.hyperpipe += PipelineElement( "SVR", { "C": FloatRange(0.001, 10, num=10), "kernel": Categorical(["linear", "rbf"]), }, ) self.hyperpipe.fit(X, y) expected_configs = 4 * 10 * 2 # check version is present self.assertIsNotNone(self.hyperpipe.results.version) # check nr of outer and inner folds self.assertTrue( len(self.hyperpipe.results.outer_folds) == self.outer_fold_nr) self.assertTrue( len(self.hyperpipe.cross_validation.outer_folds) == self.outer_fold_nr) for ( outer_fold_id, inner_folds, ) in self.hyperpipe.cross_validation.inner_folds.items(): self.assertTrue(len(inner_folds) == self.inner_fold_nr) for outer_fold_result in self.hyperpipe.results.outer_folds: # check that we have the right amount of configs tested in each outer fold self.assertTrue( len(outer_fold_result.tested_config_list) == expected_configs) for config_result in outer_fold_result.tested_config_list: # check that we have the right amount of inner-folds per config self.assertTrue( len(config_result.inner_folds) == self.inner_fold_nr) self.check_for_dummy()
class BaseTest(unittest.TestCase): def setUp(self): """ Set default start setting for all tests. """ self.intger_range = IntegerRange(2, 6) self.float_range = FloatRange(0.1, 5.7) self.categorical = Categorical( ["a", "b", "c", "d", "e", "f", "g", "h"]) self.bool = BooleanSwitch() def test_rand_success(self): for _ in range(100): self.assertIn(self.intger_range.get_random_value(), list(range(2, 6))) self.assertGreaterEqual(self.float_range.get_random_value(), 0.1) self.assertLess(self.float_range.get_random_value(), 5.7) self.assertIn( self.categorical.get_random_value(), ["a", "b", "c", "d", "e", "f", "g", "h"], ) self.assertIn(self.bool.get_random_value(), [True, False]) self.float_range.transform() self.intger_range.transform() for _ in range(100): self.assertIn( self.intger_range.get_random_value(definite_list=True), self.intger_range.values, ) self.assertIn( self.float_range.get_random_value(definite_list=True), self.float_range.values, ) def test_rand_error(self): with self.assertRaises(ValueError): self.intger_range.get_random_value(definite_list=True) self.float_range.get_random_value(definite_list=True) self.bool.get_random_value(definite_list=True) self.categorical.get_random_value(definite_list=True)
def test_classification_5(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # multi-switch # setup switch to choose between PCA or simple feature selection and add it to the pipe pre_switch = Switch("preproc_switch") pre_switch += PipelineElement( "PCA", hyperparameters={"n_components": Categorical([None, 5])}, test_disabled=True, ) pre_switch += PipelineElement( "FClassifSelectPercentile", hyperparameters={ "percentile": IntegerRange(start=5, step=20, stop=66, range_type="range") }, test_disabled=True, ) pipe += pre_switch # setup estimator switch and add it to the pipe estimator_switch = Switch("estimator_switch") estimator_switch += PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear", "rbf"]), "C": Categorical([0.01, 1, 5]), }, ) estimator_switch += PipelineElement( "RandomForestClassifier", hyperparameters={ "min_samples_split": FloatRange(start=0.05, step=0.1, stop=0.26, range_type="range") }, ) pipe += estimator_switch self.run_hyperpipe(pipe, self.classification)
def test_classification_5(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # multi-switch # setup switch to choose between PCA or simple feature selection and add it to the pipe pre_switch = Switch('preproc_switch') pre_switch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) pre_switch += PipelineElement('FClassifSelectPercentile', hyperparameters={ 'percentile': IntegerRange(start=5, step=20, stop=66, range_type='range')}, test_disabled=True) pipe += pre_switch # setup estimator switch and add it to the pipe estimator_switch = Switch('estimator_switch') estimator_switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']), 'C': Categorical([.01, 1, 5])}) estimator_switch += PipelineElement('RandomForestClassifier', hyperparameters={ 'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')}) pipe += estimator_switch self.run_hyperpipe(pipe, self.classification)
def test_classification_8(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() pipe += PipelineElement('StandardScaler') # setup pipeline branches with half of the features each # if both PCAs are disabled, features are simply concatenated and passed to the final estimator source1_branch = Branch('source1_features') # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5] source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2)))) source1_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True, confounder_names=['cov1', 'cov2']) source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) source2_branch = Branch('source2_features') # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12] source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1])) source2_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True, confounder_names=['cov1', 'cov2']) source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) # setup source branches and stack their output (i.e. horizontal concatenation) pipe += Stack('source_stack', elements=[source1_branch, source2_branch]) # final estimator with stack output as features # setup estimator switch and add it to the pipe switch = Switch('estimator_switch') switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']), 'C': Categorical([.01, 1, 5])}) switch += PipelineElement('RandomForestClassifier', hyperparameters={ 'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')}) pipe += switch self.run_hyperpipe(pipe, self.classification)
from photonai.optimization import FloatRange, IntegerRange X, y = load_breast_cancer(return_X_y=True) my_pipe = Hyperpipe('basic_stack_pipe', optimizer='sk_opt', optimizer_params={'n_configurations': 5}, metrics=['accuracy', 'precision', 'recall'], best_config_metric='accuracy', outer_cv=KFold(n_splits=3), inner_cv=KFold(n_splits=3), verbosity=1, output_settings=OutputSettings(project_folder='./tmp/')) my_pipe += PipelineElement('StandardScaler') tree = PipelineElement('DecisionTreeClassifier', hyperparameters={ 'criterion': ['gini'], 'min_samples_split': IntegerRange(2, 4) }) svc = PipelineElement('LinearSVC', hyperparameters={'C': FloatRange(0.5, 25)}) # for a stack that includes estimators you can choose whether predict or predict_proba is called for all estimators # in case only some implement predict_proba, predict is called for the remaining estimators my_pipe += Stack('final_stack', [tree, svc], use_probabilities=True) my_pipe += PipelineElement('LinearSVC') my_pipe.fit(X, y)
'PCA', hyperparameters={'n_components': IntegerRange(5, 30)}) prepro_switch += PipelineElement('RandomTreesEmbedding', hyperparameters={ 'n_estimators': IntegerRange(10, 30), 'max_depth': IntegerRange(3, 6) }) prepro_switch += PipelineElement( 'SelectPercentile', hyperparameters={'percentile': IntegerRange(5, 15)}) #prepro_switch += PipelineElement('FastICA', hyperparameters={'algorithm': Categorical(['parallel', 'deflation'])}) estimator_switch = Switch("EstimatorSwitch") estimator_switch += PipelineElement( 'SVC', hyperparameters={ 'kernel': Categorical(["linear", "rbf", 'poly', 'sigmoid']), 'C': FloatRange(0.5, 100), 'decision_function_shape': Categorical(['ovo', 'ovr']), 'degree': IntegerRange(2, 5) }) estimator_switch += PipelineElement("RandomForestClassifier", hyperparameters={ 'n_estimators': IntegerRange(10, 100), "min_samples_split": IntegerRange(2, 4) }) estimator_switch += PipelineElement( "ExtraTreesClassifier", hyperparameters={'n_estimators': IntegerRange(5, 50)}) estimator_switch += PipelineElement( "SGDClassifier", hyperparameters={'penalty': Categorical(['l2', 'l1', 'elasticnet'])})
from sklearn.datasets import load_iris from sklearn.model_selection import KFold from photonai.base import Hyperpipe, PipelineElement, OutputSettings from photonai.optimization import FloatRange, Categorical # loading the iris dataset X, y = load_iris(return_X_y=True) # DESIGN YOUR PIPELINE my_pipe = Hyperpipe('multi_class_svm_pipe', optimizer='random_grid_search', optimizer_params={'n_configurations': 10}, metrics=['accuracy'], best_config_metric='accuracy', outer_cv=KFold(n_splits=3, shuffle=True), inner_cv=KFold(n_splits=3, shuffle=True), verbosity=1, output_settings=OutputSettings(project_folder='./tmp/')) my_pipe.add(PipelineElement('StandardScaler')) my_pipe += PipelineElement('SVC', hyperparameters={ 'kernel': Categorical(['rbf', 'linear']), 'C': FloatRange(0.5, 2) }, gamma='scale') my_pipe.fit(X, y)
# DESIGN YOUR PIPELINE my_pipe = Hyperpipe( "group_split_pipe", optimizer="grid_search", metrics=["accuracy", "precision", "recall"], best_config_metric="accuracy", outer_cv=GroupKFold(n_splits=4), inner_cv=GroupShuffleSplit(n_splits=10), verbosity=1, output_settings=OutputSettings(project_folder="./tmp/"), ) # ADD ELEMENTS TO YOUR PIPELINE # first normalize all features my_pipe += PipelineElement("StandardScaler") # then do feature selection using a PCA, specify which values to try in the hyperparameter search my_pipe += PipelineElement("PCA", hyperparameters={"n_components": [5, 10, None]}, test_disabled=True) # engage and optimize the good old SVM for Classification my_pipe += PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["rbf", "linear"]), "C": FloatRange(0.5, 2, "linspace", num=5), }, ) # NOW TRAIN YOUR PIPELINE my_pipe.fit(X, y, groups=groups)
def test_false_range_type(self): with self.assertRaises(ValueError): float_range = FloatRange(1.0, 5.2, range_type='normal_distributed') float_range.transform()
def test_against_smac(self): # PHOTON implementation self.pipe.add(PipelineElement("StandardScaler")) # then do feature selection using a PCA, specify which values to try in the hyperparameter search self.pipe += PipelineElement( "PCA", hyperparameters={"n_components": IntegerRange(5, 30)} ) # engage and optimize the good old SVM for Classification self.pipe += PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear", "rbf", "poly", "sigmoid"]), "C": FloatRange(0.5, 200), }, gamma="auto", ) self.X, self.y = self.simple_classification() self.pipe.fit(self.X, self.y) # AUTO ML direct # Build Configuration Space which defines all parameters and their ranges cs = ConfigurationSpace() # We define a few possible types of SVM-kernels and add them as "kernel" to our cs n_components = UniformIntegerHyperparameter( "PCA__n_components", 5, 30 ) # , default_value=5) cs.add_hyperparameter(n_components) kernel = CategoricalHyperparameter( "SVC__kernel", ["linear", "rbf", "poly", "sigmoid"] ) # , default_value="linear") cs.add_hyperparameter(kernel) c = UniformFloatHyperparameter("SVC__C", 0.5, 200) # , default_value=1) cs.add_hyperparameter(c) # Scenario object scenario = Scenario( { "run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": 800, # maximum function evaluations "cs": cs, # configuration space "deterministic": "true", "shared_model": "false", # !!!! "wallclock_limit": self.time_limit, } ) # Optimize, using a SMAC-object print("Optimizing! Depending on your machine, this might take a few minutes.") smac = SMAC4BO( scenario=scenario, rng=np.random.RandomState(42), tae_runner=self.objective_function, ) self.traurig = smac incumbent = smac.optimize() inc_value = self.objective_function(incumbent) print(incumbent) print(inc_value) runhistory_photon = self.smac_helper["data"].solver.runhistory runhistory_original = smac.solver.runhistory x_ax = range( 1, min( len(runhistory_original.cost_per_config.keys()), len(runhistory_photon.cost_per_config.keys()), ) + 1, ) y_ax_original = [runhistory_original.cost_per_config[tmp] for tmp in x_ax] y_ax_photon = [runhistory_photon.cost_per_config[tmp] for tmp in x_ax] y_ax_original_inc = [min(y_ax_original[: tmp + 1]) for tmp in x_ax] y_ax_photon_inc = [min(y_ax_photon[: tmp + 1]) for tmp in x_ax] plt.figure(figsize=(10, 7)) plt.plot(x_ax, y_ax_original, "g", label="Original") plt.plot(x_ax, y_ax_photon, "b", label="PHOTON") plt.plot(x_ax, y_ax_photon_inc, "r", label="PHOTON Incumbent") plt.plot(x_ax, y_ax_original_inc, "k", label="Original Incumbent") plt.title("Photon Prove") plt.xlabel("X") plt.ylabel("Y") plt.legend(loc="best") plt.show() def neighbours(items, fill=None): before = itertools.chain([fill], items) after = itertools.chain( items, [fill] ) # You could use itertools.zip_longest() later instead. next(after) for a, b, c in zip(before, items, after): yield [value for value in (a, b, c) if value is not fill] print("---------------") original_pairing = [ sum(values) / len(values) for values in neighbours(y_ax_original) ] bias_term = np.mean( [ abs(y_ax_original_inc[t] - y_ax_photon_inc[t]) for t in range(len(y_ax_photon_inc)) ] ) photon_pairing = [ sum(values) / len(values) - bias_term for values in neighbours(y_ax_photon) ] counter = 0 for i, x in enumerate(x_ax): if abs(original_pairing[i] - photon_pairing[i]) > 0.05: counter += 1 self.assertLessEqual(counter / len(x_ax), 0.15)
optimizer='smac', # which optimizer PHOTON shall use, in this case smac optimizer_params={'scenario_dict': scenario_dict}, metrics=['mean_squared_error', 'pearson_correlation'], best_config_metric='mean_squared_error', outer_cv=ShuffleSplit(n_splits=1, test_size=0.2), inner_cv=KFold(n_splits=3), verbosity=1, output_settings=settings) # ADD ELEMENTS TO YOUR PIPELINE # first normalize all features my_pipe.add(PipelineElement('StandardScaler')) # then do feature selection using a PCA, specify which values to try in the hyperparameter search my_pipe += PipelineElement('PCA', hyperparameters={'n_components': IntegerRange(5, 10)}, test_disabled=True) switch = Switch("Test_Switch") # engage and optimize SVR # linspace and logspace is converted to uniform and log-uniform priors in skopt switch += PipelineElement('SVR', hyperparameters={'C': FloatRange(0, 10, range_type='linspace'), 'epsilon': FloatRange(0, 0.0001, range_type='linspace'), 'tol': FloatRange(1e-4, 1e-2, range_type='linspace'), 'kernel': Categorical(['linear', 'rbf', 'poly'])}) switch += PipelineElement('RandomForestRegressor', hyperparameters={'n_estimators': Categorical([10, 20])}) my_pipe += switch # NOW TRAIN YOUR PIPELINE my_pipe.fit(X, y)
class HyperparameterBaseTest(unittest.TestCase): def setUp(self): """ Set default start setting for all tests. """ self.intger_range = IntegerRange(2, 6) self.float_range = FloatRange(0.1, 5.7) self.cateogrical_truth = ["a", "b", "c", "d", "e", "f", "g", "h"] self.categorical = Categorical(self.cateogrical_truth) self.bool = BooleanSwitch() def test_rand_success(self): for _ in range(100): self.assertIn(self.intger_range.get_random_value(), list(range(2, 6))) self.assertGreaterEqual(self.float_range.get_random_value(), 0.1) self.assertLess(self.float_range.get_random_value(), 5.7) self.assertIn(self.categorical.get_random_value(), self.cateogrical_truth) self.assertIn(self.bool.get_random_value(), [True, False]) self.float_range.transform() self.intger_range.transform() for _ in range(100): self.assertIn( self.intger_range.get_random_value(definite_list=True), self.intger_range.values) self.assertIn( self.float_range.get_random_value(definite_list=True), self.float_range.values) def test_domain(self): self.float_range.transform() self.intger_range.transform() self.assertListEqual(self.intger_range.values, list(np.arange(2, 6))) self.assertListEqual(self.float_range.values, list(np.linspace(0.1, 5.7, dtype=np.float64))) big_float_range = FloatRange(-300.57, np.pi * 4000) big_float_range.transform() self.assertListEqual(big_float_range.values, list(np.linspace(-300.57, np.pi * 4000))) self.assertListEqual(self.categorical.values, ["a", "b", "c", "d", "e", "f", "g", "h"]) self.assertListEqual(self.bool.values, [True, False]) def test_rand_error(self): with self.assertRaises(ValueError): self.intger_range.get_random_value(definite_list=True) with self.assertRaises(ValueError): self.float_range.get_random_value(definite_list=True) with self.assertRaises(NotImplementedError): self.categorical.get_random_value(definite_list=False) with self.assertRaises(NotImplementedError): self.categorical.get_random_value(definite_list=False) def test_categorical(self): self.assertEqual(self.categorical[2], self.cateogrical_truth[2])
"acq_func_kwargs": { "kappa": 1.96 }, }, metrics=["mean_squared_error", "pearson_correlation"], best_config_metric="mean_squared_error", outer_cv=ShuffleSplit(n_splits=1, test_size=0.2), inner_cv=KFold(n_splits=3), verbosity=1, output_settings=settings, ) # ADD ELEMENTS TO YOUR PIPELINE # first normalize all features my_pipe += PipelineElement("StandardScaler") # engage and optimize SVR # linspace and logspace is converted to uniform and log-uniform priors in skopt my_pipe += PipelineElement( "SVR", hyperparameters={ "C": FloatRange(1e-3, 100, range_type="logspace"), "epsilon": FloatRange(1e-3, 10, range_type="logspace"), "tol": FloatRange(1e-4, 1e-2, range_type="linspace"), "kernel": Categorical(["linear", "rbf", "poly"]), }, ) # NOW TRAIN YOUR PIPELINE my_pipe.fit(X, y)
X, y = load_breast_cancer(return_X_y=True) my_pipe = Hyperpipe('example_project', optimizer='sk_opt', optimizer_params={'n_configurations': 25}, metrics=['accuracy', 'precision', 'recall'], best_config_metric='accuracy', outer_cv=KFold(n_splits=3), inner_cv=KFold(n_splits=3)) my_pipe += PipelineElement('StandardScaler') my_pipe += PipelineElement('PCA', hyperparameters={'n_components': FloatRange(0.5, 0.8, step=0.1)}, test_disabled=True) my_pipe += PipelineElement('ImbalancedDataTransformer', hyperparameters={'method_name': ['RandomUnderSampler','SMOTE']}, test_disabled=True) # set up two learning algorithms in an ensemble ensemble_learner = Stack('estimators', use_probabilities=True) ensemble_learner += PipelineElement('DecisionTreeClassifier', criterion='gini', hyperparameters={'min_samples_split': IntegerRange(2, 4)}) ensemble_learner += PipelineElement('LinearSVC', hyperparameters={'C':
my_pipe = Hyperpipe( "basic_stack_pipe", optimizer="sk_opt", optimizer_params={"n_configurations": 5}, metrics=["accuracy", "precision", "recall"], best_config_metric="accuracy", outer_cv=KFold(n_splits=3), inner_cv=KFold(n_splits=3), verbosity=1, output_settings=OutputSettings(project_folder="./tmp/"), ) my_pipe += PipelineElement("StandardScaler") tree = PipelineElement( "DecisionTreeClassifier", hyperparameters={ "criterion": ["gini"], "min_samples_split": IntegerRange(2, 4) }, ) svc = PipelineElement("LinearSVC", hyperparameters={"C": FloatRange(0.5, 25)}) # for a stack that includes estimators you can choose whether predict or predict_proba is called for all estimators # in case only some implement predict_proba, predict is called for the remaining estimators my_pipe += Stack("final_stack", [tree, svc], use_probabilities=True) my_pipe += PipelineElement("LinearSVC") my_pipe.fit(X, y)
from photonai.base import Hyperpipe, PipelineElement from photonai.optimization import FloatRange, Categorical, IntegerRange # WE USE THE BREAST CANCER SET FROM SKLEARN X, y = load_breast_cancer(return_X_y=True) # DESIGN YOUR PIPELINE my_pipe = Hyperpipe( 'basic_svm_pipe', inner_cv=KFold(n_splits=5), outer_cv=KFold(n_splits=3), optimizer='sk_opt', optimizer_params={'n_configurations': 25}, metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'], best_config_metric='accuracy') my_pipe.add(PipelineElement('StandardScaler')) my_pipe += PipelineElement( 'PCA', hyperparameters={'n_components': IntegerRange(10, 30)}, test_disabled=True) my_pipe += PipelineElement('SVC', hyperparameters={ 'kernel': Categorical(['rbf', 'linear']), 'C': FloatRange(1, 6) }, gamma='scale') my_pipe.fit(X, y)