def optimize(): # We load the iris-dataset (a widely used benchmark) iris = datasets.load_iris() #logger = logging.getLogger("SVMExample") logging.basicConfig(level=logging.INFO) # logging.DEBUG for debug output # Build Configuration Space which defines all parameters and their ranges cs = ConfigurationSpace() # We define a few possible types of SVM-kernels and add them as "kernel" to our cs kernel = CategoricalHyperparameter("kernel", ["linear", "rbf", "poly", "sigmoid"], default="poly") cs.add_hyperparameter(kernel) # There are some hyperparameters shared by all kernels C = UniformFloatHyperparameter("C", 0.001, 1000.0, default=1.0) shrinking = CategoricalHyperparameter("shrinking", ["true", "false"], default="true") cs.add_hyperparameters([C, shrinking]) # Others are kernel-specific, so we can add conditions to limit the searchspace degree = UniformIntegerHyperparameter("degree", 1, 5, default=3) # Only used by kernel poly coef0 = UniformFloatHyperparameter("coef0", 0.0, 10.0, default=0.0) # poly, sigmoid cs.add_hyperparameters([degree, coef0]) use_degree = InCondition(child=degree, parent=kernel, values=["poly"]) use_coef0 = InCondition(child=coef0, parent=kernel, values=["poly", "sigmoid"]) cs.add_conditions([use_degree, use_coef0]) # This also works for parameters that are a mix of categorical and values from a range of numbers # For example, gamma can be either "auto" or a fixed float gamma = CategoricalHyperparameter("gamma", ["auto", "value"], default="auto") # only rbf, poly, sigmoid gamma_value = UniformFloatHyperparameter("gamma_value", 0.0001, 8, default=1) cs.add_hyperparameters([gamma, gamma_value]) # We only activate gamma_value if gamma is set to "value" cs.add_condition(InCondition(child=gamma_value, parent=gamma, values=["value"])) # And again we can restrict the use of gamma in general to the choice of the kernel cs.add_condition(InCondition(child=gamma, parent=kernel, values=["rbf", "poly", "sigmoid"])) # Scenario object scenario = Scenario("test/test_files/svm_scenario.txt") # Example call of the function # It returns: Status, Cost, Runtime, Additional Infos def_value = svm_from_cfg(cs.get_default_configuration()) print("Default Value: %.2f" % (def_value)) # Optimize, using a SMAC-object print("Optimizing! Depending on your machine, this might take a few minutes.") smac = SMAC(scenario=scenario, rng=np.random.RandomState(42), tae_runner=svm_from_cfg) incumbent = smac.optimize() inc_value = svm_from_cfg(incumbent) print("Optimized Value: %.2f" % (inc_value))
def _get_acm_cs(self): """ returns a configuration space designed for querying ~smac.optimizer.smbo._component_builder Returns ------- ConfigurationSpace """ cs = ConfigurationSpace() cs.seed(self.rng.randint(0,2**20)) model = CategoricalHyperparameter("model", choices=("RF", "GP")) num_trees = Constant("num_trees", value=10) bootstrap = CategoricalHyperparameter("do_bootstrapping", choices=(True, False), default_value=True) ratio_features = CategoricalHyperparameter("ratio_features", choices=(3 / 6, 4 / 6, 5 / 6, 1), default_value=1) min_split = UniformIntegerHyperparameter("min_samples_to_split", lower=1, upper=10, default_value=2) min_leaves = UniformIntegerHyperparameter("min_samples_in_leaf", lower=1, upper=10, default_value=1) cs.add_hyperparameters([model, num_trees, bootstrap, ratio_features, min_split, min_leaves]) inc_num_trees = InCondition(num_trees, model, ["RF"]) inc_bootstrap = InCondition(bootstrap, model, ["RF"]) inc_ratio_features = InCondition(ratio_features, model, ["RF"]) inc_min_split = InCondition(min_split, model, ["RF"]) inc_min_leavs = InCondition(min_leaves, model, ["RF"]) cs.add_conditions([inc_num_trees, inc_bootstrap, inc_ratio_features, inc_min_split, inc_min_leavs]) acq = CategoricalHyperparameter("acq_func", choices=("EI", "LCB", "PI", "LogEI")) par_ei = UniformFloatHyperparameter("par_ei", lower=-10, upper=10) par_pi = UniformFloatHyperparameter("par_pi", lower=-10, upper=10) par_logei = UniformFloatHyperparameter("par_logei", lower=0.001, upper=100, log=True) par_lcb = UniformFloatHyperparameter("par_lcb", lower=0.0001, upper=0.9999) cs.add_hyperparameters([acq, par_ei, par_pi, par_logei, par_lcb]) inc_par_ei = InCondition(par_ei, acq, ["EI"]) inc_par_pi = InCondition(par_pi, acq, ["PI"]) inc_par_logei = InCondition(par_logei, acq, ["LogEI"]) inc_par_lcb = InCondition(par_lcb, acq, ["LCB"]) cs.add_conditions([inc_par_ei, inc_par_pi, inc_par_logei, inc_par_lcb]) return cs
def config_space(self): """SVC hyperparameter space.""" C_param = UniformFloatHyperparameter( 'C', lower=1e-8, upper=100.0, default_value=1.0 ) shrinking = CategoricalHyperparameter( 'shrinking', [True, False], default_value=True ) kernel = CategoricalHyperparameter( 'kernel', ['linear', 'rbf', 'poly', 'sigmoid'], ) degree = UniformIntegerHyperparameter( 'degree', lower=1, upper=5, default_value=2 ) coef0 = UniformFloatHyperparameter( 'coef0', lower=0.0, upper=10.0, default_value=0.0 ) # Add hyperparameters to config space. config = ConfigurationSpace() config.seed(self.random_state) config.add_hyperparameters( ( C_param, shrinking, kernel, degree, coef0, ) ) # Conditionals on hyperparameters specific to kernels. config.add_conditions( ( InCondition(child=degree, parent=kernel, values=['poly']), InCondition( child=coef0, parent=kernel, values=['poly', 'sigmoid'] ) ) ) return config
# Adding conditions to restrict the hyperparameter space # Since learning rate is used when solver is 'sgd' use_lr = CS.conditions.EqualsCondition(child=learning_rate, parent=solver, value='sgd') # Since learning rate initialization will only be accounted for when using 'sgd' or 'adam' use_lr_init = CS.conditions.InCondition(child=learning_rate_init, parent=solver, values=['sgd', 'adam']) # Since batch size will not be considered when optimizer is 'lbfgs' use_batch_size = CS.conditions.InCondition(child=batch_size, parent=solver, values=['sgd', 'adam']) # We can also add multiple conditions on hyperparameters at once: cs.add_conditions([use_lr, use_batch_size, use_lr_init]) # SMAC scenario object scenario = Scenario({ 'run_obj': 'quality', # we optimize quality (alternative to runtime) 'wallclock-limit': 100, # max duration to run the optimization (in seconds) 'cs': cs, # configuration space 'deterministic': 'true', 'limit_resources': True, # Uses pynisher to limit memory and runtime # Alternatively, you can also disable this. # Then you should handle runtime and memory yourself in the TA 'cutoff': 30, # runtime limit for target algorithm 'memory_limit': 3072, # adapt this to reasonable value for your hardware })
# There are some hyperparameters shared by all kernels C = UniformFloatHyperparameter("C", 0.001, 1000.0, default_value=1.0) shrinking = CategoricalHyperparameter("shrinking", ["true", "false"], default_value="true") cs.add_hyperparameters([C, shrinking]) # Others are kernel-specific, so we can add conditions to limit the searchspace degree = UniformIntegerHyperparameter( "degree", 1, 5, default_value=3) # Only used by kernel poly coef0 = UniformFloatHyperparameter("coef0", 0.0, 10.0, default_value=0.0) # poly, sigmoid cs.add_hyperparameters([degree, coef0]) use_degree = InCondition(child=degree, parent=kernel, values=["poly"]) use_coef0 = InCondition(child=coef0, parent=kernel, values=["poly", "sigmoid"]) cs.add_conditions([use_degree, use_coef0]) # This also works for parameters that are a mix of categorical and values from a range of numbers # For example, gamma can be either "auto" or a fixed float gamma = CategoricalHyperparameter( "gamma", ["auto", "value"], default_value="auto") # only rbf, poly, sigmoid gamma_value = UniformFloatHyperparameter("gamma_value", 0.0001, 8, default_value=1) cs.add_hyperparameters([gamma, gamma_value]) # We only activate gamma_value if gamma is set to "value" cs.add_condition(InCondition(child=gamma_value, parent=gamma, values=["value"])) # And again we can restrict the use of gamma in general to the choice of the kernel
def main_loop(problem): logging.basicConfig(level=logging.INFO) # logging.DEBUG for debug output cs = ConfigurationSpace() n_estimators = UniformIntegerHyperparameter("n_estimators", 5, 50, default_value=10) #criterion = CategoricalHyperparameter("criterion", ["mse", "mae"], default_value="mse") min_samples_split = UniformIntegerHyperparameter("min_samples_split", 2, 20, default_value=2) min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default_value=1) min_weight_fraction_leaf = UniformFloatHyperparameter( "min_weight_fraction_leaf", 0.0, 0.5, default_value=0.0) max_leaf_nodes = UniformIntegerHyperparameter("max_leaf_nodes", 10, 1000, default_value=100) min_impurity_decrease = UniformFloatHyperparameter("min_impurity_decrease", 0.0, 0.5, default_value=0.0) warm_start = CategoricalHyperparameter("warm_start", ["true", "false"], default_value="false") cs.add_hyperparameters([ n_estimators, min_weight_fraction_leaf, min_samples_split, min_samples_leaf, max_leaf_nodes, warm_start, min_impurity_decrease ]) max_features = CategoricalHyperparameter( "max_features", ["auto", "log2", "sqrt", "int", "None", "float"], default_value="auto") # only rbf, poly, sigmoid max_features_int = UniformIntegerHyperparameter("max_features_int", 2, len(X[0]), default_value=5) max_features_float = UniformFloatHyperparameter("max_features_float", 0.0, 0.9, default_value=0.0) cs.add_hyperparameters( [max_features, max_features_int, max_features_float]) use_max_features_int = InCondition(child=max_features_int, parent=max_features, values=["int"]) use_max_features_float = InCondition(child=max_features_float, parent=max_features, values=["float"]) cs.add_conditions([use_max_features_int, use_max_features_float]) max_depth = CategoricalHyperparameter("max_depth", ["None", "value"], default_value="None") max_depth_value = UniformIntegerHyperparameter("max_depth_value", 2, 20, default_value=5) cs.add_hyperparameters([max_depth, max_depth_value]) cs.add_condition( InCondition(child=max_depth_value, parent=max_depth, values=["value"])) random_state = CategoricalHyperparameter("random_state", ["None", "value"], default_value="None") random_state_value = UniformIntegerHyperparameter("random_state_value", 1, 20, default_value=1) cs.add_hyperparameters([random_state, random_state_value]) cs.add_condition( InCondition(child=random_state_value, parent=random_state, values=["value"])) with open("/home/naamah/Documents/CatES/result_All/X1.p", "rb") as fp: X = pickle.load(fp) # Scenario object max_eval = 100000 scenario = Scenario({ "run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": max_eval, # maximum function evaluations "cs": cs, # configuration space "shared_model": True, "output_dir": "/home/naamah/Documents/CatES/result_All/smac/RF/run_{}_{}_{}".format( max_eval, datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d_%H:%M:%S'), problem), "input_psmac_dirs": "/home/naamah/Documents/CatES/result_All/smac/psmac", "deterministic": "False" }) def_value = svm_from_cfg(cs.get_default_configuration()) print("Default Value: %.2f" % (def_value)) # Optimize, using a SMAC-object print( "Optimizing! Depending on your machine, this might take a few minutes." ) smac = SMAC(scenario=scenario, tae_runner=svm_from_cfg) incumbent = smac.optimize() inc_value = svm_from_cfg(incumbent) print("Optimized Value: %.2f" % (inc_value)) return (incumbent) # main_loop()
# Others are kernel-specific, so we can add conditions to limit the searchspace use_eta0 = InCondition(child=eta0, parent=learning_rate, values=["constant", "invscaling", "adaptive"]) use_power_t = InCondition(child=power_t, parent=learning_rate, values=["invscaling"]) use_l1_ratio = InCondition(child=l1_ratio, parent=penalty, values=["elasticnet"]) cs.add_conditions([use_eta0, use_power_t, use_l1_ratio]) # Scenario object scenario = Scenario({ "run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": 500, # max. number of function evaluations; for this example set to a low number "cs": cs, # configuration space "deterministic": "true" }) # Example call of the function # It returns: Status, Cost, Runtime, Additional Infos def_value = SGD_from_cfg(cs.get_default_configuration()) print("Default Value: %.2f" % (def_value))
def run_hpo(args, tae_runner): # create empty config space cs = ConfigurationSpace() # local_epochs = UniformIntegerHyperparameter("local_epochs", 1, 20, default_value=10) distill_epochs = UniformIntegerHyperparameter("distill_epochs", 1, 20, default_value=1) # fallback = CategoricalHyperparameter("fallback", [True, False], default_value=True) lambda_outlier = UniformFloatHyperparameter("lambda_outlier", 0.0, 10.0, default_value=1.0) lambda_fedprox = UniformFloatHyperparameter("lambda_fedprox", 0.000001, 10.0, default_value=0.01, log=True) # mixture_coefficients_base = UniformFloatHyperparameter( "mixture_coefficients_base", 0.0, 1.0, default_value=0.5) # local_optimizer = CategoricalHyperparameter("local_optimizer", ["Adam", "SGD"], default_value="Adam") adam_lr = UniformFloatHyperparameter("adam_lr", 0.00001, 1.0, default_value=0.001, log=True) sgd_lr = UniformFloatHyperparameter("sgd_lr", 0.00001, 1.0, default_value=0.1, log=True) cs.add_hyperparameters([ local_epochs, distill_epochs, fallback, lambda_outlier, lambda_fedprox, mixture_coefficients_base, local_optimizer, adam_lr, sgd_lr ]) use_adam_lr = EqualsCondition(child=adam_lr, parent=local_optimizer, value='Adam') use_sgd_lr = EqualsCondition(child=sgd_lr, parent=local_optimizer, value='SGD') cs.add_conditions([use_adam_lr, use_sgd_lr]) # # Scenario object scenario = Scenario({ "run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": 100, # max. number of function evaluations; for this example set to a low number "cs": cs, # configuration space "deterministic": True, "shared_model": True, "input_psmac_dirs": args.SHARE_PATH + '*', "output_dir": args.SHARE_PATH, "limit_resources": False }) max_iters = 50 # intensifier parameters intensifier_kwargs = { 'initial_budget': 5, 'max_budget': max_iters, 'eta': 3 } # Optimize, using a SMAC-object print("Optimizing!") smac = BOHB4HPO( scenario=scenario, tae_runner=tae_runner, intensifier_kwargs=intensifier_kwargs, n_jobs=args.WORKERS, ) # Start optimization try: incumbent = smac.optimize() finally: incumbent = smac.solver.incumbent inc_value = smac.get_tae_runner().run(config=incumbent, instance='2', budget=1, seed=0)[1] print("Optimized Value: %.4f" % inc_value)
def generate_data(smac_class, n_runs=1, output_dir: Union[str, Path] = ".", dataset=None, runcount_limit=50): output_dir = Path(output_dir) if dataset is None: dataset = datasets.load_iris() def svm_from_cfg(cfg): """ Creates a SVM based on a configuration and evaluates it on the iris-dataset using cross-validation. Parameters: ----------- cfg: Configuration (ConfigSpace.ConfigurationSpace.Configuration) Configuration containing the parameters. Configurations are indexable! Returns: -------- A crossvalidated mean score for the svm on the loaded data2-set. """ # For deactivated parameters, the configuration stores None-values. # This is not accepted by the SVM, so we remove them. cfg = {k: cfg[k] for k in cfg if cfg[k]} # We translate boolean values: cfg["shrinking"] = True if cfg["shrinking"] == "true" else False # And for gamma, we set it to a fixed value or to "auto" (if used) if "gamma" in cfg: cfg["gamma"] = cfg["gamma_value"] if cfg[ "gamma"] == "value" else "auto" cfg.pop("gamma_value", None) # Remove "gamma_value" clf = svm.SVC(**cfg, random_state=None) scores = cross_val_score(clf, dataset.data, dataset.target, cv=5) return 1 - np.mean(scores) # Minimize! # logger = logging.getLogger("SVMExample") logging.basicConfig(level=logging.INFO) # logging.DEBUG for debug output # Build Configuration Space which defines all parameters and their ranges cs = ConfigurationSpace() # We define a few possible types of SVM-kernels and add them as "kernel" to our cs kernel = CategoricalHyperparameter("kernel", ["linear", "rbf", "poly", "sigmoid"], default_value="poly") cs.add_hyperparameter(kernel) # There are some hyperparameters shared by all kernels C = UniformFloatHyperparameter("C", 0.001, 1000.0, default_value=1.0) shrinking = CategoricalHyperparameter("shrinking", ["true", "false"], default_value="true") cs.add_hyperparameters([C, shrinking]) # Others are kernel-specific, so we can add conditions to limit the searchspace degree = UniformIntegerHyperparameter( "degree", 1, 5, default_value=3) # Only used by kernel poly coef0 = UniformFloatHyperparameter("coef0", 0.0, 10.0, default_value=0.0) # poly, sigmoid cs.add_hyperparameters([degree, coef0]) use_degree = InCondition(child=degree, parent=kernel, values=["poly"]) use_coef0 = InCondition(child=coef0, parent=kernel, values=["poly", "sigmoid"]) cs.add_conditions([use_degree, use_coef0]) # This also works for parameters that are a mix of categorical and values from a range of numbers # For example, gamma can be either "auto" or a fixed float gamma = CategoricalHyperparameter( "gamma", ["auto", "value"], default_value="auto") # only rbf, poly, sigmoid gamma_value = UniformFloatHyperparameter("gamma_value", 0.0001, 8, default_value=1) cs.add_hyperparameters([gamma, gamma_value]) # We only activate gamma_value if gamma is set to "value" cs.add_condition( InCondition(child=gamma_value, parent=gamma, values=["value"])) # And again we can restrict the use of gamma in general to the choice of the kernel cs.add_condition( InCondition(child=gamma, parent=kernel, values=["rbf", "poly", "sigmoid"])) # Scenario object for i in range(n_runs): scenario = Scenario({ "run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": runcount_limit, # max. number of function evaluations; for this example set to a low number "cs": cs, # configuration space "deterministic": "true", "limit_resources": "false", "output_dir": str((output_dir / smac_class.__name__ / f"{i:02d}").absolute()) }) # Example call of the function # It returns: Status, Cost, Runtime, Additional Infos # def_value = svm_from_cfg(cs.get_default_configuration()) # print(f"Default Value: {def_value:.2f}") # # Optimize, using a SMAC-object smac = smac_class(scenario=scenario, rng=None, tae_runner=svm_from_cfg) incumbent = smac.optimize() # inc_value = svm_from_cfg(incumbent) # # print(f"Optimized Value: {inc_value:.2f}") # # # We can also validate our results (though this makes a lot more sense with instances) smac.validate( config_mode='inc', # We can choose which configurations to evaluate # instance_mode='train+test', # Defines what instances to validate repetitions= 100, # Ignored, unless you set "deterministic" to "false" in line 95 n_jobs=1) # How many cores to use in parallel for optimization
# tensorboard_freq=10, training_epochs=optimization_epochs, batch_size=50 ) acc, loss = network.validate(val_feats, val_labels, show_partial=False) return 1/acc # logger = logging.getLogger("Hyperparameter optimization") # logging.basicConfig(level=logging.INFO) config_space = ConfigurationSpace() config_space.add_hyperparameters(list(space.values())) config_space.add_conditions(hyper_space_conditions) scenario_dict = {"run_obj": "quality", "runcount-limit": space_optimization_evals, "cs": config_space, "deterministic": "true", "output-dir": project_data.OUT_DIR + '/smac/' } scenario = Scenario(scenario_dict) runhistory = RunHistory(aggregate_func=None) stats = Stats(scenario) smac = SMAC(scenario=scenario, runhistory=runhistory,
def get_config_space(): # Build Configuration Space which defines all parameters and their ranges cs = ConfigurationSpace() algorithm = CategoricalHyperparameter("algorithm", ["nn", "svm", "rf"], default="rf") do_bootstrapping = CategoricalHyperparameter("do_bootstrapping", ["true", "false"], default="true") num_trees = UniformIntegerHyperparameter("num_trees", 10, 50, default=10) max_features = UniformIntegerHyperparameter("max_features", 1, boston.data.shape[1], default=1) min_weight_frac_leaf = UniformFloatHyperparameter("min_weight_frac_leaf", 0.0, 0.5, default=0.0) criterion = CategoricalHyperparameter("criterion", ["mse", "mae"], default="mse") min_samples_to_split = UniformIntegerHyperparameter("min_samples_to_split", 2, 20, default=2) min_samples_in_leaf = UniformIntegerHyperparameter("min_samples_in_leaf", 1, 20, default=1) max_leaf_nodes = UniformIntegerHyperparameter("max_leaf_nodes", 10, 1000, default=100) use_do_bootstrapping = InCondition(child=do_bootstrapping, parent=algorithm, values=["rf"]) use_num_trees = InCondition(child=num_trees, parent=algorithm, values=["rf"]) use_max_features = InCondition(child=max_features, parent=algorithm, values=["rf"]) use_min_wieght_frac_leaf = InCondition(child=min_weight_frac_leaf, parent=algorithm, values=["rf"]) use_criterion = InCondition(child=criterion, parent=algorithm, values=["rf"]) use_min_samples_in_leaf = InCondition(child=min_samples_in_leaf, parent=algorithm, values=["rf"]) use_min_samples_to_split = InCondition(child=min_samples_to_split, parent=algorithm, values=["rf"]) use_max_leaf_nodes = InCondition(child=max_leaf_nodes, parent=algorithm, values=["rf"]) cs.add_hyperparameters([ do_bootstrapping, num_trees, min_weight_frac_leaf, criterion, max_features, min_samples_to_split, min_samples_in_leaf, max_leaf_nodes ]) cs.add_conditions([ use_criterion, use_num_trees, use_max_features, use_max_leaf_nodes, use_do_bootstrapping, use_min_samples_in_leaf, use_min_samples_to_split, use_min_wieght_frac_leaf ]) # We define a few possible types of SVM-kernels and add them as "kernel" to our cs kernel = CategoricalHyperparameter("kernel", ["linear", "rbf", "poly", "sigmoid"], default="poly") cs.add_hyperparameter(kernel) # There are some hyperparameters shared by all kernels C = UniformFloatHyperparameter("C", 0.001, 1000.0, default=1.0) use_C = InCondition(child=C, parent=algorithm, values=["svm"]) shrinking = CategoricalHyperparameter("shrinking", ["true", "false"], default="true") use_shrinking = InCondition(child=shrinking, parent=algorithm, values=["svm"]) cs.add_hyperparameters([C, shrinking]) cs.add_conditions([use_C, use_shrinking]) # Others are kernel-specific, so we can add conditions to limit the searchspace degree = UniformIntegerHyperparameter( "degree", 1, 5, default=3) # Only used by kernel poly coef0 = UniformFloatHyperparameter("coef0", 0.0, 10.0, default=0.0) # poly, sigmoid cs.add_hyperparameters([degree, coef0]) use_degree = InCondition(child=degree, parent=kernel, values=["poly"]) use_coef0 = InCondition(child=coef0, parent=kernel, values=["poly", "sigmoid"]) cs.add_conditions([use_degree, use_coef0]) use_use_coef = InCondition(child=use_coef0, parent=algorithm, values=["svm"]) use_use_degree = InCondition(child=use_degree, parent=algorithm, values=["svm"]) cs.add_conditions([use_use_degree, use_use_coef0]) # This also works for parameters that are a mix of categorical and values from a range of numbers # For example, gamma can be either "auto" or a fixed float gamma = CategoricalHyperparameter( "gamma", ["auto", "value"], default="auto") # only rbf, poly, sigmoid gamma_value = UniformFloatHyperparameter("gamma_value", 0.0001, 8, default=1) cs.add_hyperparameters([gamma, gamma_value]) # We only activate gamma_value if gamma is set to "value" use_gamma_value = InCondition(child=gamma_value, parent=gamma, values=["value"]) # And again we can restrict the use of gamma in general to the choice of the kernel use_gamma = InCondition(child=gamma, parent=kernel, values=["rbf", "poly", "sigmoid"]) cs.add_conditions([use_gamma_value, use_gamma]) use_use_gamma = InCondition(child=use_gamma, parent=algorithm, values=["svm"]) use_use_gamma_value = InCondition(child=use_gamma_value, parent=algorithm, values=["svm"]) cs.add_conditions([use_use_gamma_value, use_use_gamma]) return cs
def bayesianmcmc(self): from sklearn.decomposition import PCA from sklearn.manifold import Isomap from sklearn import svm from sklearn.ensemble import RandomForestClassifier from mahotas.features import haralick import os from sklearn.model_selection import StratifiedKFold, train_test_split from sklearn import metrics from sklearn.preprocessing import StandardScaler import cv2 from sklearn.neighbors import KNeighborsClassifier def naive_all_features(names): f = [] for i in range(len(names)): I = cv2.imread(names[i]) l = I.shape f1 = [] if I is None or I.size == 0 or np.sum( I[:]) == 0 or I.shape[0] == 0 or I.shape[1] == 0: if len(l) == 3: f1 = np.zeros((1, l[0] * l[1] * l[2])) else: f1 = I.flatten() f1 = np.expand_dims(f1, 0) if i == 0: f = f1 else: f = np.vstack((f, f1)) return f def haralick_all_features(X, distance=1): f = [] for i in range(len(X)): I = cv2.imread(X[i]) if I is None or I.size == 0 or np.sum( I[:]) == 0 or I.shape[0] == 0 or I.shape[1] == 0: h = np.zeros((1, 13)) else: I = cv2.cvtColor(I, cv2.COLOR_BGR2GRAY) h = haralick(I, distance=distance, return_mean=True, ignore_zeros=False) h = np.expand_dims(h, 0) if i == 0: f = h else: f = np.vstack((f, h)) return f def CNN_all_features(names, cnn): from keras.applications.vgg19 import VGG19 from keras.applications.inception_v3 import InceptionV3 from keras.applications.vgg19 import preprocess_input f = [] if cnn == 'VGG': model = VGG19(weights='imagenet') dsize = (224, 224) else: model = InceptionV3(weights='imagenet') dsize = (299, 299) for i in range(len(names)): img = cv2.imread(names[i]) img = cv2.resize(img, dsize=dsize) img = img.astype('float32') x = np.expand_dims(img, axis=0) x = preprocess_input(x) features = model.predict(x) if i == 0: f = features else: f = np.vstack((f, features)) return f def VGG_all_features(names, X): home = os.path.expanduser('~') if os.path.exists(self.data_loc + 'features/bayesian1/VGG_' + self.data_name + '.npz'): f = np.load( open( self.data_loc + 'features/bayesian1/VGG_' + self.data_name + '.npz', 'rb')) return f.f.arr_0[X, :] else: f = CNN_all_features(names, 'VGG') np.savez( open( self.data_loc + 'features/bayesian1/VGG_' + self.data_name + '.npz', 'wb'), f) return f[X, :] def inception_all_features(names, X): home = os.path.expanduser('~') if os.path.exists(self.data_loc + 'features/bayesian1/inception_' + self.data_name + '.npz'): f = np.load( open( self.data_loc + 'features/bayesian1/inception_' + self.data_name + '.npz', 'rb')) return f.f.arr_0[X, :] else: f = CNN_all_features(names, 'inception') np.savez( open( self.data_loc + 'features/bayesian1/inception_' + self.data_name + '.npz', 'wb'), f) return f[X, :] def principal_components(X, whiten=True): pca = PCA(whiten=whiten) maxvar = 0.95 X = np.asarray(X) if len(X.shape) == 1: X = X.reshape(-1, 1) data = X X1 = pca.fit(X) var = pca.explained_variance_ratio_ s1 = 0 for i in range(len(var)): s1 += var[i] s = 0 for i in range(len(var)): s += var[i] if (s * 1.0 / s1) >= maxvar: break pca = PCA(n_components=i + 1) pca.fit(data) return pca def isomap(X, n_neighbors=5, n_components=2): iso = Isomap(n_components=n_components, n_neighbors=n_neighbors) X = np.asarray(X) if len(X.shape) == 1: X = X.reshape(-1, 1) iso.fit(X) return iso def random_forests(X, y, n_estimators, max_features): clf = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features, class_weight='balanced') clf.fit(X, y) return clf def support_vector_machines(X, y, C, gamma): clf = svm.SVC(C=C, gamma=gamma, class_weight='balanced', probability=True) clf.fit(X, y) return clf def knn(X, y, neighbors=1): clf = KNeighborsClassifier(n_neighbors=neighbors) clf.fit(X, y) return clf def pipeline_from_cfg(cfg): cfg = {k: cfg[k] for k in cfg if cfg[k]} # Load the data data_home = self.data_loc + 'datasets/' + self.data_name + '/' l1 = os.listdir(data_home) y = [] names = [] cnt = 0 for z in range(len(l1)): if l1[z][0] == '.': continue l = os.listdir(data_home + l1[z] + '/') y += [cnt] * len(l) cnt += 1 for i in range(len(l)): names.append(data_home + l1[z] + '/' + l[i]) # Train val split X = np.empty((len(y), 1)) indices = np.arange(len(y)) X1, _, y1, y_val, id1, _ = train_test_split(X, y, indices, test_size=0.2, random_state=42, shuffle=True) s = [] val_splits = 3 kf = StratifiedKFold(n_splits=val_splits, random_state=42, shuffle=True) names1 = [] for i in range(len(id1)): names1.append((names[id1[i]])) f11 = [] for idx1, idx2 in kf.split(X1, y1): # Feature extraction ids1 = [] X_train = [] y_train = [] for i in idx1: X_train.append(names1[i]) y_train.append(y1[i]) ids1.append(id1[i]) X_val = [] y_val = [] ids2 = [] for i in idx2: X_val.append(names1[i]) y_val.append(y1[i]) ids2.append(id1[i]) # Feature extraction f_train = [] # f_test = [] f_val = [] if cfg['feature_extraction'] == "haralick": f_val = haralick_all_features(X_val, cfg["haralick_distance"]) f_train = haralick_all_features(X_train, cfg["haralick_distance"]) elif cfg['feature_extraction'] == "VGG": f_val = VGG_all_features(names, ids2) f_train = VGG_all_features(names, ids1) elif cfg['feature_extraction'] == "inception": f_val = inception_all_features(names, ids2) f_train = inception_all_features(names, ids1) elif cfg['feature_extraction'] == "naive_feature_extraction": f_val = naive_all_features(X_val) f_train = naive_all_features(X_train) # Dimensionality reduction if cfg['dimensionality_reduction'] == "PCA": cfg["pca_whiten"] = True if cfg[ "pca_whiten"] == "true" else False dr = principal_components(f_train, cfg["pca_whiten"]) f_train = dr.transform(f_train) f_val = dr.transform(f_val) elif cfg['dimensionality_reduction'] == "ISOMAP": dr = isomap(f_train, cfg["n_neighbors"], cfg["n_components"]) f_train = dr.transform(f_train) f_val = dr.transform(f_val) elif cfg[ 'dimensionality_reduction'] == 'naive_dimensionality_reduction': f_train = f_train f_val = f_val # Pre-processing normalizer = StandardScaler().fit(f_train) f_train = normalizer.transform(f_train) f_val = normalizer.transform(f_val) # Learning algorithms if cfg['learning_algorithm'] == "RF": clf = random_forests(f_train, y_train, cfg["n_estimators"], cfg["max_features"]) elif cfg['learning_algorithm'] == "SVM": clf = support_vector_machines(f_train, y_train, cfg["svm_C"], cfg["svm_gamma"]) elif cfg['learning_algorithm'] == 'naive_learning_algorithm': clf = knn(f_train, y_train) p_pred = clf.predict_proba(f_val) f11.append(metrics.log_loss(y_val, p_pred)) s.append(clf.score(f_val, y_val)) return np.mean(f11) self.potential = [] self.best_pipelines = [] self.times = [] self.error_curves = [] cs = ConfigurationSpace() feature_extraction = CategoricalHyperparameter( "feature_extraction", ["haralick", "VGG", "inception"], default="haralick") cs.add_hyperparameter(feature_extraction) dimensionality_reduction = CategoricalHyperparameter( "dimensionality_reduction", ["PCA", "ISOMAP"], default="PCA") cs.add_hyperparameter(dimensionality_reduction) learning_algorithm = CategoricalHyperparameter("learning_algorithm", ["SVM", "RF"], default="RF") cs.add_hyperparameter(learning_algorithm) haralick_distance = UniformIntegerHyperparameter("haralick_distance", 1, 3, default=1) cs.add_hyperparameter(haralick_distance) cond1 = InCondition(child=haralick_distance, parent=feature_extraction, values=["haralick"]) cs.add_condition(cond1) pca_whiten = CategoricalHyperparameter("pca_whiten", ["true", "false"], default="true") cs.add_hyperparameter(pca_whiten) cs.add_condition( InCondition(child=pca_whiten, parent=dimensionality_reduction, values=["PCA"])) n_neighbors = UniformIntegerHyperparameter("n_neighbors", 3, 7, default=5) n_components = UniformIntegerHyperparameter("n_components", 2, 4, default=2) cs.add_hyperparameters([n_neighbors, n_components]) cs.add_condition( InCondition(child=n_components, parent=dimensionality_reduction, values=["ISOMAP"])) cs.add_condition( InCondition(child=n_neighbors, parent=dimensionality_reduction, values=["ISOMAP"])) svm_C = UniformFloatHyperparameter("svm_C", 0.1, 100.0, default=1.0) cs.add_hyperparameter(svm_C) svm_gamma = UniformFloatHyperparameter("svm_gamma", 0.01, 8, default=1) cs.add_hyperparameter(svm_gamma) cond1 = InCondition(child=svm_C, parent=learning_algorithm, values=["SVM"]) cond2 = InCondition(child=svm_gamma, parent=learning_algorithm, values=["SVM"]) cs.add_conditions([cond1, cond2]) n_estimators = UniformIntegerHyperparameter("n_estimators", 8, 300, default=10) max_features = UniformFloatHyperparameter("max_features", 0.3, 0.8, default=0.5) cs.add_hyperparameters([max_features, n_estimators]) cond1 = InCondition(child=n_estimators, parent=learning_algorithm, values=["RF"]) cond2 = InCondition(child=max_features, parent=learning_algorithm, values=["RF"]) cs.add_conditions([cond1, cond2]) scenario = Scenario({ "run_obj": "quality", "cutoff_time": 100000, "runcount_limit": 10000 * 10, "cs": cs, "maxR": 100000, "wallclock_limit": 1000000, "deterministic": "true" }) smac = SMAC(scenario=scenario, rng=np.random.RandomState(42), tae_runner=pipeline_from_cfg) incumbent, incs, incumbents, incumbents1, times = smac.optimize() inc_value = pipeline_from_cfg(incumbent) self.best_pipelines.append(incumbent) self.potential.append(inc_value) self.incumbents = incumbents self.all_incumbents = incumbents1 self.error_curves.append(incs) self.times = times pickle.dump( self, open( self.results_loc + 'intermediate/SMAC/SMAC_' + self.data_name + '_run_' + str(self.run) + '_full.pkl', 'wb'))
step2_child_anova = InCondition(child=anova_perc, parent=step2, values=["cpoFilterAnova(perc=perc_val)"]) step2_child_kruskal = InCondition(child=kruskal_perc, parent=step2, values=["cpoFilterKruskal(perc=perc_val)"]) step2_child_univar = InCondition(child=univar_perc, parent=step2, values=["cpoFilterUnivariate(perc=perc_val)"]) step2_child_pca = InCondition( child=pca_perc, parent=step2, values=["cpoPca(center = FALSE, rank = rank_val)"]) cs.add_conditions([ step2_child_anova, step2_child_kruskal, step2_child_univar, step2_child_pca ]) step3 = CategoricalHyperparameter( "Model", ['kknn', 'ksvm', 'ranger', 'xgboost', 'naiveBayes']) cs.add_hyperparameter(step3) hyper_kknn = UniformIntegerHyperparameter("lrn_kknn_k", 1, 19, default_value=1) hyper_ksvm_C = UniformFloatHyperparameter("lrn_svm_C", 2**(-15), 2**(15), default_value=1) hyper_ksvm_sigma = UniformFloatHyperparameter("lrn_svm_sigma", 2**(-15), 2**(15),
def main(): parser = argparse.ArgumentParser(description='Dump data of a log.') parser.add_argument('--dataset', type=str, default='labelme', help='dataset to run smac on') parser.add_argument('--m', type=int, default=8, help=' number of codebooks') args = parser.parse_args() # Fixed parameters dataset = CategoricalHyperparameter("dataset", [args.dataset], default_value=args.dataset) m = CategoricalHyperparameter("m", [str(args.m)], default_value=str(args.m)) # Build Configuration Space which defines all parameters and their ranges ilsiter = UniformIntegerHyperparameter("ilsiter", 1, 16, default_value=8) npert = UniformIntegerHyperparameter("npert", 0, args.m - 1, default_value=4) randord = CategoricalHyperparameter("randord", ["true", "false"], default_value="true") # SR parameters sr_method = CategoricalHyperparameter("SR_method", ["LSQ", "SR_C", "SR_D"], default_value="SR_D") schedule = CategoricalHyperparameter("schedule", ["1", "2", "3"], default_value="1") p = UniformFloatHyperparameter("p", 0.1, 1., default_value=0.5) # Schedule and p only make sense in SR use_schedule = InCondition(child=schedule, parent=sr_method, values=["SR_C", "SR_D"]) use_p = InCondition(child=p, parent=sr_method, values=["SR_C", "SR_D"]) cs = ConfigurationSpace() cs.add_hyperparameters( [dataset, m, ilsiter, npert, randord, sr_method, schedule, p]) cs.add_conditions([use_schedule, use_p]) # Scenario object scenario = Scenario({ "run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": 200, # maximum function evaluations "cs": cs, # configuration space "deterministic": "false" }) # Optimize, using a SMAC-object thing_to_call = AbstractTAFunc(recall_from_cfg, use_pynisher=False) smac = SMAC(scenario=scenario, rng=np.random.RandomState(42), tae_runner=thing_to_call) print("Optimizing!") incumbent = smac.optimize() inc_value = recall_from_cfg(incumbent) print("Optimized Value: %.2f" % (inc_value))
def test_photon_implementation_switch(self): # PHOTON implementation self.pipe.add(PipelineElement('StandardScaler')) self.pipe += PipelineElement( 'PCA', hyperparameters={'n_components': IntegerRange(5, 30)}) estimator_siwtch = Switch("Estimator") estimator_siwtch += PipelineElement('SVC', hyperparameters={ 'kernel': Categorical( ["rbf", 'poly']), 'C': FloatRange(0.5, 200) }, gamma='auto') estimator_siwtch += PipelineElement('RandomForestClassifier', hyperparameters={ 'criterion': Categorical( ['gini', 'entropy']), 'min_samples_split': IntegerRange(2, 4) }) self.pipe += estimator_siwtch self.X, self.y = self.simple_classification() self.pipe.fit(self.X, self.y) # direct AUTO ML implementation # Build Configuration Space which defines all parameters and their ranges cs = ConfigurationSpace() n_components = UniformIntegerHyperparameter( "PCA__n_components", 5, 30) cs.add_hyperparameter(n_components) switch = CategoricalHyperparameter("Estimator_switch", ['svc', 'rf']) cs.add_hyperparameter(switch) kernel = CategoricalHyperparameter("SVC__kernel", ["rbf", 'poly']) cs.add_hyperparameter(kernel) c = UniformFloatHyperparameter("SVC__C", 0.5, 200) cs.add_hyperparameter(c) use_svc_c = InCondition(child=kernel, parent=switch, values=["svc"]) use_svc_kernel = InCondition(child=c, parent=switch, values=["svc"]) criterion = CategoricalHyperparameter( "RandomForestClassifier__criterion", ['gini', 'entropy']) cs.add_hyperparameter(criterion) minsplit = UniformIntegerHyperparameter( "RandomForestClassifier__min_samples_split", 2, 4) cs.add_hyperparameter(minsplit) use_rf_crit = InCondition(child=criterion, parent=switch, values=["rf"]) use_rf_minsplit = InCondition(child=minsplit, parent=switch, values=["rf"]) cs.add_conditions( [use_svc_c, use_svc_kernel, use_rf_crit, use_rf_minsplit]) # Scenario object scenario = Scenario({ "run_obj": "quality", "cs": cs, "deterministic": "true", "wallclock_limit": self.time_limit, "limit_resources": False, 'abort_on_first_run_crash': False }) # Optimize, using a SMAC directly smac = SMAC4HPO(scenario=scenario, rng=42, tae_runner=self.objective_function_switch) _ = smac.optimize() runhistory_photon = self.smac_helper["data"].solver.runhistory runhistory_original = smac.solver.runhistory x_ax = range( 1, min(len(runhistory_original._cost_per_config.keys()), len(runhistory_photon._cost_per_config.keys())) + 1) y_ax_original = [ runhistory_original._cost_per_config[tmp] for tmp in x_ax ] y_ax_photon = [ runhistory_photon._cost_per_config[tmp] for tmp in x_ax ] min_len = min(len(y_ax_original), len(y_ax_photon)) self.assertLessEqual( np.max( np.abs( np.array(y_ax_original[:min_len]) - np.array(y_ax_photon[:min_len]))), 0.01)