def initialize(self, stamp, acq_func, double_intensification, cache_directory, wallclock_limit, runcount_limit, cutoff, memory_limit, downsampling, intensification_fold_size, random_splitting_number, random_splitting_enabled): # Check if caching is enabled caching = True if acq_func[:2] == "pc" else False # Make a cache directory if cache_directory == None: current_directory = dirname(dirname(os.path.abspath(__file__))) self.cache_directory = os.path.join(current_directory, 'cache') else: self.cache_directory = cache_directory # Check if cache_directory exists try: if not os.path.exists(self.cache_directory): os.makedirs(self.cache_directory) except FileExistsError: pass # Load data self.data = self.data_loader.get_data() # Build runhistory # TODO Does this work correctly for non-caching? runhistory = PCRunHistory(average_cost) # Setup statistics info = { 'stamp': stamp, 'caching': caching, 'acquisition_function': acq_func, 'cache_directory': self.cache_directory, 'wallclock_limit': wallclock_limit, 'downsampling': downsampling } self.statistics = Statistics(stamp, self.output_dir, information=info, total_runtime=wallclock_limit) # The pipeline parts that get marginalized constant_pipeline_steps = [ "one_hot_encoder", "imputation", "rescaling", "balancing", "feature_preprocessor" ] variable_pipeline_steps = ["classifier"] # The pipeline parts that can get cached cached_pipeline_steps = [["one_hot_encoder", "imputation"], [ "one_hot_encoder", "imputation", "rescaling", "balancing", "feature_preprocessor" ]] # Set cache directory if caching: pr = CachedPipelineRunner( self.data, self.data_loader.info, self.pipeline_space, runhistory, self.statistics, cached_pipeline_steps=cached_pipeline_steps, cache_directory=self.cache_directory, downsampling=downsampling, num_cross_validation_folds=intensification_fold_size) else: pr = PipelineRunner( self.data, self.data_loader.info, self.pipeline_space, runhistory, self.statistics, downsampling=downsampling, num_cross_validation_folds=intensification_fold_size) # Choose acquisition function if acq_func in [ "eips", "pc-eips", "m-eips", "pc-m-eips", "pceips", "pc-m-pceips" ]: model_target_names = ['cost', 'time'] elif acq_func in ["ei", "pc-ei", "m-ei", "pc-m-ei"]: model_target_names = ['cost'] elif acq_func in ["roar", "pc-roar-mrs", "pc-roar-sigmoid-rs"]: model_target_names = [] else: # Not a valid acquisition function raise ValueError("The provided acquisition function is not valid") trajectory_path = self.output_dir + "/logging/" + stamp # + self.data_path.split("/")[-1] + "/" + str(stamp) if not os.path.exists(trajectory_path): os.makedirs(trajectory_path) self.trajectory_path_json = trajectory_path + "/traj_aclib2.json" self.trajectory_path_csv = trajectory_path + "/traj_old.csv" # Build scenario intensification_instances = [[ 1 ]] if intensification_fold_size == None else [ [i] for i in range(0, intensification_fold_size) ] args = { 'cs': self.config_space, 'run_obj': "quality", 'runcount_limit': runcount_limit, 'wallclock_limit': wallclock_limit, 'memory_limit': memory_limit, 'cutoff_time': cutoff, 'deterministic': "true", 'abort_on_first_run_crash': "false", 'instances': intensification_instances } scenario = Scenario(args) # Build stats stats = Stats(scenario, output_dir=self.output_dir + "/smac/", stamp=stamp) # Build tae runner tae_runner = ExecuteTAFuncDict(ta=pr.run, stats=stats, runhistory=runhistory, run_obj=scenario.run_obj, memory_limit=scenario.memory_limit) # Build SMBO object intensification_instances = [ 1 ] if intensification_fold_size == None else [ i for i in range(0, intensification_fold_size) ] smbo_builder = SMBOBuilder() self.smbo = smbo_builder.build_pc_smbo( tae_runner=tae_runner, stats=stats, scenario=scenario, runhistory=runhistory, aggregate_func=average_cost, acq_func_name=acq_func, model_target_names=model_target_names, logging_directory=trajectory_path, double_intensification=double_intensification, constant_pipeline_steps=constant_pipeline_steps, variable_pipeline_steps=variable_pipeline_steps, cached_pipeline_steps=cached_pipeline_steps, intensification_instances=intensification_instances, num_marginalized_configurations_by_random_search=20, num_configs_for_marginalization=40, random_splitting_number=random_splitting_number, random_splitting_enabled=random_splitting_enabled)
def setUp(self): self.scenario = Scenario({'cs': test_helpers.get_branin_config_space(), 'run_obj': 'quality', 'output_dir': ''})
def smac(): # Build Configuration Space which defines all parameters and their ranges configuration_space = ConfigurationSpace() rate_of_learning = UniformFloatHyperparameter( "rate_of_learning", hyperparameter_values_dic['rate_of_learning'][0], hyperparameter_values_dic['rate_of_learning'][1], default_value=hyperparameter_values_dic['rate_of_learning'][0]) # rate_of_decay = UniformFloatHyperparameter("rate_of_decay", hyperparameter_values_dic['rate_of_decay'][0], # hyperparameter_values_dic['rate_of_decay'][1], # default_value=hyperparameter_values_dic['rate_of_decay'][1]) cell_dimension = UniformIntegerHyperparameter( "cell_dimension", hyperparameter_values_dic['cell_dimension'][0], hyperparameter_values_dic['cell_dimension'][1], default_value=hyperparameter_values_dic['cell_dimension'][0]) no_hidden_layers = UniformIntegerHyperparameter( "num_hidden_layers", hyperparameter_values_dic['num_hidden_layers'][0], hyperparameter_values_dic['num_hidden_layers'][1], default_value=hyperparameter_values_dic['num_hidden_layers'][0]) minibatch_size = UniformIntegerHyperparameter( "minibatch_size", hyperparameter_values_dic['minibatch_size'][0], hyperparameter_values_dic['minibatch_size'][1], default_value=hyperparameter_values_dic['minibatch_size'][0]) max_epoch_size = UniformIntegerHyperparameter( "max_epoch_size", hyperparameter_values_dic['max_epoch_size'][0], hyperparameter_values_dic['max_epoch_size'][1], default_value=hyperparameter_values_dic['max_epoch_size'][0]) max_num_of_epochs = UniformIntegerHyperparameter( "max_num_epochs", hyperparameter_values_dic['max_num_epochs'][0], hyperparameter_values_dic['max_num_epochs'][1], default_value=hyperparameter_values_dic['max_num_epochs'][0]) l2_regularization = UniformFloatHyperparameter( "l2_regularization", hyperparameter_values_dic['l2_regularization'][0], hyperparameter_values_dic['l2_regularization'][1], default_value=hyperparameter_values_dic['l2_regularization'][0]) gaussian_noise_stdev = UniformFloatHyperparameter( "gaussian_noise_stdev", hyperparameter_values_dic['gaussian_noise_stdev'][0], hyperparameter_values_dic['gaussian_noise_stdev'][1], default_value=hyperparameter_values_dic['gaussian_noise_stdev'][0]) random_normal_initializer_stdev = UniformFloatHyperparameter( "random_normal_initializer_stdev", hyperparameter_values_dic['random_normal_initializer_stdev'][0], hyperparameter_values_dic['random_normal_initializer_stdev'][1], default_value=hyperparameter_values_dic[ 'random_normal_initializer_stdev'][0]) # add the hyperparameter for learning rate only if the optimization is not cocob if optimizer == "cocob": configuration_space.add_hyperparameters([ cell_dimension, no_hidden_layers, minibatch_size, max_epoch_size, max_num_of_epochs, l2_regularization, gaussian_noise_stdev, random_normal_initializer_stdev ]) else: configuration_space.add_hyperparameters([ rate_of_learning, cell_dimension, no_hidden_layers, minibatch_size, max_epoch_size, max_num_of_epochs, l2_regularization, gaussian_noise_stdev, random_normal_initializer_stdev ]) # creating the scenario object scenario = Scenario({ "run_obj": "quality", "runcount-limit": hyperparameter_tuning_configs.SMAC_RUNCOUNT_LIMIT, "cs": configuration_space, "deterministic": "true", "abort_on_first_run_crash": "false" }) # optimize using an SMAC object smac = SMAC(scenario=scenario, rng=np.random.RandomState(seed), tae_runner=train_model_smac) incumbent = smac.optimize() smape_error = train_model_smac(incumbent) print("Optimized configuration: {}".format(incumbent)) print("Optimized Value: {}\n".format(smape_error)) return incumbent.get_dictionary()
beta1 = UniformFloatHyperparameter("beta1",0.5,0.99,default_value=0.9) cs.add_hyperparameters([lr,beta1]) def kmnist_from_cfg(cfg): cfg = {k : cfg[k] for k in cfg if cfg[k]} lr = cfg["lr"] beta1 = cfg["beta1"] model = CNN() val_accuracy = train(model, lr, beta1, trainDataloader, valDataloader, epochs) return 1 - val_accuracy # Minimize # Scenario object scenario = Scenario({"run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": 200, # maximum function evaluations "cs": cs, # configuration space "deterministic": "true" }) # Optimize, using a SMAC-object print("Optimizing! Depending on your machine, this might take a few minutes.") smac = SMAC(scenario=scenario, rng=np.random.RandomState(42), tae_runner=kmnist_from_cfg) smac.solver.intensifier.tae_runner.use_pynisher = False incumbent = smac.optimize() inc_value = kmnist_from_cfg(incumbent) print("Optimized Value: %.2f" % (inc_value))
def get_tuned_config(self, scenario: ASlibScenario, runcount_limit: int = 42, wallclock_limit: int = 300, autofolio_config: dict = dict(), seed: int = 42): ''' uses SMAC3 to determine a well-performing configuration in the configuration space self.cs on the given scenario Arguments --------- scenario: ASlibScenario ASlib Scenario at hand runcount_limit: int runcount_limit for SMAC scenario wallclock_limit: int wallclock limit in sec for SMAC scenario (overwritten by autofolio_config) autofolio_config: dict, or None An optional dictionary of configuration options seed: int random seed for SMAC Returns ------- Configuration best incumbent configuration found by SMAC ''' wallclock_limit = autofolio_config.get("wallclock_limit", wallclock_limit) runcount_limit = autofolio_config.get("runcount_limit", runcount_limit) taf = functools.partial(self.called_by_smac, scenario=scenario) max_fold = scenario.cv_data.max().max() max_fold = int(max_fold) ac_scenario = Scenario({ "run_obj": "quality", # we optimize quality "runcount-limit": runcount_limit, "cs": self.cs, # configuration space "deterministic": "true", "instances": [[str(i)] for i in range(1, max_fold + 1)], "wallclock-limit": wallclock_limit, "output-dir": "" if not autofolio_config.get("output-dir", None) else autofolio_config.get("output-dir") }) # necessary to use stats options related to scenario information AC_Stats.scenario = ac_scenario # Optimize self.logger.info( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" ) self.logger.info("Start Configuration") self.logger.info( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" ) smac = SMAC(scenario=ac_scenario, tae_runner=taf, rng=np.random.RandomState(seed)) incumbent = smac.optimize() self.logger.info("Final Incumbent: %s" % (incumbent)) return incumbent
# Or we can add multiple hyperparameters at once: num_trees = UniformIntegerHyperparameter("num_trees", 10, 50, default_value=10) max_features = UniformIntegerHyperparameter("max_features", 1, boston.data.shape[1], default_value=1) min_weight_frac_leaf = UniformFloatHyperparameter("min_weight_frac_leaf", 0.0, 0.5, default_value=0.0) criterion = CategoricalHyperparameter("criterion", ["mse", "mae"], default_value="mse") min_samples_to_split = UniformIntegerHyperparameter("min_samples_to_split", 2, 20, default_value=2) min_samples_in_leaf = UniformIntegerHyperparameter("min_samples_in_leaf", 1, 20, default_value=1) max_leaf_nodes = UniformIntegerHyperparameter("max_leaf_nodes", 10, 1000, default_value=100) cs.add_hyperparameters([num_trees, min_weight_frac_leaf, criterion, max_features, min_samples_to_split, min_samples_in_leaf, max_leaf_nodes]) # SMAC scenario oject scenario = Scenario({"run_obj": "quality", # we optimize quality (alternative runtime) "runcount-limit": 50, # maximum number of function evaluations "cs": cs, # configuration space "deterministic": "true", "memory_limit": 3072, # adapt this to reasonable value for your hardware }) # To optimize, we pass the function to the SMAC-object smac = SMAC(scenario=scenario, rng=np.random.RandomState(42), tae_runner=rf_from_cfg) # Example call of the function with default values # It returns: Status, Cost, Runtime, Additional Infos def_value = smac.get_tae_runner().run(cs.get_default_configuration(), 1)[1] print("Value for default configuration: %.2f" % (def_value)) # Start optimization try: incumbent = smac.optimize()
"Start this script with one of the following arguments in a suitable python-environment (that fulfills CAVE's requirements):\n" "'--generate' will generate suitable test-cases using SMAC-optimization \n" "'--cave' will analyze the results of the generate-option using cave \n" "'--clean' will delete previous results \n" "'--firefox' will open all reports in firefox.") if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) #logging.basicConfig(level=logging.INFO) if len(sys.argv) < 2: print_help() elif sys.argv[1] == '--generate': generate_bohb_data() for scen in get_scenarios(): scenario = Scenario(scen) smac = SMAC4AC(scenario=scenario, rng=np.random.RandomState(42)) smac.optimize() elif sys.argv[1] == '--cave': failed = [] for scen in get_scenarios(): try: folder = [f for f in os.listdir(scen['output_dir']) if f.startswith('run')][0] cave = CAVE([os.path.join(scen['output_dir'], folder)], os.path.join(scen['output_dir'], 'CAVE_RESULT'), ta_exec_dir=['.'], validation_method='validation') cave.analyze({'fANOVA' : False, 'number_quantiles' : 2}) except: raise failed.append(scen['output_dir']) print("Failed: %s" % (str(failed)))
def test_Exception(self): with self.assertRaises(TypeError): _ = Scenario(['a', 'b'])
def runhistory_builder(ta,scenario_dic,rng): tae_runner = ExecuteTARun(ta=ta) scenario = Scenario(scenario_dic) stats = Stats(scenario=scenario) traj_logger = TrajLogger(stats=stats,output_dir="/home/dfki/Desktop/temp") # if tae_runner.stats is None: # new_smac =SMAC(scenario=scenario,tae_runner=tae_runner) # tae_runner.stats = new_smac.stats stats.start_timing() deful_config_builder = DefaultConfiguration(tae_runner,scenario,stats,traj_logger,rng) config_milad =deful_config_builder._select_configuration() config_milad._values = None config_milad._values = {'balancing:strategy': 'none', 'categorical_encoding:__choice__': 'one_hot_encoding', 'classifier:__choice__': 'random_forest', 'imputation:strategy': 'mean', 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'standardize', 'categorical_encoding:one_hot_encoding:use_minimum_fraction': 'True', 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 10, 'classifier:random_forest:max_features': 0.5, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:min_impurity_decrease': 0.0, 'classifier:random_forest:min_samples_leaf': 1, 'classifier:random_forest:min_samples_split': 2, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:n_estimators': 100, 'categorical_encoding:one_hot_encoding:minimum_fraction': 0.01} # config_milad._values = {'balancing:strategy': 'none', # 'categorical_encoding:__choice__': 'no_encoding', # 'classifier:__choice__': 'random_forest', # 'imputation:strategy': 'mean', # 'preprocessor:__choice__': 'pca', # 'preprocessor:copy':True, # 'preprocessor:iterated_power':'auto', # 'preprocessor:n_components':'None', # 'preprocessor:random_state':'None', # 'preprocessor:svd_solver':'auto', # 'preprocessor:tol':0.0, # 'preprocessor:whiten':'False', # 'rescaling:__choice__': 'None', # 'classifier:random_forest:bootstrap': 'True', # 'classifier:random_forest:class_weight': 'None', # 'classifier:random_forest:criterion': 'gini', # 'classifier:random_forest:max_depth': 'None', # 'classifier:random_forest:max_features': 'auto', # 'classifier:random_forest:max_leaf_nodes': 'None', # 'classifier:random_forest:min_impurity_decrease': 0.0, # 'classifier:random_forest:min_impurity_split': '1e-07', # 'classifier:random_forest:min_samples_leaf': 1, # 'classifier:random_forest:min_samples_split': 2, # 'classifier:random_forest:min_weight_fraction_leaf': 0.0, # 'classifier:random_forest:n_estimators': 10, # 'classifier:random_forest:n_jobs': 1, # 'classifier:random_forest:oob_score': 'False', # 'classifier:random_forest:random_state': 'None', # 'classifier:random_forest:verbose': 0, # 'classifier:random_forest:warm_start': 'False', # } # config_milad._vector =None status, cost, runtime, additional_info = tae_runner.start(config=config_milad,instance=None) print(status, cost, runtime, additional_info) runhistory = RunHistory(aggregate_func=average_cost) runhistory.add( config=config_milad, cost=cost, time=runtime, status=status, instance_id=None, additional_info=additional_info) return runhistory
def test_write(self): """ Test whether a reloaded scenario still holds all the necessary information. A subset of parameters might change, such as the paths to pcs- or instance-files, so they are checked manually. """ def check_scen_eq(scen1, scen2): print('check_scen_eq') """ Customized check for scenario-equality, ignoring file-paths """ for name in scen1._arguments: dest = scen1._arguments[name]['dest'] name = dest if dest else name # if 'dest' is None, use 'name' if name in [ "pcs_fn", "train_inst_fn", "test_inst_fn", "feature_fn", "output_dir" ]: continue # Those values are allowed to change when writing to disk elif name == 'cs': # Using repr because of cs-bug # (https://github.com/automl/ConfigSpace/issues/25) self.assertEqual(repr(scen1.cs), repr(scen2.cs)) elif name == 'feature_dict': self.assertEqual(len(scen1.feature_dict), len(scen2.feature_dict)) for key in scen1.feature_dict: self.assertTrue( (scen1.feature_dict[key] == scen2.feature_dict[key] ).all()) else: print(name, getattr(scen1, name), getattr(scen2, name)) self.assertEqual(getattr(scen1, name), getattr(scen2, name)) # First check with file-paths defined feature_filename = 'test/test_files/scenario_test/features_multiple.txt' feature_filename = os.path.abspath(feature_filename) self.test_scenario_dict['feature_file'] = feature_filename scenario = Scenario(self.test_scenario_dict) # This injection would usually happen by the facade object! scenario.output_dir_for_this_run = scenario.output_dir scenario.write() path = os.path.join(scenario.output_dir, 'scenario.txt') scenario_reloaded = Scenario(path) check_scen_eq(scenario, scenario_reloaded) # Test whether json is the default pcs_fn self.assertTrue( os.path.exists(os.path.join(scenario.output_dir, 'param.pcs'))) self.assertTrue( os.path.exists(os.path.join(scenario.output_dir, 'param.json'))) self.assertEqual(scenario_reloaded.pcs_fn, os.path.join(scenario.output_dir, 'param.json')) # Now create new scenario without filepaths self.test_scenario_dict.update({ 'paramfile': None, 'cs': scenario.cs, 'feature_file': None, 'features': scenario.feature_dict, 'feature_names': scenario.feature_names, 'instance_file': None, 'instances': scenario.train_insts, 'test_instance_file': None, 'test_instances': scenario.test_insts }) logging.debug(scenario_reloaded) scenario_no_fn = Scenario(self.test_scenario_dict) scenario_reloaded = Scenario(path) check_scen_eq(scenario_no_fn, scenario_reloaded) # Test whether json is the default pcs_fn self.assertTrue( os.path.exists(os.path.join(scenario.output_dir, 'param.pcs'))) self.assertTrue( os.path.exists(os.path.join(scenario.output_dir, 'param.json'))) self.assertEqual(scenario_reloaded.pcs_fn, os.path.join(scenario.output_dir, 'param.json'))
def test_no_output_dir(self): self.test_scenario_dict['output_dir'] = "" scenario = Scenario(self.test_scenario_dict) self.assertFalse(scenario.out_writer.write_scenario_file(scenario))
def test_merge_foreign_data(self): ''' test smac.utils.merge_foreign_data ''' scenario = Scenario(self.test_scenario_dict) scenario_2 = Scenario(self.test_scenario_dict) scenario_2.feature_dict = {"inst_new": [4]} # init cs cs = ConfigurationSpace() cs.add_hyperparameter( UniformIntegerHyperparameter(name='a', lower=0, upper=100)) cs.add_hyperparameter( UniformIntegerHyperparameter(name='b', lower=0, upper=100)) # build runhistory rh_merge = RunHistory() config = Configuration(cs, values={'a': 1, 'b': 2}) rh_merge.add(config=config, instance_id="inst_new", cost=10, time=20, status=StatusType.SUCCESS, seed=None, additional_info=None) # "d" is an instance in <scenario> rh_merge.add(config=config, instance_id="d", cost=5, time=20, status=StatusType.SUCCESS, seed=None, additional_info=None) # build empty rh rh_base = RunHistory() merge_foreign_data(scenario=scenario, runhistory=rh_base, in_scenario_list=[scenario_2], in_runhistory_list=[rh_merge]) # both runs should be in the runhistory # but we should not use the data to update the cost of config self.assertTrue(len(rh_base.data) == 2) self.assertTrue(np.isnan(rh_base.get_cost(config))) # we should not get direct access to external run data runs = rh_base.get_runs_for_config(config, only_max_observed_budget=True) self.assertTrue(len(runs) == 0) rh_merge.add(config=config, instance_id="inst_new_2", cost=10, time=20, status=StatusType.SUCCESS, seed=None, additional_info=None) self.assertRaises( ValueError, merge_foreign_data, **{ "scenario": scenario, "runhistory": rh_base, "in_scenario_list": [scenario_2], "in_runhistory_list": [rh_merge] })
def fit(self, X_train, y_train, **fit_params): data_schema = lale.helpers.fold_schema(X_train, y_train, self.cv, self.estimator.is_classifier()) self.search_space: ConfigurationSpace = get_smac_space( self.estimator, lale_num_grids=self.lale_num_grids, data_schema=data_schema) # Scenario object scenario_options = { "run_obj": "quality", # optimize quality (alternatively runtime) "runcount-limit": self.max_evals, # maximum function evaluations "cs": self.search_space, # configuration space "deterministic": "true", "abort_on_first_run_crash": False, } if self.max_opt_time is not None: scenario_options["wallclock_limit"] = self.max_opt_time self.scenario = Scenario(scenario_options) self.cv = check_cv(self.cv, y=y_train, classifier=self.estimator.is_classifier()) def smac_train_test(trainable, X_train, y_train): try: cv_score, logloss, execution_time = cross_val_score_track_trials( trainable, X_train, y_train, cv=self.cv, scoring=self.scoring) logger.debug("Successful trial of SMAC") except BaseException as e: # If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion if self.handle_cv_failure: ( X_train_part, X_validation, y_train_part, y_validation, ) = train_test_split(X_train, y_train, test_size=0.20) start = time.time() trained = trainable.fit(X_train_part, y_train_part, **fit_params) scorer = check_scoring(trainable, scoring=self.scoring) cv_score = scorer(trained, X_validation, y_validation) execution_time = time.time() - start y_pred_proba = trained.predict_proba(X_validation) try: logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba) except BaseException: logloss = 0 logger.debug("Warning, log loss cannot be computed") else: logger.debug("Error {} with pipeline:{}".format( e, trainable.to_json())) raise e return cv_score, logloss, execution_time def f(trainable): return_dict = {} try: score, logloss, execution_time = smac_train_test( trainable, X_train=X_train, y_train=y_train) return_dict = { "loss": self.best_score - score, "time": execution_time, "log_loss": logloss, } except BaseException as e: logger.warning( f"Exception caught in SMACCV:{type(e)}, {traceback.format_exc()}, SMAC will set a cost_for_crash to MAXINT." ) raise e return return_dict["loss"] try: smac = orig_SMAC( scenario=self.scenario, rng=np.random.RandomState(42), tae_runner=lale_op_smac_tae(self.estimator, f), ) incumbent = smac.optimize() self.trials = smac.get_runhistory() trainable = lale_trainable_op_from_config(self.estimator, incumbent) # get the trainable corresponding to the best params and train it on the entire training dataset. trained = trainable.fit(X_train, y_train, **fit_params) self._best_estimator = trained except BudgetExhaustedException: logger.warning( "Maximum alloted optimization time exceeded. Optimization exited prematurely" ) except BaseException as e: logger.warning("Error during optimization: {}".format(e)) self._best_estimator = None return self
default=logging.INFO, choices=["INFO", "DEBUG"], help="verbose level") args_, misc = parser.parse_known_args() # remove leading '-' in option names misc = dict( (k.lstrip("-"), v.strip("'")) for k, v in zip(misc[::2], misc[1::2])) if args_.verbose_level == "INFO": logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.DEBUG) scenario = Scenario(args_.scenario) traj_logger = TrajLogger(None, Stats(scenario)) trajectory = traj_logger.read_traj_aclib_format(args_.trajectory, scenario.cs) if args_.tae == "old": tae = ExecuteTARunOld(ta=scenario.ta, run_obj=scenario.run_obj, par_factor=scenario.par_factor, cost_for_crash=scenario.cost_for_crash) if args_.tae == "aclib": tae = ExecuteTARunAClib(ta=scenario.ta, run_obj=scenario.run_obj, par_factor=scenario.par_factor, cost_for_crash=scenario.cost_for_crash) validator = Validator(scenario, trajectory, args_.output, args_.seed)
def validate( self, config_mode: Union[str, typing.List[Configuration]] = 'def', instance_mode: Union[str, typing.List[str]] = 'test', repetitions: int = 1, n_jobs: int = 1, backend: str = 'threading', runhistory: RunHistory = None, tae: ExecuteTARun = None, output_fn: typing.Optional[str] = None, ) -> RunHistory: """ Validate configs on instances and save result in runhistory. If a runhistory is provided as input it is important that you run it on the same/comparable hardware. side effect: if output is specified, saves runhistory to specified output directory. Parameters ---------- config_mode: str or list<Configuration> string or directly a list of Configuration. string from [def, inc, def+inc, wallclock_time, cpu_time, all]. time evaluates at cpu- or wallclock-timesteps of: [max_time/2^0, max_time/2^1, max_time/2^3, ..., default] with max_time being the highest recorded time instance_mode: str or list<str> what instances to use for validation, either from [train, test, train+test] or directly a list of instances repetitions: int number of repetitions in nondeterministic algorithms n_jobs: int number of parallel processes used by joblib backend: str what backend joblib should use for parallel runs runhistory: RunHistory optional, RunHistory-object to reuse runs tae: ExecuteTARun tae to be used. if None, will initialize ExecuteTARunOld output_fn: str path to runhistory to be saved. if the suffix is not '.json', will be interpreted as directory and filename will be 'validated_runhistory.json' Returns ------- runhistory: RunHistory runhistory with validated runs """ self.logger.debug( "Validating configs '%s' on instances '%s', repeating %d times" " with %d parallel runs on backend '%s'.", config_mode, instance_mode, repetitions, n_jobs, backend) # Get all runs to be evaluated as list runs, validated_rh = self._get_runs(config_mode, instance_mode, repetitions, runhistory) # Create new Stats without limits inf_scen = Scenario({ 'run_obj': self.scen.run_obj, 'cutoff_time': self.scen.cutoff, # type: ignore[attr-defined] # noqa F821 'output_dir': "" }) inf_stats = Stats(inf_scen) inf_stats.start_timing() # Create TAE if not tae: tae = ExecuteTARunOld( ta=self.scen.ta, # type: ignore[attr-defined] # noqa F821 runhistory=runhistory, stats=inf_stats, run_obj=self.scen.run_obj, par_factor=self.scen. par_factor, # type: ignore[attr-defined] # noqa F821 cost_for_crash=self.scen.cost_for_crash ) # type: ignore[attr-defined] # noqa F821 else: # Inject endless-stats tae.stats = inf_stats # Validate! run_results = self._validate_parallel(tae, runs, n_jobs, backend) assert len(run_results) == len(runs), (run_results, runs) # tae returns (status, cost, runtime, additional_info) # Add runs to RunHistory for run, result in zip(runs, run_results): validated_rh.add(config=run.config, cost=result[1], time=result[2], status=result[0], instance_id=run.inst, seed=run.seed, additional_info=result[3]) self._save_results(validated_rh, output_fn, backup_fn="validated_runhistory.json") return validated_rh
def main(): try: cmd_args, _ = get_common_cmd_args() output_basedir = cmd_args.output_basedir model_name = cmd_args.model_name if model_name == "resnet20": cfg2funcparams = cfg2funcparams_nas_resnet20 get_cs = get_cs_nas_resnet20 else: raise ValueError(f"model name {model_name} is wrong") logger = logging.getLogger(f"SMAC-NAS-{model_name}") logger.setLevel(logging.DEBUG) expid = get_experiment_id(6) output_dir = os.path.join(output_basedir, "SMAC", model_name, expid) os.makedirs(output_dir, exist_ok=True) log_path = os.path.join(output_dir, f"SMAC-NAS-{model_name}.log") setup_logger(logger, log_path) logger.info(f"Experiment {expid} starts...") logger.info("Experiment Configuration:") logger.info(vars(cmd_args)) def obj_func(cfg): logger.info("Starting BO iteration") params = cfg2funcparams(cfg) obj_info = nas_train_test(cmd_args, params, logger, model_name=model_name) logger.info("Finishing BO iteration") logger.info(params) logger.info(obj_info) all_info = { "params": params, "obj_info": obj_info, } fn_path = os.path.join(output_dir, "smac_iter_hists.txt") with open(fn_path, "a") as f: json.dump(all_info, f) f.write("\n") return obj_info["value"] # smac default do minimize cs = get_cs() scenario = Scenario( { "run_obj": "quality", # we optimize quality (alternatively runtime) "runcount_limit": 100, # maximum function evaluations "cs": cs, # configuration space "deterministic": "true", "initial_incumbent": "LHD", } ) smac = SMAC4HPO(scenario=scenario, tae_runner=obj_func,) incumbent = smac.optimize() print(incumbent) except KeyboardInterrupt: print("Interrupted. You pressed Ctrl-C!!!") try: sys.exit(0) except SystemExit: os._exit(0)
def main_cli( self, commandline_arguments: typing.Optional[typing.List[str]] = None ) -> None: """Main function of SMAC for CLI interface""" self.logger.info("SMAC call: %s" % (" ".join(sys.argv))) cmd_reader = CMDReader() kwargs = {} if commandline_arguments: kwargs['commandline_arguments'] = commandline_arguments main_args_, smac_args_, scen_args_ = cmd_reader.read_cmd(**kwargs) root_logger = logging.getLogger() root_logger.setLevel(main_args_.verbose_level) logger_handler = logging.StreamHandler(stream=sys.stdout) if root_logger.level >= logging.INFO: formatter = logging.Formatter("%(levelname)s:\t%(message)s") else: formatter = logging.Formatter( "%(asctime)s:%(levelname)s:%(name)s:\t%(message)s", "%Y-%m-%d %H:%M:%S") logger_handler.setFormatter(formatter) root_logger.addHandler(logger_handler) # remove default handler if len(root_logger.handlers) > 1: root_logger.removeHandler(root_logger.handlers[0]) # Create defaults rh = None initial_configs = None stats = None incumbent = None # Create scenario-object scenario = {} scenario.update(vars(smac_args_)) scenario.update(vars(scen_args_)) scen = Scenario(scenario=scenario) # Restore state if main_args_.restore_state: root_logger.debug("Restoring state from %s...", main_args_.restore_state) restore_state = main_args_.restore_state rh, stats, traj_list_aclib, traj_list_old = self.restore_state( scen, restore_state) scen.output_dir_for_this_run = create_output_directory( scen, main_args_.seed, root_logger, ) scen.write() incumbent = self.restore_state_after_output_dir( scen, stats, traj_list_aclib, traj_list_old) if main_args_.warmstart_runhistory: rh = RunHistory() scen, rh = merge_foreign_data_from_file( scenario=scen, runhistory=rh, in_scenario_fn_list=main_args_.warmstart_scenario, in_runhistory_fn_list=main_args_.warmstart_runhistory, cs=scen.cs, # type: ignore[attr-defined] # noqa F821 ) if main_args_.warmstart_incumbent: initial_configs = [scen.cs.get_default_configuration() ] # type: ignore[attr-defined] # noqa F821 for traj_fn in main_args_.warmstart_incumbent: trajectory = TrajLogger.read_traj_aclib_format( fn=traj_fn, cs=scen.cs, # type: ignore[attr-defined] # noqa F821 ) initial_configs.append(trajectory[-1]["incumbent"]) if main_args_.mode == "SMAC4AC": optimizer = SMAC4AC(scenario=scen, rng=np.random.RandomState(main_args_.seed), runhistory=rh, initial_configurations=initial_configs, stats=stats, restore_incumbent=incumbent, run_id=main_args_.seed) elif main_args_.mode == "SMAC4HPO": optimizer = SMAC4HPO(scenario=scen, rng=np.random.RandomState(main_args_.seed), runhistory=rh, initial_configurations=initial_configs, stats=stats, restore_incumbent=incumbent, run_id=main_args_.seed) elif main_args_.mode == "SMAC4BB": optimizer = SMAC4BB(scenario=scen, rng=np.random.RandomState(main_args_.seed), runhistory=rh, initial_configurations=initial_configs, stats=stats, restore_incumbent=incumbent, run_id=main_args_.seed) elif main_args_.mode == "ROAR": optimizer = ROAR(scenario=scen, rng=np.random.RandomState(main_args_.seed), runhistory=rh, initial_configurations=initial_configs, run_id=main_args_.seed) elif main_args_.mode == "Hydra": optimizer = Hydra( scenario=scen, rng=np.random.RandomState(main_args_.seed), runhistory=rh, initial_configurations=initial_configs, stats=stats, restore_incumbent=incumbent, run_id=main_args_.seed, random_configuration_chooser=main_args_. random_configuration_chooser, n_iterations=main_args_.hydra_iterations, val_set=main_args_.hydra_validation, incs_per_round=main_args_.hydra_incumbents_per_round, n_optimizers=main_args_.hydra_n_optimizers) elif main_args_.mode == "PSMAC": optimizer = PSMAC( scenario=scen, rng=np.random.RandomState(main_args_.seed), run_id=main_args_.seed, shared_model=smac_args_.shared_model, validate=main_args_.psmac_validate, n_optimizers=main_args_.hydra_n_optimizers, n_incs=main_args_.hydra_incumbents_per_round, ) try: optimizer.optimize() except (TAEAbortException, FirstRunCrashedException) as err: self.logger.error(err)
# inc_value = mysmac_from_cfg(incumbent) # print("Optimized Value: %.2f" % (inc_value)) # # We can also validate our results (though this makes a lot more sense with instances) # smac.validate(config_mode='inc', # We can choose which configurations to evaluate # # instance_mode='train+test', # Defines what instances to validate # repetitions=3, # Ignored, unless you set "deterministic" to "false" in line 95 # n_jobs=1) # How many cores to use in parallel for optimization ##########################SMAC------end---------------############################## # SMAC scenario object scenario = Scenario({"run_obj": "quality", # we optimize quality (alternative to runtime) "wallclock-limit": 40, #100 max duration to run the optimization (in seconds) "cs": cs, # configuration space "deterministic": "true", "limit_resources": True, # Uses pynisher to limit memory and runtime # Alternatively, you can also disable this. # Then you should handle runtime and memory yourself in the TA "cutoff": 15, #30 runtime limit for target algorithm "memory_limit": 307, # 3072adapt this to reasonable value for your hardware }) # max budget for hyperband can be anything. Here, we set it to maximum no. of epochs to train the MLP for max_iters = 15 # intensifier parameters intensifier_kwargs = {'initial_budget': 5, 'max_budget': max_iters, 'eta': 3} # To optimize, we pass the function to the SMAC-object smac = BOHB4HPO(scenario=scenario, rng=np.random.RandomState(42), tae_runner=mysmac_from_cfg, intensifier_kwargs=intensifier_kwargs) # all arguments related to intensifier can be passed like this # Example call of the function with default values
def setUp(self): self.output_dirs = [] fn = os.path.join(os.path.dirname(__file__), '../test_files/spear_hydra_test_scenario.txt') self.scenario = Scenario(fn)
def main(): parser = argparse.ArgumentParser(description='Dump data of a log.') parser.add_argument('--dataset', type=str, default='labelme', help='dataset to run smac on') parser.add_argument('--m', type=int, default=8, help=' number of codebooks') args = parser.parse_args() # Fixed parameters dataset = CategoricalHyperparameter("dataset", [args.dataset], default_value=args.dataset) m = CategoricalHyperparameter("m", [str(args.m)], default_value=str(args.m)) # Build Configuration Space which defines all parameters and their ranges ilsiter = UniformIntegerHyperparameter("ilsiter", 1, 16, default_value=8) npert = UniformIntegerHyperparameter("npert", 0, args.m - 1, default_value=4) randord = CategoricalHyperparameter("randord", ["true", "false"], default_value="true") # SR parameters sr_method = CategoricalHyperparameter("SR_method", ["LSQ", "SR_C", "SR_D"], default_value="SR_D") schedule = CategoricalHyperparameter("schedule", ["1", "2", "3"], default_value="1") p = UniformFloatHyperparameter("p", 0.1, 1., default_value=0.5) # Schedule and p only make sense in SR use_schedule = InCondition(child=schedule, parent=sr_method, values=["SR_C", "SR_D"]) use_p = InCondition(child=p, parent=sr_method, values=["SR_C", "SR_D"]) cs = ConfigurationSpace() cs.add_hyperparameters( [dataset, m, ilsiter, npert, randord, sr_method, schedule, p]) cs.add_conditions([use_schedule, use_p]) # Scenario object scenario = Scenario({ "run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": 200, # maximum function evaluations "cs": cs, # configuration space "deterministic": "false" }) # Optimize, using a SMAC-object thing_to_call = AbstractTAFunc(recall_from_cfg, use_pynisher=False) smac = SMAC(scenario=scenario, rng=np.random.RandomState(42), tae_runner=thing_to_call) print("Optimizing!") incumbent = smac.optimize() inc_value = recall_from_cfg(incumbent) print("Optimized Value: %.2f" % (inc_value))
def centroid(n_eval, random_seed_pair): name_tag = 'centroid_' + datetime.now().strftime("%Y-%m-%d-%H:%M:%S:%f") cs = ConfigurationSpace() for i in range(CENTROID_N_EDGES): car_var = CategoricalHyperparameter( 'x' + str(i + 1).zfill(2), [str(elm) for elm in range(CENTROID_N_CHOICE)], default_value='0') cs.add_hyperparameter(car_var) init_points_numpy = sample_init_points([CENTROID_N_CHOICE] * CENTROID_N_EDGES, 20, random_seed_pair[1]).long().numpy() init_points = [] for i in range(init_points_numpy.shape[0]): init_points.append( Configuration( cs, { 'x' + str(j + 1).zfill(2): str(init_points_numpy[i][j]) for j in range(CENTROID_N_EDGES) })) evaluator = Centroid(random_seed_pair) interaction_list = evaluator.interaction_list covariance_list = evaluator.covariance_list partition_original_list = evaluator.partition_original_list def evaluate(x): interaction_mixed = edge_choice( np.array([ int(x['x' + str(j + 1).zfill(2)]) for j in range(CENTROID_N_EDGES) ]), interaction_list) partition_mixed = partition(interaction_mixed, CENTROID_GRID) kld_sum = 0 for i in range(evaluator.n_ising_models): kld = ising_dense(interaction_sparsified=interaction_mixed, interaction_original=interaction_list[i], covariance=covariance_list[i], partition_sparsified=partition_mixed, partition_original=partition_original_list[i], grid_h=CENTROID_GRID[0]) kld_sum += kld return kld_sum / float(evaluator.n_ising_models) print('Began at ' + datetime.now().strftime("%H:%M:%S")) scenario = Scenario({ "run_obj": "quality", "runcount-limit": n_eval, "cs": cs, "deterministic": "true", 'output_dir': os.path.join(EXP_DIR, name_tag) }) smac = SMAC(scenario=scenario, tae_runner=evaluate, initial_configurations=init_points) smac.optimize() evaluations, optimum = evaluations_from_smac(smac) print('Finished at ' + datetime.now().strftime("%H:%M:%S")) return optimum
def fmin_smac(func: typing.Callable, x0: typing.List[float], bounds: typing.List[typing.Iterable[float]], maxfun: int = -1, rng: typing.Union[np.random.RandomState, int] = None, scenario_args: typing.Mapping[str, typing.Any] = None, **kwargs): """ Minimize a function func using the SMAC4HPO facade (i.e., a modified version of SMAC). This function is a convenience wrapper for the SMAC4HPO class. Parameters ---------- func : typing.Callable Function to minimize. x0 : typing.List[float] Initial guess/default configuration. bounds : typing.List[typing.List[float]] ``(min, max)`` pairs for each element in ``x``, defining the bound on that parameters. maxfun : int, optional Maximum number of function evaluations. rng : np.random.RandomState, optional Random number generator used by SMAC. scenario_args: typing.Mapping[str,typing.Any] Arguments passed to the scenario See smac.scenario.scenario.Scenario **kwargs: Arguments passed to the optimizer class See ~smac.facade.smac_facade.SMAC Returns ------- x : list Estimated position of the minimum. f : float Value of `func` at the minimum. s : :class:`smac.facade.smac_hpo_facade.SMAC4HPO` SMAC objects which enables the user to get e.g., the trajectory and runhistory. """ # create configuration space cs = ConfigurationSpace() # Adjust zero padding tmplt = 'x{0:0' + str(len(str(len(bounds)))) + 'd}' for idx, (lower_bound, upper_bound) in enumerate(bounds): parameter = UniformFloatHyperparameter(name=tmplt.format(idx + 1), lower=lower_bound, upper=upper_bound, default_value=x0[idx]) cs.add_hyperparameter(parameter) # create scenario scenario_dict = { "run_obj": "quality", "cs": cs, "deterministic": "true", "initial_incumbent": "DEFAULT", } if scenario_args is not None: scenario_dict.update(scenario_args) if maxfun > 0: scenario_dict["runcount_limit"] = maxfun scenario = Scenario(scenario_dict) smac = SMAC4HPO(scenario=scenario, tae_runner=ExecuteTAFuncArray, tae_runner_kwargs={'ta': func}, rng=rng, **kwargs) smac.logger = logging.getLogger(smac.__module__ + "." + smac.__class__.__name__) incumbent = smac.optimize() config_id = smac.solver.runhistory.config_ids[incumbent] run_key = RunKey(config_id, None, 0) incumbent_performance = smac.solver.runhistory.data[run_key] incumbent = np.array( [incumbent[tmplt.format(idx + 1)] for idx in range(len(bounds))], dtype=np.float) return incumbent, incumbent_performance.cost, smac
else: memory_limit_factor = 2 print('Starting to validate configurations') for i, entry in enumerate(trajectory): print('Starting to validate configuration %d/%d' % (i + 1, len(trajectory))) incumbent_id = entry.incumbent_id train_performance = entry.train_perf if incumbent_id not in incumbent_id_to_model: config = entry.incumbent logger = logging.getLogger('Testing:)') stats = Stats( Scenario({ 'cutoff_time': per_run_time_limit * 2, 'run_obj': 'quality', })) stats.start_timing() # To avoid the output "first run crashed"... stats.submitted_ta_runs += 1 stats.finished_ta_runs += 1 memory_lim = memory_limit_factor * automl_arguments['memory_limit'] ta = ExecuteTaFuncWithQueue( backend=automl.automl_._backend, autosklearn_seed=seed, resampling_strategy='test', memory_limit=memory_lim, disable_file_output=True, logger=logger, stats=stats, all_scoring_functions=True,
def __init__( self, scenario: Scenario, tae_runner: typing.Union[ExecuteTARun, typing.Callable] = None, runhistory: RunHistory = None, intensifier: Intensifier = None, acquisition_function: AbstractAcquisitionFunction = None, acquisition_function_optimizer: AcquisitionFunctionMaximizer = None, model: AbstractEPM = None, runhistory2epm: AbstractRunHistory2EPM = None, initial_design: InitialDesign = None, initial_configurations: typing.List[Configuration] = None, stats: Stats = None, restore_incumbent: Configuration = None, rng: typing.Union[np.random.RandomState, int] = None, smbo_class: SMBO = None, run_id: int = 1, hoag: AbstractHOAG = None, #server: Server=None, bayesian_optimization: bool = False): """Constructor Parameters ---------- scenario : ~smac.scenario.scenario.Scenario Scenario object tae_runner : ~smac.tae.execute_ta_run.ExecuteTARun or callable Callable or implementation of :class:`~smac.tae.execute_ta_run.ExecuteTARun`. In case a callable is passed it will be wrapped by :class:`~smac.tae.execute_func.ExecuteTAFuncDict`. If not set, it will be initialized with the :class:`~smac.tae.execute_ta_run_old.ExecuteTARunOld`. runhistory : RunHistory runhistory to store all algorithm runs intensifier : Intensifier intensification object to issue a racing to decide the current incumbent acquisition_function : ~smac.optimizer.acquisition.AbstractAcquisitionFunction Object that implements the :class:`~smac.optimizer.acquisition.AbstractAcquisitionFunction`. Will use :class:`~smac.optimizer.acquisition.EI` if not set. acquisition_function_optimizer : ~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer Object that implements the :class:`~smac.optimizer.ei_optimization.AcquisitionFunctionMaximizer`. Will use :class:`smac.optimizer.ei_optimization.InterleavedLocalAndRandomSearch` if not set. model : AbstractEPM Model that implements train() and predict(). Will use a :class:`~smac.epm.rf_with_instances.RandomForestWithInstances` if not set. runhistory2epm : ~smac.runhistory.runhistory2epm.RunHistory2EMP Object that implements the AbstractRunHistory2EPM. If None, will use :class:`~smac.runhistory.runhistory2epm.RunHistory2EPM4Cost` if objective is cost or :class:`~smac.runhistory.runhistory2epm.RunHistory2EPM4LogCost` if objective is runtime. initial_design : InitialDesign initial sampling design initial_configurations : typing.List[Configuration] list of initial configurations for initial design -- cannot be used together with initial_design stats : Stats optional stats object rng : np.random.RandomState Random number generator restore_incumbent : Configuration incumbent used if restoring to previous state smbo_class : ~smac.optimizer.smbo.SMBO Class implementing the SMBO interface which will be used to instantiate the optimizer class. run_id: int, (default: 1) Run ID will be used as subfolder for output_dir. """ self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__) aggregate_func = average_cost self.output_dir = create_output_directory(scenario, run_id) scenario.write() # initialize stats object if stats: self.stats = stats else: self.stats = Stats(scenario) # initialize empty runhistory if runhistory is None: runhistory = RunHistory(aggregate_func=aggregate_func) # inject aggr_func if necessary if runhistory.aggregate_func is None: runhistory.aggregate_func = aggregate_func # initial random number generator num_run, rng = self._get_rng(rng=rng) # reset random number generator in config space to draw different # random configurations with each seed given to SMAC scenario.cs.seed(rng.randint(MAXINT)) # initial Trajectory Logger traj_logger = TrajLogger(output_dir=self.output_dir, stats=self.stats) # initial EPM types, bounds = get_types(scenario.cs, scenario.feature_array) if model is None: model = RandomForestWithInstances( types=types, bounds=bounds, instance_features=scenario.feature_array, seed=rng.randint(MAXINT), pca_components=scenario.PCA_DIM) # initial acquisition function if acquisition_function is None: if scenario.run_obj == "runtime": acquisition_function = LogEI(model=model) else: acquisition_function = EI(model=model) # inject model if necessary if acquisition_function.model is None: acquisition_function.model = model # initialize optimizer on acquisition function if acquisition_function_optimizer is None: acquisition_function_optimizer = InterleavedLocalAndRandomSearch( acquisition_function, scenario.cs, np.random.RandomState(seed=rng.randint(MAXINT))) elif not isinstance( acquisition_function_optimizer, AcquisitionFunctionMaximizer, ): raise ValueError( "Argument 'acquisition_function_optimizer' must be of type" "'AcquisitionFunctionMaximizer', but is '%s'" % type(acquisition_function_optimizer)) # initialize tae_runner # First case, if tae_runner is None, the target algorithm is a call # string in the scenario file if tae_runner is None: tae_runner = ExecuteTARunOld( ta=scenario.ta, stats=self.stats, run_obj=scenario.run_obj, runhistory=runhistory, par_factor=scenario.par_factor, cost_for_crash=scenario.cost_for_crash) # Second case, the tae_runner is a function to be optimized elif callable(tae_runner): tae_runner = ExecuteTAFuncDict( ta=tae_runner, stats=self.stats, run_obj=scenario.run_obj, memory_limit=scenario.memory_limit, runhistory=runhistory, par_factor=scenario.par_factor, cost_for_crash=scenario.cost_for_crash) # Third case, if it is an ExecuteTaRun we can simply use the # instance. Otherwise, the next check raises an exception elif not isinstance(tae_runner, ExecuteTARun): raise TypeError("Argument 'tae_runner' is %s, but must be " "either a callable or an instance of " "ExecuteTaRun. Passing 'None' will result in the " "creation of target algorithm runner based on the " "call string in the scenario file." % type(tae_runner)) # Check that overall objective and tae objective are the same if tae_runner.run_obj != scenario.run_obj: raise ValueError("Objective for the target algorithm runner and " "the scenario must be the same, but are '%s' and " "'%s'" % (tae_runner.run_obj, scenario.run_obj)) # inject stats if necessary if tae_runner.stats is None: tae_runner.stats = self.stats # inject runhistory if necessary if tae_runner.runhistory is None: tae_runner.runhistory = runhistory # inject cost_for_crash if tae_runner.crash_cost != scenario.cost_for_crash: tae_runner.crash_cost = scenario.cost_for_crash # initialize intensification if intensifier is None: intensifier = Intensifier(tae_runner=tae_runner, stats=self.stats, traj_logger=traj_logger, rng=rng, instances=scenario.train_insts, cutoff=scenario.cutoff, deterministic=scenario.deterministic, run_obj_time=scenario.run_obj == "runtime", always_race_against=scenario.cs.get_default_configuration() \ if scenario.always_race_default else None, instance_specifics=scenario.instance_specific, minR=scenario.minR, maxR=scenario.maxR) # inject deps if necessary if intensifier.tae_runner is None: intensifier.tae_runner = tae_runner if intensifier.stats is None: intensifier.stats = self.stats if intensifier.traj_logger is None: intensifier.traj_logger = traj_logger # initial design if initial_design is not None and initial_configurations is not None: raise ValueError( "Either use initial_design or initial_configurations; but not both" ) if initial_configurations is not None: initial_design = MultiConfigInitialDesign( tae_runner=tae_runner, scenario=scenario, stats=self.stats, traj_logger=traj_logger, runhistory=runhistory, rng=rng, configs=initial_configurations, intensifier=intensifier, aggregate_func=aggregate_func) elif initial_design is None: if scenario.initial_incumbent == "DEFAULT": initial_design = DefaultConfiguration(tae_runner=tae_runner, scenario=scenario, stats=self.stats, traj_logger=traj_logger, rng=rng) elif scenario.initial_incumbent == "RANDOM": initial_design = RandomConfiguration(tae_runner=tae_runner, scenario=scenario, stats=self.stats, traj_logger=traj_logger, rng=rng) else: raise ValueError("Don't know what kind of initial_incumbent " "'%s' is" % scenario.initial_incumbent) # inject deps if necessary if initial_design.tae_runner is None: initial_design.tae_runner = tae_runner if initial_design.scenario is None: initial_design.scenario = scenario if initial_design.stats is None: initial_design.stats = self.stats if initial_design.traj_logger is None: initial_design.traj_logger = traj_logger # initial conversion of runhistory into EPM data if runhistory2epm is None: num_params = len(scenario.cs.get_hyperparameters()) if scenario.run_obj == "runtime": # if we log the performance data, # the RFRImputator will already get # log transform data from the runhistory cutoff = np.log10(scenario.cutoff) threshold = np.log10(scenario.cutoff * scenario.par_factor) imputor = RFRImputator(rng=rng, cutoff=cutoff, threshold=threshold, model=model, change_threshold=0.01, max_iter=2) runhistory2epm = RunHistory2EPM4LogCost( scenario=scenario, num_params=num_params, success_states=[ StatusType.SUCCESS, ], impute_censored_data=True, impute_state=[ StatusType.CAPPED, ], imputor=imputor) elif scenario.run_obj == 'quality': runhistory2epm = RunHistory2EPM4Cost( scenario=scenario, num_params=num_params, success_states=[StatusType.SUCCESS, StatusType.CRASHED], impute_censored_data=False, impute_state=None) else: raise ValueError('Unknown run objective: %s. Should be either ' 'quality or runtime.' % self.scenario.run_obj) # inject scenario if necessary: if runhistory2epm.scenario is None: runhistory2epm.scenario = scenario smbo_args = { 'scenario': scenario, 'stats': self.stats, 'initial_design': initial_design, 'runhistory': runhistory, 'runhistory2epm': runhistory2epm, 'intensifier': intensifier, 'aggregate_func': aggregate_func, 'num_run': num_run, 'model': model, 'acq_optimizer': acquisition_function_optimizer, 'acquisition_func': acquisition_function, 'rng': rng, 'restore_incumbent': restore_incumbent, 'hoag': hoag, #'server': server, 'bayesian_optimization': bayesian_optimization } if smbo_class is None: self.solver = SMBO(**smbo_args) else: self.solver = smbo_class(**smbo_args)
def get_smbo(intensification_perc): """ Return SMBO with intensification_percentage. """ scen = Scenario({'cs': test_helpers.get_branin_config_space(), 'run_obj': 'quality', 'output_dir': '', 'intensification_percentage' : intensification_perc}) return SMAC(scen, tae_runner=target, rng=1).solver
def fmin_smac(func: callable, x0: list, bounds: list, maxfun: int=-1, maxtime: int=-1, rng: np.random.RandomState=None): """ Minimize a function func using the SMAC algorithm. This function is a convenience wrapper for the SMAC class. Parameters ---------- func : callable f(x) Function to minimize. x0 : list Initial guess/default configuration. bounds : list ``(min, max)`` pairs for each element in ``x``, defining the bound on that parameters. maxtime : int, optional Maximum runtime in seconds. maxfun : int, optional Maximum number of function evaluations. rng : np.random.RandomState, optional Random number generator used by SMAC. Returns ------- x : list Estimated position of the minimum. f : float Value of `func` at the minimum. s : :class:`smac.facade.smac_facade.SMAC` SMAC objects which enables the user to get e.g., the trajectory and runhistory. """ # create configuration space cs = ConfigurationSpace() for idx, (lower_bound, upper_bound) in enumerate(bounds): parameter = UniformFloatHyperparameter(name="x%d" % (idx + 1), lower=lower_bound, upper=upper_bound, default_value=x0[idx]) cs.add_hyperparameter(parameter) # Create target algorithm runner ta = ExecuteTAFuncArray(ta=func) # create scenario scenario_dict = {"run_obj": "quality", "cs": cs, "deterministic": "true", "initial_incumbent": "DEFAULT" } if maxfun > 0: scenario_dict["runcount_limit"] = maxfun if maxtime > 0: scenario_dict["wallclock_limit"] = maxtime scenario = Scenario(scenario_dict) smac = SMAC(scenario=scenario, tae_runner=ta, rng=rng) smac.logger = logging.getLogger(smac.__module__ + "." + smac.__class__.__name__) incumbent = smac.optimize() config_id = smac.solver.runhistory.config_ids[incumbent] run_key = RunKey(config_id, None, 0) incumbent_performance = smac.solver.runhistory.data[run_key] incumbent = np.array([incumbent['x%d' % (idx + 1)] for idx in range(len(bounds))], dtype=np.float) return incumbent, incumbent_performance.cost, \ smac
1, 50, default_value=5) weights = CategoricalHyperparameter("weights", ["uniform", "distance"], default_value="uniform") p = UniformIntegerHyperparameter("p", 1, 5, default_value=2) cs.add_hyperparameters([n_neighbors, weights, p]) # Scenario object scenario = Scenario({ "run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": 500, # max. number of function evaluations; for this example set to a low number "cs": cs, # configuration space "deterministic": "true" }) # Example call of the function # It returns: Status, Cost, Runtime, Additional Infos def_value = kNN_from_cfg(cs.get_default_configuration()) print("Default Value: %.2f" % (def_value)) # Optimize, using a SMAC-object print("Optimizing! Depending on your machine, this might take a few minutes.") smac = SMAC4HPO(scenario=scenario, rng=np.random.RandomState(42), tae_runner=kNN_from_cfg)
def dont_test_car_smac(self): import numpy as np from lale.datasets.auto_weka import fetch_car from sklearn.metrics import accuracy_score, make_scorer from sklearn.preprocessing import LabelEncoder import pandas as pd from lale.lib.weka import J48 from lalegpl.lib.r import ArulesCBAClassifier from lale.operators import make_pipeline from lale.lib.lale import HyperoptClassifier from lale.lib.sklearn import LogisticRegression, KNeighborsClassifier from smac.scenario.scenario import Scenario from smac.facade.smac_facade import SMAC from smac.configspace import ConfigurationSpace (X_train, y_train), (X_test, y_test) = fetch_car() y_name = y_train.name le = LabelEncoder() y_train = le.fit_transform(y_train) y_test = le.transform(y_test) y_train = pd.Series(y_train, name=y_name) y_test = pd.Series(y_test, name=y_name) # planned_pipeline = make_pipeline(J48() | ArulesCBAClassifier() | LogisticRegression() | KNeighborsClassifier()) planned_pipeline = make_pipeline(ArulesCBAClassifier() | KNeighborsClassifier() | LogisticRegression()) cs:ConfigurationSpace = get_smac_space(planned_pipeline) print(cs) # X_train = X_train[0:20] # y_train = y_train[0:20] # Scenario object run_count_limit = 1 scenario = Scenario({"run_obj": "quality", # we optimize quality (alternatively runtime) "runcount-limit": run_count_limit, # maximum function evaluations "cs": cs, # configuration space "deterministic": "true", "abort_on_first_run_crash": False }) # Optimize, using a SMAC-object def f_min(op): return test_f_min(op, X_train, y_train, num_folds=2) tae = lale_op_smac_tae(planned_pipeline, f_min) print("Optimizing! Depending on your machine, this might take a few minutes.") smac = SMAC(scenario=scenario, rng=np.random.RandomState(42), tae_runner=tae) incumbent = smac.optimize() trainable_pipeline = lale_trainable_op_from_config(planned_pipeline, incumbent) trained_pipeline = trainable_pipeline.fit(X_train, y_train) pred = trained_pipeline.predict(X_test) accuracy = accuracy_score(y_test, pred) print("Accuracy: %.2f" % (accuracy)) inc_value = tae(incumbent) print("Optimized Value: %.2f" % (inc_value)) print(f"Run count limit: {run_count_limit}")
import numpy as np import random from smac.configspace import ConfigurationSpace from ConfigSpace.hyperparameters import UniformFloatHyperparameter from smac.tae.execute_func import ExecuteTAFuncDict from smac.scenario.scenario import Scenario from smac.facade.smac_facade import SMAC scenario = Scenario({ "run_obj": "quality", "runcount-limit": 200, "paramfile": "./searchspace.pcs", "deterministic": "true", "algo": "python SMACOptimizerClient.py", "wallclock-limit": 600 }) print("create smac object") smac = SMAC(scenario=scenario, rng=np.random.RandomState(42)) print("Start optimization process") smac.optimize() print("Optimization done")
def test_no_rh_epm(self): scen = Scenario(self.scen_fn, cmd_args={'run_obj': 'quality'}) scen.feature_array = None validator = Validator(scen, self.trajectory) self.assertRaises(ValueError, validator.validate_epm)