def test_round_trip(self): this_file = os.path.abspath(__file__) this_directory = os.path.dirname(this_file) configuration_space_path = os.path.join(this_directory, "..", "test_searchspaces") configuration_space_path = os.path.abspath(configuration_space_path) pcs_files = os.listdir(configuration_space_path) for pcs_file in sorted(pcs_files): if '.pcs' in pcs_file: full_path = os.path.join(configuration_space_path, pcs_file) with open(full_path) as fh: cs_string = fh.read().split('\n') try: cs = read_pcs(cs_string) except Exception: cs = read_pcs_new(cs_string) cs.name = pcs_file json_string = write(cs) new_cs = read(json_string) self.assertEqual(new_cs, cs)
def test_round_trip(self): this_file = os.path.abspath(__file__) this_directory = os.path.dirname(this_file) configuration_space_path = os.path.join(this_directory, "..", "test_searchspaces") configuration_space_path = os.path.abspath(configuration_space_path) pcs_files = os.listdir(configuration_space_path) for pcs_file in sorted(pcs_files): if '.pcs' in pcs_file: full_path = os.path.join(configuration_space_path, pcs_file) with open(full_path) as fh: cs_string = fh.read().split('\n') try: cs = read_pcs(cs_string) except: cs = read_pcs_new(cs_string) cs.name = pcs_file json_string = write(cs) new_cs = read(json_string) self.assertEqual(new_cs, cs)
def test_configspace_with_probabilities(self): cs = ConfigurationSpace() cs.add_hyperparameter( CategoricalHyperparameter('a', [0, 1, 2], weights=[0.2, 0.2, 0.6])) string = write(cs) new_cs = read(string) self.assertEqual( new_cs.get_hyperparameter('a').probabilities, (0.2, 0.2, 0.6))
def as_dict(self): return { 'cid': self.cid.without_config().external_name, 'pipeline': comp_util.serialize(self.pipeline), 'cfg_keys': [(key.hash, key.idx) for key in self.cfg_keys], 'budget': self.budget, 'configspace': config_json.write(self.configspace) if self.configspace is not None else None, }
def fit(self, pipeline_config, data_manager, instance, autonet_config_file, autonet, run_number, run_id, task_id): instance_name, autonet_config_name, run_name = get_names( instance, autonet_config_file, run_id, run_number) run_result_dir = get_run_result_dir(pipeline_config, instance, autonet_config_file, run_id, run_number) instance_run_id = str(run_name) + "_" + str(instance_name) + "_" + str( autonet_config_name) instance_run_id = '_'.join(instance_run_id.split(':')) autonet.autonet_config = None #clean results of last fit autonet.update_autonet_config(task_id=task_id, run_id=instance_run_id, result_logger_dir=run_result_dir) if (task_id not in [-1, 1]): return {'result_dir': run_result_dir} if not os.path.exists(run_result_dir): try: os.makedirs(run_result_dir) except Exception as e: print(e) logging.getLogger('benchmark').debug( "Create config and info files for current run " + str(run_name)) instance_info = dict() instance_info['path'] = instance instance_info['is_classification'] = data_manager.is_classification instance_info['is_multilabel'] = data_manager.is_multilabel instance_info['instance_shape'] = data_manager.X_train.shape instance_info[ 'categorical_features'] = data_manager.categorical_features if autonet.get_current_autonet_config( )["hyperparameter_search_space_updates"] is not None: autonet.get_current_autonet_config( )["hyperparameter_search_space_updates"].save_as_file( os.path.join(run_result_dir, "hyperparameter_search_space_updates.txt")) self.write_config_to_file(run_result_dir, "instance.info", instance_info) self.write_config_to_file(run_result_dir, "benchmark.config", pipeline_config) self.write_config_to_file(run_result_dir, "autonet.config", autonet.get_current_autonet_config()) with open(os.path.join(run_result_dir, "configspace.json"), "w") as f: f.write( json.write( autonet.pipeline.get_hyperparameter_search_space( **autonet.get_current_autonet_config()))) return {'result_dir': run_result_dir}
def get_fidelity_space(self, kwargs_str: str) -> str: logger.debug(f'Server: get_fidelity_space: kwargs_str: {kwargs_str}') kwargs = json.loads(kwargs_str, cls=BenchmarkDecoder) seed = kwargs.get('seed', None) result = self.benchmark.get_fidelity_space(seed=seed) logger.debug(f'Server: Fidelity Space: {result}') return csjson.write(result, indent=None)
def get_configuration_space(self, kwargs_str: str) -> str: logger.debug(f'Server: get_config_space: kwargs_str: {kwargs_str}') kwargs = json.loads(kwargs_str) seed = kwargs.get('seed', None) result = self.benchmark.get_configuration_space(seed=seed) logger.debug(f'Server: Configspace: {result}') return csjson.write(result, indent=None)
def as_dict(self): # meta data are serialized via pickle # noinspection PyUnresolvedReferences return { 'config': self.config.get_array().tolist(), 'configspace': config_json.write(self.config.configuration_space), 'cfg_key': self.cfg_key, 'name': self.name, 'mf': self.mf.tolist() }
def default(self, obj): if isinstance(obj, uuid.UUID): return str(obj) elif isinstance(obj, np.integer): return int(obj) elif isinstance(obj, np.floating): return float(obj) elif isinstance(obj, np.bool_): return bool(obj) elif isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, types.FunctionType) or isclass(obj): return f"{obj.__module__}.{obj.__name__}" elif isinstance(obj, deephyper.skopt.space.Dimension): return str(obj) elif isinstance(obj, csh.Hyperparameter): return str(obj) elif isinstance(obj, cs.ConfigurationSpace): return json.loads(cs_json.write(obj)) else: return super(Encoder, self).default(obj)
import ConfigSpace.read_and_write.json as json_writer import hpbandster.core.nameserver as hpns from hpbandster.optimizers import BOHB as BOHB from hpbandster.examples.commons import MyWorker # First, create a ConfigSpace-Object. # It contains the hyperparameters to be optimized # For more details, please have a look in the ConfigSpace-Example in the Documentation config_space = CS.ConfigurationSpace() config_space.add_hyperparameter( CS.UniformFloatHyperparameter('x', lower=0, upper=1)) # Write the configSpace for later use to file with open('configspace.json', 'w') as file: file.write(json_writer.write(config_space)) # Every run has to have a unique (at runtime) id. # This needs to be unique for concurrent runs, i.e. when multiple # instances run at the same time, they have to have different ids run_id = '0' # Step 1: # Every run needs a nameserver. It could be a 'static' server with a # permanent address, but here it will be started for the local machine # with a random port. # The nameserver manages the concurrent running workers across all possible threads or clusternodes. NS = hpns.NameServer(run_id=run_id, host='localhost', port=0) ns_host, ns_port = NS.start() # Step 2:
def main(input_dir, method, method_type, output_dir, taskset, nseeds=3): assert os.path.isdir(input_dir), input_dir cs_file_tmp = os.path.join(input_dir, method_type, method + "*", 'space.json') cs_file = glob.glob(cs_file_tmp) if len(cs_file) == 0: print("Could not find space %s" % cs_file_tmp) with open(cs_file[0]) as fh: cs = read(fh.read()) task_id_to_dir = defaultdict(list) incumbents_test = list() config_to_tasks = defaultdict(list) for task_id in taskset: configuration_output_dir = os.path.join( input_dir, method_type, '%s_%d_*_0_0' % (method, task_id), 'auto-sklearn-output', 'smac3-output', 'run_*', 'runhistory.json', ) configuration_output_dirs = glob.glob(configuration_output_dir) if len(configuration_output_dirs) != nseeds: print("Skip", configuration_output_dir, "has only", len(configuration_output_dirs), "runhistories") continue task_id_to_dir[task_id] = configuration_output_dirs print("Found", len(task_id_to_dir), "complete entries") print("Skipped", len(automl_metadata) - len(task_id_to_dir), "entries") rval = Parallel(n_jobs=8, verbose=0)( delayed(read_configurations_for_task_id)(task_id, task_id_to_dir, cs) for task_id in sorted(list(task_id_to_dir)) ) for task_id, ivt in rval: for inc in ivt: if inc not in incumbents_test: incumbents_test.append(inc) config_to_tasks[inc].append(task_id) break else: config_to_tasks[inc].append(task_id) print("Found", len(incumbents_test), "incumbents") jason = {hash_config(i): i for i in incumbents_test} assert len(jason) == len(incumbents_test) drop_keys = set() for idx, i in enumerate(jason): for jdx, j in enumerate(jason): if idx >= jdx: continue else: if jason[i].get_dictionary() == jason[j].get_dictionary(): drop_keys.add(j) for key in drop_keys: raise ValueError("Found double entry:", jason[key]) del jason[key] config_id_to_task = dict() for key in jason: config_id_to_task[key] = list(config_to_tasks[jason[key]]) jason[key] = jason[key].get_dictionary() print('Found %d incuments!' % len(jason)) json_file_name = os.path.join(output_dir, 'incumbents.json') with open(json_file_name, 'w') as fh: json.dump(jason, fh, indent=4) json_file_name = os.path.join(output_dir, 'task_to_inc_id.json') with open(json_file_name, 'w') as fh: json.dump(config_id_to_task, fh, indent=4) configspace_file_name = os.path.join(output_dir, 'space.json') with open(configspace_file_name, 'w') as fh: fh.write(write(cs))
# 5) InCondition: # 'e' is only active if 'c' is in the set [25, 26, 27] in_cond = CS.InCondition(e, c, [25, 26, 27]) # 6) AndConjunction: # The 'and-conjunction' combines the conditions less_cond and greater_cond cs.add_condition(CS.AndConjunction(less_cond, greater_cond)) # 7) OrConjunction: # The 'or-conjunction' works similar to the 'and-conjunction' equals_cond = CS.EqualsCondition(e, a, 2) cs.add_condition(CS.OrConjunction(in_cond, equals_cond)) # 8) ForbiddenEqualsClause: # This clause forbids the value 2 for the hyperparameter f forbidden_clause_f = CS.ForbiddenEqualsClause(f, 2) # 9) ForbiddenInClause # This clause forbids the value of the hyperparameter g to be in the set [2] forbidden_clause_g = CS.ForbiddenInClause(g, [2]) # 10) ForbiddenAndConjunction # Now, we combine them with an 'and-conjunction' and add them to the ConfigurationSpace forbidden_clause = CS.ForbiddenAndConjunction(forbidden_clause_f, forbidden_clause_g) cs.add_forbidden_clause(forbidden_clause) # To end this example, we store the defined configuration space to a json file with open('configspace.json', 'w') as fh: fh.write(json.write(cs))
worker = worker(min_budget=min_budget, max_budget=max_budget, eta=eta, nameserver=ns_host, nameserver_port=ns_port, run_id=args.run_id, model=args.model, data_config_path=args.data_config_path, data_root=args.data_root) worker.run(background=True) # Dump the configspace to the directory config_space = worker.get_config_space() with open(os.path.join(args.working_directory, 'configspace.json'), 'w') as f: f.write(config_space_json_r_w.write(config_space)) # instantiate BOHB and run it result_logger = hputil.json_result_logger(directory=args.working_directory, overwrite=True) HPB = BOHB(configspace=worker.get_config_space(), working_directory=args.working_directory, run_id=args.run_id, eta=eta, min_budget=min_budget, max_budget=max_budget, host=ns_host, nameserver=ns_host, nameserver_port=ns_port, ping_interval=3600,
upper=.5, log=False, default_value=0.49070634552851977) tol = CSH.UniformFloatHyperparameter('tol', lower=1e-4, upper=1e-2, log=True, default_value=0.0002154969698207585) gamma = CSH.CategoricalHyperparameter('gamma', choices=['scale', 'auto'], default_value='scale') C = CSH.UniformFloatHyperparameter('C', lower=1.0, upper=20, log=True, default_value=3.2333262862494365) epsilon = CSH.UniformFloatHyperparameter('epsilon', lower=0.01, upper=0.99, log=True, default_value=0.14834562300010581) shrinking = CSH.CategoricalHyperparameter('shrinking', choices=['True', 'False'], default_value='True') cs.add_hyperparameters( [kernel, tol, gamma, C, epsilon, shrinking, degree, coef0]) with open('svr_configspace.json', 'w') as f: f.write(cs_json.write(cs))
def fmin(func, config_space, func_args=(), eta=2, min_budget=2, max_budget=4, num_iterations=1, num_workers=1, output_dir='.'): """ Starts a local BOHB optimization run for a function over a hyperparameter search space, which is referred to as configuration space. This function's purpose is to give a fast and easy way to run BOHB on a optimization objective on your local machine. The optimized function must satisfy the following conditions: - Contain a parameter ``budget``: This parameter is passed by the optimizer. Its meaning is defined by your interpretation of the budget used by your model. For example it may be the number of epochs for a neural network to train or the number of datapoints, the model receives. The idea is to run many configurations on a small budget and only take the best 1/``eta`` of them to the next round. In the next iteration, the configurations run on the doubled budget. This is repeated until only 2 configurations are left to run on the ``max_budget``. Therefore, bad configurations are rejected fast, and the good ones are explored more. The number of configurations with a minimum budget is calculated similar to the optimization run, just reversed. Having 2 configurations with ``max_budget``, in the iteration before ``eta``-times many configurations with half the budget are sampled, and so on. - Hyperparameter from the configuration space object: The function must implement all hyperparameters defined in the configuration space. The parameter name in the function call must be equal to the name of the hyperparameter. Otherwise, a exception will be thrown. - Function arguments in the right order: Function arguments, which are not hyperparameters and therefore not defined in the configuration space must be passed to the ``fmin`` call in the order of occurrence in the function signature. In the example below, the training data, X and y, is a use case for this kind of function arguments. Example:: import numpy as np from FMin import fmin import ConfigSpace as CS # Create configuration space cs = CS.ConfigurationSpace() cs.add_hyperparameter( CS.UniformFloatHyperparameter('w', lower=-5, upper=5) ) # Create data from function # f(x) = x + :math:`\mathcal{N}(0, 1)` X = np.random.uniform(-5, 5, 100) y = np.random.normal(X, 1) # The function calculates the mean squared error for the first # ``budget`` points compared to their responding true values. # The expected minimum is at w = 1. opt_func = lambda x, y, w, budget: np.mean((y[:int(budget)] - w*x[:int(budget)])**2) inc_best, inc_best_cfg, result = fmin(opt_func, cs, func_args=(X, y), min_budget=3, max_budget=len(X), num_iterations=3, num_worker=1) Args: func (function): function to optimize. Must return a python scalar! See also the section above **The optimized function must satisfy the following conditions** config_space (ConfigSpace.ConfigurationSpace): Definition of the search space containing all hyperparameters and their value ranges. You can find its definition in the `ConfiSpace repository <https://github.com/automl/ConfigSpace/>`_. func_args (tuple): arguments, passed to the function by the user, e.g., the data (X,y). These arguments don't include optimized parameters. Those are defined in the configuration space object and will be passed by the master directly to the function. eta (float): In each iteration, a complete run of sequential halving is executed. In it, after evaluating each configuration on the same subset size, only a fraction of 1/eta of them 'advances' to the next round. Must be greater or equal to 2. min_budget (int, float, optional): Defines the minimum budget to evaluate configurations on it. In combination with the parameter `max_budget` and `eta`, the number of configurations to evaluate is determined. Read more about it in the `Quickstart <https://automl.github.io/HpBandSter/build/html/quickstart.html#id6>`_. By default `min_budget` and `max_budget` is set, so that only a few configurations with budgets from 1 to 4 are evaluated. max_budget (int, float, optional): Defines the maximum budget to evaluate configurations on it. num_iterations (int, optional): number of iterations to be performed in this run. By default, this value is set to 1. num_workers (int, optional): number of parallel workers. By default, just one worker is used. output_dir (str, optional): HpBandSter stores the sampled configurations and the results on these configurations in two .json files. 'configs.json' and 'results.json'. Those files will be stored by default in the current directory (default='.'). Also, we store the configuration space definition for later use to this directory. It may be used for further analysis via `CAVE <https://automl.github.io/CAVE/stable/>`_. Returns: hpbandster.core.result.Run - Best run. Run result with the best loss values of all budgets. It stores information about the - budget - the unique configuration id (tuple) - loss - time stamps: start time and end time for this run. Dict - Best found configuration. Containing the configuration (from the configuration space), which achieved the best results in optimization run hpbandster.core.result.Result - Result object stores all results from all results, which were evaluated. The best run and the best found configuration are extracted from this results-object. """ output_dir = Path(output_dir) output_dir.mkdir(exist_ok=True) # Set up a local nameserver and start it ns = hpns.NameServer(run_id='fmin', nic_name=None, working_directory=output_dir) ns_host, ns_port = ns.start() # Create ``num_workers`` workers and pass the function as well as the # function arguments to each of them. workers = [] for _ in range(num_workers): worker = FMinWorker(func=func, func_args=func_args, nameserver=ns_host, nameserver_port=ns_port, run_id='fmin') worker.run(background=True) workers.append(worker) # The result logger will store the intermediate results and the sampled # configurations in the passed directory. result_logger = hpres.json_result_logger(directory=output_dir, overwrite=True) # For hyperparameter importance analysis via CAVE we store the configuration # space definition to file. with open(output_dir / 'configspace.json', 'w') as f: f.write(json.write(config_space)) # Set up a master, which is book keeping and decides what to run next. opt = BOHB(configspace=config_space, run_id='fmin', min_budget=min_budget, max_budget=max_budget, eta=eta, host=ns_host, nameserver=ns_host, nameserver_port=ns_port, result_logger=result_logger) # The result object stores run information, e.g. the incumbent trajectory. # Force the master to wait until all workers are ready. result = opt.run(n_iterations=num_iterations, min_n_workers=num_workers) # After the run has finished, shut down the master and the workers opt.shutdown(shutdown_workers=True) ns.shutdown() # Save to result object to file. with open(output_dir / 'results.pkl', 'wb') as f: import pickle pickle.dump(result, f) # Return the optimal value and the responding configuration, as well as the # result object. The result object can be used in a second step for further # hyperparameter importance analysis with CAVE. id2config = result.get_id2config_mapping() incumbent = result.get_incumbent_id() inc_value = result.get_runs_by_id(incumbent)[-1]['loss'] inc_cfg = id2config[incumbent]['config'] return inc_value, inc_cfg, result
def generate_csv_data(NUM_EVALUATIONS, NUM_BUDGETS, ALLINONE, SEPARATE): if not os.path.exists(ALLINONE): os.makedirs(ALLINONE) if not os.path.exists(SEPARATE): os.makedirs(SEPARATE) config_space = ConfigurationSpace() config_space.add_hyperparameters([UniformFloatHyperparameter('random_parameter_1', 0, 1.2), UniformIntegerHyperparameter('random_parameter_2', -10, 10), UniformIntegerHyperparameter('random_parameter_3', 1, 1000)]) trajectory = [] runhistory = [] lowest_cost = np.inf start_time = time.time() if NUM_BUDGETS <= 1: budgets = [0 for _ in range(NUM_EVALUATIONS)] else: budgets = [50 + 50 * (i // (NUM_EVALUATIONS / NUM_BUDGETS)) for i in range(NUM_EVALUATIONS)] for i, budget in enumerate(budgets): if i == 0: random1 = config_space.get_hyperparameter('random_parameter_1').default_value random2 = config_space.get_hyperparameter('random_parameter_2').default_value random3 = config_space.get_hyperparameter('random_parameter_3').default_value else: random1 = np.random.uniform(0.1, 1.1) random2 = np.random.randint(-10, 10) random3 = np.random.randint(1, 1000) cost = np.random.uniform(np.abs(NUM_EVALUATIONS - i - np.random.randint(50)), 10 * np.log(NUM_EVALUATIONS - i)) * random1 new_time = time.time() - start_time status = 'SUCCESS' seed = 42 # should be: np.random.randint(1, 10000000) but seeds are currently not supported with budgets. if lowest_cost > cost: lowest_cost = cost trajectory.append([new_time, new_time, i, cost, random1, random2, random3]) runhistory.append([cost, new_time, status, budget, seed, random1, random2, random3]) with open(os.path.join(ALLINONE, 'runhistory.csv'), 'w', newline='') as f: writer = csv.writer(f, delimiter=',') writer.writerow(['cost', 'time', 'status', 'budget', 'seed', 'random_parameter_1', 'random_parameter_2', 'random_parameter_3']) for run in runhistory: writer.writerow(run) with open(os.path.join(SEPARATE, 'runhistory.csv'), 'w', newline='') as rh,\ open(os.path.join(SEPARATE, 'configurations.csv'), 'w', newline='') as configs: rh_writer = csv.writer(rh, delimiter=',') configs_writer = csv.writer(configs, delimiter=',') rh_writer.writerow(['cost', 'time', 'status', 'budget', 'seed', 'config_id']) configs_writer.writerow(['CONFIG_ID', 'random_parameter_1', 'random_parameter_2', 'random_parameter_3']) for idx, run in enumerate(runhistory): rh_writer.writerow(run[:5] + [idx]) configs_writer.writerow([idx] + run[5:]) for path in [ALLINONE, SEPARATE]: with open(os.path.join(path, 'configspace.json'), 'w') as f: f.write(pcs_json.write(config_space)) with open(os.path.join(path, 'trajectory.csv'), 'w', newline='') as f: writer = csv.writer(f, delimiter=',') writer.writerow(['cpu_time', 'wallclock_time', 'evaluations', 'cost', 'random_parameter_1', 'random_parameter_2', 'random_parameter_3']) for t in trajectory: writer.writerow(t) with open(os.path.join(path, 'scenario.txt'), 'w' ) as f: f.write('paramfile = {}\nrun_obj = quality'.format(os.path.join(os.path.basename(path.rstrip('/')), 'configspace.json')))
hp.default_value = float(hp.default_value) hp.sequence = tuple(np.array(hp.sequence).astype(float)) # This if-block has been introduced explicitly for SVM that fixes the np.float32 type cast on # enforced in the old get_fidelity_grid function that was used for bulk of the SVM collection if metadata["exp_args"]["space"] == "svm": z_grid = get_fidelity_grid( config_spaces["z"], metadata["exp_args"]["z_grid_size"], include_sh_budgets=metadata["exp_args"]["include_SH"] ) z_grid = tuple([f[0] for f in z_grid]) hp = config_spaces["z_discrete"].get_hyperparameter("subsample") hp.sequence = z_grid hp.default_value = z_grid[-1] for hp in config_spaces["z_discrete"].get_hyperparameters(): if isinstance(hp.default_value, (np.float16, np.float32, np.float64)): hp.sequence = tuple(float(val) for val in hp.sequence) hp.default_value = float(hp.sequence[-1]) else: hp.sequence = tuple(int(val) for val in hp.sequence) hp.default_value = int(hp.sequence[-1]) for k, _space in config_spaces.items(): config_spaces[k] = json_cs.write(_space) metadata["config_spaces"] = config_spaces with open(os.path.join(output_path, "{}_{}_metadata.json".format(space, task_id)), "w") as f: json.dump(json_compatible_dict(metadata), f) print("Updated with global minimas!") print("All files saved!")
def test_serialize_forbidden_in_clause(self): cs = ConfigurationSpace() a = cs.add_hyperparameter(CategoricalHyperparameter('a', [0, 1, 2])) cs.add_forbidden_clause(ForbiddenInClause(a, [1, 2])) write(cs)
def fit(self, pipeline_config, data_manager, instance, autonet, run_number, run_id, task_id): instance_name, run_name = get_names(instance, run_id, run_number) run_result_dir = get_run_result_dir(pipeline_config, instance, run_id, run_number, autonet) instance_run_id = str(run_name) + "-" + str(instance_name) instance_run_id = '_'.join(instance_run_id.split(':')) autonet.autonet_config = None #clean results of last fit autonet.update_autonet_config(task_id=task_id, run_id=instance_run_id, result_logger_dir=run_result_dir) if (task_id not in [-1, 1]): return {'result_dir': run_result_dir} if not os.path.exists(run_result_dir): try: os.makedirs(run_result_dir) except Exception as e: print(e) logging.getLogger('benchmark').debug( "Create config and info files for current run " + str(run_name)) instance_info = dict() instance_info['path'] = instance instance_info['is_classification'] = data_manager.is_classification instance_info['is_multilabel'] = data_manager.is_multilabel instance_info['instance_shape'] = data_manager.X_train.shape instance_info[ 'categorical_features'] = data_manager.categorical_features autonet_config = autonet.get_current_autonet_config() if autonet_config["hyperparameter_search_space_updates"] is not None: autonet_config["hyperparameter_search_space_updates"].save_as_file( os.path.join(run_result_dir, "hyperparameter_search_space_updates.txt")) if 'user_updates_config' in pipeline_config: user_updates_config = pipeline_config['user_updates_config'] if user_updates_config: from shutil import copyfile copyfile( user_updates_config, os.path.join(run_result_dir, 'user_updates_config.csv')) self.write_config_to_file(run_result_dir, "instance.info", instance_info) self.write_config_to_file(run_result_dir, "benchmark.config", pipeline_config) self.write_config_to_file(run_result_dir, "autonet.config", autonet_config) # save refit config - add indent and sort keys if 'refit_config' in pipeline_config and pipeline_config[ 'refit_config'] is not None: import json with open(pipeline_config['refit_config'], 'r') as f: refit_config = json.loads(f.read()) with open(os.path.join(run_result_dir, 'refit_config.json'), 'w+') as f: f.write(json.dumps(refit_config, indent=4, sort_keys=True)) # save search space search_space = autonet.pipeline.get_hyperparameter_search_space( **autonet_config) with open(os.path.join(run_result_dir, "configspace.json"), "w") as f: f.write(cs_json.write(search_space)) # save search space without constants - used by bohb - as pcs (simple) simplified_search_space, _ = remove_constant_hyperparameter( search_space) with open(os.path.join(run_result_dir, "configspace_simple.pcs"), "w") as f: f.write(cs_pcs.write(simplified_search_space)) return {'result_dir': run_result_dir}
def convert(self, folders, ta_exec_dirs=None, output_dir=None, converted_dest='converted_input_data'): self.logger.debug( "Converting CSV-data to SMAC3-data. Called with: folders=%s, ta_exec_dirs=%s, output_dir=%s, " "converted_dest=%s", str(folders), str(ta_exec_dirs), str(output_dir), str(converted_dest)) # Using temporary files for the intermediate smac-result-like format if no output_dir specified if not output_dir: output_dir = tempfile.mkdtemp() self.logger.debug( "Temporary directory for intermediate SMAC3-results: %s", output_dir) if ta_exec_dirs is None or len(ta_exec_dirs) == 0: ta_exec_dirs = ['.'] if len(ta_exec_dirs) != len(folders): ta_exec_dirs = [ta_exec_dirs[0] for _ in folders] ##################### # Actual conversion # ##################### folder_basenames = get_folder_basenames(folders) result = OrderedDict() for f, f_base, ta_exec_dir in zip( folders, folder_basenames, ta_exec_dirs): # Those are the parallel runs converted_folder_path = os.path.join(output_dir, converted_dest, f_base) self.logger.debug( "Processing folder=%s, f_base=%s, ta_exec_dir=%s. Saving to %s.", f, f_base, ta_exec_dir, converted_folder_path) if not os.path.exists(converted_folder_path): self.logger.debug("%s doesn't exist. Creating...", converted_folder_path) os.makedirs(converted_folder_path) # Get and write scenario # (todo: enhancement: make scenario-file optional (build from scratch)) scenario_file_path = os.path.join(converted_folder_path, 'scenario.txt') scenario = self.get_scenario(f, ta_exec_dir=ta_exec_dir, out_path=scenario_file_path) # Read Configuration Space config_space = scenario.cs #config_space = self.load_configspace(f) scenario.paramfile = os.path.join(converted_folder_path, 'configspace.json') with open(scenario.paramfile, 'w') as new_file: new_file.write(pcs_json.write(config_space)) # Read runhistory.csv and write runhistory.json(s) runhistory = self.get_runhistory(f, scenario, 'runhistory.csv') runhistory.save_json( os.path.join(converted_folder_path, 'runhistory.json')) try: validated_runhistory = self.get_runhistory( f, scenario, 'validated_runhistory.csv') validated_runhistory.save_json( os.path.join(converted_folder_path, 'validated_runhistory.json')) except FileNotFoundError: validated_runhistory = None self.logger.debug("No file detected at \"%s\"", os.path.join(f, 'validated_runhistory.csv')) # Read trajectory. # (todo: enhancement: make trajectory-file (read it from runhistory?)) trajectory = self.get_trajectory(f, config_space, scenario, converted_folder_path) # After (possibly) changing paths and options (or creating the object), (over)write to new location scenario.output_dir_for_this_run = converted_folder_path scenario.write() result[f] = { 'new_path': converted_folder_path, 'config_space': config_space, 'runhistory': runhistory, 'validated_runhistory': validated_runhistory, 'scenario': scenario, 'trajectory': trajectory, } return result
# License: MIT import json import random import requests from ConfigSpace.read_and_write import json as config_json from ConfigSpace.hyperparameters import UniformFloatHyperparameter from openbox.config_space import ConfigurationSpace from openbox.config_space.util import convert_configurations_to_array user_id = 18 cs = ConfigurationSpace() x1 = UniformFloatHyperparameter("x1", -5, 10, default_value=0) x2 = UniformFloatHyperparameter("x2", 0, 15, default_value=0) cs.add_hyperparameters([x1, x2]) config_space_array = config_json.write(cs) res = requests.post('http://127.0.0.1:8001/bo_advice/task_register/', data={'id':user_id, 'config_space_array':config_space_array}) print('-----------------') print(res) print('-----------------') print(res.text) print('-----------------')
def hpbandster2smac(self, folder2result, cs: ConfigurationSpace, backup_cs, output_dir: str): """Reading hpbandster-result-object and creating RunHistory and trajectory... treats each budget as an individual 'smac'-run, creates an output-directory with subdirectories for each budget. Parameters ---------- folder2result: Dict(str : hpbandster.core.result.Result) folder mapping to bohb's result-objects cs: ConfigurationSpace the configuration space backup_cs: List[ConfigurationSpace] if loading a configuration fails, try configspaces from this list until succeed output_dir: str the output-dir to save the smac-runs to """ # Create runhistories (one per budget) budget2rh = OrderedDict() for folder, result in folder2result.items(): self.logger.debug("Budgets for '%s': %s" % (folder, str(result.HB_config['budgets']))) id2config_mapping = result.get_id2config_mapping() skipped = {'None': 0, 'NaN': 0} for run in result.get_all_runs(): if not run.budget in budget2rh: budget2rh[run.budget] = RunHistory(average_cost) rh = budget2rh[run.budget] # Load config... try: config = self._get_config(run.config_id, id2config_mapping, cs) except ValueError as err: self.logger.debug( "Loading configuration failed... trying alternatives", exc_info=1) for bcs in backup_cs: try: config = self._get_config(run.config_id, id2config_mapping, bcs) cs = bcs break except ValueError: self.logger.debug("", exc_info=1) pass else: self.logger.debug("None of the alternatives worked...") raise ValueError( "Your configspace seems to be corrupt. If you use floats (or mix up ints, bools and strings) as categoricals, " "please consider using the .json-format, as the .pcs-format cannot recover the type " "of categoricals. Otherwise please report this to " "https://github.com/automl/CAVE/issues (and attach the debug.log)" ) if run.loss is None: skipped['None'] += 1 continue if np.isnan(run.loss): skipped['NaN'] += 1 continue rh.add(config=config, cost=run.loss, time=run.time_stamps['finished'] - run.time_stamps['started'], status=StatusType.SUCCESS, seed=0, additional_info={ 'info': run.info, 'timestamps': run.time_stamps }) self.logger.debug( "Skipped %d None- and %d NaN-loss-values in BOHB-result", skipped['None'], skipped['NaN']) # Write to disk budget2path = OrderedDict() # paths to individual budgets self.logger.info( "Assuming BOHB treats target algorithms as deterministic (and does not re-evaluate)" ) formatted_budgets = format_budgets(budget2rh.keys()) for b, rh in budget2rh.items(): output_path = os.path.join(output_dir, formatted_budgets[b]) budget2path[b] = output_path scenario = Scenario({ 'run_obj': 'quality', 'cs': cs, 'output_dir': output_dir, 'deterministic': True, # At the time of writing, BOHB is always treating ta's as deterministic }) scenario.output_dir_for_this_run = output_path scenario.write() with open(os.path.join(output_path, 'configspace.json'), 'w') as fh: fh.write(pcs_json.write(cs)) rh.save_json(fn=os.path.join(output_path, 'runhistory.json')) self.get_trajectory(folder2result, output_path, scenario, rh, budget=b) return budget2path
# Step 2: # The worker implements the connection to the model to be evaluated. # Its 'compute'-method will be called later by the BOHB-optimizer repeatedly # with the sampled configurations and return for example the computed loss. # Further usages of the worker will be covered in a later example. w = MyWorker( nameserver=ns_host, nameserver_port=ns_port, run_id=run_id, # unique Hyperband run id ) w.run(background=True) # Write the ConfigSpace for later use to the working dir with open('configspace.json', 'w') as file: file.write(json_writer.write(MyWorker.get_configspace())) # Step 3: # The number of sampled configurations is determined by the # parameters eta, min_budget and max_budget. # After evaluating each configuration, starting with the minimum budget # on the same subset size, only a fraction of 1 / eta of them # 'advances' to the next round. At the same time the current budget will be doubled. # This process runs until the maximum budget is reached. HB = BOHB( configspace=MyWorker.get_configspace(), run_id=run_id, eta=3, min_budget=1, max_budget=25, # Hyperband parameters nameserver=ns_host,
def hpbandster2smac(self, folder, result, cs_options, output_dir: str): """Reading hpbandster-result-object and creating RunHistory and trajectory... Parameters ---------- folder: str (path) original folder result: hpbandster.core.result.Result bohb's result-object cs_options: list[ConfigurationSpace] the configuration spaces. in the best case it's a single element, but for pcs-format we need to guess through a list of possible configspaces output_dir_base: str the output-dir to save the smac-runs to Returns ------- converted: dict{ 'new_path' : path_to_converted_input, 'hp_bandster_result' : result_in_hpbandster_format, 'config_space' : config_space, 'runhistory' : runhistory, 'validated_runhistory' : validated_runhistory, 'scenario' : scenario, 'trajectory' : trajectory, } """ self.logger.debug("Budgets for '%s': %s" % (folder, str(result.HB_config['budgets']))) ########################## # 1. Create runhistory # ########################## id2config_mapping = result.get_id2config_mapping() skipped = {'None': 0, 'NaN': 0} rh = RunHistory() for run in result.get_all_runs(): # Load config... config = None while config is None: if len(cs_options) == 0: self.logger.debug("None of the alternatives worked...") raise ValueError( "Your configspace seems to be corrupt. If you use floats (or mix up ints, bools " "and strings) as categoricals, please consider using the .json-format, as the " ".pcs-format cannot recover the type of categoricals. Otherwise please report " "this to https://github.com/automl/CAVE/issues (and attach the debug.log)" ) try: config = self._get_config(run.config_id, id2config_mapping, cs_options[0]) except ValueError as err: self.logger.debug( "Loading config failed. Trying %d alternatives" % len(cs_options) - 1, exc_info=1) cs_options = cs_options[ 1:] # remove the failing cs-version # Filter corrupted loss-values (ignore them) if run.loss is None: skipped['None'] += 1 continue if np.isnan(run.loss): skipped['NaN'] += 1 continue rh.add(config=config, cost=run.loss, time=run.time_stamps['finished'] - run.time_stamps['started'], status=StatusType.SUCCESS, budget=run.budget, seed=0, additional_info={ 'info': run.info, 'timestamps': run.time_stamps }) self.logger.debug( "Skipped %d None- and %d NaN-loss-values in BOHB-result", skipped['None'], skipped['NaN']) ########################## # 2. Create all else # ########################## scenario = Scenario({ 'run_obj': 'quality', 'cs': cs_options[0], 'output_dir': output_dir, 'deterministic': True, # At the time of writing, BOHB is always treating ta's as deterministic }) scenario.output_dir_for_this_run = output_dir scenario.write() with open(os.path.join(output_dir, 'configspace.json'), 'w') as fh: fh.write(pcs_json.write(cs_options[0])) rh.save_json(fn=os.path.join(output_dir, 'runhistory.json')) trajectory = self.get_trajectory(result, output_dir, scenario, rh) return { 'new_path': output_dir, 'hpbandster_result': result, 'config_space': cs_options[0], 'runhistory': rh, 'validated_runhistory': None, 'scenario': scenario, 'trajectory': trajectory, }
def convert_cs_to_json(cs): cs_as_string = write(cs) cs_as_json = json.loads(cs_as_string) return cs_as_json
def __init__(self, configspace, min_points_in_model=None, top_n_percent=15, num_samples=64, random_fraction=1 / 3, bandwidth_factor=3, min_bandwidth=1e-3, **kwargs): """ Fits for each given budget a kernel density estimator on the best N percent of the evaluated configurations on this budget. Parameters: ----------- configspace: ConfigSpace Configuration space object top_n_percent: int Determines the percentile of configurations that will be used as training data for the kernel density estimator, e.g if set to 10 the 10% best configurations will be considered for training. min_points_in_model: int minimum number of datapoints needed to fit a model num_samples: int number of samples drawn to optimize EI via sampling random_fraction: float fraction of random configurations returned bandwidth_factor: float widens the bandwidth for contiuous parameters for proposed points to optimize EI min_bandwidth: float to keep diversity, even when all (good) samples have the same value for one of the parameters, a minimum bandwidth (Default: 1e-3) is used instead of zero. """ super().__init__(**kwargs) self.top_n_percent = top_n_percent self.configspace = configspace self.bw_factor = bandwidth_factor self.min_bandwidth = min_bandwidth self.min_points_in_model = min_points_in_model if min_points_in_model is None: self.min_points_in_model = len( self.configspace.get_hyperparameters()) + 1 if self.min_points_in_model < len( self.configspace.get_hyperparameters()) + 1: self.logger.warning( 'Invalid min_points_in_model value. Setting it to %i' % (len(self.configspace.get_hyperparameters()) + 1)) self.min_points_in_model = len( self.configspace.get_hyperparameters()) + 1 self.num_samples = num_samples self.random_fraction = random_fraction hps = self.configspace.get_hyperparameters() from ConfigSpace.read_and_write import json with open('configspace.json', 'w') as fh: fh.write(json.write(self.configspace)) #print(cs) with open('configspace.json') as fh: cs_str = str(fh.read()) cs_ = json.read(cs_str) print(self.configspace == cs_) self.kde_vartypes = "" self.vartypes = [] for h in hps: if hasattr(h, 'sequence'): raise RuntimeError( 'This version on BOHB does not support ordinal hyperparameters. Please encode %s as an integer parameter!' % (h.name)) if hasattr(h, 'choices'): self.kde_vartypes += 'u' self.vartypes += [len(h.choices)] else: self.kde_vartypes += 'c' self.vartypes += [0] self.vartypes = np.array(self.vartypes, dtype=int) # store precomputed probs for the categorical parameters self.cat_probs = [] self.configs = dict() self.losses = dict() self.good_config_rankings = dict() self.kde_models = dict()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--n_iterations', type=int, help='Number of iterations performed by the optimizer', default=4) parser.add_argument('--worker', help='Flag to turn this into a worker process', action='store_true') parser.add_argument( '--run_id', type=str, help= 'A unique run id for this optimization run. An easy option is to use' ' the job id of the clusters scheduler.') parser.add_argument( '--shared_directory', type=str, help= 'A directory that is accessible for all processes, e.g. a NFS share.') parser.add_argument('--interface', type=str, help='Which network interface to use', default="eth1") args = parser.parse_args() try: os.mkdir(args.shared_directory) except FileExistsError: pass # Every process has to lookup the hostname host = hpns.nic_name_to_host(args.interface) if args.worker: time.sleep( 60 ) # short artificial delay to make sure the nameserver is already running w = MyWorker(run_id=args.run_id, host=host) w.load_nameserver_credentials(working_directory=args.shared_directory) w.run(background=False) exit(0) # Write the configspace cs = MyWorker.get_configspace() with open(os.path.join(args.shared_directory, 'configspace.json'), "w") as fh: fh.write(pcs_out.write(cs)) result_logger = hpres.json_result_logger(directory=args.shared_directory, overwrite=True) NS = hpns.NameServer(run_id=args.run_id, host=host, port=0, working_directory=args.shared_directory) ns_host, ns_port = NS.start() w = MyWorker(run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port) w.run(background=True) # Run an optimizer # We now have to specify the host, and the nameserver information bohb = BOHB(configspace=cs, run_id=args.run_id, host=host, nameserver=ns_host, nameserver_port=ns_port, eta=3, result_logger=result_logger, min_budget=1, max_budget=9) res = bohb.run(n_iterations=args.n_iterations, min_n_workers=1) # In a cluster environment, you usually want to store the results for later analysis. # One option is to simply pickle the Result object with open(os.path.join(args.shared_directory, 'results.pkl'), 'wb') as fh: pickle.dump(res, fh) # Step 4: Shutdown # After the optimizer run, we must shutdown the master and the nameserver. bohb.shutdown(shutdown_workers=True) NS.shutdown()
def hpbandster2smac(self, folder2result, cs_options, output_dir: str): """Reading hpbandster-result-object and creating RunHistory and trajectory... treats each budget as an individual 'smac'-run, creates an output-directory with subdirectories for each budget. Parameters ---------- folder2result: Dict(str : hpbandster.core.result.Result) folder mapping to bohb's result-objects cs_options: list[ConfigurationSpace] the configuration spaces. in the best case it's a single element, but for pcs-format we need to guess through a list of possible configspaces output_dir: str the output-dir to save the smac-runs to Returns ------- folder2budgets: dict(dict(str) - str) maps each folder (from parallel execution) to a dict, which in turn maps all budgets of the specific parallel execution to their paths """ folder2budgets = OrderedDict() self.logger.debug("Loading with %d configspace alternative options...", len(cs_options)) self.logger.info( "Assuming BOHB treats target algorithms as deterministic (and does not re-evaluate)" ) for folder, result in folder2result.items(): folder2budgets[folder] = OrderedDict() self.logger.debug("Budgets for '%s': %s" % (folder, str(result.HB_config['budgets']))) ########################## # 1. Create runhistory # ########################## id2config_mapping = result.get_id2config_mapping() skipped = {'None': 0, 'NaN': 0} budget2rh = OrderedDict() for run in result.get_all_runs(): # Choose runhistory to add run to if not run.budget in budget2rh: budget2rh[run.budget] = RunHistory(average_cost) rh = budget2rh[run.budget] # Load config... config = None while config is None: if len(cs_options) == 0: self.logger.debug("None of the alternatives worked...") raise ValueError( "Your configspace seems to be corrupt. If you use floats (or mix up ints, bools and strings) as categoricals, " "please consider using the .json-format, as the .pcs-format cannot recover the type " "of categoricals. Otherwise please report this to " "https://github.com/automl/CAVE/issues (and attach the debug.log)" ) try: config = self._get_config(run.config_id, id2config_mapping, cs_options[0]) except ValueError as err: self.logger.debug( "Loading configuration failed... trying %d alternatives" % len(cs_options) - 1, exc_info=1) cs_options = cs_options[ 1:] # remove the failing cs-version # Filter corrupted loss-values (ignore them) if run.loss is None: skipped['None'] += 1 continue if np.isnan(run.loss): skipped['NaN'] += 1 continue rh.add(config=config, cost=run.loss, time=run.time_stamps['finished'] - run.time_stamps['started'], status=StatusType.SUCCESS, seed=0, additional_info={ 'info': run.info, 'timestamps': run.time_stamps }) self.logger.debug( "Skipped %d None- and %d NaN-loss-values in BOHB-result", skipped['None'], skipped['NaN']) ########################## # 2. Create all else # ########################## formatted_budgets = format_budgets( budget2rh.keys() ) # Make budget-names readable [0.021311, 0.031211] to [0.02, 0.03] for b, rh in budget2rh.items(): output_path = os.path.join(output_dir, folder, formatted_budgets[b]) folder2budgets[folder][b] = output_path scenario = Scenario({ 'run_obj': 'quality', 'cs': cs_options[0], 'output_dir': output_dir, 'deterministic': True, # At the time of writing, BOHB is always treating ta's as deterministic }) scenario.output_dir_for_this_run = output_path scenario.write() with open(os.path.join(output_path, 'configspace.json'), 'w') as fh: fh.write(pcs_json.write(cs_options[0])) rh.save_json(fn=os.path.join(output_path, 'runhistory.json')) self.get_trajectory(folder2result[folder], output_path, scenario, rh, budget=b) return folder2budgets
print(traceback.print_exc()) crashed = True pass #new_autosklearn_path = os.path.join(tmp_dir, 'auto-sklearn-output') #shutil.copytree(autosklearn_directory, new_autosklearn_path) #try: # shutil.rmtree(autosklearn_directory) #except: # pass #raise e # Store searchspace for later examination if run not crashed if not crashed: cs = automl._automl[0].configuration_space with open(os.path.join(tmp_dir, 'space.json'), 'w') as fh: fh.write(write(cs)) result = dict() result[0] = { 'task_id': task_id, 'time_limit': time_limit, 'loss': loss, 'trajectory': trajectory } time_stamp_dict = {} for dirpath, dirnames, filenames in os.walk(autosklearn_directory, topdown=False): time_stamp_dict[dirpath] = {} for filename in filenames: time_stamp_dict[dirpath][filename] = os.path.getmtime(os.path.join(dirpath, filename)) # Save timestamps, so we can compute ensemble performance over time