Beispiel #1
0
    def test_round_trip(self):
        this_file = os.path.abspath(__file__)
        this_directory = os.path.dirname(this_file)
        configuration_space_path = os.path.join(this_directory,
                                                "..", "test_searchspaces")
        configuration_space_path = os.path.abspath(configuration_space_path)
        pcs_files = os.listdir(configuration_space_path)

        for pcs_file in sorted(pcs_files):

            if '.pcs' in pcs_file:
                full_path = os.path.join(configuration_space_path, pcs_file)

                with open(full_path) as fh:
                    cs_string = fh.read().split('\n')
                try:
                    cs = read_pcs(cs_string)
                except Exception:
                    cs = read_pcs_new(cs_string)

                cs.name = pcs_file

                json_string = write(cs)
                new_cs = read(json_string)

                self.assertEqual(new_cs, cs)
Beispiel #2
0
    def test_round_trip(self):
        this_file = os.path.abspath(__file__)
        this_directory = os.path.dirname(this_file)
        configuration_space_path = os.path.join(this_directory,
                                                "..", "test_searchspaces")
        configuration_space_path = os.path.abspath(configuration_space_path)
        pcs_files = os.listdir(configuration_space_path)

        for pcs_file in sorted(pcs_files):

            if '.pcs' in pcs_file:
                full_path = os.path.join(configuration_space_path, pcs_file)

                with open(full_path) as fh:
                    cs_string = fh.read().split('\n')
                try:
                    cs = read_pcs(cs_string)
                except:
                    cs = read_pcs_new(cs_string)

                cs.name = pcs_file

                json_string = write(cs)
                new_cs = read(json_string)

                self.assertEqual(new_cs, cs)
Beispiel #3
0
 def test_configspace_with_probabilities(self):
     cs = ConfigurationSpace()
     cs.add_hyperparameter(
         CategoricalHyperparameter('a', [0, 1, 2], weights=[0.2, 0.2, 0.6]))
     string = write(cs)
     new_cs = read(string)
     self.assertEqual(
         new_cs.get_hyperparameter('a').probabilities, (0.2, 0.2, 0.6))
Beispiel #4
0
 def as_dict(self):
     return {
         'cid': self.cid.without_config().external_name,
         'pipeline': comp_util.serialize(self.pipeline),
         'cfg_keys': [(key.hash, key.idx) for key in self.cfg_keys],
         'budget': self.budget,
         'configspace': config_json.write(self.configspace) if self.configspace is not None else None,
     }
    def fit(self, pipeline_config, data_manager, instance, autonet_config_file,
            autonet, run_number, run_id, task_id):

        instance_name, autonet_config_name, run_name = get_names(
            instance, autonet_config_file, run_id, run_number)
        run_result_dir = get_run_result_dir(pipeline_config, instance,
                                            autonet_config_file, run_id,
                                            run_number)
        instance_run_id = str(run_name) + "_" + str(instance_name) + "_" + str(
            autonet_config_name)
        instance_run_id = '_'.join(instance_run_id.split(':'))

        autonet.autonet_config = None  #clean results of last fit
        autonet.update_autonet_config(task_id=task_id,
                                      run_id=instance_run_id,
                                      result_logger_dir=run_result_dir)

        if (task_id not in [-1, 1]):
            return {'result_dir': run_result_dir}

        if not os.path.exists(run_result_dir):
            try:
                os.makedirs(run_result_dir)
            except Exception as e:
                print(e)

        logging.getLogger('benchmark').debug(
            "Create config and info files for current run " + str(run_name))

        instance_info = dict()
        instance_info['path'] = instance
        instance_info['is_classification'] = data_manager.is_classification
        instance_info['is_multilabel'] = data_manager.is_multilabel
        instance_info['instance_shape'] = data_manager.X_train.shape
        instance_info[
            'categorical_features'] = data_manager.categorical_features

        if autonet.get_current_autonet_config(
        )["hyperparameter_search_space_updates"] is not None:
            autonet.get_current_autonet_config(
            )["hyperparameter_search_space_updates"].save_as_file(
                os.path.join(run_result_dir,
                             "hyperparameter_search_space_updates.txt"))

        self.write_config_to_file(run_result_dir, "instance.info",
                                  instance_info)
        self.write_config_to_file(run_result_dir, "benchmark.config",
                                  pipeline_config)
        self.write_config_to_file(run_result_dir, "autonet.config",
                                  autonet.get_current_autonet_config())

        with open(os.path.join(run_result_dir, "configspace.json"), "w") as f:
            f.write(
                json.write(
                    autonet.pipeline.get_hyperparameter_search_space(
                        **autonet.get_current_autonet_config())))

        return {'result_dir': run_result_dir}
Beispiel #6
0
    def get_fidelity_space(self, kwargs_str: str) -> str:
        logger.debug(f'Server: get_fidelity_space: kwargs_str: {kwargs_str}')

        kwargs = json.loads(kwargs_str, cls=BenchmarkDecoder)
        seed = kwargs.get('seed', None)

        result = self.benchmark.get_fidelity_space(seed=seed)
        logger.debug(f'Server: Fidelity Space: {result}')
        return csjson.write(result, indent=None)
Beispiel #7
0
    def get_configuration_space(self, kwargs_str: str) -> str:
        logger.debug(f'Server: get_config_space: kwargs_str: {kwargs_str}')

        kwargs = json.loads(kwargs_str)
        seed = kwargs.get('seed', None)

        result = self.benchmark.get_configuration_space(seed=seed)
        logger.debug(f'Server: Configspace: {result}')
        return csjson.write(result, indent=None)
Beispiel #8
0
 def as_dict(self):
     # meta data are serialized via pickle
     # noinspection PyUnresolvedReferences
     return {
         'config': self.config.get_array().tolist(),
         'configspace': config_json.write(self.config.configuration_space),
         'cfg_key': self.cfg_key,
         'name': self.name,
         'mf': self.mf.tolist()
     }
Beispiel #9
0
 def default(self, obj):
     if isinstance(obj, uuid.UUID):
         return str(obj)
     elif isinstance(obj, np.integer):
         return int(obj)
     elif isinstance(obj, np.floating):
         return float(obj)
     elif isinstance(obj, np.bool_):
         return bool(obj)
     elif isinstance(obj, np.ndarray):
         return obj.tolist()
     elif isinstance(obj, types.FunctionType) or isclass(obj):
         return f"{obj.__module__}.{obj.__name__}"
     elif isinstance(obj, deephyper.skopt.space.Dimension):
         return str(obj)
     elif isinstance(obj, csh.Hyperparameter):
         return str(obj)
     elif isinstance(obj, cs.ConfigurationSpace):
         return json.loads(cs_json.write(obj))
     else:
         return super(Encoder, self).default(obj)
Beispiel #10
0
import ConfigSpace.read_and_write.json as json_writer

import hpbandster.core.nameserver as hpns
from hpbandster.optimizers import BOHB as BOHB
from hpbandster.examples.commons import MyWorker

# First, create a ConfigSpace-Object.
# It contains the hyperparameters to be optimized
# For more details, please have a look in the ConfigSpace-Example in the Documentation
config_space = CS.ConfigurationSpace()
config_space.add_hyperparameter(
    CS.UniformFloatHyperparameter('x', lower=0, upper=1))

# Write the configSpace for later use to file
with open('configspace.json', 'w') as file:
    file.write(json_writer.write(config_space))

# Every run has to have a unique (at runtime) id.
# This needs to be unique for concurrent runs, i.e. when multiple
# instances run at the same time, they have to have different ids
run_id = '0'

# Step 1:
# Every run needs a nameserver. It could be a 'static' server with a
# permanent address, but here it will be started for the local machine
# with a random port.
# The nameserver manages the concurrent running workers across all possible threads or clusternodes.
NS = hpns.NameServer(run_id=run_id, host='localhost', port=0)
ns_host, ns_port = NS.start()

# Step 2:
def main(input_dir, method, method_type, output_dir, taskset, nseeds=3):
    assert os.path.isdir(input_dir), input_dir
    cs_file_tmp = os.path.join(input_dir, method_type, method + "*", 'space.json')
    cs_file = glob.glob(cs_file_tmp)
    if len(cs_file) == 0:
        print("Could not find space %s" % cs_file_tmp)
    with open(cs_file[0]) as fh:
        cs = read(fh.read())

    task_id_to_dir = defaultdict(list)
    incumbents_test = list()
    config_to_tasks = defaultdict(list)

    for task_id in taskset:
        configuration_output_dir = os.path.join(
            input_dir,
            method_type,
            '%s_%d_*_0_0' % (method, task_id),
            'auto-sklearn-output',
            'smac3-output',
            'run_*',
            'runhistory.json',
        )
        configuration_output_dirs = glob.glob(configuration_output_dir)
        if len(configuration_output_dirs) != nseeds:
            print("Skip", configuration_output_dir, "has only", len(configuration_output_dirs), "runhistories")
            continue
        task_id_to_dir[task_id] = configuration_output_dirs

    print("Found", len(task_id_to_dir), "complete entries")
    print("Skipped", len(automl_metadata) - len(task_id_to_dir), "entries")

    rval = Parallel(n_jobs=8, verbose=0)(
        delayed(read_configurations_for_task_id)(task_id, task_id_to_dir, cs)
        for task_id in sorted(list(task_id_to_dir))
    )
    for task_id, ivt in rval:
        for inc in ivt:
            if inc not in incumbents_test:
                incumbents_test.append(inc)
                config_to_tasks[inc].append(task_id)
                break
            else:
                config_to_tasks[inc].append(task_id)
    print("Found", len(incumbents_test), "incumbents")

    jason = {hash_config(i): i for i in incumbents_test}
    assert len(jason) == len(incumbents_test)

    drop_keys = set()
    for idx, i in enumerate(jason):
        for jdx, j in enumerate(jason):
            if idx >= jdx:
                continue
            else:
                if jason[i].get_dictionary() == jason[j].get_dictionary():
                    drop_keys.add(j)
    for key in drop_keys:
        raise ValueError("Found double entry:", jason[key])
        del jason[key]

    config_id_to_task = dict()
    for key in jason:
        config_id_to_task[key] = list(config_to_tasks[jason[key]])
        jason[key] = jason[key].get_dictionary()

    print('Found %d incuments!' % len(jason))

    json_file_name = os.path.join(output_dir, 'incumbents.json')
    with open(json_file_name, 'w') as fh:
        json.dump(jason, fh, indent=4)

    json_file_name = os.path.join(output_dir, 'task_to_inc_id.json')
    with open(json_file_name, 'w') as fh:
        json.dump(config_id_to_task, fh, indent=4)

    configspace_file_name = os.path.join(output_dir, 'space.json')
    with open(configspace_file_name, 'w') as fh:
        fh.write(write(cs))
Beispiel #12
0
# 5) InCondition:
#    'e' is only active if 'c' is in the set [25, 26, 27]
in_cond = CS.InCondition(e, c, [25, 26, 27])

# 6) AndConjunction:
#    The 'and-conjunction' combines the conditions less_cond and greater_cond
cs.add_condition(CS.AndConjunction(less_cond, greater_cond))

# 7) OrConjunction:
#    The 'or-conjunction' works similar to the 'and-conjunction'
equals_cond = CS.EqualsCondition(e, a, 2)
cs.add_condition(CS.OrConjunction(in_cond, equals_cond))

# 8) ForbiddenEqualsClause:
#    This clause forbids the value 2 for the hyperparameter f
forbidden_clause_f = CS.ForbiddenEqualsClause(f, 2)

# 9) ForbiddenInClause
#    This clause forbids the value of the hyperparameter g to be in the set [2]
forbidden_clause_g = CS.ForbiddenInClause(g, [2])

# 10) ForbiddenAndConjunction
#     Now, we combine them with an 'and-conjunction' and add them to the ConfigurationSpace
forbidden_clause = CS.ForbiddenAndConjunction(forbidden_clause_f,
                                              forbidden_clause_g)
cs.add_forbidden_clause(forbidden_clause)

# To end this example, we store the defined configuration space to a json file
with open('configspace.json', 'w') as fh:
    fh.write(json.write(cs))
Beispiel #13
0
    worker = worker(min_budget=min_budget,
                    max_budget=max_budget,
                    eta=eta,
                    nameserver=ns_host,
                    nameserver_port=ns_port,
                    run_id=args.run_id,
                    model=args.model,
                    data_config_path=args.data_config_path,
                    data_root=args.data_root)
    worker.run(background=True)

    # Dump the configspace to the directory
    config_space = worker.get_config_space()
    with open(os.path.join(args.working_directory, 'configspace.json'),
              'w') as f:
        f.write(config_space_json_r_w.write(config_space))

    # instantiate BOHB and run it
    result_logger = hputil.json_result_logger(directory=args.working_directory,
                                              overwrite=True)

    HPB = BOHB(configspace=worker.get_config_space(),
               working_directory=args.working_directory,
               run_id=args.run_id,
               eta=eta,
               min_budget=min_budget,
               max_budget=max_budget,
               host=ns_host,
               nameserver=ns_host,
               nameserver_port=ns_port,
               ping_interval=3600,
Beispiel #14
0
                                       upper=.5,
                                       log=False,
                                       default_value=0.49070634552851977)
tol = CSH.UniformFloatHyperparameter('tol',
                                     lower=1e-4,
                                     upper=1e-2,
                                     log=True,
                                     default_value=0.0002154969698207585)
gamma = CSH.CategoricalHyperparameter('gamma',
                                      choices=['scale', 'auto'],
                                      default_value='scale')
C = CSH.UniformFloatHyperparameter('C',
                                   lower=1.0,
                                   upper=20,
                                   log=True,
                                   default_value=3.2333262862494365)
epsilon = CSH.UniformFloatHyperparameter('epsilon',
                                         lower=0.01,
                                         upper=0.99,
                                         log=True,
                                         default_value=0.14834562300010581)
shrinking = CSH.CategoricalHyperparameter('shrinking',
                                          choices=['True', 'False'],
                                          default_value='True')

cs.add_hyperparameters(
    [kernel, tol, gamma, C, epsilon, shrinking, degree, coef0])

with open('svr_configspace.json', 'w') as f:
    f.write(cs_json.write(cs))
Beispiel #15
0
def fmin(func,
         config_space,
         func_args=(),
         eta=2,
         min_budget=2,
         max_budget=4,
         num_iterations=1,
         num_workers=1,
         output_dir='.'):
    """
    Starts a local BOHB optimization run for a function over a hyperparameter
    search space, which is referred to as configuration space.
    This function's purpose is to give a fast and easy way to run BOHB on a
    optimization objective on your local machine.

    The optimized function must satisfy the following conditions:
    - Contain a parameter ``budget``:
        This parameter is passed by the optimizer.
        Its meaning is defined by your interpretation of the budget used by your
        model. For example it may be the number of epochs for a neural network
        to train or the number of datapoints, the model receives.

        The idea is to run many configurations on a small budget and only
        take the best 1/``eta`` of them to the next round. In the next iteration,
        the configurations run on the doubled budget. This is repeated until
        only 2 configurations are left to run on the ``max_budget``.
        Therefore, bad configurations are rejected fast, and the good
        ones are explored more.
        The number of configurations with a minimum budget is calculated similar
        to the optimization run, just reversed. Having 2 configurations with
        ``max_budget``, in the iteration before ``eta``-times many
        configurations with half the budget are sampled, and so on.

    - Hyperparameter from the configuration space object:
        The function must implement all hyperparameters defined in the
        configuration space. The parameter name in the function call must be
        equal to the name of the hyperparameter. Otherwise, a exception will
        be thrown.

    - Function arguments in the right order:
        Function arguments, which are not hyperparameters and therefore not
        defined in the configuration space must be passed to the
        ``fmin`` call in the order of occurrence in the function signature.
        In the example below, the training data, X and y, is a
        use case for this kind of function arguments.

    Example::
        import numpy as np
        from FMin import fmin
        import ConfigSpace as CS

        # Create configuration space
        cs = CS.ConfigurationSpace()
        cs.add_hyperparameter(
            CS.UniformFloatHyperparameter('w', lower=-5, upper=5)
        )

        # Create data from function
        # f(x) = x + :math:`\mathcal{N}(0, 1)`
        X = np.random.uniform(-5, 5, 100)
        y = np.random.normal(X, 1)

        # The function calculates the mean squared error for the first
        # ``budget`` points compared to their responding true values.
        # The expected minimum is at w = 1.
        opt_func = lambda x, y, w, budget: np.mean((y[:int(budget)] - w*x[:int(budget)])**2)

        inc_best, inc_best_cfg, result = fmin(opt_func,
                                                  cs, func_args=(X, y),
                                                  min_budget=3,
                                                  max_budget=len(X),
                                                  num_iterations=3,
                                                  num_worker=1)

    Args:
        func (function): function to optimize. Must return a python scalar!
            See also the section above
            **The optimized function must satisfy the following conditions**
        config_space (ConfigSpace.ConfigurationSpace):
            Definition of the search space containing all hyperparameters
            and their value ranges. You can find its definition in
            the `ConfiSpace repository <https://github.com/automl/ConfigSpace/>`_.
        func_args (tuple): arguments, passed to the function by the user,
            e.g., the data (X,y). These arguments don't include
            optimized parameters. Those are defined in the
            configuration space object and will be passed by the master directly
            to the function.
        eta (float): In each iteration, a complete run of sequential halving
            is executed. In it, after evaluating each configuration on the
            same subset size, only a fraction of 1/eta of them 'advances' to
            the next round. Must be greater or equal to 2.
        min_budget (int, float, optional): Defines the minimum budget to
            evaluate configurations on it.
            In combination with the parameter `max_budget` and `eta`,
            the number of configurations to evaluate is determined.
            Read more about it in the
            `Quickstart <https://automl.github.io/HpBandSter/build/html/quickstart.html#id6>`_.
            By default `min_budget` and `max_budget` is set, so that only a few
            configurations with budgets from 1 to 4 are evaluated.
        max_budget (int, float, optional): Defines the maximum budget to
            evaluate configurations on it.
        num_iterations (int, optional):   number of iterations to be performed
            in this run. By default, this value is set to 1.
        num_workers (int, optional): number of parallel workers. By default, just
            one worker is used.
        output_dir (str, optional): HpBandSter stores the sampled
            configurations and the results on these configurations in two .json
            files. 'configs.json' and 'results.json'. Those files will be stored
            by default in the current directory (default='.').
            Also, we store the configuration space definition for later use to
            this directory. It may be used for further analysis via
            `CAVE <https://automl.github.io/CAVE/stable/>`_.

    Returns:
        hpbandster.core.result.Run - Best run.
            Run result with the best loss values of all budgets.
            It stores information about the
            - budget
            - the unique configuration id (tuple)
            - loss
            - time stamps: start time and end time for this run.

        Dict - Best found configuration.
            Containing the configuration (from the configuration space), which
            achieved the best results in optimization run

        hpbandster.core.result.Result - Result object stores all results from
            all results, which were evaluated. The best run and the best found
            configuration are extracted from this results-object.

    """
    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True)

    # Set up a local nameserver and start it
    ns = hpns.NameServer(run_id='fmin',
                         nic_name=None,
                         working_directory=output_dir)
    ns_host, ns_port = ns.start()

    # Create ``num_workers`` workers and pass the function as well as the
    # function arguments to each of them.
    workers = []
    for _ in range(num_workers):
        worker = FMinWorker(func=func,
                            func_args=func_args,
                            nameserver=ns_host,
                            nameserver_port=ns_port,
                            run_id='fmin')
        worker.run(background=True)
        workers.append(worker)

    # The result logger will store the intermediate results and the sampled
    # configurations in the passed directory.
    result_logger = hpres.json_result_logger(directory=output_dir,
                                             overwrite=True)

    # For hyperparameter importance analysis via CAVE we store the configuration
    # space definition to file.
    with open(output_dir / 'configspace.json', 'w') as f:
        f.write(json.write(config_space))

    # Set up a master, which is book keeping and decides what to run next.
    opt = BOHB(configspace=config_space,
               run_id='fmin',
               min_budget=min_budget,
               max_budget=max_budget,
               eta=eta,
               host=ns_host,
               nameserver=ns_host,
               nameserver_port=ns_port,
               result_logger=result_logger)

    # The result object stores run information, e.g. the incumbent trajectory.
    # Force the master to wait until all workers are ready.
    result = opt.run(n_iterations=num_iterations, min_n_workers=num_workers)

    # After the run has finished, shut down the master and the workers
    opt.shutdown(shutdown_workers=True)
    ns.shutdown()

    # Save to result object to file.
    with open(output_dir / 'results.pkl', 'wb') as f:
        import pickle
        pickle.dump(result, f)

    # Return the optimal value and the responding configuration, as well as the
    # result object. The result object can be used in a second step for further
    # hyperparameter importance analysis with CAVE.
    id2config = result.get_id2config_mapping()
    incumbent = result.get_incumbent_id()
    inc_value = result.get_runs_by_id(incumbent)[-1]['loss']
    inc_cfg = id2config[incumbent]['config']

    return inc_value, inc_cfg, result
def generate_csv_data(NUM_EVALUATIONS, NUM_BUDGETS, ALLINONE, SEPARATE):
    if not os.path.exists(ALLINONE):
        os.makedirs(ALLINONE)
    if not os.path.exists(SEPARATE):
        os.makedirs(SEPARATE)

    config_space = ConfigurationSpace()
    config_space.add_hyperparameters([UniformFloatHyperparameter('random_parameter_1', 0, 1.2),
                                      UniformIntegerHyperparameter('random_parameter_2', -10, 10),
                                      UniformIntegerHyperparameter('random_parameter_3', 1, 1000)])

    trajectory = []
    runhistory = []
    lowest_cost = np.inf
    start_time = time.time()
    if NUM_BUDGETS <= 1:
        budgets = [0 for _ in range(NUM_EVALUATIONS)]
    else:
        budgets = [50 + 50 * (i // (NUM_EVALUATIONS / NUM_BUDGETS)) for i in range(NUM_EVALUATIONS)]
    for i, budget in enumerate(budgets):
        if i == 0:
            random1 = config_space.get_hyperparameter('random_parameter_1').default_value
            random2 = config_space.get_hyperparameter('random_parameter_2').default_value
            random3 = config_space.get_hyperparameter('random_parameter_3').default_value
        else:
            random1 = np.random.uniform(0.1, 1.1)
            random2 = np.random.randint(-10, 10)
            random3 = np.random.randint(1, 1000)
        cost = np.random.uniform(np.abs(NUM_EVALUATIONS - i - np.random.randint(50)),
                                 10 * np.log(NUM_EVALUATIONS - i)) * random1
        new_time = time.time() - start_time
        status = 'SUCCESS'
        seed = 42  # should be: np.random.randint(1, 10000000) but seeds are currently not supported with budgets.
        if lowest_cost > cost:
            lowest_cost = cost
            trajectory.append([new_time, new_time, i, cost, random1, random2, random3])
        runhistory.append([cost, new_time, status, budget, seed, random1, random2, random3])

    with open(os.path.join(ALLINONE, 'runhistory.csv'), 'w', newline='') as f:
        writer = csv.writer(f, delimiter=',')
        writer.writerow(['cost', 'time', 'status', 'budget', 'seed', 'random_parameter_1', 'random_parameter_2', 'random_parameter_3'])
        for run in runhistory:
            writer.writerow(run)

    with open(os.path.join(SEPARATE, 'runhistory.csv'), 'w', newline='') as rh,\
         open(os.path.join(SEPARATE, 'configurations.csv'), 'w', newline='') as configs:
        rh_writer = csv.writer(rh, delimiter=',')
        configs_writer = csv.writer(configs, delimiter=',')
        rh_writer.writerow(['cost', 'time', 'status', 'budget', 'seed', 'config_id'])
        configs_writer.writerow(['CONFIG_ID', 'random_parameter_1', 'random_parameter_2', 'random_parameter_3'])
        for idx, run in enumerate(runhistory):
            rh_writer.writerow(run[:5] + [idx])
            configs_writer.writerow([idx] + run[5:])

    for path in [ALLINONE, SEPARATE]:
        with open(os.path.join(path, 'configspace.json'), 'w') as f:
            f.write(pcs_json.write(config_space))

        with open(os.path.join(path, 'trajectory.csv'), 'w', newline='') as f:
            writer = csv.writer(f, delimiter=',')
            writer.writerow(['cpu_time', 'wallclock_time', 'evaluations', 'cost', 'random_parameter_1', 'random_parameter_2',
                             'random_parameter_3'])
            for t in trajectory:
                writer.writerow(t)

        with open(os.path.join(path, 'scenario.txt'), 'w' ) as f:
            f.write('paramfile = {}\nrun_obj = quality'.format(os.path.join(os.path.basename(path.rstrip('/')),
                                                                            'configspace.json')))
Beispiel #17
0
        hp.default_value = float(hp.default_value)
        hp.sequence = tuple(np.array(hp.sequence).astype(float))

    # This if-block has been introduced explicitly for SVM that fixes the np.float32 type cast on
    # enforced in the old get_fidelity_grid function that was used for bulk of the SVM collection
    if metadata["exp_args"]["space"] == "svm":
        z_grid = get_fidelity_grid(
            config_spaces["z"],
            metadata["exp_args"]["z_grid_size"],
            include_sh_budgets=metadata["exp_args"]["include_SH"]
        )
        z_grid = tuple([f[0] for f in z_grid])
        hp = config_spaces["z_discrete"].get_hyperparameter("subsample")
        hp.sequence = z_grid
        hp.default_value = z_grid[-1]

    for hp in config_spaces["z_discrete"].get_hyperparameters():
        if isinstance(hp.default_value, (np.float16, np.float32, np.float64)):
            hp.sequence = tuple(float(val) for val in hp.sequence)
            hp.default_value = float(hp.sequence[-1])
        else:
            hp.sequence = tuple(int(val) for val in hp.sequence)
            hp.default_value = int(hp.sequence[-1])
    for k, _space in config_spaces.items():
        config_spaces[k] = json_cs.write(_space)
    metadata["config_spaces"] = config_spaces
    with open(os.path.join(output_path, "{}_{}_metadata.json".format(space, task_id)), "w") as f:
        json.dump(json_compatible_dict(metadata), f)
    print("Updated with global minimas!")
    print("All files saved!")
Beispiel #18
0
 def test_serialize_forbidden_in_clause(self):
     cs = ConfigurationSpace()
     a = cs.add_hyperparameter(CategoricalHyperparameter('a', [0, 1, 2]))
     cs.add_forbidden_clause(ForbiddenInClause(a, [1, 2]))
     write(cs)
Beispiel #19
0
    def fit(self, pipeline_config, data_manager, instance, autonet, run_number,
            run_id, task_id):

        instance_name, run_name = get_names(instance, run_id, run_number)
        run_result_dir = get_run_result_dir(pipeline_config, instance, run_id,
                                            run_number, autonet)
        instance_run_id = str(run_name) + "-" + str(instance_name)
        instance_run_id = '_'.join(instance_run_id.split(':'))

        autonet.autonet_config = None  #clean results of last fit
        autonet.update_autonet_config(task_id=task_id,
                                      run_id=instance_run_id,
                                      result_logger_dir=run_result_dir)

        if (task_id not in [-1, 1]):
            return {'result_dir': run_result_dir}

        if not os.path.exists(run_result_dir):
            try:
                os.makedirs(run_result_dir)
            except Exception as e:
                print(e)

        logging.getLogger('benchmark').debug(
            "Create config and info files for current run " + str(run_name))

        instance_info = dict()
        instance_info['path'] = instance
        instance_info['is_classification'] = data_manager.is_classification
        instance_info['is_multilabel'] = data_manager.is_multilabel
        instance_info['instance_shape'] = data_manager.X_train.shape
        instance_info[
            'categorical_features'] = data_manager.categorical_features

        autonet_config = autonet.get_current_autonet_config()
        if autonet_config["hyperparameter_search_space_updates"] is not None:
            autonet_config["hyperparameter_search_space_updates"].save_as_file(
                os.path.join(run_result_dir,
                             "hyperparameter_search_space_updates.txt"))

        if 'user_updates_config' in pipeline_config:
            user_updates_config = pipeline_config['user_updates_config']
            if user_updates_config:
                from shutil import copyfile
                copyfile(
                    user_updates_config,
                    os.path.join(run_result_dir, 'user_updates_config.csv'))

        self.write_config_to_file(run_result_dir, "instance.info",
                                  instance_info)
        self.write_config_to_file(run_result_dir, "benchmark.config",
                                  pipeline_config)
        self.write_config_to_file(run_result_dir, "autonet.config",
                                  autonet_config)

        # save refit config - add indent and sort keys
        if 'refit_config' in pipeline_config and pipeline_config[
                'refit_config'] is not None:
            import json
            with open(pipeline_config['refit_config'], 'r') as f:
                refit_config = json.loads(f.read())
            with open(os.path.join(run_result_dir, 'refit_config.json'),
                      'w+') as f:
                f.write(json.dumps(refit_config, indent=4, sort_keys=True))

        # save search space
        search_space = autonet.pipeline.get_hyperparameter_search_space(
            **autonet_config)
        with open(os.path.join(run_result_dir, "configspace.json"), "w") as f:
            f.write(cs_json.write(search_space))

        # save search space without constants - used by bohb - as pcs (simple)
        simplified_search_space, _ = remove_constant_hyperparameter(
            search_space)
        with open(os.path.join(run_result_dir, "configspace_simple.pcs"),
                  "w") as f:
            f.write(cs_pcs.write(simplified_search_space))

        return {'result_dir': run_result_dir}
Beispiel #20
0
    def convert(self,
                folders,
                ta_exec_dirs=None,
                output_dir=None,
                converted_dest='converted_input_data'):

        self.logger.debug(
            "Converting CSV-data to SMAC3-data. Called with: folders=%s, ta_exec_dirs=%s, output_dir=%s, "
            "converted_dest=%s", str(folders), str(ta_exec_dirs),
            str(output_dir), str(converted_dest))

        # Using temporary files for the intermediate smac-result-like format if no output_dir specified
        if not output_dir:
            output_dir = tempfile.mkdtemp()
            self.logger.debug(
                "Temporary directory for intermediate SMAC3-results: %s",
                output_dir)
        if ta_exec_dirs is None or len(ta_exec_dirs) == 0:
            ta_exec_dirs = ['.']
        if len(ta_exec_dirs) != len(folders):
            ta_exec_dirs = [ta_exec_dirs[0] for _ in folders]

        #####################
        # Actual conversion #
        #####################
        folder_basenames = get_folder_basenames(folders)
        result = OrderedDict()
        for f, f_base, ta_exec_dir in zip(
                folders, folder_basenames,
                ta_exec_dirs):  # Those are the parallel runs
            converted_folder_path = os.path.join(output_dir, converted_dest,
                                                 f_base)
            self.logger.debug(
                "Processing folder=%s, f_base=%s, ta_exec_dir=%s. Saving to %s.",
                f, f_base, ta_exec_dir, converted_folder_path)
            if not os.path.exists(converted_folder_path):
                self.logger.debug("%s doesn't exist. Creating...",
                                  converted_folder_path)
                os.makedirs(converted_folder_path)

            # Get and write scenario # (todo: enhancement: make scenario-file optional (build from scratch))
            scenario_file_path = os.path.join(converted_folder_path,
                                              'scenario.txt')
            scenario = self.get_scenario(f,
                                         ta_exec_dir=ta_exec_dir,
                                         out_path=scenario_file_path)

            # Read Configuration Space
            config_space = scenario.cs
            #config_space = self.load_configspace(f)
            scenario.paramfile = os.path.join(converted_folder_path,
                                              'configspace.json')
            with open(scenario.paramfile, 'w') as new_file:
                new_file.write(pcs_json.write(config_space))

            # Read runhistory.csv and write runhistory.json(s)
            runhistory = self.get_runhistory(f, scenario, 'runhistory.csv')
            runhistory.save_json(
                os.path.join(converted_folder_path, 'runhistory.json'))
            try:
                validated_runhistory = self.get_runhistory(
                    f, scenario, 'validated_runhistory.csv')
                validated_runhistory.save_json(
                    os.path.join(converted_folder_path,
                                 'validated_runhistory.json'))
            except FileNotFoundError:
                validated_runhistory = None
                self.logger.debug("No file detected at \"%s\"",
                                  os.path.join(f, 'validated_runhistory.csv'))

            # Read trajectory. # (todo: enhancement: make trajectory-file (read it from runhistory?))
            trajectory = self.get_trajectory(f, config_space, scenario,
                                             converted_folder_path)

            # After (possibly) changing paths and options (or creating the object), (over)write to new location
            scenario.output_dir_for_this_run = converted_folder_path
            scenario.write()

            result[f] = {
                'new_path': converted_folder_path,
                'config_space': config_space,
                'runhistory': runhistory,
                'validated_runhistory': validated_runhistory,
                'scenario': scenario,
                'trajectory': trajectory,
            }

        return result
Beispiel #21
0
# License: MIT

import json
import random
import requests
from ConfigSpace.read_and_write import json as config_json
from ConfigSpace.hyperparameters import UniformFloatHyperparameter
from openbox.config_space import ConfigurationSpace
from openbox.config_space.util import convert_configurations_to_array

user_id = 18

cs = ConfigurationSpace()
x1 = UniformFloatHyperparameter("x1", -5, 10, default_value=0)
x2 = UniformFloatHyperparameter("x2", 0, 15, default_value=0)
cs.add_hyperparameters([x1, x2])

config_space_array = config_json.write(cs)

res = requests.post('http://127.0.0.1:8001/bo_advice/task_register/',
                    data={'id':user_id, 'config_space_array':config_space_array})
print('-----------------')
print(res)
print('-----------------')
print(res.text)
print('-----------------')
Beispiel #22
0
    def hpbandster2smac(self, folder2result, cs: ConfigurationSpace, backup_cs,
                        output_dir: str):
        """Reading hpbandster-result-object and creating RunHistory and trajectory...
        treats each budget as an individual 'smac'-run, creates an
        output-directory with subdirectories for each budget.

        Parameters
        ----------
        folder2result: Dict(str : hpbandster.core.result.Result)
            folder mapping to bohb's result-objects
        cs: ConfigurationSpace
            the configuration space
        backup_cs: List[ConfigurationSpace]
            if loading a configuration fails, try configspaces from this list until succeed
        output_dir: str
            the output-dir to save the smac-runs to
        """
        # Create runhistories (one per budget)
        budget2rh = OrderedDict()
        for folder, result in folder2result.items():
            self.logger.debug("Budgets for '%s': %s" %
                              (folder, str(result.HB_config['budgets'])))
            id2config_mapping = result.get_id2config_mapping()
            skipped = {'None': 0, 'NaN': 0}
            for run in result.get_all_runs():
                if not run.budget in budget2rh:
                    budget2rh[run.budget] = RunHistory(average_cost)
                rh = budget2rh[run.budget]

                # Load config...
                try:
                    config = self._get_config(run.config_id, id2config_mapping,
                                              cs)
                except ValueError as err:
                    self.logger.debug(
                        "Loading configuration failed... trying alternatives",
                        exc_info=1)
                    for bcs in backup_cs:
                        try:
                            config = self._get_config(run.config_id,
                                                      id2config_mapping, bcs)
                            cs = bcs
                            break
                        except ValueError:
                            self.logger.debug("", exc_info=1)
                            pass
                    else:
                        self.logger.debug("None of the alternatives worked...")
                        raise ValueError(
                            "Your configspace seems to be corrupt. If you use floats (or mix up ints, bools and strings) as categoricals, "
                            "please consider using the .json-format, as the .pcs-format cannot recover the type "
                            "of categoricals. Otherwise please report this to "
                            "https://github.com/automl/CAVE/issues (and attach the debug.log)"
                        )

                if run.loss is None:
                    skipped['None'] += 1
                    continue
                if np.isnan(run.loss):
                    skipped['NaN'] += 1
                    continue

                rh.add(config=config,
                       cost=run.loss,
                       time=run.time_stamps['finished'] -
                       run.time_stamps['started'],
                       status=StatusType.SUCCESS,
                       seed=0,
                       additional_info={
                           'info': run.info,
                           'timestamps': run.time_stamps
                       })

            self.logger.debug(
                "Skipped %d None- and %d NaN-loss-values in BOHB-result",
                skipped['None'], skipped['NaN'])

        # Write to disk
        budget2path = OrderedDict()  # paths to individual budgets
        self.logger.info(
            "Assuming BOHB treats target algorithms as deterministic (and does not re-evaluate)"
        )
        formatted_budgets = format_budgets(budget2rh.keys())
        for b, rh in budget2rh.items():
            output_path = os.path.join(output_dir, formatted_budgets[b])
            budget2path[b] = output_path

            scenario = Scenario({
                'run_obj': 'quality',
                'cs': cs,
                'output_dir': output_dir,
                'deterministic':
                True,  # At the time of writing, BOHB is always treating ta's as deterministic
            })
            scenario.output_dir_for_this_run = output_path
            scenario.write()

            with open(os.path.join(output_path, 'configspace.json'),
                      'w') as fh:
                fh.write(pcs_json.write(cs))

            rh.save_json(fn=os.path.join(output_path, 'runhistory.json'))
            self.get_trajectory(folder2result,
                                output_path,
                                scenario,
                                rh,
                                budget=b)

        return budget2path
Beispiel #23
0
# Step 2:
# The worker implements the connection to the model to be evaluated.
# Its 'compute'-method will be called later by the BOHB-optimizer repeatedly
# with the sampled configurations and return for example the computed loss.
# Further usages of the worker will be covered in a later example.
w = MyWorker(
    nameserver=ns_host,
    nameserver_port=ns_port,
    run_id=run_id,  # unique Hyperband run id
)
w.run(background=True)

# Write the ConfigSpace for later use to the working dir
with open('configspace.json', 'w') as file:
    file.write(json_writer.write(MyWorker.get_configspace()))

# Step 3:
# The number of sampled configurations is determined by the
# parameters eta, min_budget and max_budget.
# After evaluating each configuration, starting with the minimum budget
# on the same subset size, only a fraction of 1 / eta of them
# 'advances' to the next round. At the same time the current budget will be doubled.
# This process runs until the maximum budget is reached.
HB = BOHB(
    configspace=MyWorker.get_configspace(),
    run_id=run_id,
    eta=3,
    min_budget=1,
    max_budget=25,  # Hyperband parameters
    nameserver=ns_host,
Beispiel #24
0
    def hpbandster2smac(self, folder, result, cs_options, output_dir: str):
        """Reading hpbandster-result-object and creating RunHistory and trajectory...

        Parameters
        ----------
        folder: str (path)
            original folder
        result: hpbandster.core.result.Result
            bohb's result-object
        cs_options: list[ConfigurationSpace]
            the configuration spaces. in the best case it's a single element, but for pcs-format we need to guess
            through a list of possible configspaces
        output_dir_base: str
            the output-dir to save the smac-runs to
        
        Returns
        -------
        converted: dict{
                'new_path' : path_to_converted_input,
                'hp_bandster_result' : result_in_hpbandster_format,
                'config_space' : config_space,
                'runhistory' : runhistory,
                'validated_runhistory' : validated_runhistory,
                'scenario' : scenario,
                'trajectory' : trajectory,
                }

        """
        self.logger.debug("Budgets for '%s': %s" %
                          (folder, str(result.HB_config['budgets'])))
        ##########################
        # 1. Create runhistory   #
        ##########################
        id2config_mapping = result.get_id2config_mapping()
        skipped = {'None': 0, 'NaN': 0}
        rh = RunHistory()
        for run in result.get_all_runs():
            # Load config...
            config = None
            while config is None:
                if len(cs_options) == 0:
                    self.logger.debug("None of the alternatives worked...")
                    raise ValueError(
                        "Your configspace seems to be corrupt. If you use floats (or mix up ints, bools "
                        "and strings) as categoricals, please consider using the .json-format, as the "
                        ".pcs-format cannot recover the type of categoricals. Otherwise please report "
                        "this to https://github.com/automl/CAVE/issues (and attach the debug.log)"
                    )
                try:
                    config = self._get_config(run.config_id, id2config_mapping,
                                              cs_options[0])
                except ValueError as err:
                    self.logger.debug(
                        "Loading config failed. Trying %d alternatives" %
                        len(cs_options) - 1,
                        exc_info=1)
                    cs_options = cs_options[
                        1:]  # remove the failing cs-version

            # Filter corrupted loss-values (ignore them)
            if run.loss is None:
                skipped['None'] += 1
                continue
            if np.isnan(run.loss):
                skipped['NaN'] += 1
                continue

            rh.add(config=config,
                   cost=run.loss,
                   time=run.time_stamps['finished'] -
                   run.time_stamps['started'],
                   status=StatusType.SUCCESS,
                   budget=run.budget,
                   seed=0,
                   additional_info={
                       'info': run.info,
                       'timestamps': run.time_stamps
                   })

        self.logger.debug(
            "Skipped %d None- and %d NaN-loss-values in BOHB-result",
            skipped['None'], skipped['NaN'])

        ##########################
        # 2. Create all else     #
        ##########################
        scenario = Scenario({
            'run_obj': 'quality',
            'cs': cs_options[0],
            'output_dir': output_dir,
            'deterministic':
            True,  # At the time of writing, BOHB is always treating ta's as deterministic
        })
        scenario.output_dir_for_this_run = output_dir
        scenario.write()

        with open(os.path.join(output_dir, 'configspace.json'), 'w') as fh:
            fh.write(pcs_json.write(cs_options[0]))

        rh.save_json(fn=os.path.join(output_dir, 'runhistory.json'))

        trajectory = self.get_trajectory(result, output_dir, scenario, rh)

        return {
            'new_path': output_dir,
            'hpbandster_result': result,
            'config_space': cs_options[0],
            'runhistory': rh,
            'validated_runhistory': None,
            'scenario': scenario,
            'trajectory': trajectory,
        }
Beispiel #25
0
 def test_serialize_forbidden_in_clause(self):
     cs = ConfigurationSpace()
     a = cs.add_hyperparameter(CategoricalHyperparameter('a', [0, 1, 2]))
     cs.add_forbidden_clause(ForbiddenInClause(a, [1, 2]))
     write(cs)
Beispiel #26
0
def convert_cs_to_json(cs):
    cs_as_string = write(cs)
    cs_as_json = json.loads(cs_as_string)
    return cs_as_json
Beispiel #27
0
# 5) InCondition:
#    'e' is only active if 'c' is in the set [25, 26, 27]
in_cond = CS.InCondition(e, c, [25, 26, 27])

# 6) AndConjunction:
#    The 'and-conjunction' combines the conditions less_cond and greater_cond
cs.add_condition(CS.AndConjunction(less_cond, greater_cond))

# 7) OrConjunction:
#    The 'or-conjunction' works similar to the 'and-conjunction'
equals_cond = CS.EqualsCondition(e, a, 2)
cs.add_condition(CS.OrConjunction(in_cond, equals_cond))

# 8) ForbiddenEqualsClause:
#    This clause forbids the value 2 for the hyperparameter f
forbidden_clause_f = CS.ForbiddenEqualsClause(f, 2)

# 9) ForbiddenInClause
#    This clause forbids the value of the hyperparameter g to be in the set [2]
forbidden_clause_g = CS.ForbiddenInClause(g, [2])

# 10) ForbiddenAndConjunction
#     Now, we combine them with an 'and-conjunction' and add them to the ConfigurationSpace
forbidden_clause = CS.ForbiddenAndConjunction(forbidden_clause_f, forbidden_clause_g)
cs.add_forbidden_clause(forbidden_clause)

# To end this example, we store the defined configuration space to a json file
with open('configspace.json', 'w') as fh:
    fh.write(json.write(cs))
Beispiel #28
0
    def __init__(self,
                 configspace,
                 min_points_in_model=None,
                 top_n_percent=15,
                 num_samples=64,
                 random_fraction=1 / 3,
                 bandwidth_factor=3,
                 min_bandwidth=1e-3,
                 **kwargs):
        """
			Fits for each given budget a kernel density estimator on the best N percent of the
			evaluated configurations on this budget.


			Parameters:
			-----------
			configspace: ConfigSpace
				Configuration space object
			top_n_percent: int
				Determines the percentile of configurations that will be used as training data
				for the kernel density estimator, e.g if set to 10 the 10% best configurations will be considered
				for training.
			min_points_in_model: int
				minimum number of datapoints needed to fit a model
			num_samples: int
				number of samples drawn to optimize EI via sampling
			random_fraction: float
				fraction of random configurations returned
			bandwidth_factor: float
				widens the bandwidth for contiuous parameters for proposed points to optimize EI
			min_bandwidth: float
				to keep diversity, even when all (good) samples have the same value for one of the parameters,
				a minimum bandwidth (Default: 1e-3) is used instead of zero. 

		"""
        super().__init__(**kwargs)
        self.top_n_percent = top_n_percent
        self.configspace = configspace
        self.bw_factor = bandwidth_factor
        self.min_bandwidth = min_bandwidth

        self.min_points_in_model = min_points_in_model
        if min_points_in_model is None:
            self.min_points_in_model = len(
                self.configspace.get_hyperparameters()) + 1

        if self.min_points_in_model < len(
                self.configspace.get_hyperparameters()) + 1:
            self.logger.warning(
                'Invalid min_points_in_model value. Setting it to %i' %
                (len(self.configspace.get_hyperparameters()) + 1))
            self.min_points_in_model = len(
                self.configspace.get_hyperparameters()) + 1

        self.num_samples = num_samples
        self.random_fraction = random_fraction

        hps = self.configspace.get_hyperparameters()

        from ConfigSpace.read_and_write import json
        with open('configspace.json', 'w') as fh:
            fh.write(json.write(self.configspace))
        #print(cs)
        with open('configspace.json') as fh:
            cs_str = str(fh.read())
            cs_ = json.read(cs_str)
            print(self.configspace == cs_)

        self.kde_vartypes = ""
        self.vartypes = []

        for h in hps:
            if hasattr(h, 'sequence'):
                raise RuntimeError(
                    'This version on BOHB does not support ordinal hyperparameters. Please encode %s as an integer parameter!'
                    % (h.name))

            if hasattr(h, 'choices'):
                self.kde_vartypes += 'u'
                self.vartypes += [len(h.choices)]
            else:
                self.kde_vartypes += 'c'
                self.vartypes += [0]

        self.vartypes = np.array(self.vartypes, dtype=int)

        # store precomputed probs for the categorical parameters
        self.cat_probs = []

        self.configs = dict()
        self.losses = dict()
        self.good_config_rankings = dict()
        self.kde_models = dict()
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--n_iterations',
                        type=int,
                        help='Number of iterations performed by the optimizer',
                        default=4)
    parser.add_argument('--worker',
                        help='Flag to turn this into a worker process',
                        action='store_true')
    parser.add_argument(
        '--run_id',
        type=str,
        help=
        'A unique run id for this optimization run. An easy option is to use'
        ' the job id of the clusters scheduler.')
    parser.add_argument(
        '--shared_directory',
        type=str,
        help=
        'A directory that is accessible for all processes, e.g. a NFS share.')
    parser.add_argument('--interface',
                        type=str,
                        help='Which network interface to use',
                        default="eth1")

    args = parser.parse_args()

    try:
        os.mkdir(args.shared_directory)
    except FileExistsError:
        pass

    # Every process has to lookup the hostname
    host = hpns.nic_name_to_host(args.interface)

    if args.worker:
        time.sleep(
            60
        )  # short artificial delay to make sure the nameserver is already running
        w = MyWorker(run_id=args.run_id, host=host)
        w.load_nameserver_credentials(working_directory=args.shared_directory)
        w.run(background=False)
        exit(0)

    # Write the configspace
    cs = MyWorker.get_configspace()
    with open(os.path.join(args.shared_directory, 'configspace.json'),
              "w") as fh:
        fh.write(pcs_out.write(cs))

    result_logger = hpres.json_result_logger(directory=args.shared_directory,
                                             overwrite=True)
    NS = hpns.NameServer(run_id=args.run_id,
                         host=host,
                         port=0,
                         working_directory=args.shared_directory)
    ns_host, ns_port = NS.start()

    w = MyWorker(run_id=args.run_id,
                 host=host,
                 nameserver=ns_host,
                 nameserver_port=ns_port)
    w.run(background=True)

    # Run an optimizer
    # We now have to specify the host, and the nameserver information
    bohb = BOHB(configspace=cs,
                run_id=args.run_id,
                host=host,
                nameserver=ns_host,
                nameserver_port=ns_port,
                eta=3,
                result_logger=result_logger,
                min_budget=1,
                max_budget=9)
    res = bohb.run(n_iterations=args.n_iterations, min_n_workers=1)

    # In a cluster environment, you usually want to store the results for later analysis.
    # One option is to simply pickle the Result object
    with open(os.path.join(args.shared_directory, 'results.pkl'), 'wb') as fh:
        pickle.dump(res, fh)

    # Step 4: Shutdown
    # After the optimizer run, we must shutdown the master and the nameserver.
    bohb.shutdown(shutdown_workers=True)
    NS.shutdown()
Beispiel #30
0
    def hpbandster2smac(self, folder2result, cs_options, output_dir: str):
        """Reading hpbandster-result-object and creating RunHistory and trajectory...  treats each budget as an
        individual 'smac'-run, creates an output-directory with subdirectories for each budget.

        Parameters
        ----------
        folder2result: Dict(str : hpbandster.core.result.Result)
            folder mapping to bohb's result-objects
        cs_options: list[ConfigurationSpace]
            the configuration spaces. in the best case it's a single element, but for pcs-format we need to guess
            through a list of possible configspaces
        output_dir: str
            the output-dir to save the smac-runs to
        
        Returns
        -------
        folder2budgets: dict(dict(str) - str)
            maps each folder (from parallel execution) to a dict, which in turn maps all budgets of
            the specific parallel execution to their paths
        """
        folder2budgets = OrderedDict()
        self.logger.debug("Loading with %d configspace alternative options...",
                          len(cs_options))
        self.logger.info(
            "Assuming BOHB treats target algorithms as deterministic (and does not re-evaluate)"
        )
        for folder, result in folder2result.items():
            folder2budgets[folder] = OrderedDict()
            self.logger.debug("Budgets for '%s': %s" %
                              (folder, str(result.HB_config['budgets'])))
            ##########################
            # 1. Create runhistory   #
            ##########################
            id2config_mapping = result.get_id2config_mapping()
            skipped = {'None': 0, 'NaN': 0}
            budget2rh = OrderedDict()
            for run in result.get_all_runs():
                # Choose runhistory to add run to
                if not run.budget in budget2rh:
                    budget2rh[run.budget] = RunHistory(average_cost)
                rh = budget2rh[run.budget]

                # Load config...
                config = None
                while config is None:
                    if len(cs_options) == 0:
                        self.logger.debug("None of the alternatives worked...")
                        raise ValueError(
                            "Your configspace seems to be corrupt. If you use floats (or mix up ints, bools and strings) as categoricals, "
                            "please consider using the .json-format, as the .pcs-format cannot recover the type "
                            "of categoricals. Otherwise please report this to "
                            "https://github.com/automl/CAVE/issues (and attach the debug.log)"
                        )
                    try:
                        config = self._get_config(run.config_id,
                                                  id2config_mapping,
                                                  cs_options[0])
                    except ValueError as err:
                        self.logger.debug(
                            "Loading configuration failed... trying %d alternatives"
                            % len(cs_options) - 1,
                            exc_info=1)
                        cs_options = cs_options[
                            1:]  # remove the failing cs-version

                # Filter corrupted loss-values (ignore them)
                if run.loss is None:
                    skipped['None'] += 1
                    continue
                if np.isnan(run.loss):
                    skipped['NaN'] += 1
                    continue

                rh.add(config=config,
                       cost=run.loss,
                       time=run.time_stamps['finished'] -
                       run.time_stamps['started'],
                       status=StatusType.SUCCESS,
                       seed=0,
                       additional_info={
                           'info': run.info,
                           'timestamps': run.time_stamps
                       })

            self.logger.debug(
                "Skipped %d None- and %d NaN-loss-values in BOHB-result",
                skipped['None'], skipped['NaN'])

            ##########################
            # 2. Create all else     #
            ##########################
            formatted_budgets = format_budgets(
                budget2rh.keys()
            )  # Make budget-names readable [0.021311, 0.031211] to [0.02, 0.03]
            for b, rh in budget2rh.items():
                output_path = os.path.join(output_dir, folder,
                                           formatted_budgets[b])
                folder2budgets[folder][b] = output_path

                scenario = Scenario({
                    'run_obj': 'quality',
                    'cs': cs_options[0],
                    'output_dir': output_dir,
                    'deterministic':
                    True,  # At the time of writing, BOHB is always treating ta's as deterministic
                })
                scenario.output_dir_for_this_run = output_path
                scenario.write()

                with open(os.path.join(output_path, 'configspace.json'),
                          'w') as fh:
                    fh.write(pcs_json.write(cs_options[0]))

                rh.save_json(fn=os.path.join(output_path, 'runhistory.json'))

                self.get_trajectory(folder2result[folder],
                                    output_path,
                                    scenario,
                                    rh,
                                    budget=b)

        return folder2budgets
Beispiel #31
0
    print(traceback.print_exc())
    crashed = True
    pass
    #new_autosklearn_path = os.path.join(tmp_dir, 'auto-sklearn-output')
    #shutil.copytree(autosklearn_directory, new_autosklearn_path)
    #try:
    #    shutil.rmtree(autosklearn_directory)
    #except:
    #    pass
    #raise e

# Store searchspace for later examination if run not crashed
if not crashed:
    cs = automl._automl[0].configuration_space
    with open(os.path.join(tmp_dir, 'space.json'), 'w') as fh:
        fh.write(write(cs))

result = dict()
result[0] = {
    'task_id': task_id,
    'time_limit': time_limit,
    'loss': loss,
    'trajectory': trajectory
}

time_stamp_dict = {}
for dirpath, dirnames, filenames in os.walk(autosklearn_directory, topdown=False):
    time_stamp_dict[dirpath] = {}
    for filename in filenames:
        time_stamp_dict[dirpath][filename] = os.path.getmtime(os.path.join(dirpath, filename))
# Save timestamps, so we can compute ensemble performance over time