Exemple #1
0
def run_objective_function(
    configurations,
    hypermapper_mode,
    param_space,
    beginning_of_time,
    run_directory,
    evolution_data_array,
    fast_addressing_of_data_array,
    enable_feasible_predictor=False,
    evaluation_limit=float("inf"),
    black_box_function=None,
    number_of_cpus=0,
):
    """
    Evaluate a list of configurations using the black-box function being optimized.
    This method avoids evaluating repeated points by recovering their value from the history of evaluated points.
    :param configurations: list of configurations to evaluate.
    :param hypermapper_mode: which HyperMapper mode is being used.
    hypermapper_mode == "default"
    :param param_space: a space object containing the search space.
    :param beginning_of_time: timestamp of when the optimization started.
    :param run_directory: directory where HyperMapper is running.
    :param evolution_data_array: a dictionary containing all of the configurations that have been evaluated.
    :param fast_addressing_of_data_array: a dictionary containing evaluated configurations and their index in
    the evolution_data_array.
    :param enable_feasible_predictor: whether to use constrained optimization.
    :param evaluation_limit: the maximum number of function evaluations allowed for the evolutionary search.
    :param black_box_function: the black_box_function being optimized in the evolutionary search.
    :param number_of_cpus: an integer for the number of cpus to be used in parallel.
    :return: configurations with evaluations for all points in configurations and the number of evaluated configurations
    """
    new_configurations = []
    new_evaluations = {}
    previous_evaluations = defaultdict(list)
    number_of_new_evaluations = 0
    t0 = datetime.datetime.now()
    absolute_configuration_index = len(fast_addressing_of_data_array)

    # Adds configutations to new if they have not been evaluated before
    for configuration in configurations:
        str_data = param_space.get_unique_hash_string_from_values(configuration)
        if str_data in fast_addressing_of_data_array:
            configuration_idx = fast_addressing_of_data_array[str_data]
            for key in evolution_data_array:
                previous_evaluations[key].append(
                    evolution_data_array[key][configuration_idx]
                )
        else:
            if (
                absolute_configuration_index + number_of_new_evaluations
                < evaluation_limit
            ):
                new_configurations.append(configuration)
                number_of_new_evaluations += 1

    # Evaluates new configurations. If there is any
    t1 = datetime.datetime.now()
    if number_of_new_evaluations > 0:
        new_evaluations = param_space.run_configurations(
            hypermapper_mode,
            new_configurations,
            beginning_of_time,
            black_box_function,
            run_directory=run_directory,
        )

    # Values for all given configurations
    all_evaluations = concatenate_data_dictionaries(
        previous_evaluations, new_evaluations
    )
    all_evaluations_size = len(all_evaluations[list(all_evaluations.keys())[0]])

    population = list()
    for idx in range(number_of_new_evaluations):
        configuration = get_single_configuration(new_evaluations, idx)
        population.append(configuration)
        for key in configuration:
            evolution_data_array[key].append(configuration[key])

        str_data = param_space.get_unique_hash_string_from_values(configuration)
        fast_addressing_of_data_array[str_data] = absolute_configuration_index
        absolute_configuration_index += 1

    sys.stdout.write_to_logfile(
        (
            "Time to run new configurations %10.4f sec\n"
            % ((datetime.datetime.now() - t1).total_seconds())
        )
    )
    sys.stdout.write_to_logfile(
        (
            "Total time to run configurations %10.4f sec\n"
            % ((datetime.datetime.now() - t0).total_seconds())
        )
    )

    return population, all_evaluations_size
Exemple #2
0
def local_search(
    local_search_starting_points,
    local_search_random_points,
    param_space,
    fast_addressing_of_data_array,
    enable_feasible_predictor,
    optimization_function,
    optimization_function_parameters,
    scalarization_key,
    number_of_cpus,
    previous_points=None,
    profiling=None,
    noise=False,
):
    """
    Optimize the acquisition function using a mix of random and local search.
    This algorithm random samples N points and then does a local search on the
    best points from the random search and the best points from previous iterations (if any).
    :param local_search_starting_points: an integer for the number of starting points for the local search. If 0, all points will be used.
    :param local_search_random_points: number of random points to sample before the local search.
    :param param_space: a space object containing the search space.
    :param fast_addressing_of_data_array: A list containing the points that were already explored.
    :param enable_feasible_predictor: whether to use constrained optimization.
    :param optimization_function: the function that will be optimized by the local search.
    :param optimization_function_parameters: a dictionary containing the parameters that will be passed to the optimization function.
    :param scalarization_key: the name given to the scalarized values.
    :param previous_points: previous points that have already been evaluated.
    :return: all points evaluted and the best point found by the local search.
    """
    if number_of_cpus == 0:
        number_of_cpus = cpu_count()
    t0 = datetime.datetime.now()
    tmp_fast_addressing_of_data_array = copy.deepcopy(
        fast_addressing_of_data_array)
    input_params = param_space.get_input_parameters()
    feasible_parameter = param_space.get_feasible_parameter()[0]
    data_array = {}
    end_of_search = False
    # percentage of oversampling for the local search starting points
    oversampling_factor = 2

    default_configuration = param_space.get_default_or_random_configuration()
    str_data = param_space.get_unique_hash_string_from_values(
        default_configuration)
    if str_data not in fast_addressing_of_data_array:
        tmp_fast_addressing_of_data_array[str_data] = 1
    if param_space.get_space_size() < local_search_random_points:
        all_configurations = dict_of_lists_to_list_of_dicts(
            param_space.get_space())
        half_of_points = int(len(all_configurations) / 2)
        uniform_configurations = all_configurations[0:half_of_points]
        prior_configurations = all_configurations[half_of_points::]
    else:
        uniform_configurations = param_space.get_random_configuration(
            size=local_search_random_points,
            use_priors=False,
            return_as_array=True)
        prior_configurations = param_space.get_random_configuration(
            size=local_search_random_points,
            use_priors=True,
            return_as_array=True)

        uniform_configurations = array_to_list_of_dicts(
            uniform_configurations, param_space.get_input_parameters())
        prior_configurations = array_to_list_of_dicts(
            prior_configurations, param_space.get_input_parameters())

    sampling_time = datetime.datetime.now()
    sys.stdout.write_to_logfile(("Total RS time %10.4f sec\n" %
                                 ((sampling_time - t0).total_seconds())))

    # check that the number of configurations are not less than the number of CPUs
    min_number_of_configs = min(len(uniform_configurations),
                                len(prior_configurations))
    if min_number_of_configs < number_of_cpus:
        number_of_cpus = min_number_of_configs

    # to avoid having partitions with no samples, it's necessary to compute a floor for the number of partitions for small sample spaces
    # alternatively, an arbitraty number of samples could be set for the number of points where we do not have to partition (since it will be quick anyway)
    min_number_per_partition = min_number_of_configs / number_of_cpus
    partitions_per_cpu = min(10, int(min_number_per_partition))
    if number_of_cpus == 1:
        function_values_uniform, feasibility_indicators_uniform = optimization_function(
            configurations=uniform_configurations,
            **optimization_function_parameters)
        function_values_prior, feasibility_indicators_prior = optimization_function(
            configurations=prior_configurations,
            **optimization_function_parameters)
    else:
        # the number of splits of the list of input points that each process is expected to handle

        uniform_partition_fraction = len(uniform_configurations) / (
            partitions_per_cpu * number_of_cpus)
        prior_partition_fraction = len(prior_configurations) / (
            partitions_per_cpu * number_of_cpus)
        uniform_partition = [
            uniform_configurations[int(i * uniform_partition_fraction):int(
                (i + 1) * uniform_partition_fraction)]
            for i in range(partitions_per_cpu * number_of_cpus)
        ]
        prior_partition = [
            prior_configurations[int(i * prior_partition_fraction):int(
                (i + 1) * prior_partition_fraction)]
            for i in range(partitions_per_cpu * number_of_cpus)
        ]

        # Define a process queue and the processes, each containing half uniform and half prior partitions
        # as arguments to the nested function along with the queue
        input_queue = JoinableQueue()
        for i in range(number_of_cpus * partitions_per_cpu):
            combined_partition = uniform_partition[i] + prior_partition[i]
            input_queue.put({
                "partition": combined_partition,
                "split_index": len(uniform_partition[i]),
                "conf_index": i,
            })
        output_queue = Queue()

        processes = [
            Process(
                target=parallel_optimization_function,
                args=(
                    optimization_function_parameters,
                    input_queue,
                    output_queue,
                    i,
                    optimization_function,
                ),
            ) for i in range(number_of_cpus)
        ]

        function_values_uniform, feasibility_indicators_uniform = [
            {}
        ] * len(uniform_configurations), [{}] * len(uniform_configurations)
        function_values_prior, feasibility_indicators_prior = [
            {}
        ] * len(prior_configurations), [{}] * len(prior_configurations)

        # starting the processes and ensuring there's nothing more to process - joining the input queue when it's empty
        with threadpool_limits(limits=1):

            for process in processes:
                process.start()
                input_queue.put(None)
            input_queue.join()

        # the index on which to split the output
        for i in range(number_of_cpus * partitions_per_cpu):

            # would like this queue call to be non-blocking, but that does not work since the processes would need to be closed (?) for that to reliably work
            result = output_queue.get()
            scalarized_values, feasibility_indicators, split_index, conf_index = (
                result["scalarized_values"],
                result["feasibility_indicators"],
                result["split_index"],
                result["conf_index"],
            )
            # since half of the result is uniform and half is prior, it needs splitting in the middle of the resulting arrays
            function_values_uniform[
                int(conf_index * uniform_partition_fraction
                    ):int(conf_index * uniform_partition_fraction) +
                split_index] = scalarized_values[0:split_index]
            feasibility_indicators_uniform[
                int(conf_index * uniform_partition_fraction
                    ):int(conf_index * uniform_partition_fraction) +
                split_index] = feasibility_indicators[0:split_index]
            function_values_prior[int(conf_index * prior_partition_fraction
                                      ):int(conf_index *
                                            prior_partition_fraction) +
                                  (len(scalarized_values) - split_index
                                   )] = scalarized_values[split_index::]
            feasibility_indicators_prior[
                int(conf_index *
                    prior_partition_fraction):int(conf_index *
                                                  prior_partition_fraction) +
                (len(scalarized_values) -
                 split_index)] = feasibility_indicators[split_index::]

        # Safeguard so ensure the processes actually stop - ensures no process waits for more input and quits the MSLS function

        input_queue.close()
        output_queue.close()
        for i in range(len(processes)):
            processes[i].join()

    acquisition_time = datetime.datetime.now()
    sys.stdout.write_to_logfile(
        ("Optimization function time %10.4f sec\n" %
         (acquisition_time - sampling_time).total_seconds()))

    # This will concatenate the entire neighbors array if all configurations were evaluated
    # but only the evaluated configurations if we reached the budget and did not evaluate all
    function_values_uniform_size = len(function_values_uniform)
    new_data_array_uniform = concatenate_list_of_dictionaries(
        uniform_configurations[:function_values_uniform_size])
    new_data_array_uniform[scalarization_key] = function_values_uniform

    function_values_prior_size = len(function_values_prior)
    new_data_array_prior = concatenate_list_of_dictionaries(
        prior_configurations[:function_values_prior_size])
    new_data_array_prior[scalarization_key] = function_values_prior

    if enable_feasible_predictor:
        new_data_array_uniform[
            feasible_parameter] = feasibility_indicators_uniform
        new_data_array_prior[feasible_parameter] = feasibility_indicators_prior

    new_data_array = concatenate_data_dictionaries(new_data_array_uniform,
                                                   new_data_array_prior)
    data_array = concatenate_data_dictionaries(data_array, new_data_array)

    # If some configurations were not evaluated, we reached the budget and must stop
    if (function_values_uniform_size < len(uniform_configurations)) or (
            function_values_prior_size < len(prior_configurations)):
        sys.stdout.write_to_logfile(
            "Out of budget, not all configurations were evaluated, stopping local search\n"
        )
        end_of_search = True

    best_nbr_of_points = local_search_starting_points * oversampling_factor
    if enable_feasible_predictor:
        local_search_configurations_uniform = get_min_feasible_configurations(
            new_data_array_uniform,
            best_nbr_of_points,
            scalarization_key,
            feasible_parameter,
        )
        local_search_configurations_prior = get_min_feasible_configurations(
            new_data_array_prior,
            best_nbr_of_points,
            scalarization_key,
            feasible_parameter,
        )
    else:
        local_search_configurations_uniform = get_min_configurations(
            new_data_array_uniform, best_nbr_of_points, scalarization_key)
        local_search_configurations_prior = get_min_configurations(
            new_data_array_prior, best_nbr_of_points, scalarization_key)

    local_search_configurations = concatenate_data_dictionaries(
        local_search_configurations_uniform, local_search_configurations_prior)

    if previous_points is not None:
        concatenation_keys = input_params + [scalarization_key]
        if enable_feasible_predictor:
            concatenation_keys + [feasible_parameter]
            best_previous = get_min_feasible_configurations(
                previous_points,
                local_search_starting_points,
                scalarization_key,
                feasible_parameter,
            )
        else:
            best_previous = get_min_configurations(
                previous_points, local_search_starting_points,
                scalarization_key)

        local_search_configurations = concatenate_data_dictionaries(
            local_search_configurations, best_previous, concatenation_keys)
        data_array = concatenate_data_dictionaries(data_array, previous_points,
                                                   concatenation_keys)

    local_search_points_numpy, col_of_keys = dict_of_lists_to_numpy(
        local_search_configurations, return_col_of_key=True)
    uniform_points = local_search_points_numpy[0:best_nbr_of_points]
    prior_points = local_search_points_numpy[
        best_nbr_of_points:best_nbr_of_points * 2]
    best_previous_points = local_search_points_numpy[best_nbr_of_points * 2::]

    (
        best_previous_points,
        prior_points,
        uniform_points,
    ) = param_space.remove_duplicate_configs(
        best_previous_points,
        prior_points,
        uniform_points,
        ignore_columns=col_of_keys["scalarization"],
    )
    combined_unique_points = np.concatenate(
        (
            uniform_points[0:local_search_starting_points],
            prior_points[0:local_search_starting_points],
            best_previous_points[0:local_search_starting_points],
        ),
        axis=0,
    )
    local_search_configurations = {
        key: combined_unique_points[:, column].tolist()
        for key, column in col_of_keys.items()
    }

    data_collection_time = datetime.datetime.now()
    number_of_configurations = len(local_search_configurations[list(
        local_search_configurations.keys())[0]])
    sys.stdout.write_to_logfile("Starting local search iteration: " +
                                ", #configs:" + str(number_of_configurations) +
                                "\n")
    input_queue = JoinableQueue()
    output_queue = Queue()
    # puts each configuration in a queue to be evaluated in parallel
    for idx in range(number_of_configurations):
        input_queue.put({
            "config":
            get_single_configuration(local_search_configurations, idx),
            "idx":
            idx,
        })
        sys.stdout.write_to_logfile((f"{idx}, \n"))

    for i in range(number_of_cpus):
        input_queue.put(None)

    if number_of_cpus == 1:
        parallel_multistart_local_search(
            input_queue,
            output_queue,
            input_params,
            param_space,
            optimization_function_parameters,
            optimization_function,
            enable_feasible_predictor,
            scalarization_key,
            0,
        )
        input_queue.join()

    else:
        processes = [
            Process(
                target=parallel_multistart_local_search,
                args=(
                    input_queue,
                    output_queue,
                    input_params,
                    param_space,
                    optimization_function_parameters,
                    optimization_function,
                    enable_feasible_predictor,
                    scalarization_key,
                    i,
                ),
            ) for i in range(number_of_cpus)
        ]

        with threadpool_limits(limits=1):
            for process in processes:
                process.start()
            input_queue.join()

    result_array = {}
    for i in range(number_of_configurations):
        result = output_queue.get()
        sys.stdout.write_to_logfile(result["logstring"], msg_is_verbose=True)
        result_array = concatenate_data_dictionaries(result_array,
                                                     result["data_array"])
    data_array = concatenate_data_dictionaries(result_array, data_array)

    input_queue.close()
    output_queue.close()

    if number_of_cpus != 1:
        for i in range(len(processes)):
            processes[i].join()

    local_search_time = datetime.datetime.now()
    sys.stdout.write_to_logfile(
        ("Multi-start LS time %10.4f sec\n" %
         (local_search_time - acquisition_time).total_seconds()))
    # Compute best configuration found in the local search
    best_configuration = {}
    tmp_data_array = copy.deepcopy(data_array)
    best_configuration_idx = np.argmin(tmp_data_array[scalarization_key])
    for param in input_params:
        best_configuration[param] = tmp_data_array[param][
            best_configuration_idx]
    configuration_string = param_space.get_unique_hash_string_from_values(
        best_configuration)
    # If the best configuration has already been evaluated before, remove it and get the next best configuration
    while configuration_string in fast_addressing_of_data_array:
        for key in tmp_data_array:
            del tmp_data_array[key][best_configuration_idx]
        best_configuration_idx = np.argmin(tmp_data_array[scalarization_key])
        for param in input_params:
            best_configuration[param] = tmp_data_array[param][
                best_configuration_idx]
        configuration_string = param_space.get_unique_hash_string_from_values(
            best_configuration)

    post_MSLS_time = datetime.datetime.now()

    sys.stdout.write_to_logfile(
        ("MSLS time %10.4f sec\n" %
         (post_MSLS_time - acquisition_time).total_seconds()))
    if profiling is not None:
        profiling.add("(LS) Random sampling time",
                      (sampling_time - t0).total_seconds())
        profiling.add(
            "(LS) Acquisition evaluation time",
            (acquisition_time - sampling_time).total_seconds(),
        )
        profiling.add(
            "(LS) Data collection time",
            (data_collection_time - acquisition_time).total_seconds(),
        )
        profiling.add(
            "(LS) Multi-start LS time",
            (local_search_time - data_collection_time).total_seconds(),
        )
        profiling.add(
            "(LS) Post-MSLS data treatment time",
            (post_MSLS_time - local_search_time).total_seconds(),
        )

    return data_array, best_configuration
Exemple #3
0
def main(config, black_box_function=None, profiling=None):
    """
    Run design-space exploration using bayesian optimization.
    :param config: dictionary containing all the configuration parameters of this optimization.
    :param output_file: a name for the file used to save the dse results.
    """
    start_time = datetime.datetime.now()
    run_directory = config["run_directory"]
    hypermapper_mode = config["hypermapper_mode"]["mode"]

    # Start logging
    log_file = deal_with_relative_and_absolute_path(run_directory, config["log_file"])
    sys.stdout.change_log_file(log_file)
    sys.stdout.set_verbose_mode(config["verbose_logging"])
    if hypermapper_mode == "client-server":
        sys.stdout.switch_log_only_on_file(True)

    # Log the json configuration for this optimization
    sys.stdout.write_to_logfile(str(config) + "\n")

    # Create parameter space object and unpack hyperparameters from json
    param_space = space.Space(config)
    application_name = config["application_name"]
    optimization_metrics = config["optimization_objectives"]
    optimization_iterations = config["optimization_iterations"]
    evaluations_per_optimization_iteration = config[
        "evaluations_per_optimization_iteration"
    ]
    output_data_file = get_output_data_file(
        config["output_data_file"], run_directory, application_name
    )
    batch_mode = evaluations_per_optimization_iteration > 1
    number_of_cpus = config["number_of_cpus"]
    print_importances = config["print_parameter_importance"]
    epsilon_greedy_threshold = config["epsilon_greedy_threshold"]
    acquisition_function = config["acquisition_function"]
    weight_sampling = config["weight_sampling"]
    scalarization_method = config["scalarization_method"]
    scalarization_key = config["scalarization_key"]
    doe_type = config["design_of_experiment"]["doe_type"]
    number_of_doe_samples = config["design_of_experiment"]["number_of_samples"]
    model_type = config["models"]["model"]
    optimization_method = config["optimization_method"]
    time_budget = config["time_budget"]
    acquisition_function_optimizer = config["acquisition_function_optimizer"]
    if (
        acquisition_function_optimizer == "cma_es"
        and not param_space.is_space_continuous()
    ):
        print(
            "Warning: CMA_ES can only be used with continuous search spaces (i.e. all parameters must be of type 'real')"
        )
        print("Switching acquisition function optimizer to local search")
        acquisition_function_optimizer = "local_search"

    input_params = param_space.get_input_parameters()
    number_of_objectives = len(optimization_metrics)
    objective_limits = {}
    data_array = {}
    fast_addressing_of_data_array = {}
    objective_bounds = None
    exhaustive_search_data_array = None
    normalize_objectives = False
    debug = False

    if "feasible_output" in config:
        feasible_output = config["feasible_output"]
        feasible_output_name = feasible_output["name"]
        enable_feasible_predictor = feasible_output["enable_feasible_predictor"]
        enable_feasible_predictor_grid_search_on_recall_and_precision = feasible_output[
            "enable_feasible_predictor_grid_search_on_recall_and_precision"
        ]
        feasible_predictor_grid_search_validation_file = feasible_output[
            "feasible_predictor_grid_search_validation_file"
        ]
        feasible_parameter = param_space.get_feasible_parameter()
        number_of_trees = config["models"]["number_of_trees"]

    if weight_sampling == "bounding_box":
        objective_bounds = {}
        user_bounds = config["bounding_box_limits"]
        if len(user_bounds) == 2:
            if user_bounds[0] > user_bounds[1]:
                user_bounds[0], user_bounds[1] = user_bounds[1], user_bounds[0]
            for objective in optimization_metrics:
                objective_bounds[objective] = user_bounds
                objective_limits[objective] = user_bounds
        elif len(user_bounds) == number_of_objectives * 2:
            idx = 0
            for objective in optimization_metrics:
                objective_bounds[objective] = user_bounds[idx : idx + 2]
                if objective_bounds[objective][0] > objective_bounds[objective][1]:
                    objective_bounds[objective][0], objective_bounds[objective][1] = (
                        objective_bounds[objective][1],
                        objective_bounds[objective][0],
                    )
                objective_limits[objective] = objective_bounds[objective]
                idx += 2
        else:
            print(
                "Wrong number of bounding boxes, expected 2 or",
                2 * number_of_objectives,
                "got",
                len(user_bounds),
            )
            raise SystemExit
    else:
        for objective in optimization_metrics:
            objective_limits[objective] = [float("inf"), float("-inf")]

    exhaustive_search_data_array = None
    exhaustive_search_fast_addressing_of_data_array = None
    if hypermapper_mode == "exhaustive":
        exhaustive_file = config["hypermapper_mode"]["exhaustive_search_file"]
        (
            exhaustive_search_data_array,
            exhaustive_search_fast_addressing_of_data_array,
        ) = param_space.load_data_file(
            exhaustive_file, debug=False, number_of_cpus=number_of_cpus
        )

    # Check if some parameters are correctly defined
    if hypermapper_mode == "default":
        if black_box_function == None:
            print("Error: the black box function must be provided")
            raise SystemExit
        if not callable(black_box_function):
            print("Error: the black box function parameter is not callable")
            raise SystemExit

    if (model_type == "gaussian_process") and (acquisition_function == "TS"):
        print(
            "Error: The TS acquisition function with Gaussian Process models is still under implementation"
        )
        print("Using EI acquisition function instead")
        config["acquisition_function"] = "EI"

    if number_of_cpus > 1:
        print(
            "Warning: HyperMapper supports only sequential execution for now. Running on a single cpu."
        )
        number_of_cpus = 1

    # If priors are present, use prior-guided optimization
    user_priors = False
    for input_param in config["input_parameters"]:
        if config["input_parameters"][input_param]["prior"] != "uniform":
            if number_of_objectives == 1:
                user_priors = True
            else:
                print(
                    "Warning: prior optimization does not work with multiple objectives yet, priors will be uniform"
                )
                config["input_parameters"][input_param]["prior"] = "uniform"

    if user_priors:
        bo_method = prior_guided_optimization
    else:
        bo_method = random_scalarizations
        normalize_objectives = True

    ### Resume previous optimization, if any
    beginning_of_time = param_space.current_milli_time()
    absolute_configuration_index = 0
    doe_t0 = datetime.datetime.now()
    if config["resume_optimization"] == True:
        resume_data_file = config["resume_optimization_data"]

        if not resume_data_file.endswith(".csv"):
            print("Error: resume data file must be a CSV")
            raise SystemExit
        if resume_data_file == "output_samples.csv":
            resume_data_file = application_name + "_" + resume_data_file

        data_array, fast_addressing_of_data_array = param_space.load_data_file(
            resume_data_file, debug=False, number_of_cpus=number_of_cpus
        )
        absolute_configuration_index = len(
            data_array[list(data_array.keys())[0]]
        )  # get the number of points evaluated in the previous run
        beginning_of_time = (
            beginning_of_time - data_array[param_space.get_timestamp_parameter()[0]][-1]
        )  # Set the timestamp back to match the previous run
        print(
            "Resumed optimization, number of samples = %d ......."
            % absolute_configuration_index
        )

    create_output_data_file(
        output_data_file, param_space.get_input_output_and_timestamp_parameters()
    )
    if data_array:  # if it is not empty
        write_data_array(param_space, data_array, output_data_file)
    ### DoE phase
    if absolute_configuration_index < number_of_doe_samples:
        configurations = []
        default_configuration = param_space.get_default_or_random_configuration()
        str_data = param_space.get_unique_hash_string_from_values(default_configuration)
        if str_data not in fast_addressing_of_data_array:
            fast_addressing_of_data_array[str_data] = absolute_configuration_index
            configurations.append(default_configuration)
            absolute_configuration_index += 1

        doe_configurations = []
        if absolute_configuration_index < number_of_doe_samples:
            doe_configurations = param_space.get_doe_sample_configurations(
                fast_addressing_of_data_array,
                number_of_doe_samples - absolute_configuration_index,
                doe_type,
            )
        configurations += doe_configurations
        print(
            "Design of experiment phase, number of new doe samples = %d ......."
            % len(configurations)
        )

        doe_data_array = param_space.run_configurations(
            hypermapper_mode,
            configurations,
            beginning_of_time,
            output_data_file,
            black_box_function,
            exhaustive_search_data_array,
            exhaustive_search_fast_addressing_of_data_array,
            run_directory,
            batch_mode=batch_mode,
        )
        data_array = concatenate_data_dictionaries(
            data_array,
            doe_data_array,
            param_space.input_output_and_timestamp_parameter_names,
        )
        absolute_configuration_index = number_of_doe_samples
        iteration_number = 1
    else:
        iteration_number = absolute_configuration_index - number_of_doe_samples + 1

    # If we have feasibility constraints, we must ensure we have at least one feasible and one infeasible sample before starting optimization
    # If this is not true, continue design of experiment until the condition is met
    if enable_feasible_predictor:
        while (
            are_all_elements_equal(data_array[feasible_parameter[0]])
            and optimization_iterations > 0
        ):
            print(
                "Warning: all points are either valid or invalid, random sampling more configurations."
            )
            print("Number of doe samples so far:", absolute_configuration_index)
            configurations = param_space.get_doe_sample_configurations(
                fast_addressing_of_data_array, 1, "random sampling"
            )
            new_data_array = param_space.run_configurations(
                hypermapper_mode,
                configurations,
                beginning_of_time,
                output_data_file,
                black_box_function,
                exhaustive_search_data_array,
                exhaustive_search_fast_addressing_of_data_array,
                run_directory,
                batch_mode=batch_mode,
            )
            data_array = concatenate_data_dictionaries(
                new_data_array,
                data_array,
                param_space.input_output_and_timestamp_parameter_names,
            )
            absolute_configuration_index += 1
            optimization_iterations -= 1

    for objective in optimization_metrics:
        lower_bound = min(objective_limits[objective][0], min(data_array[objective]))
        upper_bound = max(objective_limits[objective][1], max(data_array[objective]))
        objective_limits[objective] = [lower_bound, upper_bound]
    print(
        "\nEnd of doe/resume phase, the number of evaluated configurations is: %d\n"
        % absolute_configuration_index
    )
    sys.stdout.write_to_logfile(
        (
            "End of DoE - Time %10.4f sec\n"
            % ((datetime.datetime.now() - doe_t0).total_seconds())
        )
    )
    if doe_type == "grid_search" and optimization_iterations > 0:
        print(
            "Warning: DoE is grid search, setting number of optimization iterations to 0"
        )
        optimization_iterations = 0

    ### Main optimization loop
    bo_t0 = datetime.datetime.now()
    run_time = (datetime.datetime.now() - start_time).total_seconds() / 60
    # run_time / time_budget < 1 if budget > elapsed time or budget == -1
    if time_budget > 0:
        print(
            "starting optimization phase, limited to run for ", time_budget, " minutes"
        )
    elif time_budget == 0:
        print("Time budget cannot be zero. To not limit runtime set time_budget = -1")
        sys.exit()

    configurations = []
    evaluation_budget = optimization_iterations * evaluations_per_optimization_iteration
    iteration_number = 0
    evaluation_count = 0
    while evaluation_count < evaluation_budget and run_time / time_budget < 1:
        if evaluation_count % evaluations_per_optimization_iteration == 0:
            iteration_number += 1
            print("Starting optimization iteration", iteration_number)
            iteration_t0 = datetime.datetime.now()

        model_t0 = datetime.datetime.now()
        regression_models, _, _ = models.generate_mono_output_regression_models(
            data_array,
            param_space,
            input_params,
            optimization_metrics,
            1.00,
            config,
            model_type=model_type,
            number_of_cpus=number_of_cpus,
            print_importances=print_importances,
            normalize_objectives=normalize_objectives,
            objective_limits=objective_limits,
        )

        classification_model = None
        if enable_feasible_predictor:
            classification_model, _, _ = models.generate_classification_model(
                application_name,
                param_space,
                data_array,
                input_params,
                feasible_parameter,
                1.00,
                config,
                debug,
                number_of_cpus=number_of_cpus,
                data_array_exhaustive=exhaustive_search_data_array,
                enable_feasible_predictor_grid_search_on_recall_and_precision=enable_feasible_predictor_grid_search_on_recall_and_precision,
                feasible_predictor_grid_search_validation_file=feasible_predictor_grid_search_validation_file,
                print_importances=print_importances,
            )
        model_t1 = datetime.datetime.now()
        sys.stdout.write_to_logfile(
            (
                "Model fitting time %10.4f sec\n"
                % ((model_t1 - model_t0).total_seconds())
            )
        )
        if weight_sampling == "bounding_box":
            objective_weights = sample_weight_bbox(
                optimization_metrics, objective_bounds, objective_limits, 1
            )[0]
        elif weight_sampling == "flat":
            objective_weights = sample_weight_flat(optimization_metrics, 1)[0]
        else:
            print("Error: unrecognized option:", weight_sampling)
            raise SystemExit

        data_array_scalarization, _ = compute_data_array_scalarization(
            data_array, objective_weights, objective_limits, scalarization_method
        )
        data_array[scalarization_key] = data_array_scalarization.tolist()

        epsilon = random.uniform(0, 1)
        local_search_t0 = datetime.datetime.now()
        if epsilon > epsilon_greedy_threshold:
            best_configuration = bo_method(
                config,
                data_array,
                param_space,
                fast_addressing_of_data_array,
                regression_models,
                iteration_number,
                objective_weights,
                objective_limits,
                classification_model,
                profiling,
                acquisition_function_optimizer,
            )

        else:
            sys.stdout.write_to_logfile(
                str(epsilon)
                + " < "
                + str(epsilon_greedy_threshold)
                + " random sampling a configuration to run\n"
            )
            tmp_fast_addressing_of_data_array = copy.deepcopy(
                fast_addressing_of_data_array
            )
            best_configuration = (
                param_space.random_sample_configurations_without_repetitions(
                    tmp_fast_addressing_of_data_array, 1, use_priors=False
                )[0]
            )
        local_search_t1 = datetime.datetime.now()
        sys.stdout.write_to_logfile(
            (
                "Local search time %10.4f sec\n"
                % ((local_search_t1 - local_search_t0).total_seconds())
            )
        )

        configurations.append(best_configuration)

        # When we have selected "evaluations_per_optimization_iteration" configurations, evaluate the batch
        if evaluation_count % evaluations_per_optimization_iteration == (
            evaluations_per_optimization_iteration - 1
        ):
            black_box_function_t0 = datetime.datetime.now()
            new_data_array = param_space.run_configurations(
                hypermapper_mode,
                configurations,
                beginning_of_time,
                output_data_file,
                black_box_function,
                exhaustive_search_data_array,
                exhaustive_search_fast_addressing_of_data_array,
                run_directory,
                batch_mode=batch_mode,
            )
            black_box_function_t1 = datetime.datetime.now()
            sys.stdout.write_to_logfile(
                (
                    "Black box function time %10.4f sec\n"
                    % ((black_box_function_t1 - black_box_function_t0).total_seconds())
                )
            )

            # If running batch BO, we will have some liars in fast_addressing_of_data, update them with the true value
            for configuration_idx in range(
                len(new_data_array[list(new_data_array.keys())[0]])
            ):
                configuration = get_single_configuration(
                    new_data_array, configuration_idx
                )
                str_data = param_space.get_unique_hash_string_from_values(configuration)
                if str_data in fast_addressing_of_data_array:
                    absolute_index = fast_addressing_of_data_array[str_data]
                    for header in configuration:
                        data_array[header][absolute_index] = configuration[header]
                else:
                    fast_addressing_of_data_array[
                        str_data
                    ] = absolute_configuration_index
                    absolute_configuration_index += 1
                    for header in configuration:
                        data_array[header].append(configuration[header])

            configurations = []
        else:
            # If we have not selected all points in the batch yet, add the model prediction as a 'liar'
            for header in best_configuration:
                data_array[header].append(best_configuration[header])

            bufferx = [tuple(best_configuration.values())]
            prediction_means, _ = models.compute_model_mean_and_uncertainty(
                bufferx, regression_models, model_type, param_space
            )
            for objective in prediction_means:
                data_array[objective].append(prediction_means[objective][0])

            if classification_model is not None:
                classification_prediction_results = models.model_probabilities(
                    bufferx, classification_model, param_space
                )
                true_value_index = (
                    classification_model[feasible_parameter[0]]
                    .classes_.tolist()
                    .index(True)
                )
                feasibility_indicator = classification_prediction_results[
                    feasible_parameter[0]
                ][:, true_value_index]
                data_array[feasible_output_name].append(
                    True if feasibility_indicator[0] >= 0.5 else False
                )

            data_array[param_space.get_timestamp_parameter()[0]].append(
                absolute_configuration_index
            )
            str_data = param_space.get_unique_hash_string_from_values(
                best_configuration
            )
            fast_addressing_of_data_array[str_data] = absolute_configuration_index
            absolute_configuration_index += 1

        for objective in optimization_metrics:
            lower_bound = min(
                objective_limits[objective][0], min(data_array[objective])
            )
            upper_bound = max(
                objective_limits[objective][1], max(data_array[objective])
            )
            objective_limits[objective] = [lower_bound, upper_bound]

        evaluation_count += 1
        run_time = (datetime.datetime.now() - start_time).total_seconds() / 60
        iteration_t1 = datetime.datetime.now()
        sys.stdout.write_to_logfile(
            (
                "Total iteration time %10.4f sec\n"
                % ((iteration_t1 - iteration_t0).total_seconds())
            )
        )

        if profiling is not None:
            profiling.add("Model fitting time", (model_t1 - model_t0).total_seconds())
            # local search profiling is done inside of local search
            profiling.add(
                "Black box function time",
                (black_box_function_t1 - black_box_function_t0).total_seconds(),
            )

    sys.stdout.write_to_logfile(
        (
            "End of BO phase - Time %10.4f sec\n"
            % ((datetime.datetime.now() - bo_t0).total_seconds())
        )
    )

    print("End of Bayesian Optimization")

    print_posterior_best = config["print_posterior_best"]
    if print_posterior_best:
        if number_of_objectives > 1:
            print(
                "Warning: print_posterior_best is set to true, but application is not mono-objective."
            )
            print(
                "Can only compute best according to posterior for mono-objective applications. Ignoring."
            )
        elif enable_feasible_predictor:
            print(
                "Warning: print_posterior_best is set to true, but application has feasibility constraints."
            )
            print(
                "Cannot compute best according to posterior for applications with feasibility constraints. Ignoring."
            )
        else:
            # Update model with latest data
            regression_models, _, _ = models.generate_mono_output_regression_models(
                data_array,
                param_space,
                input_params,
                optimization_metrics,
                1.00,
                config,
                model_type=model_type,
                number_of_cpus=number_of_cpus,
                print_importances=print_importances,
                normalize_objectives=normalize_objectives,
                objective_limits=objective_limits,
            )

            best_point = models.minimize_posterior_mean(
                regression_models,
                config,
                param_space,
                data_array,
                objective_limits,
                normalize_objectives,
                profiling,
            )
            keys = ""
            best_point_string = ""
            for key in best_point:
                keys += f"{key},"
                best_point_string += f"{best_point[key]},"
            keys = keys[:-1]
            best_point_string = best_point_string[:-1]

            sys.stdout.write_protocol("Minimum of the posterior mean:\n")
            sys.stdout.write_protocol(f"{keys}\n")
            sys.stdout.write_protocol(f"{best_point_string}\n\n")

    sys.stdout.write_to_logfile(
        (
            "Total script time %10.2f sec\n"
            % ((datetime.datetime.now() - start_time).total_seconds())
        )
    )

    return data_array
Exemple #4
0
def run_objective_function(
    configurations,
    hypermapper_mode,
    param_space,
    beginning_of_time,
    output_data_file,
    run_directory,
    local_search_data_array,
    fast_addressing_of_data_array,
    exhaustive_search_data_array,
    exhaustive_search_fast_addressing_of_data_array,
    scalarization_weights,
    objective_limits,
    scalarization_method,
    enable_feasible_predictor=False,
    evaluation_limit=float("inf"),
    black_box_function=None,
    number_of_cpus=0,
):
    """
    Evaluate a list of configurations using the black-box function being optimized.
    This method avoids evaluating repeated points by recovering their value from the history of evaluated points.
    :param configurations: list of configurations to evaluate.
    :param hypermapper_mode: which HyperMapper mode is being used.
    :param param_space: a space object containing the search space.
    :param beginning_of_time: timestamp of when the optimization started.
    :param run_directory: directory where HyperMapper is running.
    :param local_search_data_array: a dictionary containing all of the configurations that have been evaluated.
    :param fast_addressing_of_data_array: a dictionary containing evaluated configurations and their index in the local_search_data_array.
    :param exhaustive_search_data_array: dictionary containing all points and function values, used in exhaustive mode.
    :param exhaustive_search_fast_addressing_of_data_array: dictionary containing the index of each point in the exhaustive array.
    :param scalarization_weights: the weights used to scalarize the function value.
    :param objective_limits: dictionary containing the estimated min and max limits for each objective.
    :param scalarization_method: which method to use to scalarize multiple objectives.
    :param enable_feasible_predictor: whether to use constrained optimization.
    :param evaluation_limit: the maximum number of function evaluations allowed for the local search.
    :param black_box_function: the black_box_function being optimized in the local search.
    :param number_of_cpus: an integer for the number of cpus to be used in parallel.
    :return: the best point found in the random sampling and local search.
    """
    new_configurations = []
    new_evaluations = {}
    previous_evaluations = defaultdict(list)
    number_of_new_evaluations = 0
    t0 = datetime.datetime.now()
    absolute_configuration_index = len(fast_addressing_of_data_array)
    function_values = {}

    for configuration in configurations:
        str_data = param_space.get_unique_hash_string_from_values(
            configuration)
        if str_data in fast_addressing_of_data_array:
            configuration_idx = fast_addressing_of_data_array[str_data]
            for key in local_search_data_array:
                previous_evaluations[key].append(
                    local_search_data_array[key][configuration_idx])
        else:
            if (absolute_configuration_index + number_of_new_evaluations <
                    evaluation_limit):
                new_configurations.append(configuration)
                number_of_new_evaluations += 1

    t1 = datetime.datetime.now()
    if number_of_new_evaluations > 0:
        new_evaluations = param_space.run_configurations(
            hypermapper_mode,
            new_configurations,
            beginning_of_time,
            output_data_file,
            black_box_function,
            exhaustive_search_data_array,
            exhaustive_search_fast_addressing_of_data_array,
            run_directory,
        )

    all_evaluations = concatenate_data_dictionaries(previous_evaluations,
                                                    new_evaluations)
    all_evaluations_size = len(all_evaluations[list(
        all_evaluations.keys())[0]])

    if enable_feasible_predictor:
        feasible_parameter = param_space.get_feasible_parameter()[0]
        feasibility_indicators = all_evaluations[feasible_parameter]
    else:
        # if no constraints, all points are feasible
        feasibility_indicators = [1] * all_evaluations_size

    scalarized_values, tmp_objective_limits = compute_data_array_scalarization(
        all_evaluations, scalarization_weights, objective_limits,
        scalarization_method)

    for objective in objective_limits:
        objective_limits[objective] = tmp_objective_limits[objective]

    for idx in range(number_of_new_evaluations):
        configuration = get_single_configuration(new_evaluations, idx)
        for key in configuration:
            local_search_data_array[key].append(configuration[key])

        str_data = param_space.get_unique_hash_string_from_values(
            configuration)
        fast_addressing_of_data_array[str_data] = absolute_configuration_index
        absolute_configuration_index += 1

    sys.stdout.write_to_logfile(
        ("Time to run new configurations %10.4f sec\n" %
         ((datetime.datetime.now() - t1).total_seconds())))
    sys.stdout.write_to_logfile(
        ("Total time to run configurations %10.4f sec\n" %
         ((datetime.datetime.now() - t0).total_seconds())))

    return list(scalarized_values), feasibility_indicators