Example #1
0
def HVI_from_files(real_pareto_file, parameters_file):
    """
    Compute hvi for a target Pareto front using the real Pareto front as reference.
    :param real_pareto_file: file containing the real Pareto front
    :param parameters_file: file containing the experiment scenario. Also used to find the target Pareto file.
    :return: the hvi of the target Pareto front
    """
    config = validate_json(parameters_file)
    param_space = space.Space(config)

    application_name = config["application_name"]
    test_pareto_file = config["output_pareto_file"]
    run_directory = config["run_directory"]
    if test_pareto_file == "output_pareto.csv":
        test_pareto_file = application_name + "_" + test_pareto_file
    test_pareto_file = deal_with_relative_and_absolute_path(
        run_directory, test_pareto_file
    )

    optimization_metrics = param_space.get_optimization_parameters()
    selection_keys = optimization_metrics + param_space.get_timestamp_parameter()
    feasible_flag = True if (param_space.get_feasible_parameter() != [None]) else False
    exhaustive_branin_pareto, _ = param_space.load_data_file(
        real_pareto_file, selection_keys_list=selection_keys, only_valid=feasible_flag
    )
    test_pareto, _ = param_space.load_data_file(
        test_pareto_file, selection_keys_list=selection_keys, only_valid=feasible_flag
    )
    concatenated_all_data_array = concatenate_data_dictionaries(
        exhaustive_branin_pareto, test_pareto, selection_keys_list=selection_keys
    )

    standard_deviations, max_point = compute_std_and_max_point(
        concatenated_all_data_array, optimization_metrics
    )

    exhaustive_branin_pareto = normalize_with_std(
        exhaustive_branin_pareto, standard_deviations, optimization_metrics
    )
    test_pareto = normalize_with_std(
        test_pareto, standard_deviations, optimization_metrics
    )

    exhaustive_branin_pareto = [
        exhaustive_branin_pareto[objective] for objective in optimization_metrics
    ]
    exhaustive_branin_pareto = list(zip(*exhaustive_branin_pareto))

    test_pareto = [test_pareto[objective] for objective in optimization_metrics]
    test_pareto = list(zip(*test_pareto))

    hv_exhaustive = H(exhaustive_branin_pareto, max_point)
    hv_test = H(test_pareto, max_point)
    hvi = hv_exhaustive - hv_test

    return hvi
Example #2
0
def main(config, black_box_function=None, profiling=None):
    """
    Run design-space exploration using bayesian optimization.
    :param config: dictionary containing all the configuration parameters of this optimization.
    :param output_file: a name for the file used to save the dse results.
    """
    start_time = datetime.datetime.now()
    run_directory = config["run_directory"]
    hypermapper_mode = config["hypermapper_mode"]["mode"]

    # Start logging
    log_file = deal_with_relative_and_absolute_path(run_directory, config["log_file"])
    sys.stdout.change_log_file(log_file)
    sys.stdout.set_verbose_mode(config["verbose_logging"])
    if hypermapper_mode == "client-server":
        sys.stdout.switch_log_only_on_file(True)

    # Log the json configuration for this optimization
    sys.stdout.write_to_logfile(str(config) + "\n")

    # Create parameter space object and unpack hyperparameters from json
    param_space = space.Space(config)
    application_name = config["application_name"]
    optimization_metrics = config["optimization_objectives"]
    optimization_iterations = config["optimization_iterations"]
    evaluations_per_optimization_iteration = config[
        "evaluations_per_optimization_iteration"
    ]
    output_data_file = get_output_data_file(
        config["output_data_file"], run_directory, application_name
    )
    batch_mode = evaluations_per_optimization_iteration > 1
    number_of_cpus = config["number_of_cpus"]
    print_importances = config["print_parameter_importance"]
    epsilon_greedy_threshold = config["epsilon_greedy_threshold"]
    acquisition_function = config["acquisition_function"]
    weight_sampling = config["weight_sampling"]
    scalarization_method = config["scalarization_method"]
    scalarization_key = config["scalarization_key"]
    doe_type = config["design_of_experiment"]["doe_type"]
    number_of_doe_samples = config["design_of_experiment"]["number_of_samples"]
    model_type = config["models"]["model"]
    optimization_method = config["optimization_method"]
    time_budget = config["time_budget"]
    acquisition_function_optimizer = config["acquisition_function_optimizer"]
    if (
        acquisition_function_optimizer == "cma_es"
        and not param_space.is_space_continuous()
    ):
        print(
            "Warning: CMA_ES can only be used with continuous search spaces (i.e. all parameters must be of type 'real')"
        )
        print("Switching acquisition function optimizer to local search")
        acquisition_function_optimizer = "local_search"

    input_params = param_space.get_input_parameters()
    number_of_objectives = len(optimization_metrics)
    objective_limits = {}
    data_array = {}
    fast_addressing_of_data_array = {}
    objective_bounds = None
    exhaustive_search_data_array = None
    normalize_objectives = False
    debug = False

    if "feasible_output" in config:
        feasible_output = config["feasible_output"]
        feasible_output_name = feasible_output["name"]
        enable_feasible_predictor = feasible_output["enable_feasible_predictor"]
        enable_feasible_predictor_grid_search_on_recall_and_precision = feasible_output[
            "enable_feasible_predictor_grid_search_on_recall_and_precision"
        ]
        feasible_predictor_grid_search_validation_file = feasible_output[
            "feasible_predictor_grid_search_validation_file"
        ]
        feasible_parameter = param_space.get_feasible_parameter()
        number_of_trees = config["models"]["number_of_trees"]

    if weight_sampling == "bounding_box":
        objective_bounds = {}
        user_bounds = config["bounding_box_limits"]
        if len(user_bounds) == 2:
            if user_bounds[0] > user_bounds[1]:
                user_bounds[0], user_bounds[1] = user_bounds[1], user_bounds[0]
            for objective in optimization_metrics:
                objective_bounds[objective] = user_bounds
                objective_limits[objective] = user_bounds
        elif len(user_bounds) == number_of_objectives * 2:
            idx = 0
            for objective in optimization_metrics:
                objective_bounds[objective] = user_bounds[idx : idx + 2]
                if objective_bounds[objective][0] > objective_bounds[objective][1]:
                    objective_bounds[objective][0], objective_bounds[objective][1] = (
                        objective_bounds[objective][1],
                        objective_bounds[objective][0],
                    )
                objective_limits[objective] = objective_bounds[objective]
                idx += 2
        else:
            print(
                "Wrong number of bounding boxes, expected 2 or",
                2 * number_of_objectives,
                "got",
                len(user_bounds),
            )
            raise SystemExit
    else:
        for objective in optimization_metrics:
            objective_limits[objective] = [float("inf"), float("-inf")]

    exhaustive_search_data_array = None
    exhaustive_search_fast_addressing_of_data_array = None
    if hypermapper_mode == "exhaustive":
        exhaustive_file = config["hypermapper_mode"]["exhaustive_search_file"]
        (
            exhaustive_search_data_array,
            exhaustive_search_fast_addressing_of_data_array,
        ) = param_space.load_data_file(
            exhaustive_file, debug=False, number_of_cpus=number_of_cpus
        )

    # Check if some parameters are correctly defined
    if hypermapper_mode == "default":
        if black_box_function == None:
            print("Error: the black box function must be provided")
            raise SystemExit
        if not callable(black_box_function):
            print("Error: the black box function parameter is not callable")
            raise SystemExit

    if (model_type == "gaussian_process") and (acquisition_function == "TS"):
        print(
            "Error: The TS acquisition function with Gaussian Process models is still under implementation"
        )
        print("Using EI acquisition function instead")
        config["acquisition_function"] = "EI"

    if number_of_cpus > 1:
        print(
            "Warning: HyperMapper supports only sequential execution for now. Running on a single cpu."
        )
        number_of_cpus = 1

    # If priors are present, use prior-guided optimization
    user_priors = False
    for input_param in config["input_parameters"]:
        if config["input_parameters"][input_param]["prior"] != "uniform":
            if number_of_objectives == 1:
                user_priors = True
            else:
                print(
                    "Warning: prior optimization does not work with multiple objectives yet, priors will be uniform"
                )
                config["input_parameters"][input_param]["prior"] = "uniform"

    if user_priors:
        bo_method = prior_guided_optimization
    else:
        bo_method = random_scalarizations
        normalize_objectives = True

    ### Resume previous optimization, if any
    beginning_of_time = param_space.current_milli_time()
    absolute_configuration_index = 0
    doe_t0 = datetime.datetime.now()
    if config["resume_optimization"] == True:
        resume_data_file = config["resume_optimization_data"]

        if not resume_data_file.endswith(".csv"):
            print("Error: resume data file must be a CSV")
            raise SystemExit
        if resume_data_file == "output_samples.csv":
            resume_data_file = application_name + "_" + resume_data_file

        data_array, fast_addressing_of_data_array = param_space.load_data_file(
            resume_data_file, debug=False, number_of_cpus=number_of_cpus
        )
        absolute_configuration_index = len(
            data_array[list(data_array.keys())[0]]
        )  # get the number of points evaluated in the previous run
        beginning_of_time = (
            beginning_of_time - data_array[param_space.get_timestamp_parameter()[0]][-1]
        )  # Set the timestamp back to match the previous run
        print(
            "Resumed optimization, number of samples = %d ......."
            % absolute_configuration_index
        )

    create_output_data_file(
        output_data_file, param_space.get_input_output_and_timestamp_parameters()
    )
    if data_array:  # if it is not empty
        write_data_array(param_space, data_array, output_data_file)
    ### DoE phase
    if absolute_configuration_index < number_of_doe_samples:
        configurations = []
        default_configuration = param_space.get_default_or_random_configuration()
        str_data = param_space.get_unique_hash_string_from_values(default_configuration)
        if str_data not in fast_addressing_of_data_array:
            fast_addressing_of_data_array[str_data] = absolute_configuration_index
            configurations.append(default_configuration)
            absolute_configuration_index += 1

        doe_configurations = []
        if absolute_configuration_index < number_of_doe_samples:
            doe_configurations = param_space.get_doe_sample_configurations(
                fast_addressing_of_data_array,
                number_of_doe_samples - absolute_configuration_index,
                doe_type,
            )
        configurations += doe_configurations
        print(
            "Design of experiment phase, number of new doe samples = %d ......."
            % len(configurations)
        )

        doe_data_array = param_space.run_configurations(
            hypermapper_mode,
            configurations,
            beginning_of_time,
            output_data_file,
            black_box_function,
            exhaustive_search_data_array,
            exhaustive_search_fast_addressing_of_data_array,
            run_directory,
            batch_mode=batch_mode,
        )
        data_array = concatenate_data_dictionaries(
            data_array,
            doe_data_array,
            param_space.input_output_and_timestamp_parameter_names,
        )
        absolute_configuration_index = number_of_doe_samples
        iteration_number = 1
    else:
        iteration_number = absolute_configuration_index - number_of_doe_samples + 1

    # If we have feasibility constraints, we must ensure we have at least one feasible and one infeasible sample before starting optimization
    # If this is not true, continue design of experiment until the condition is met
    if enable_feasible_predictor:
        while (
            are_all_elements_equal(data_array[feasible_parameter[0]])
            and optimization_iterations > 0
        ):
            print(
                "Warning: all points are either valid or invalid, random sampling more configurations."
            )
            print("Number of doe samples so far:", absolute_configuration_index)
            configurations = param_space.get_doe_sample_configurations(
                fast_addressing_of_data_array, 1, "random sampling"
            )
            new_data_array = param_space.run_configurations(
                hypermapper_mode,
                configurations,
                beginning_of_time,
                output_data_file,
                black_box_function,
                exhaustive_search_data_array,
                exhaustive_search_fast_addressing_of_data_array,
                run_directory,
                batch_mode=batch_mode,
            )
            data_array = concatenate_data_dictionaries(
                new_data_array,
                data_array,
                param_space.input_output_and_timestamp_parameter_names,
            )
            absolute_configuration_index += 1
            optimization_iterations -= 1

    for objective in optimization_metrics:
        lower_bound = min(objective_limits[objective][0], min(data_array[objective]))
        upper_bound = max(objective_limits[objective][1], max(data_array[objective]))
        objective_limits[objective] = [lower_bound, upper_bound]
    print(
        "\nEnd of doe/resume phase, the number of evaluated configurations is: %d\n"
        % absolute_configuration_index
    )
    sys.stdout.write_to_logfile(
        (
            "End of DoE - Time %10.4f sec\n"
            % ((datetime.datetime.now() - doe_t0).total_seconds())
        )
    )
    if doe_type == "grid_search" and optimization_iterations > 0:
        print(
            "Warning: DoE is grid search, setting number of optimization iterations to 0"
        )
        optimization_iterations = 0

    ### Main optimization loop
    bo_t0 = datetime.datetime.now()
    run_time = (datetime.datetime.now() - start_time).total_seconds() / 60
    # run_time / time_budget < 1 if budget > elapsed time or budget == -1
    if time_budget > 0:
        print(
            "starting optimization phase, limited to run for ", time_budget, " minutes"
        )
    elif time_budget == 0:
        print("Time budget cannot be zero. To not limit runtime set time_budget = -1")
        sys.exit()

    configurations = []
    evaluation_budget = optimization_iterations * evaluations_per_optimization_iteration
    iteration_number = 0
    evaluation_count = 0
    while evaluation_count < evaluation_budget and run_time / time_budget < 1:
        if evaluation_count % evaluations_per_optimization_iteration == 0:
            iteration_number += 1
            print("Starting optimization iteration", iteration_number)
            iteration_t0 = datetime.datetime.now()

        model_t0 = datetime.datetime.now()
        regression_models, _, _ = models.generate_mono_output_regression_models(
            data_array,
            param_space,
            input_params,
            optimization_metrics,
            1.00,
            config,
            model_type=model_type,
            number_of_cpus=number_of_cpus,
            print_importances=print_importances,
            normalize_objectives=normalize_objectives,
            objective_limits=objective_limits,
        )

        classification_model = None
        if enable_feasible_predictor:
            classification_model, _, _ = models.generate_classification_model(
                application_name,
                param_space,
                data_array,
                input_params,
                feasible_parameter,
                1.00,
                config,
                debug,
                number_of_cpus=number_of_cpus,
                data_array_exhaustive=exhaustive_search_data_array,
                enable_feasible_predictor_grid_search_on_recall_and_precision=enable_feasible_predictor_grid_search_on_recall_and_precision,
                feasible_predictor_grid_search_validation_file=feasible_predictor_grid_search_validation_file,
                print_importances=print_importances,
            )
        model_t1 = datetime.datetime.now()
        sys.stdout.write_to_logfile(
            (
                "Model fitting time %10.4f sec\n"
                % ((model_t1 - model_t0).total_seconds())
            )
        )
        if weight_sampling == "bounding_box":
            objective_weights = sample_weight_bbox(
                optimization_metrics, objective_bounds, objective_limits, 1
            )[0]
        elif weight_sampling == "flat":
            objective_weights = sample_weight_flat(optimization_metrics, 1)[0]
        else:
            print("Error: unrecognized option:", weight_sampling)
            raise SystemExit

        data_array_scalarization, _ = compute_data_array_scalarization(
            data_array, objective_weights, objective_limits, scalarization_method
        )
        data_array[scalarization_key] = data_array_scalarization.tolist()

        epsilon = random.uniform(0, 1)
        local_search_t0 = datetime.datetime.now()
        if epsilon > epsilon_greedy_threshold:
            best_configuration = bo_method(
                config,
                data_array,
                param_space,
                fast_addressing_of_data_array,
                regression_models,
                iteration_number,
                objective_weights,
                objective_limits,
                classification_model,
                profiling,
                acquisition_function_optimizer,
            )

        else:
            sys.stdout.write_to_logfile(
                str(epsilon)
                + " < "
                + str(epsilon_greedy_threshold)
                + " random sampling a configuration to run\n"
            )
            tmp_fast_addressing_of_data_array = copy.deepcopy(
                fast_addressing_of_data_array
            )
            best_configuration = (
                param_space.random_sample_configurations_without_repetitions(
                    tmp_fast_addressing_of_data_array, 1, use_priors=False
                )[0]
            )
        local_search_t1 = datetime.datetime.now()
        sys.stdout.write_to_logfile(
            (
                "Local search time %10.4f sec\n"
                % ((local_search_t1 - local_search_t0).total_seconds())
            )
        )

        configurations.append(best_configuration)

        # When we have selected "evaluations_per_optimization_iteration" configurations, evaluate the batch
        if evaluation_count % evaluations_per_optimization_iteration == (
            evaluations_per_optimization_iteration - 1
        ):
            black_box_function_t0 = datetime.datetime.now()
            new_data_array = param_space.run_configurations(
                hypermapper_mode,
                configurations,
                beginning_of_time,
                output_data_file,
                black_box_function,
                exhaustive_search_data_array,
                exhaustive_search_fast_addressing_of_data_array,
                run_directory,
                batch_mode=batch_mode,
            )
            black_box_function_t1 = datetime.datetime.now()
            sys.stdout.write_to_logfile(
                (
                    "Black box function time %10.4f sec\n"
                    % ((black_box_function_t1 - black_box_function_t0).total_seconds())
                )
            )

            # If running batch BO, we will have some liars in fast_addressing_of_data, update them with the true value
            for configuration_idx in range(
                len(new_data_array[list(new_data_array.keys())[0]])
            ):
                configuration = get_single_configuration(
                    new_data_array, configuration_idx
                )
                str_data = param_space.get_unique_hash_string_from_values(configuration)
                if str_data in fast_addressing_of_data_array:
                    absolute_index = fast_addressing_of_data_array[str_data]
                    for header in configuration:
                        data_array[header][absolute_index] = configuration[header]
                else:
                    fast_addressing_of_data_array[
                        str_data
                    ] = absolute_configuration_index
                    absolute_configuration_index += 1
                    for header in configuration:
                        data_array[header].append(configuration[header])

            configurations = []
        else:
            # If we have not selected all points in the batch yet, add the model prediction as a 'liar'
            for header in best_configuration:
                data_array[header].append(best_configuration[header])

            bufferx = [tuple(best_configuration.values())]
            prediction_means, _ = models.compute_model_mean_and_uncertainty(
                bufferx, regression_models, model_type, param_space
            )
            for objective in prediction_means:
                data_array[objective].append(prediction_means[objective][0])

            if classification_model is not None:
                classification_prediction_results = models.model_probabilities(
                    bufferx, classification_model, param_space
                )
                true_value_index = (
                    classification_model[feasible_parameter[0]]
                    .classes_.tolist()
                    .index(True)
                )
                feasibility_indicator = classification_prediction_results[
                    feasible_parameter[0]
                ][:, true_value_index]
                data_array[feasible_output_name].append(
                    True if feasibility_indicator[0] >= 0.5 else False
                )

            data_array[param_space.get_timestamp_parameter()[0]].append(
                absolute_configuration_index
            )
            str_data = param_space.get_unique_hash_string_from_values(
                best_configuration
            )
            fast_addressing_of_data_array[str_data] = absolute_configuration_index
            absolute_configuration_index += 1

        for objective in optimization_metrics:
            lower_bound = min(
                objective_limits[objective][0], min(data_array[objective])
            )
            upper_bound = max(
                objective_limits[objective][1], max(data_array[objective])
            )
            objective_limits[objective] = [lower_bound, upper_bound]

        evaluation_count += 1
        run_time = (datetime.datetime.now() - start_time).total_seconds() / 60
        iteration_t1 = datetime.datetime.now()
        sys.stdout.write_to_logfile(
            (
                "Total iteration time %10.4f sec\n"
                % ((iteration_t1 - iteration_t0).total_seconds())
            )
        )

        if profiling is not None:
            profiling.add("Model fitting time", (model_t1 - model_t0).total_seconds())
            # local search profiling is done inside of local search
            profiling.add(
                "Black box function time",
                (black_box_function_t1 - black_box_function_t0).total_seconds(),
            )

    sys.stdout.write_to_logfile(
        (
            "End of BO phase - Time %10.4f sec\n"
            % ((datetime.datetime.now() - bo_t0).total_seconds())
        )
    )

    print("End of Bayesian Optimization")

    print_posterior_best = config["print_posterior_best"]
    if print_posterior_best:
        if number_of_objectives > 1:
            print(
                "Warning: print_posterior_best is set to true, but application is not mono-objective."
            )
            print(
                "Can only compute best according to posterior for mono-objective applications. Ignoring."
            )
        elif enable_feasible_predictor:
            print(
                "Warning: print_posterior_best is set to true, but application has feasibility constraints."
            )
            print(
                "Cannot compute best according to posterior for applications with feasibility constraints. Ignoring."
            )
        else:
            # Update model with latest data
            regression_models, _, _ = models.generate_mono_output_regression_models(
                data_array,
                param_space,
                input_params,
                optimization_metrics,
                1.00,
                config,
                model_type=model_type,
                number_of_cpus=number_of_cpus,
                print_importances=print_importances,
                normalize_objectives=normalize_objectives,
                objective_limits=objective_limits,
            )

            best_point = models.minimize_posterior_mean(
                regression_models,
                config,
                param_space,
                data_array,
                objective_limits,
                normalize_objectives,
                profiling,
            )
            keys = ""
            best_point_string = ""
            for key in best_point:
                keys += f"{key},"
                best_point_string += f"{best_point[key]},"
            keys = keys[:-1]
            best_point_string = best_point_string[:-1]

            sys.stdout.write_protocol("Minimum of the posterior mean:\n")
            sys.stdout.write_protocol(f"{keys}\n")
            sys.stdout.write_protocol(f"{best_point_string}\n\n")

    sys.stdout.write_to_logfile(
        (
            "Total script time %10.2f sec\n"
            % ((datetime.datetime.now() - start_time).total_seconds())
        )
    )

    return data_array
Example #3
0
def main(config, black_box_function=None, output_file="", profiling=None):
    """
    Run design-space exploration using evolution.
    :param config: dictionary containing all the configuration parameters of this design-space exploration.
    :param black_box_function: The function hypermapper seeks to optimize
    :param output_file: a name for the file used to save the dse results.
    :return:
    """
    param_space = space.Space(config)

    run_directory = config["run_directory"]
    application_name = config["application_name"]
    hypermapper_mode = config["hypermapper_mode"]["mode"]
    if hypermapper_mode == "default":
        if black_box_function == None:
            print("Error: the black box function must be provided")
            raise SystemExit
        if not callable(black_box_function):
            print("Error: the black box function parameter is not callable")
            raise SystemExit

    optimization_metrics = config["optimization_objectives"]
    number_of_objectives = len(optimization_metrics)
    if number_of_objectives != 1:
        print(
            "the evolutionary optimization does not support multi-objective optimization. Exiting."
        )
        sys.exit()

    fitness_measure = optimization_metrics[0]
    population_size = config["evolution_population_size"]
    generations = config["evolution_generations"]
    mutation_rate = config["mutation_rate"]
    if mutation_rate > len(param_space.get_input_parameters()):
        print("mutation rate cannot be higher than the number of parameters. Exiting.")
        sys.exit()
    if mutation_rate < 1:
        print("mutation rate must be at least 1 for evolution to work. Exiting.")
        sys.exit()
    crossover = config["evolution_crossover"]
    regularize = config["regularize_evolution"]
    batch_size = config["batch_size"]
    if batch_size > population_size:
        print("population_size must be bigger than batch_size. Exiting.")
        sys.exit()
    elif batch_size < 2 and not crossover:
        print("batch_size cannot be smaller than 2. Exiting.")
        sys.exit()
    elif batch_size < 3 and crossover:
        print("batch_size must be at least 3 when using crossover. Exiting.")
        sys.exit()

    log_file = deal_with_relative_and_absolute_path(run_directory, config["log_file"])
    sys.stdout.change_log_file(log_file)
    if hypermapper_mode == "client-server":
        sys.stdout.switch_log_only_on_file(True)

    if output_file == "":
        output_data_file = config["output_data_file"]
        if output_data_file == "output_samples.csv":
            output_data_file = application_name + "_" + output_data_file
    else:
        output_data_file = output_file

    absolute_configuration_index = 0
    fast_addressing_of_data_array = {}
    evolution_fast_addressing_of_data_array = {}
    evolution_data_array = defaultdict(list)

    beginning_of_time = param_space.current_milli_time()

    optimization_function_parameters = dict()
    optimization_function_parameters["hypermapper_mode"] = hypermapper_mode
    optimization_function_parameters["param_space"] = param_space
    optimization_function_parameters["beginning_of_time"] = beginning_of_time
    optimization_function_parameters["run_directory"] = run_directory
    optimization_function_parameters["black_box_function"] = black_box_function
    optimization_function_parameters["evolution_data_array"] = evolution_data_array
    optimization_function_parameters[
        "fast_addressing_of_data_array"
    ] = evolution_fast_addressing_of_data_array

    print("Starting evolution...")
    evolution_t0 = datetime.datetime.now()
    all_samples = evolution(
        population_size,
        generations,
        mutation_rate,
        crossover,
        regularize,
        batch_size,
        fitness_measure,
        param_space,
        fast_addressing_of_data_array,
        run_objective_function,
        optimization_function_parameters,
        profiling,
    )

    print(
        "Evolution finished after %d function evaluations"
        % (len(evolution_data_array[optimization_metrics[0]]))
    )
    sys.stdout.write_to_logfile(
        (
            "Evolutionary search time %10.4f sec\n"
            % ((datetime.datetime.now() - evolution_t0).total_seconds())
        )
    )

    with open(
        deal_with_relative_and_absolute_path(run_directory, output_data_file), "w"
    ) as f:
        w = csv.writer(f)
        w.writerow(list(evolution_data_array.keys()))
        tmp_list = [
            param_space.convert_types_to_string(j, evolution_data_array)
            for j in list(evolution_data_array.keys())
        ]
        tmp_list = list(zip(*tmp_list))
        for i in range(len(evolution_data_array[optimization_metrics[0]])):
            w.writerow(tmp_list[i])

    print("### End of the evolutionary search")
    return evolution_data_array
Example #4
0
        config = json.load(f)

    schema = json.load(resource_stream("hypermapper", "schema.json"))

    try:
        DefaultValidatingDraft4Validator = extend_with_default(Draft4Validator)
        DefaultValidatingDraft4Validator(schema).validate(config)
    except exceptions.ValidationError as ve:
        print("Failed to validate json:")
        print(ve)
        raise SystemExit

    run_directory = config["run_directory"]
    if run_directory == ".":
        run_directory = initial_directory
        config["run_directory"] = run_directory
    log_file = config["log_file"]
    if log_file == "hypermapper_logfile.log":
        log_file = deal_with_relative_and_absolute_path(
            run_directory, log_file)
    sys.stdout = Logger(log_file)

    main(config)

    try:
        os.chdir(hypermapper_pwd)
    except:
        pass

    print("### End of bayesian optimization script.")
Example #5
0
def prior_guided_optimization(
    config,
    data_array,
    param_space,
    fast_addressing_of_data_array,
    regression_models,
    iteration_number,
    objective_weights,
    objective_limits,
    classification_model=None,
    profiling=None,
    acquisition_function_optimizer="local_search",
):
    """
    Run a prior-guided bayesian optimization iteration.
    :param config: dictionary containing all the configuration parameters of this optimization.
    :param data_array: a dictionary containing previously explored points and their function values.
    :param param_space: parameter space object for the current application.
    :param fast_addressing_of_data_array: dictionary for quick-access to previously explored configurations.
    :param regression_models: the surrogate models used to evaluate points.
    :param iteration_number: the current iteration number.
    :param objective_weights: objective weights for multi-objective optimization. Not implemented yet.
    :param objective_limits: estimated minimum and maximum limits for each objective.
    :param classification_model: feasibility classifier for constrained optimization.
    """
    scalarization_key = config["scalarization_key"]
    number_of_cpus = config["number_of_cpus"]

    function_parameters = {}
    function_parameters["param_space"] = param_space
    function_parameters["iteration_number"] = iteration_number
    function_parameters["regression_models"] = regression_models
    function_parameters["classification_model"] = classification_model
    function_parameters["objective_weights"] = objective_weights
    function_parameters["objective_limits"] = objective_limits
    function_parameters["model_type"] = config["models"]["model"]
    function_parameters["model_weight"] = config["model_posterior_weight"]
    function_parameters["posterior_floor"] = config[
        "posterior_computation_lower_limit"]
    model_good_quantile = config["model_good_quantile"]
    function_parameters["threshold"] = {}
    optimization_metrics = param_space.get_optimization_parameters()
    for objective in optimization_metrics:
        function_parameters["threshold"][objective] = np.quantile(
            data_array[objective], model_good_quantile)

    if param_space.get_prior_normalization_flag() is True:
        prior_limit_estimation_points = config["prior_limit_estimation_points"]
        good_prior_normalization_limits = estimate_prior_limits(
            param_space, prior_limit_estimation_points, objective_weights)
    else:
        good_prior_normalization_limits = None
    function_parameters[
        "good_prior_normalization_limits"] = good_prior_normalization_limits

    if classification_model is not None:
        function_parameters["posterior_normalization_limits"] = [
            float("inf"),
            float("-inf"),
        ]

    if acquisition_function_optimizer == "local_search":
        local_search_starting_points = config["local_search_starting_points"]
        local_search_random_points = config["local_search_random_points"]
        _, best_configuration = local_search(
            local_search_starting_points,
            local_search_random_points,
            param_space,
            fast_addressing_of_data_array,
            False,  # set feasibility to false, we handle it inside the acquisition function
            compute_EI_from_posteriors,
            function_parameters,
            scalarization_key,
            number_of_cpus,
            previous_points=data_array,
            profiling=profiling,
        )
    elif acquisition_function_optimizer == "cma_es":
        logfile = deal_with_relative_and_absolute_path(config["run_directory"],
                                                       config["log_file"])
        sigma = config["cma_es_sigma"]
        cma_es_starting_points = config["cma_es_starting_points"]
        cma_es_random_points = config["cma_es_random_points"]
        best_configuration = cma_es(
            param_space,
            data_array,
            fast_addressing_of_data_array,
            scalarization_key,
            logfile,
            compute_EI_from_posteriors,
            function_parameters,
            cma_es_random_points=cma_es_random_points,
            cma_es_starting_points=cma_es_starting_points,
            sigma=sigma,
        )
    else:
        print(
            "Unrecognized acquisition function optimizer:",
            acquisition_function_optimizer,
        )
        raise SystemExit

    return best_configuration
Example #6
0
def main(config, black_box_function=None, profiling=None):
    """
    Run design-space exploration using random scalarizations.
    :param config: dictionary containing all the configuration parameters of this design-space exploration.
    :param output_file: a name for the file used to save the dse results.
    :return:
    """
    param_space = space.Space(config)

    run_directory = config["run_directory"]
    application_name = config["application_name"]
    hypermapper_mode = config["hypermapper_mode"]["mode"]
    if hypermapper_mode == "default":
        if black_box_function == None:
            print("Error: the black box function must be provided")
            raise SystemExit
        if not callable(black_box_function):
            print("Error: the black box function parameter is not callable")
            raise SystemExit

    noise = config["noise"]
    output_data_file = get_output_data_file(config["output_data_file"],
                                            run_directory, application_name)
    optimization_metrics = config["optimization_objectives"]
    number_of_objectives = len(optimization_metrics)
    # local search will not produce reasonable output if run in parallel - it is therefore disabled
    number_of_cpus = 1
    local_search_random_points = config["local_search_random_points"]
    local_search_evaluation_limit = config["local_search_evaluation_limit"]
    if local_search_evaluation_limit == -1:
        local_search_evaluation_limit = float("inf")
    scalarization_key = config["scalarization_key"]
    scalarization_method = config["scalarization_method"]
    scalarization_weights = config["local_search_scalarization_weights"]
    if len(scalarization_weights) < len(optimization_metrics):
        print(
            "Error: not enough scalarization weights. Received",
            len(scalarization_weights),
            "expected",
            len(optimization_metrics),
        )
        raise SystemExit
    if sum(scalarization_weights) != 1:
        sys.stdout.write_to_logfile(
            "Weights must sum 1. Normalizing weights.\n")
        for idx in range(len(scalarization_weights)):
            scalarization_weights[idx] = scalarization_weights[idx] / sum(
                scalarization_weights)
        sys.stdout.write_to_logfile("New weights:" +
                                    str(scalarization_weights) + "\n")
    objective_weights = {}
    objective_limits = {}
    for idx, objective in enumerate(optimization_metrics):
        objective_weights[objective] = scalarization_weights[idx]
        objective_limits[objective] = [float("inf"), float("-inf")]

    exhaustive_search_data_array = None
    exhaustive_search_fast_addressing_of_data_array = None
    if hypermapper_mode == "exhaustive":
        exhaustive_file = config["hypermapper_mode"]["exhaustive_search_file"]
        print("Exhaustive mode, loading data from %s ..." % exhaustive_file)
        (
            exhaustive_search_data_array,
            exhaustive_search_fast_addressing_of_data_array,
        ) = param_space.load_data_file(exhaustive_file,
                                       debug=False,
                                       number_of_cpus=number_of_cpus)

    enable_feasible_predictor = False
    if "feasible_output" in config:
        feasible_output = config["feasible_output"]
        feasible_output_name = feasible_output["name"]
        enable_feasible_predictor = feasible_output[
            "enable_feasible_predictor"]
        enable_feasible_predictor_grid_search_on_recall_and_precision = feasible_output[
            "enable_feasible_predictor_grid_search_on_recall_and_precision"]
        feasible_predictor_grid_search_validation_file = feasible_output[
            "feasible_predictor_grid_search_validation_file"]
        feasible_parameter = param_space.get_feasible_parameter()

    local_search_starting_points = config["local_search_starting_points"]

    debug = False

    log_file = deal_with_relative_and_absolute_path(run_directory,
                                                    config["log_file"])
    sys.stdout.change_log_file(log_file)
    sys.stdout.set_verbose_mode(config["verbose_logging"])
    if hypermapper_mode == "client-server":
        sys.stdout.switch_log_only_on_file(True)

    absolute_configuration_index = 0
    fast_addressing_of_data_array = {}
    local_search_fast_addressing_of_data_array = {}
    local_search_data_array = defaultdict(list)

    beginning_of_time = param_space.current_milli_time()

    optimization_function_parameters = {}
    optimization_function_parameters["hypermapper_mode"] = hypermapper_mode
    optimization_function_parameters["param_space"] = param_space
    optimization_function_parameters["beginning_of_time"] = beginning_of_time
    optimization_function_parameters["run_directory"] = run_directory
    optimization_function_parameters["output_data_file"] = output_data_file
    optimization_function_parameters[
        "exhaustive_search_data_array"] = exhaustive_search_data_array
    optimization_function_parameters[
        "exhaustive_search_fast_addressing_of_data_array"] = exhaustive_search_fast_addressing_of_data_array
    optimization_function_parameters["black_box_function"] = black_box_function
    optimization_function_parameters["number_of_cpus"] = number_of_cpus
    optimization_function_parameters[
        "local_search_data_array"] = local_search_data_array
    optimization_function_parameters[
        "fast_addressing_of_data_array"] = local_search_fast_addressing_of_data_array
    optimization_function_parameters[
        "evaluation_limit"] = local_search_evaluation_limit
    optimization_function_parameters[
        "scalarization_weights"] = objective_weights
    optimization_function_parameters["objective_limits"] = objective_limits
    optimization_function_parameters[
        "scalarization_method"] = scalarization_method
    optimization_function_parameters[
        "enable_feasible_predictor"] = enable_feasible_predictor

    create_output_data_file(
        output_data_file,
        param_space.get_input_output_and_timestamp_parameters())

    print("Starting local search...")
    local_search_t0 = datetime.datetime.now()
    all_samples, best_configuration = local_search(
        local_search_starting_points,
        local_search_random_points,
        param_space,
        fast_addressing_of_data_array,
        enable_feasible_predictor,
        run_objective_function,
        optimization_function_parameters,
        scalarization_key,
        number_of_cpus,
        profiling=profiling,
        noise=noise,
    )

    print("Local search finished after %d function evaluations" %
          (len(local_search_data_array[optimization_metrics[0]])))

    print("### End of the local search.")
    return local_search_data_array
def random_scalarizations(
    config,
    data_array,
    param_space,
    fast_addressing_of_data_array,
    regression_models,
    iteration_number,
    objective_weights,
    objective_limits,
    classification_model=None,
    profiling=None,
    acquisition_function_optimizer="local_search",
):
    """
    Run one iteration of bayesian optimization with random scalarizations.
    :param config: dictionary containing all the configuration parameters of this optimization.
    :param data_array: a dictionary containing previously explored points and their function values.
    :param param_space: parameter space object for the current application.
    :param fast_addressing_of_data_array: dictionary for quick-access to previously explored configurations.
    :param regression_models: the surrogate models used to evaluate points.
    :param iteration_number: the current iteration number.
    :param objective_weights: objective weights for multi-objective optimization. Not implemented yet.
    :param objective_limits: estimated minimum and maximum limits for each objective.
    :param classification_model: feasibility classifier for constrained optimization.
    """
    optimization_metrics = config["optimization_objectives"]
    number_of_objectives = len(optimization_metrics)
    local_search_starting_points = config["local_search_starting_points"]
    local_search_random_points = config["local_search_random_points"]
    scalarization_key = config["scalarization_key"]
    number_of_cpus = config["number_of_cpus"]

    optimization_function_parameters = {}
    optimization_function_parameters["regression_models"] = regression_models
    optimization_function_parameters["iteration_number"] = iteration_number
    optimization_function_parameters["data_array"] = data_array
    optimization_function_parameters["classification_model"] = classification_model
    optimization_function_parameters["param_space"] = param_space
    optimization_function_parameters["objective_weights"] = objective_weights
    optimization_function_parameters["model_type"] = config["models"]["model"]
    optimization_function_parameters["objective_limits"] = objective_limits
    optimization_function_parameters["acquisition_function"] = config[
        "acquisition_function"
    ]
    optimization_function_parameters["scalarization_method"] = config[
        "scalarization_method"
    ]
    optimization_function_parameters["number_of_cpus"] = config["number_of_cpus"]

    if acquisition_function_optimizer == "local_search":
        _, best_configuration = local_search(
            local_search_starting_points,
            local_search_random_points,
            param_space,
            fast_addressing_of_data_array,
            False,  # we do not want the local search to consider feasibility constraints, only the acquisition functions
            run_acquisition_function,
            optimization_function_parameters,
            scalarization_key,
            number_of_cpus,
            previous_points=data_array,
            profiling=profiling,
        )
    elif acquisition_function_optimizer == "cma_es":
        logfile = deal_with_relative_and_absolute_path(
            config["run_directory"], config["log_file"]
        )
        sigma = config["cma_es_sigma"]
        cma_es_starting_points = config["cma_es_starting_points"]
        cma_es_random_points = config["cma_es_random_points"]
        best_configuration = cma_es(
            param_space,
            data_array,
            fast_addressing_of_data_array,
            scalarization_key,
            logfile,
            run_acquisition_function,
            optimization_function_parameters,
            cma_es_random_points=cma_es_random_points,
            cma_es_starting_points=cma_es_starting_points,
            sigma=sigma,
        )
    else:
        print(
            "Unrecognized acquisition function optimizer:",
            acquisition_function_optimizer,
        )
        raise SystemExit

    return best_configuration
def compute(
    parameters_file="example_scenarios/spatial/BlackScholes_scenario.json",
    input_data_file=None,
    output_pareto_file=None,
):
    """
    Compute Pareto from the csv data files specified in the json output_pareto_file field.
    :param parameters_file: the json file the specify all the HyperMapper input parameters.
    :return: the csv file is written on disk.
    """
    try:
        hypermapper_pwd = os.environ["PWD"]
        hypermapper_home = os.environ["HYPERMAPPER_HOME"]
        os.chdir(hypermapper_home)
    except:
        hypermapper_pwd = "."

    print("######## compute_pareto.py #####################")
    print("### Parameters file is %s" % parameters_file)
    sys.stdout.flush()

    filename, file_extension = os.path.splitext(parameters_file)
    if file_extension != ".json":
        print(
            "Error: invalid file name. \nThe input file has to be a .json file not a %s"
            % file_extension)
        exit(1)
    with open(parameters_file, "r") as f:
        config = json.load(f)

    schema = json.load(resource_stream("hypermapper", "schema.json"))

    DefaultValidatingDraft4Validator = extend_with_default(Draft4Validator)
    DefaultValidatingDraft4Validator(schema).validate(config)

    application_name = config["application_name"]
    max_number_of_predictions = config["max_number_of_predictions"]
    optimization_metrics = config["optimization_objectives"]
    number_of_cpus = config["number_of_cpus"]
    run_directory = config["run_directory"]
    if run_directory == ".":
        run_directory = hypermapper_pwd
        config["run_directory"] = run_directory
    if input_data_file is None:
        input_data_file = config["output_data_file"]
        if input_data_file == "output_samples.csv":
            input_data_file = application_name + "_" + input_data_file
    input_data_file = deal_with_relative_and_absolute_path(
        run_directory, input_data_file)
    if output_pareto_file is None:
        output_pareto_file = config["output_pareto_file"]
        if output_pareto_file == "output_pareto.csv":
            output_pareto_file = application_name + "_" + output_pareto_file
    output_pareto_file = deal_with_relative_and_absolute_path(
        run_directory, output_pareto_file)

    param_space = space.Space(config)
    print("### The input data file is %s" % input_data_file)
    print("### The output Pareto file is %s" % output_pareto_file)
    print("################################################")
    debug = False

    print("Computing the Pareto...")
    start_time = datetime.datetime.now()
    # Compute Pareto and save it to output_pareto_file
    with open(input_data_file, "r") as f_csv_file_data_array:
        count_number_of_points_in_Pareto = compute_pareto(
            param_space, input_data_file, output_pareto_file, debug,
            number_of_cpus)
    end_time = datetime.datetime.now()
    print(
        ("Total time of computation is (read and Pareto computation): " + str(
            (end_time - start_time).total_seconds()) + " seconds"))
    print(("The total size of the Pareto (RS + AL) is: %d" %
           count_number_of_points_in_Pareto))
    sys.stdout.flush()
    print("End of the compute_pareto.py script!\n")
Example #9
0
def optimize(parameters_file, black_box_function=None):
    try:
        hypermapper_pwd = os.environ["PWD"]
        hypermapper_home = os.environ["HYPERMAPPER_HOME"]
        os.chdir(hypermapper_home)
        warnings.warn(
            "Found environment variable 'HYPERMAPPER_HOME', used to update the system path. Support might be discontinued in the future. Please make sure your installation is working without this environment variable, e.g., by installing with 'pip install hypermapper'.",
            DeprecationWarning,
            2,
        )
    except:
        hypermapper_pwd = "."

    if not parameters_file.endswith(".json"):
        _, file_extension = os.path.splitext(parameters_file)
        print(
            "Error: invalid file name. \nThe input file has to be a .json file not a %s"
            % file_extension)
        raise SystemExit
    with open(parameters_file, "r") as f:
        config = json.load(f)

    schema = json.load(resource_stream("hypermapper", "schema.json"))

    DefaultValidatingDraft4Validator = extend_with_default(Draft4Validator)
    try:
        DefaultValidatingDraft4Validator(schema).validate(config)
    except exceptions.ValidationError as ve:
        print("Failed to validate json:")
        print(ve)
        raise SystemExit

    # This handles the logger. The standard setting is that HyperMapper always logs both on screen and on the log file.
    # In cases like the client-server mode we only want to log on the file.
    run_directory = config["run_directory"]
    if run_directory == ".":
        run_directory = hypermapper_pwd
        config["run_directory"] = run_directory
    log_file = config["log_file"]
    log_file = deal_with_relative_and_absolute_path(run_directory, log_file)
    sys.stdout = Logger(log_file)

    optimization_method = config["optimization_method"]
    if config["profiling"]:
        profiling = Profiler(config)
        profiling.run()
    else:
        profiling = None

    if ((optimization_method == "random_scalarizations")
            or (optimization_method == "bayesian_optimization")
            or (optimization_method == "prior_guided_optimization")):
        data_array = bo.main(config,
                             black_box_function=black_box_function,
                             profiling=profiling)
    elif optimization_method == "local_search":
        data_array = local_search.main(config,
                                       black_box_function=black_box_function,
                                       profiling=profiling)
    elif optimization_method == "evolutionary_optimization":
        data_array = evolution.main(config,
                                    black_box_function=black_box_function,
                                    profiling=profiling)
    else:
        print("Unrecognized optimization method:", optimization_method)
        raise SystemExit
    if config["profiling"]:
        profiling.stop()

    try:
        os.chdir(hypermapper_pwd)
    except:
        pass

    # If mono-objective, compute the best point found
    objectives = config["optimization_objectives"]
    inputs = list(config["input_parameters"].keys())
    if len(objectives) == 1:
        explored_points = {}
        for parameter in inputs + objectives:
            explored_points[parameter] = data_array[parameter]
        objective = objectives[0]
        feasible_output = config["feasible_output"]
        if feasible_output["enable_feasible_predictor"]:
            feasible_parameter = feasible_output["name"]
            explored_points[feasible_parameter] = data_array[
                feasible_parameter]
            best_point = get_min_feasible_configurations(
                explored_points, 1, objective, feasible_parameter)
        else:
            best_point = get_min_configurations(explored_points, 1, objective)
        keys = ""
        best_point_string = ""
        for parameter in inputs + objectives:
            keys += f"{parameter},"
            best_point_string += f"{best_point[parameter][0]},"
        keys = keys[:-1]
        best_point_string = best_point_string[:-1]

    # If there is a best point, return it according the user's preference
    print_best = config["print_best"]
    if (print_best is not True) and (print_best is not False):
        if print_best != "auto":
            print(
                f"Warning: unrecognized option for print_best: {print_best}. Should be either 'auto' or a boolean."
            )
            print("Using default.")
        hypermapper_mode = config["hypermapper_mode"]
        print_best = False if hypermapper_mode == "client-server" else True

    if print_best:
        if len(objectives) == 1:
            sys.stdout.write_protocol("Best point found:\n")
            sys.stdout.write_protocol(f"{keys}\n")
            sys.stdout.write_protocol(f"{best_point_string}\n\n")
        else:
            if (
                    config["print_best"] is True
            ):  # If the user requested this, let them know it is not possible
                sys.stdout.write_protocol(
                    "\nMultiple objectives, there is no single best point.\n")
    else:
        if len(objectives) > 1:
            sys.stdout.write_to_logfile(
                "\nMultiple objectives, there is no single best point.\n")
        else:
            sys.stdout.write_to_logfile("Best point found:\n")
            sys.stdout.write_to_logfile(f"{keys}\n")
            sys.stdout.write_to_logfile(f"{best_point}\n\n")

    sys.stdout.write_protocol("End of HyperMapper\n")
Example #10
0
def plot(parameters_file, list_of_pairs_of_files=[], image_output_file=None):
    """
    Plot the results of the previously run design space exploration.
    """
    try:
        hypermapper_pwd = os.environ["PWD"]
        hypermapper_home = os.environ["HYPERMAPPER_HOME"]
        os.chdir(hypermapper_home)
    except:
        hypermapper_home = "."
        hypermapper_pwd = "."
    show_samples = False

    filename, file_extension = os.path.splitext(parameters_file)
    if file_extension != ".json":
        print(
            "Error: invalid file name. \nThe input file has to be a .json file not a %s"
            % file_extension
        )
        exit(1)
    with open(parameters_file, "r") as f:
        config = json.load(f)

    schema = json.load(resource_stream("hypermapper", "schema.json"))

    DefaultValidatingDraft4Validator = extend_with_default(Draft4Validator)
    DefaultValidatingDraft4Validator(schema).validate(config)

    application_name = config["application_name"]
    optimization_metrics = config["optimization_objectives"]
    feasible_output = config["feasible_output"]
    feasible_output_name = feasible_output["name"]
    run_directory = config["run_directory"]
    if run_directory == ".":
        run_directory = hypermapper_pwd
        config["run_directory"] = run_directory

    xlog = config["output_image"]["image_xlog"]
    ylog = config["output_image"]["image_ylog"]

    if "optimization_objectives_labels_image_pdf" in config["output_image"]:
        optimization_objectives_labels_image_pdf = config["output_image"][
            "optimization_objectives_labels_image_pdf"
        ]
    else:
        optimization_objectives_labels_image_pdf = optimization_metrics

    # Only consider the files in the json file if there are no input files.
    if list_of_pairs_of_files == []:
        output_pareto_file = config["output_pareto_file"]
        if output_pareto_file == "output_pareto.csv":
            output_pareto_file = application_name + "_" + output_pareto_file
        output_data_file = config["output_data_file"]
        if output_data_file == "output_samples.csv":
            output_data_file = application_name + "_" + output_data_file
        list_of_pairs_of_files.append(
            (
                deal_with_relative_and_absolute_path(run_directory, output_pareto_file),
                deal_with_relative_and_absolute_path(run_directory, output_data_file),
            )
        )
    else:
        for idx, (output_pareto_file, output_data_file) in enumerate(
            list_of_pairs_of_files
        ):
            list_of_pairs_of_files[idx] = (
                deal_with_relative_and_absolute_path(run_directory, output_pareto_file),
                deal_with_relative_and_absolute_path(run_directory, output_data_file),
            )

    if image_output_file != None:
        output_image_pdf_file = image_output_file
        output_image_pdf_file = deal_with_relative_and_absolute_path(
            run_directory, output_image_pdf_file
        )
        filename = os.path.basename(output_image_pdf_file)
        path = os.path.dirname(output_image_pdf_file)
        if path == "":
            output_image_pdf_file_with_all_samples = "all_" + filename
        else:
            output_image_pdf_file_with_all_samples = path + "/" + "all_" + filename
    else:
        tmp_file_name = config["output_image"]["output_image_pdf_file"]
        if tmp_file_name == "output_pareto.pdf":
            tmp_file_name = application_name + "_" + tmp_file_name
        output_image_pdf_file = deal_with_relative_and_absolute_path(
            run_directory, tmp_file_name
        )
        filename = os.path.basename(output_image_pdf_file)
        path = os.path.dirname(output_image_pdf_file)
        if path == "":
            output_image_pdf_file_with_all_samples = "all_" + filename
        else:
            output_image_pdf_file_with_all_samples = path + "/" + "all_" + filename

    str_files = ""
    for e in list_of_pairs_of_files:
        str_files += str(e[0] + " " + e[1] + " ")

    print("######### plot_pareto.py ##########################")
    print("### Parameters file is %s" % parameters_file)
    print("### The Pareto and DSE data files are: %s" % str_files)
    print("### The first output pdf image is %s" % output_image_pdf_file)
    print(
        "### The second output pdf image is %s" % output_image_pdf_file_with_all_samples
    )
    print("################################################")

    param_space = space.Space(config)

    xelem = optimization_metrics[0]
    yelem = optimization_metrics[1]
    handler_map_for_legend = {}
    xlabel = optimization_objectives_labels_image_pdf[0]
    ylabel = optimization_objectives_labels_image_pdf[1]

    x_max = float("-inf")
    x_min = float("inf")
    y_max = float("-inf")
    y_min = float("inf")

    print_legend = True
    fig = plt.figure()
    ax1 = plt.subplot(1, 1, 1)

    if xlog:
        ax1.set_xscale("log")
    if ylog:
        ax1.set_yscale("log")

    objective_1_max = objective_2_max = 1
    objective_1_is_percentage = objective_2_is_percentage = False
    if "objective_1_max" in config["output_image"]:
        objective_1_max = config["output_image"]["objective_1_max"]
        objective_1_is_percentage = True
    if "objective_2_max" in config["output_image"]:
        objective_2_max = config["output_image"]["objective_2_max"]
        objective_2_is_percentage = True

    input_data_array = {}
    fast_addressing_of_data_array = {}
    non_valid_optimization_obj_1 = defaultdict(list)
    non_valid_optimization_obj_2 = defaultdict(list)

    for (
        file_pair
    ) in (
        list_of_pairs_of_files
    ):  # file_pair is tuple containing: (pareto file, DSE file)
        next_color = get_next_color()

        #############################################################################
        ###### Load data from files and do preprocessing on the data before plotting.
        #############################################################################
        for file in file_pair:
            print(("Loading data from %s ..." % file))
            (
                input_data_array[file],
                fast_addressing_of_data_array[file],
            ) = param_space.load_data_file(file, debug)
            if input_data_array[file] == None:
                print("Error: no data found in input data file: %s. \n" % file_pair[1])
                exit(1)
            if (xelem not in input_data_array[file]) or (
                yelem not in input_data_array[file]
            ):
                print(
                    "Error: the optimization variables have not been found in input data file %s. \n"
                    % file
                )
                exit(1)
            print(("Parameters are " + str(list(input_data_array[file].keys())) + "\n"))
            input_data_array[file][xelem] = [
                float(input_data_array[file][xelem][i]) / objective_1_max
                for i in range(len(input_data_array[file][xelem]))
            ]
            input_data_array[file][yelem] = [
                float(input_data_array[file][yelem][i]) / objective_2_max
                for i in range(len(input_data_array[file][yelem]))
            ]

            if objective_1_is_percentage:
                input_data_array[file][xelem] = [
                    input_data_array[file][xelem][i] * 100
                    for i in range(len(input_data_array[file][xelem]))
                ]
            if objective_2_is_percentage:
                input_data_array[file][yelem] = [
                    input_data_array[file][yelem][i] * 100
                    for i in range(len(input_data_array[file][yelem]))
                ]

            x_max, x_min, y_max, y_min = compute_min_max_samples(
                input_data_array[file], x_max, x_min, xelem, y_max, y_min, yelem
            )

            input_data_array_size = len(
                input_data_array[file][list(input_data_array[file].keys())[0]]
            )
            print("Size of the data file %s is %d" % (file, input_data_array_size))

        file_pareto = file_pair[0]  # This is the Pareto file
        file_search = file_pair[1]  # This is the DSE file

        ######################################################################################################
        ###### Compute invalid samples to be plot in a different color (and remove them from the data arrays).
        ######################################################################################################
        if show_samples:
            i = 0
            for ind in range(len(input_data_array[file][yelem])):
                if input_data_array[file][feasible_output_name][i] == False:
                    non_valid_optimization_obj_2[file_search].append(
                        input_data_array[file][yelem][i]
                    )
                    non_valid_optimization_obj_1[file_search].append(
                        input_data_array[file][xelem][i]
                    )
                    for key in list(input_data_array[file].keys()):
                        del input_data_array[file][key][i]
                else:
                    i += 1

            label_is = get_last_dir_and_file_names(file_pareto)
            (all_samples,) = plt.plot(
                input_data_array[file_search][xelem],
                input_data_array[file_search][yelem],
                color=next_color,
                linestyle="None",
                marker=".",
                mew=0.5,
                markersize=3,
                fillstyle="none",
                label=label_is,
            )
            plt.plot(
                input_data_array[file_pareto][xelem],
                input_data_array[file_pareto][yelem],
                linestyle="None",
                marker=".",
                mew=0.5,
                markersize=3,
                fillstyle="none",
            )
            handler_map_for_legend[all_samples] = HandlerLine2D(numpoints=1)

        ################################################################################################################
        ##### Create a straight Pareto plot: we need to add one point for each point of the data in paretoX and paretoY.
        ##### We also need to reorder the points on the x axis first.
        ################################################################################################################
        straight_pareto_x = list()
        straight_pareto_y = list()
        if len(input_data_array[file_pareto][xelem]) != 0:
            data_array_pareto_x, data_array_pareto_y = (
                list(t)
                for t in zip(
                    *sorted(
                        zip(
                            input_data_array[file_pareto][xelem],
                            input_data_array[file_pareto][yelem],
                        )
                    )
                )
            )
            for j in range(len(data_array_pareto_x)):
                straight_pareto_x.append(data_array_pareto_x[j])
                straight_pareto_x.append(data_array_pareto_x[j])
                straight_pareto_y.append(data_array_pareto_y[j])
                straight_pareto_y.append(data_array_pareto_y[j])
            straight_pareto_x.append(x_max)  # Just insert the max on the x axis
            straight_pareto_y.insert(0, y_max)  # Just insert the max on the y axis

        label_is = "Pareto - " + get_last_dir_and_file_names(file_pareto)

        (pareto_front,) = plt.plot(
            straight_pareto_x,
            straight_pareto_y,
            label=label_is,
            linewidth=1,
            color=next_color,
        )
        handler_map_for_legend[pareto_front] = HandlerLine2D(numpoints=1)

        label_is = "Invalid Samples - " + get_last_dir_and_file_names(file_search)
        if show_samples:
            (non_valid,) = plt.plot(
                non_valid_optimization_obj_1[file_search],
                non_valid_optimization_obj_2[file_search],
                linestyle="None",
                marker=".",
                mew=0.5,
                markersize=3,
                fillstyle="none",
                label=label_is,
            )
            handler_map_for_legend[non_valid] = HandlerLine2D(numpoints=1)

    plt.ylabel(ylabel, fontsize=16)
    plt.xlabel(xlabel, fontsize=16)
    for tick in ax1.xaxis.get_major_ticks():
        tick.label.set_fontsize(
            14
        )  # Set the fontsize of the label on the ticks of the x axis
    for tick in ax1.yaxis.get_major_ticks():
        tick.label.set_fontsize(
            14
        )  # Set the fontsize of the label on the ticks of the y axis

    # Add the legend with some customizations
    if print_legend:
        lgd = ax1.legend(
            handler_map=handler_map_for_legend,
            loc="best",
            bbox_to_anchor=(1, 1),
            fancybox=True,
            shadow=True,
            ncol=1,
            prop={"size": 14},
        )  # Display legend.

    font = {"size": 16}
    matplotlib.rc("font", **font)

    fig.savefig(output_image_pdf_file_with_all_samples, dpi=120, bbox_inches="tight")

    if objective_1_is_percentage:
        plt.xlim(0, 100)
    if objective_2_is_percentage:
        plt.ylim(0, 100)

    fig.savefig(output_image_pdf_file, dpi=120, bbox_inches="tight")