Ejemplo n.º 1
0
def preprocess_data_buffer(bufferx, param_space):
    """
    Preprocess an input buffer before feeding into a regression/classification model.
    The preprocessing standardize non-categorical inputs (if the flag is set).
    It also transforms categorical variables using one-hot encoding.
    :param bufferx: data array containing the input configurations to preprocess.
    :param param_space: parameter space object for the current application.
    :return: preprocessed data buffer.
    """
    input_params = param_space.get_input_parameters()
    data_array = data_tuples_to_dictionary(bufferx, input_params)
    preprocessed_data_array = preprocess_data_array(data_array, param_space,
                                                    input_params)
    preprocessed_buffer = data_dictionary_to_tuple(
        preprocessed_data_array, list(preprocessed_data_array.keys()))
    return preprocessed_buffer
Ejemplo n.º 2
0
def ls_compute_posterior_mean(configurations, model, model_type, param_space):
    """
    Compute the posterior mean for a list of configurations. This function follows the interface defined by
    HyperMapper's local search. It receives configurations from the local search and returns their values.
    :param configurations: configurations to compute posterior mean
    :param model: posterior model to use for predictions
    :param model_type: string with the type of model being used.
    :param param_space: Space object containing the search space.
    :return: the posterior mean value for each configuration. To satisfy the local search's requirements, also returns a list of feasibility flags, all set to 1.
    """
    configurations = concatenate_list_of_dictionaries(configurations)
    configurations = data_dictionary_to_tuple(
        configurations, param_space.get_input_parameters())
    posterior_means, _ = compute_model_mean_and_uncertainty(
        configurations, model, model_type, param_space)

    objective = param_space.get_optimization_parameters()[0]
    return list(
        posterior_means[objective]), [1] * len(posterior_means[objective])
Ejemplo n.º 3
0
    def fit_RF(self, X, y, data_array=None, param_space=None, **kwargs):
        """
        Fit the adapted RF model. If the data_array and param_space parameters are not provided
        a standard scikit-learn RF model will be fitted instead.
        :param X: the training data for the RF model.
        :param y: the training data labels for the RF model.
        :param data_array: a dictionary containing previously explored points and their function values.
        :param param_space: parameter space object for the current application.
        """
        self.fit(X, y, **kwargs)

        # If data_array and param_space are provided, fit the adapted RF
        if (data_array is not None) and (param_space is not None):
            bufferx = data_dictionary_to_tuple(data_array,
                                               list(data_array.keys()))
            leaf_per_sample = self.get_leaves_per_sample(bufferx, param_space)
            self.set_means_per_leaf(y, leaf_per_sample)
            self.set_vars_per_leaf(y, leaf_per_sample)
            new_features = list(data_array.keys())
            for tree_idx, tree in enumerate(self):
                samples_per_node = self.get_samples_per_node(
                    tree, leaf_per_sample[tree_idx, :])

                left_children = tree.tree_.children_left
                right_children = tree.tree_.children_right
                for node_idx in range(tree.tree_.node_count):
                    if (
                            left_children[node_idx] == right_children[node_idx]
                    ):  # If both children are equal, this is a leaf in the tree
                        continue
                    feature = new_features[tree.tree_.feature[node_idx]]
                    threshold = tree.tree_.threshold[node_idx]

                    lower_bound, upper_bound = self.get_node_bounds(
                        samples_per_node[node_idx], data_array[feature],
                        threshold)
                    new_split = stats.uniform.rvs(loc=lower_bound,
                                                  scale=upper_bound -
                                                  lower_bound)
                    tree.tree_.threshold[node_idx] = new_split
def run_acquisition_function(
    acquisition_function,
    configurations,
    objective_weights,
    regression_models,
    param_space,
    scalarization_method,
    objective_limits,
    iteration_number,
    data_array,
    model_type,
    classification_model=None,
    number_of_cpus=0,
):
    """
    Apply the chosen acquisition function to a list of configurations.
    :param acquisition_function: a string defining which acquisition function to apply
    :param bufferx: a list of tuples containing the configurations.
    :param objective_weights: a list containing the weights for each objective.
    :param regression_models: the surrogate models used to evaluate points.
    :param param_space: a space object containing the search space.
    :param scalarization_method: a string indicating which scalarization method to use.
    :param evaluations_per_optimization_iteration: how many configurations to return.
    :param objective_limits: a dictionary with estimated minimum and maximum values for each objective.
    :param iteration_number: an integer for the current iteration number, used to compute the beta on ucb
    :param classification_model: the surrogate model used to evaluate feasibility constraints
    :param number_of_cpus: an integer for the number of cpus to be used in parallel.
    :return: a list of scalarized values for each point in bufferx.
    """
    tmp_objective_limits = None
    configurations = concatenate_list_of_dictionaries(configurations)
    configurations = data_dictionary_to_tuple(
        configurations, param_space.get_input_parameters()
    )
    if acquisition_function == "TS":
        scalarized_values, tmp_objective_limits = thompson_sampling(
            configurations,
            objective_weights,
            regression_models,
            param_space,
            scalarization_method,
            objective_limits,
            model_type,
            classification_model,
            number_of_cpus,
        )
    elif acquisition_function == "UCB":
        scalarized_values, tmp_objective_limits = ucb(
            configurations,
            objective_weights,
            regression_models,
            param_space,
            scalarization_method,
            objective_limits,
            iteration_number,
            model_type,
            classification_model,
            number_of_cpus,
        )
    elif acquisition_function == "EI":
        scalarized_values, tmp_objective_limits = EI(
            configurations,
            data_array,
            objective_weights,
            regression_models,
            param_space,
            scalarization_method,
            objective_limits,
            iteration_number,
            model_type,
            classification_model,
            number_of_cpus,
        )
    else:
        print("Unrecognized acquisition function:", acquisition_function)
        raise SystemExit

    scalarized_values = list(scalarized_values)

    # we want the local search to consider all points feasible, we already account for feasibility it in the scalarized value
    feasibility_indicators = [1] * len(scalarized_values)

    return scalarized_values, feasibility_indicators