def main():
    # choices = {'a': 1, 'b': 2}
    # result = choices.get(key, 'default')

    user_input = input("Type: \n 1 for Random Search \n 2 for GA \n 3 for ABC \n 4 for Local Search \n")
    print('Target solution: {}'.format(TARGET))
    if user_input == '1':
        random_search(TARGET, NUMBER_OF_ITERATIONS)
    elif user_input == '2':
        ga_search(TARGET, NUMBER_OF_ITERATIONS, POPULATION_SIZE)
    elif user_input == '3':
        abc_search(TARGET, NUMBER_OF_ITERATIONS)
    elif user_input == '4':
        local_search(TARGET, NUMBER_OF_ITERATIONS)
Exemple #2
0
def grasp_by_max_time(cost_function,
                      alpha,
                      greedy_loss_function,
                      max_time_in_seconds,
                      input,
                      params_glf=None):
    best_cost_solution = math.inf
    best_solution = None

    print("Initiating GRASP with time constraint...")
    time_start = time.time()
    time_current = time_start
    k = 0

    while time_current - time_start <= max_time_in_seconds:
        k = k + 1
        feasible_solution = construct(greedy_loss_function, alpha, input,
                                      params_glf)

        cost_of_new_solution, best_solution_in_neighbourhood = local_search(
            cost_function, input, feasible_solution)

        if (cost_of_new_solution < best_cost_solution):
            best_solution = best_solution_in_neighbourhood
            best_cost_solution = cost_of_new_solution

        time_current = time.time()

    time_spent_computing = time_current - time_start

    return (best_solution, best_cost_solution, time_spent_computing, k)
def random_scalarizations(config,
                          data_array,
                          param_space,
                          fast_addressing_of_data_array,
                          regression_models,
                          iteration_number,
                          objective_weights,
                          objective_limits,
                          classification_model=None):
    """
    Run one iteration of bayesian optimization with random scalarizations.
    :param config: dictionary containing all the configuration parameters of this optimization.
    :param data_array: a dictionary containing previously explored points and their function values.
    :param param_space: parameter space object for the current application.
    :param fast_addressing_of_data_array: dictionary for quick-access to previously explored configurations.
    :param regression_models: the surrogate models used to evaluate points.
    :param iteration_number: the current iteration number.
    :param objective_weights: objective weights for multi-objective optimization. Not implemented yet.
    :param objective_limits: estimated minimum and maximum limits for each objective.
    :param classification_model: feasibility classifier for constrained optimization.
    """
    optimization_metrics = config["optimization_objectives"]
    number_of_objectives = len(optimization_metrics)
    local_search_starting_points = config["local_search_starting_points"]
    local_search_random_points = config["local_search_random_points"]
    scalarization_key = config["scalarization_key"]

    optimization_function_parameters = {}
    optimization_function_parameters['regression_models'] = regression_models
    optimization_function_parameters['iteration_number'] = iteration_number
    optimization_function_parameters['data_array'] = data_array
    optimization_function_parameters[
        'classification_model'] = classification_model
    optimization_function_parameters['param_space'] = param_space
    optimization_function_parameters['objective_weights'] = objective_weights
    optimization_function_parameters['model_type'] = config["models"]["model"]
    optimization_function_parameters['objective_limits'] = objective_limits
    optimization_function_parameters['acquisition_function'] = config[
        "acquisition_function"]
    optimization_function_parameters['scalarization_method'] = config[
        "scalarization_method"]
    optimization_function_parameters['number_of_cpus'] = config[
        "number_of_cpus"]

    _, best_configuration = local_search(
        local_search_starting_points,
        local_search_random_points,
        param_space,
        fast_addressing_of_data_array,
        False,  # we do not want the local search to consider feasibility constraints, only the acquisition functions
        run_acquisition_function,
        optimization_function_parameters,
        scalarization_key,
        previous_points=data_array)
    return best_configuration
    def run(self):

        start = time.time()
        process_gop_list = []
        process_item_list = []
        process_util_list = []
        process_pair_list = []

        search_counter = 0

        print("Process " + str(self.processIdx) + " starts global search.")

        for idx, item in enumerate(
                partition_to_k(self.layer_list, self.acc_cluster_num, False),
                0):
            if idx % PROCESS_NUM == self.processIdx:
                sub_gop_list = []
                search_counter = search_counter + 1
                sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag \
                    = model_split_by_list(self.conv_N, self.conv_M, self.conv_r, self.conv_R, self.conv_K, self.conv_S, self.flag, item)
                sub_pair_list, sub_lat_list, sub_util_list = \
                    local_search(sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag)

                for i in range(0, len(sub_conv_N)):
                    sub_gop_list.append(
                        gop_calculate(sub_conv_N[i], sub_conv_M[i],
                                      sub_conv_R[i], sub_conv_K[i]))

                if max(sub_lat_list) < self.overall_lat:
                    overall_lat = max(sub_lat_list)
                    if len(process_pair_list) < 6:
                        process_item_list.append(item)
                        process_pair_list.append(sub_pair_list)
                        # process_pair_list.append([overall_lat])
                        process_util_list.append([overall_lat])
                        process_gop_list.append(sub_gop_list)
                        # process_util_list.append(sub_util_list)
                        # process_pair_list.append(sub_util_list)
                    # else:
                    #     max_among_mins = process_pair_list.index(max(overall_lat))
                    #     process_pair_list.remove(process_pair_list[max_among_mins])
                    #     process_pair_list.append(sub_pair_list)
                    #     process_pair_list.append([overall_lat])
                    #     process_pair_list.append(sub_util_list)

            # print "For set ID: " + str(idx) + ", the final explored points = ", search_counter

        if len(process_pair_list) != 0:
            self.result_Q.put((process_pair_list, process_item_list,
                               process_gop_list, process_util_list))

        end = time.time()
        print("Thread ", self.processIdx, " :", (end - start))
Exemple #5
0
def grasp_by_max_time(cost_function,
                      alpha,
                      greedy_loss_function,
                      max_time_in_seconds,
                      input,
                      params_glf=None):
    best_cost_solution = math.inf
    best_solution = None

    print("Initiating GRASP with time constraint...")
    time_start = time.time()
    time_current = time_start
    k = 0

    while time_current - time_start <= max_time_in_seconds:
        k = k + 1
        feasible_solution = construct(greedy_loss_function, alpha, input,
                                      params_glf)

        if input["stop_after_some_time"] == True:
            if time.time() - input["time_start"] > input["max_time"]:
                return (best_solution, best_cost_solution,
                        time.time() - time_start, k)

        cost_of_new_solution, best_solution_in_neighbourhood = local_search(
            cost_function, input, feasible_solution)

        # TODO: Remove after debug
        check_if_solution_makes_sense(best_solution_in_neighbourhood)
        # TODO: Remove after debug

        if (cost_of_new_solution < best_cost_solution):
            best_solution = best_solution_in_neighbourhood
            best_cost_solution = cost_of_new_solution

        print(f"Run {k} iteration!!!")
        time_current = time.time()

    time_spent_computing = time_current - time_start

    print(f"Run {k} iterations!!!")

    return (best_solution, best_cost_solution, time_spent_computing, k)
Exemple #6
0
def grasp(cost_function, alpha, greedy_cost_function, max_iterations, input):
    best_cost_solution = math.inf
    best_solution = None

    print("Initiating GRASP...")

    start = time.time()

    for k in range(max_iterations):
        feasible_solution = construct(greedy_cost_function, alpha, input)
        cost_of_new_solution, best_solution_in_neighbourhood = local_search(
            cost_function, input, feasible_solution)

        print(f"Cost of new solution: {cost_of_new_solution}")
        print(
            f"Highest load of new solution: {best_solution_in_neighbourhood['highest_loaded_truck_load']}"
        )

        if (cost_of_new_solution < best_cost_solution):
            best_solution = best_solution_in_neighbourhood
            best_cost_solution = cost_of_new_solution

    return (best_solution, best_cost_solution, time.time() - start,
            max_iterations)
Exemple #7
0
def grasp(cost_function, alpha, greedy_loss_function, max_iterations, input):
    best_cost_solution = math.inf
    best_solution = None

    print("Initiating GRASP...")

    for k in range(max_iterations):
        #print(f"Running {k} iteration: construct")
        feasible_solution = construct(greedy_loss_function, alpha, input)

        #print(f"Running {k} iteration: local_search")
        cost_of_new_solution, best_solution_in_neighbourhood = local_search(
            cost_function, input, feasible_solution)

        print(f"Cost of new solution: {cost_of_new_solution}")
        print(
            f"Highest load of new solution: {feasible_solution['highest_loaded_truck_load']}"
        )

        sum_in_rows = best_solution_in_neighbourhood['pt'].sum(axis=1)
        if np.any(sum_in_rows > 1):
            print(best_solution_in_neighbourhood)
            print("Local search f****d up for row > 1")
            sys.exit()
        if np.any(sum_in_rows == 0):
            print(best_solution_in_neighbourhood)
            print("Local search f****d up for row = 0")
            sys.exit()

        if (cost_of_new_solution < best_cost_solution):
            best_solution = best_solution_in_neighbourhood
            best_cost_solution = cost_of_new_solution

        print(f"Run {k} iteration!!!")

    return (best_solution, best_cost_solution, None, max_iterations)
    groups = create_groups_with_bfs(graph, nb_groups=3)
    print(groups.shape)
    print(groups)
    plot_graph(graph,
               groups,
               title='gaussian kernel graph with 3 groups got with BFS',
               file_name='gaussian_kernel_graph.png')

    weights = get_adjacency_matrix(graph)
    # best_permutation = smb2.spectral_sequencing(weights)
    best_permutation = smb2.mc_allister(weights)
    print(
        f'value of bandwith sum found  : {smb2.bandwidth_sum(best_permutation, weights)}'
    )

    best_permutation, best_value = local_search(best_permutation, graph)
    print(best_permutation)
    print(
        f'value of bandwith sum found  : {smb2.bandwidth_sum(best_permutation, weights)}'
    )

    print(best_permutation)
    spectrogram_with_groups(
        graph,
        groups,
        permutation=best_permutation,
        file_name='spectrogram_gaussian_kernel_graph_groups_bfs.png',
        title=
        'mc_allister then local search on gaussian kernel graph with 3 groups got with BFS'
    )
def main(w0 = None):
    # tm should translate unknown words as-is with probability 1

    w = w0
    if w is None:
        # lm_logprob, distortion penenalty, direct translate logprob, direct lexicon logprob, inverse translation logprob, inverse lexicon logprob
        if opts.weights == "no weights specify":
            w = [1.0/7] * 7
            # w = [1.76846735947, 0.352553835525, 1.00071564481, 1.49937872683, 0.562198294709, -0.701483985454, 1.80395218437]
        else:
            w = [float(line.strip()) for line in open(opts.weights)]
    sys.stderr.write(str(w) + '\n')

    tm = models.TM(opts.tm, opts.k, opts.mute)
    lm = models.LM(opts.lm, opts.mute)
    # ibm_t = {} 
    ibm_t = init('./data/ibm.t.gz')
    french = [tuple(line.strip().split()) for line in open(opts.input).readlines()[:opts.num_sents]]
    french = french[opts.start : opts.end]
    bound_width = float(opts.bwidth)

    for word in set(sum(french,())):
        if (word,) not in tm:
            tm[(word,)] = [models.phrase(word, [0.0, 0.0, 0.0, 0.0])]



    nbest_output = []
    total_prob = 0
    if opts.mute == 0:
        sys.stderr.write("Start decoding %s ...\n" % (opts.input,))
    for idx,f in enumerate(french):
        if opts.mute == 0:
            sys.stderr.write("Decoding sentence #%s ...\n" % (str(idx)))
        initial_hypothesis = hypothesis(lm.begin(), 0.0, 0, 0, None, None, None)
        heaps = [{} for _ in f] + [{}]
        heaps[0][lm.begin(), 0, 0] = initial_hypothesis
        for i, heap in enumerate(heaps[:-1]):
            # maintain beam heap
            # front_item = sorted(heap.itervalues(), key=lambda h: -h.logprob)[0]
            for h in sorted(heap.itervalues(),key=lambda h: -h.logprob)[:opts.s]: # prune
                # if h.logprob < front_item.logprob - float(opts.bwidth):
                #    continue

                fopen = prefix1bits(h.coverage)
                for j in xrange(fopen,min(fopen+1+opts.disord, len(f)+1)):
                    for k in xrange(j+1, len(f)+1):
                        if f[j:k] in tm:
                            if (h.coverage & bitmap(range(j, k))) == 0:
                                for phrase in tm[f[j:k]]:
                                    lm_prob = 0
                                    lm_state = h.lm_state
                                    for word in phrase.english.split():
                                        (lm_state, prob) = lm.score(lm_state, word)
                                        lm_prob += prob
                                    lm_prob += lm.end(lm_state) if k == len(f) else 0.0
                                    coverage = h.coverage | bitmap(range(j, k))
                                    # logprob = h.logprob + lm_prob*w[0] + getDotProduct(phrase.several_logprob, w[2:6]) + abs(h.end+1-j)*w[1] + ibm_model_1_w_score(ibm_t, f, phrase.english)*w[6]
                                    logprob  = h.logprob
                                    logprob += lm_prob*w[0]
                                    logprob += getDotProduct(phrase.several_logprob, w[1:5])
                                    # logprob += opts.diseta*abs(h.end+1-j)*w[1]
                                    logprob += ibm_model_1_w_score(ibm_t, f, phrase.english)*w[5]
                                    logprob += (len(phrase.english.split()) - (k - j)) * w[6]

                                    new_hypothesis = hypothesis(lm_state, logprob, coverage, k, h, phrase, abs(h.end + 1 - j))

                                    # add to heap
                                    num = onbits(coverage)
                                    if (lm_state, coverage, k) not in heaps[num] or new_hypothesis.logprob > heaps[num][lm_state, coverage, k].logprob:
                                        heaps[num][lm_state, coverage, k] = new_hypothesis

        winners = sorted(heaps[-1].itervalues(), key=lambda h: -h.logprob)[0:opts.nbest]

        def get_lm_logprob(test_list):
            stance = []
            for i in test_list:
                stance += (i.split())
            stance = tuple(stance)
            lm_state = ("<s>",)
            score = 0.0
            for word in stance:
                (lm_state, word_score) = lm.score(lm_state, word)
                score += word_score
            return score
        def get_list_and_features(h, idx_self):
            lst = [];
            features = [0, 0, 0, 0, 0, 0, 0]
            current_h = h;
            while current_h.phrase is not None:
                # print current_h
                lst.append(current_h.phrase.english)
                # features[1] += current_h.distortionPenalty
                features[1] += current_h.phrase.several_logprob[0]      # translation feature 1
                features[2] += current_h.phrase.several_logprob[1]      # translation feature 2
                features[3] += current_h.phrase.several_logprob[2]      # translation feature 3
                features[4] += current_h.phrase.several_logprob[3]      # translation feature 4
                current_h = current_h.predecessor
            lst.reverse()
            features[0] = get_lm_logprob(lst)                           # language model score
            features[5] = ibm_model_1_score(ibm_t, f, lst)
            features[6] = len(lst) - len(french[idx_self])
            return (lst, features)

        for win in winners:
            # s = str(idx) + " ||| "
            (lst, features) = get_list_and_features(win, idx)
            print local_search.local_search(lst, lm)
Exemple #10
0
def prior_guided_optimization(config,
                              data_array,
                              param_space,
                              fast_addressing_of_data_array,
                              regression_models,
                              iteration_number,
                              objective_weights,
                              objective_limits,
                              classification_model=None):
    """
    Run a prior-guided bayesian optimization iteration.
    :param config: dictionary containing all the configuration parameters of this optimization.
    :param data_array: a dictionary containing previously explored points and their function values.
    :param param_space: parameter space object for the current application.
    :param fast_addressing_of_data_array: dictionary for quick-access to previously explored configurations.
    :param regression_models: the surrogate models used to evaluate points.
    :param iteration_number: the current iteration number.
    :param objective_weights: objective weights for multi-objective optimization. Not implemented yet.
    :param objective_limits: estimated minimum and maximum limits for each objective.
    :param classification_model: feasibility classifier for constrained optimization.
    """
    local_search_starting_points = config["local_search_starting_points"]
    local_search_random_points = config["local_search_random_points"]
    scalarization_key = config["scalarization_key"]
    function_parameters = {}
    function_parameters["param_space"] = param_space
    function_parameters["iteration_number"] = iteration_number
    function_parameters["regression_models"] = regression_models
    function_parameters['classification_model'] = classification_model
    function_parameters["objective_weights"] = objective_weights
    function_parameters["objective_limits"] = objective_limits
    function_parameters['model_type'] = config["models"]["model"]
    function_parameters["model_weight"] = config["model_posterior_weight"]
    function_parameters["posterior_floor"] = config[
        "posterior_computation_lower_limit"]
    model_good_quantile = config["model_good_quantile"]
    function_parameters["threshold"] = {}
    optimization_metrics = param_space.get_optimization_parameters()
    for objective in optimization_metrics:
        function_parameters["threshold"][objective] = np.quantile(
            data_array[objective], model_good_quantile)

    if param_space.get_prior_normalization_flag() is True:
        prior_limit_estimation_points = config["prior_limit_estimation_points"]
        good_prior_normalization_limits = estimate_prior_limits(
            param_space, prior_limit_estimation_points, objective_weights)
    else:
        good_prior_normalization_limits = None
    function_parameters[
        "good_prior_normalization_limits"] = good_prior_normalization_limits

    if classification_model is not None:
        function_parameters["posterior_normalization_limits"] = [
            float("inf"), float("-inf")
        ]

    _, best_configuration = local_search(
        local_search_starting_points,
        local_search_random_points,
        param_space,
        fast_addressing_of_data_array,
        False,  # set feasibility to false, we handle it inside the acquisition function
        compute_EI_from_posteriors,
        function_parameters,
        scalarization_key,
        previous_points=data_array)
    return best_configuration
Exemple #11
0
def test():
    facilities = read_and_parse_text("test_cases/test0.txt")
    #local_search(facilities, frozenset())
    #local_search(facilities, frozenset({'A','B','C','D','E'}))
    local_search(facilities, frozenset({'B', 'D'}))
Exemple #12
0
    def __init__(self, w, target_est_count=None, target_moe_count=None, target_th_count=None,\
                    target_est_prop=None, target_moe_prop=None, target_th_prop=None,\
                    target_est_ratio=None, target_moe_ratio=None, target_th_ratio=None,\
                    target_th_all=None, count_est=None, count_th_min=None, count_th_max=None,\
                    exclude=None, auto_exclude=0, base_solutions=100,\
                    zscore=True, pca=True, local_improvement=True, local_params=None,\
                    compactness=None, points=None, anchor=None, cardinality=False,\
                    cv_exclude_count=0, cv_exclude_prop=0, cv_exclude_ratio=0):

        time1 = time.time()
        time_output = {
            'prep': 0,
            'base': 0,
            'base_wrapup': 0,
            'local': 0,
            'local_wrapup': 0,
            'wrapup': 0,
            'total': 0
        }
        # convert arbitrary IDs in W object to integers
        id2i = w.id2i
        neighbors = {
            id2i[key]: [id2i[neigh] for neigh in w.neighbors[key]]
            for key in w.id_order
        }
        w = ps.W(neighbors)

        # build KDTree for use in finding base solution
        if issubclass(type(points), scipy.spatial.KDTree):
            kd = points
            points = kd.data
        elif type(points).__name__ == 'ndarray':
            kd = ps.common.KDTree(points)
        elif issubclass(type(points),
                        ps.core.IOHandlers.pyShpIO.PurePyShpWrapper):
            #loop to find centroids, need to be sure order matches W and data
            centroids = []
            for i in points:
                centroids.append(i.centroid)
            kd = ps.common.KDTree(centroids)
            points = kd.data
        elif points is None:
            kd = None
        else:
            raise Exception, 'Unsupported type passed to points'

        # dictionary allowing multivariate and univariate flexibility
        target_parts = {'target_est_count':target_est_count,\
                        'target_est_prop':target_est_prop,\
                        'target_est_ratio':target_est_ratio,\
                        'target_sde_count':target_moe_count ,\
                        'target_sde_prop':target_moe_prop,\
                        'target_sde_ratio':target_moe_ratio}

        # setup the holder for the variables to minimize; later we will put all
        # the count, ratio and proportion variables into this array.
        # Also, convert MOEs to standard errors when appropriate
        total_vars = 0
        rows = 0
        if target_est_count is not None:
            rows, cols = target_est_count.shape
            total_vars += cols
            target_parts['target_est_count'] = target_est_count * 1.0
            target_parts['target_sde_count'] = target_moe_count / 1.645
        if target_est_prop is not None:
            rows, cols = target_est_prop.shape
            total_vars += cols / 2
            target_parts['target_est_prop'] = target_est_prop * 1.0
            target_parts['target_sde_prop'] = target_moe_prop / 1.645
        if target_est_ratio is not None:
            rows, cols = target_est_ratio.shape
            total_vars += cols / 2
            target_parts['target_est_ratio'] = target_est_ratio * 1.0
            target_parts['target_sde_ratio'] = target_moe_ratio / 1.645

        if total_vars == 0:
            target_est = None
            print 'warning: optimization steps will not be run since no target_est variables provided'
        else:
            target_est = np.ones((rows, total_vars)) * -999

        # organize and check the input data; prep data for actual computations
        position = 0
        target_th = []
        # IMPORTANT: maintain the order of count then proportion then ratio
        if target_est_count is not None:
            target_est, target_th, position = mv_data_prep(target_est_count,\
                                              target_th_count, target_th_all,\
                                              target_est, target_th, position,\
                                              scale=1, ratio=False)
        if target_est_prop is not None:
            target_est, target_th, position = mv_data_prep(target_est_prop,\
                                              target_th_prop, target_th_all,\
                                              target_est, target_th, position,\
                                              scale=2, ratio=False)
        if target_est_ratio is not None:
            target_est, target_th, position = mv_data_prep(target_est_ratio,\
                                              target_th_ratio, target_th_all,\
                                              target_est, target_th, position,\
                                              scale=2, ratio=True)
        target_th = np.array(target_th)

        # compute zscores
        # NOTE: zscores computed using all data, i.e. we do not screen out
        #       observations in the exclude list.
        if zscore and target_est is not None:
            if pca:
                # Python does not currently have a widely used tool for
                # computing PCA with missing values. In principle,
                # NIPALS (Nonlinear Iterative Partial Least Squares)
                # can accommodate missing values, but the implementation in MDP
                # 3.4 will return a matrix of NAN values if there is an NAN
                # value in the input data.
                # http://sourceforge.net/p/mdp-toolkit/mailman/mdp-toolkit-users/?viewmonth=201111
                # http://stats.stackexchange.com/questions/35561/imputation-of-missing-values-for-pca
                # Therefore, we impute the missing values when the user
                # requests PCA; compute the z-scores on the imputed data; and
                # then pass this on to the PCA step.
                # The imputation replaces a missing value with the average of
                # its neighbors (i.e., its spatial lag). If missing values
                # remain (due to missing values in a missing value's neighbor
                # set), then that value is replaced by the column average.
                w_standardized = copy.deepcopy(w)
                w_standardized.transform = 'r'
                target_est_lag = ps.lag_spatial(w_standardized, target_est)
                # replace troublemakers with their spatial lag
                trouble = np.isfinite(target_est)
                trouble = np.bitwise_not(trouble)
                target_est[trouble] = target_est_lag[trouble]
                del target_est_lag
                del trouble
            # Pandas ignores missing values by default, so we can
            # compute the z-score and retain the missing values
            target_est = pd.DataFrame(target_est)
            target_est = (target_est -
                          target_est.mean(axis=0)) / target_est.std(axis=0)
            target_est = target_est.values
            if pca:
                # For the PCA case we need to replace any remaining missing
                # values with their column average. Since we now have z-scores,
                # we know that the average of every column is zero.
                # If it's not the PCA case, then we can leave the missing
                # values in as they will be ignored down the line.
                if np.isfinite(target_est.sum()) == False:
                    trouble = np.isfinite(target_est)
                    trouble = np.bitwise_not(trouble)
                    target_est[trouble] = 0.
                    del trouble

        # run principle components on target data (skip PCA if pca=False)
        # NOTE: matplotlib has deprecated PCA function, also it only uses SVD
        #       which can get tripped up by bad data
        # NOTE: the logic here is to first identify the principle components and
        #       then weight each component in preparation for future SSD
        #       computations; we weight the data here so that we don't need to
        #       weight the data each time the SSD is computed; in effect we want
        #       to compute the SSD on each raw component and then weight that
        #       component's contribution to the total SSD by the component's share
        #       of total variance explained, since the SSD computation has a
        #       squared term we can take the square root of the data now and then
        #       not have to weight it later
        # NOTE: PCA computed using all data, i.e. we do not screen out
        #       observations in the exclude list.
        if pca and target_est is not None:
            try:
                # eigenvector approach
                pca_node = MDP.nodes.PCANode()
                target_est = pca_node.execute(
                    target_est)  # get principle components
            except:
                try:
                    # singular value decomposition approach
                    pca_node = MDP.nodes.PCANode(svd=True)
                    target_est = pca_node.execute(
                        target_est)  # get principle components
                except:
                    # NIPALS would be a better approach than imputing
                    # missing values entirely, but MDP 3.4 does not handle
                    # missing values. Leaving this code as a place holder in
                    # case MDP is updated later.
                    ###pca_node = MDP.nodes.NIPALSNode()
                    ###target_est = pca_node.execute(target_est)  # get principle components
                    raise Exception, "PCA not possible given input data and settings. Set zscore=True to automatically impute missing values or address missing values in advance."

            pca_variance = np.sqrt(pca_node.d / pca_node.total_variance)
            target_est = target_est * pca_variance  # weighting for SSD

        # NOTE: the target_est variable is passed to the SSD function, and the
        #       target_parts variable is passed to the feasibility test function

        # set the appropriate objective function plan
        build_region, enclave_test, local_test = function_picker(count_est,\
                                    count_th_min, count_th_max, target_th_count,\
                                    target_th_prop, target_th_ratio, target_th_all)

        # setup the CV computation
        get_cv = UTILS.get_mv_cv
        cv_exclude = [cv_exclude_count, cv_exclude_prop, cv_exclude_ratio]

        # setup areas to be excluded from computations
        if exclude:
            exclude = [id2i[j] for j in exclude]
            original_exclude = exclude[:]  # in integer ID form
        else:
            original_exclude = []
        # might consider an automated process to drop observations where
        # count_est=0; at this time the user would be expected to add these
        # observations to the exclude list

        time2 = time.time()
        time_output['prep'] = time2 - time1
        # find the feasible solution with the most number of regions
        regions, id2region, exclude, enclaves = BASE.base_region_iterator(\
                             w, count_th_min, count_th_max, count_est, target_th, target_est,\
                             exclude, auto_exclude, get_cv, base_solutions,\
                             target_parts, build_region, enclave_test, kd, points,
                             anchor, cardinality, cv_exclude)

        time3 = time.time()
        time_output['base'] = time3 - time2
        problem_ids = list(set(exclude).difference(original_exclude))
        if id2region == False:
            # Infeasible base run
            exit = "no feasible solution"
            time3a = time4 = time4a = time.time()
        else:
            if target_est is not None:
                # only compute SSDs if there are target_est variables
                start_ssds = np.array([
                    UTILS.sum_squares(region, target_est) for region in regions
                ])
            else:
                start_ssds = np.ones(len(regions)) * -999.0

            if compactness:
                # capture compactness from base solution
                start_compactness = UTILS.compactness_global(
                    regions, compactness)

            if local_improvement and len(regions) > 1:
                # only run the local improvement if the appropriate flag is set
                # (local_improvement=True) and if there is more then one region to
                # swap areas between
                # swap areas along region borders that improve SSD
                time3a = time.time()
                regions, id2region, exit = \
                              LOCAL.local_search(regions, id2region, w, count_th_min, count_th_max,\
                                                 count_est, target_th, target_parts,\
                                                 target_est, exclude, get_cv,\
                                                 local_test, local_params, cv_exclude)
                time4 = time.time()
                # collect stats on SSD for each region
                end_ssds = np.array([
                    UTILS.sum_squares(region, target_est) for region in regions
                ])
                ssd_improvement = (end_ssds - start_ssds) / start_ssds
                ssd_improvement[np.isnan(
                    ssd_improvement
                )] = 0.0  # makes singleton regions have 0 improvement
                ssds = np.vstack((start_ssds, end_ssds, ssd_improvement)).T
                if compactness:
                    # capture compactness from final solution
                    end_compactness = UTILS.compactness_global(
                        regions, compactness)
                    compact_change = \
                        (end_compactness - start_compactness) / start_compactness
                    compacts = np.vstack(
                        (start_compactness, end_compactness, compact_change)).T
                else:
                    compacts = np.ones((len(regions), 3)) * -999.0
                time4a = time.time()
            else:
                time3a = time4 = time.time()
                # capture start SSDs and compactness, insert -999 for "improvements"
                ssds = np.vstack((start_ssds, np.ones(start_ssds.shape)*-999,\
                                              np.ones(start_ssds.shape)*-999)).T
                if compactness:
                    compacts = np.vstack((start_compactness, np.ones(start_compactness.shape)*-999,\
                                                             np.ones(start_compactness.shape)*-999)).T
                else:
                    compacts = np.ones((len(regions), 3)) * -999.0
                exit = 'no local improvement'
                print "Did not run local improvement"
                time4a = time.time()

        time_output['base_wrapup'] = time3a - time3
        time_output['local'] = time4 - time3a
        time_output['local_wrapup'] = time4a - time4

        ####################
        # process regionalization results for user output
        ####################

        # setup header for the pandas dataframes (estimates, MOEs, CVs)
        header = []
        if target_est_count is not None:
            if 'pandas' in str(type(target_est_count)):
                header.extend(target_est_count.columns.tolist())
            else:
                header.extend([
                    'count_var' + str(i)
                    for i in range(target_est_count.shape[1])
                ])
        if target_est_prop is not None:
            if 'pandas' in str(type(target_est_prop)):
                header.extend(target_est_count.prop.tolist())
            else:
                header.extend([
                    'prop_var' + str(i)
                    for i in range(target_est_prop.shape[1] / 2)
                ])
        if target_est_ratio is not None:
            if 'pandas' in str(type(target_est_ratio)):
                header.extend(target_est_ratio.columns.tolist())
            else:
                header.extend([
                    'ratio_var' + str(i)
                    for i in range(target_est_ratio.shape[1] / 2)
                ])

        # initialize pandas dataframes (estimates, MOEs, CVs; regions and areas)
        regionID = pd.Index(range(len(regions)), name='regionID')
        ests_region = pd.DataFrame(index=regionID, columns=header)
        moes_region = pd.DataFrame(index=regionID, columns=header)
        cvs_region = pd.DataFrame(index=regionID, columns=header)
        areaID = pd.Index(range(w.n), name='areaID')
        ests_area = pd.DataFrame(index=areaID, columns=header)
        moes_area = pd.DataFrame(index=areaID, columns=header)
        cvs_area = pd.DataFrame(index=areaID, columns=header)

        # setup header and pandas dataframe (count variable, if applicable)
        header = ['count']
        if count_est is not None:
            if 'pandas' in str(type(count_est)):
                header = [count_est.columns[0]]
        counts_region = pd.DataFrame(index=range(len(regions)), columns=header)
        counts_area = pd.DataFrame(index=range(w.n), columns=header)

        # create SSD and compactness dataframes
        if id2region == False:
            # Infeasible base run
            ssds = None
            compacts = None
        else:
            ssds = pd.DataFrame(
                ssds,
                index=regionID,
                columns=['start_ssd', 'end_ssd', 'ssd_improvement'])
            compacts = pd.DataFrame(compacts,
                                    index=regionID,
                                    columns=[
                                        'start_compactness', 'end_compactness',
                                        'compactness_improvement'
                                    ])

        # this one-dimensional list will contain the region IDs (ordered by area)
        ordered_region_ids = np.ones(w.n) * -9999

        for i, region in enumerate(regions):
            if count_est is not None:
                # get region totals for count variable
                counts_region.ix[i] = count_est[region].sum()
                for j in region:
                    counts_area.ix[j] = count_est[j]
            ests = []
            sdes = []
            if target_est_count is not None:
                # est, MOE and CV for count data
                est, sde = UTILS.get_est_sde_count(region, target_parts)
                est[np.isnan(est)] = 0.0  # clean up 0/0 case
                sde[np.isnan(sde)] = 0.0  # clean up 0/0 case
                ests.extend(est)
                sdes.extend(sde)
            if target_est_prop is not None:
                # est, MOE and CV for proportion data
                est, sde = UTILS.get_est_sde_prop(region, target_parts)
                est[np.isnan(est)] = 0.0  # clean up 0/0 case
                sde[np.isnan(sde)] = 0.0  # clean up 0/0 case
                ests.extend(est)
                sdes.extend(sde)
            if target_est_ratio is not None:
                # est, MOE and CV for ratio data
                est, sde = UTILS.get_est_sde_ratio(region, target_parts)
                est[np.isnan(est)] = 0.0  # clean up 0/0 case
                sde[np.isnan(sde)] = 0.0  # clean up 0/0 case
                ests.extend(est)
                sdes.extend(sde)
            ests_region, moes_region, cvs_region = wrapup_region(\
                                i, ests, sdes, target_parts,
                                ests_region, moes_region, cvs_region)
            ests_area, moes_area, cvs_area = wrapup_areas(\
                                region, target_parts,
                                ests_area, moes_area, cvs_area)
            ordered_region_ids[region] = i
        # set excluded areas to region ID -999
        ordered_region_ids[exclude] = -999
        time5 = time.time()
        time_output['wrapup'] = time5 - time4
        time_output['total'] = time5 - time1

        self.exit = exit
        self.time = time_output
        self.enclaves = enclaves
        self.p = len(regions)
        self.regions = regions
        self.region_ids = ordered_region_ids.tolist()
        self.ssds = ssds
        self.compactness = compacts
        self.ests_region = ests_region
        self.moes_region = moes_region
        self.cvs_region = cvs_region
        self.ests_area = ests_area
        self.moes_area = moes_area
        self.cvs_area = cvs_area
        self.counts_region = counts_region
        self.counts_area = counts_area
        self.problem_ids = problem_ids
Exemple #13
0
def solve_it(input_data):
    # Modify this code to run your optimization algorithm

    # parse the input
    lines = input_data.split('\n')

    parts = lines[0].split()
    facility_count = int(parts[0])
    customer_count = int(parts[1])

    facilities = []
    for i in range(1, facility_count + 1):
        parts = lines[i].split()
        facilities.append(
            Facility(i - 1, float(parts[0]), int(parts[1]),
                     Point(float(parts[2]), float(parts[3]))))

    customers = []
    for i in range(facility_count + 1, facility_count + 1 + customer_count):
        parts = lines[i].split()
        customers.append(
            Customer(i - 1 - facility_count, int(parts[0]),
                     Point(float(parts[1]), float(parts[2]))))

    from mip1 import run1
    from mip1 import weight_random, weight_length, weight_length_plus_some_setup
    #solution = run1(facilities, customers, weight_random, 20)
    #from mip2 import run2
    #solution = run2(facilities, customers, num_fac=2, iterations=500)

    from local_search import local_search

    solution = local_search(facilities, customers)
    # build a trivial solution
    # pack the facilities one by one until all the customers are served
    '''solution = [-1]*len(customers)
    capacity_remaining = [f.capacity for f in facilities]

    facility_index = 0
    for customer in customers:
        if capacity_remaining[facility_index] >= customer.demand:
            solution[customer.index] = facility_index
            capacity_remaining[facility_index] -= customer.demand
        else:
            facility_index += 1
            assert capacity_remaining[facility_index] >= customer.demand
            solution[customer.index] = facility_index
            capacity_remaining[facility_index] -= customer.demand
'''
    used = [0] * len(facilities)
    for facility_index in solution:
        used[facility_index] = 1

    # calculate the cost of the solution
    obj = sum([f.setup_cost * used[f.index] for f in facilities])
    for customer in customers:
        obj += length(customer.location,
                      facilities[solution[customer.index]].location)

    # prepare the solution in the specified output format
    output_data = '%.2f' % obj + ' ' + str(0) + '\n'
    output_data += ' '.join(map(str, solution))

    return output_data
Exemple #14
0
         visualise_sol( w_price, w_space, w_capacity, solution )
 # ITERATIVE
 if task == "iterative":
     for i in range(8):
         if m_time:
             print "@ Start constructive h.: %s" % (
                 stops.start_time()
                 )
         init_solution = constructive_random_heuristic(
             w_price, w_space, w_capacity
             )
         if m_time:
             print "@ Start iterative ls.: %s" % (
                 stops.current_time()
                 )
         frame = local_search( init_solution, w_price, w_space, w_capacity )
         solution = frame.run_iteration()
         temp_price = calculate_price( solution, w_price )
         if temp_price < best_sol:
             best_sol = temp_price
             best_type = "iterative"
         if m_time:
             print "@ Finish: %s" % ( stops.current_time() )
         print "\t\tIterative: %s" % (str(temp_price))
     if stats:
         print solution_table( solution )
     if visualise:
         visualise_sol( w_price, w_space, w_capacity, solution )
 # GREAT DELUGE            
 if task in ("deluge", "delugeall", \
         "delugerandom", "delugegreedy", "delugepeckish"):                
Exemple #15
0
def main(config, black_box_function=None, output_file=""):
    """
    Run design-space exploration using prior injection.
    :param config: dictionary containing all the configuration parameters of this design-space exploration.
    :param black_box_function: black-box function to optimize if running on default mode.
    :param output_file: a name for the file used to save the optimization results.
    """
    debug = False
    sys.stdout.write_to_logfile(str(config) + "\n")

    param_space = space.Space(config)

    random_time = datetime.datetime.now()

    run_directory = config["run_directory"]
    application_name = config["application_name"]
    hypermapper_mode = config["hypermapper_mode"]["mode"]

    log_file = deal_with_relative_and_absolute_path(run_directory,
                                                    config["log_file"])
    sys.stdout.change_log_file(log_file)
    if (hypermapper_mode == 'client-server'):
        sys.stdout.switch_log_only_on_file(True)

    if hypermapper_mode == "default":
        if black_box_function == None:
            print("Error: the black box function must be provided")
            raise SystemExit
        if not callable(black_box_function):
            print("Error: the black box function parameter is not callable")
            raise SystemExit

    input_params = param_space.get_input_parameters()
    optimization_metrics = config["optimization_objectives"]
    if len(optimization_metrics) > 1:
        print(
            "Error: prior optimization does not support multi-objective optimization yet"
        )
        exit()
    number_of_objectives = len(optimization_metrics)
    optimization_iterations = config["optimization_iterations"]
    evaluations_per_optimization_iteration = config[
        "evaluations_per_optimization_iteration"]
    number_of_cpus = config["number_of_cpus"]
    if number_of_cpus > 1:
        print(
            "Warning: this mode supports only sequential execution for now. Running on a single cpu."
        )
        number_of_cpus = 1
    print_importances = config["print_parameter_importance"]
    epsilon_greedy_threshold = config["epsilon_greedy_threshold"]

    if "feasible_output" in config:
        feasible_output = config["feasible_output"]
        feasible_output_name = feasible_output["name"]
        enable_feasible_predictor = feasible_output[
            "enable_feasible_predictor"]
        enable_feasible_predictor_grid_search_on_recall_and_precision = feasible_output[
            "enable_feasible_predictor_grid_search_on_recall_and_precision"]
        feasible_predictor_grid_search_validation_file = feasible_output[
            "feasible_predictor_grid_search_validation_file"]
        feasible_parameter = param_space.get_feasible_parameter()

    acquisition_function_optimizer = config["acquisition_function_optimizer"]
    if acquisition_function_optimizer == "local_search":
        local_search_random_points = config["local_search_random_points"]
        local_search_starting_points = config["local_search_starting_points"]
    elif acquisition_function_optimizer == "posterior_sampling":
        posterior_sampling_tuning_points = config[
            "posterior_sampling_tuning_points"]
        posterior_sampling_final_samples = config[
            "posterior_sampling_final_samples"]
        posterior_sampling_mcmc_chains = config[
            "posterior_sampling_mcmc_chains"]
    else:
        print(
            "Unrecognized acquisition function optimization method in the configuration file:",
            acquisition_function_optimizer)
        raise SystemExit

    exhaustive_search_data_array = None
    exhaustive_search_fast_addressing_of_data_array = None
    scalarization_key = config["scalarization_key"]
    scalarization_method = config["scalarization_method"]

    model_weight = config["model_posterior_weight"]
    model_good_quantile = config["model_good_quantile"]
    weight_sampling = config["weight_sampling"]
    objective_limits = {}
    for objective in optimization_metrics:
        objective_limits[objective] = [float("inf"), float("-inf")]

    number_of_doe_samples = config["design_of_experiment"]["number_of_samples"]

    model_type = config["models"]["model"]
    regression_model_parameters = {}
    if model_type == "random_forest":
        regression_model_parameters["n_estimators"] = config["models"][
            "number_of_trees"]
        regression_model_parameters["max_features"] = config["models"][
            "max_features"]
        regression_model_parameters["bootstrap"] = config["models"][
            "bootstrap"]
        regression_model_parameters["min_samples_split"] = config["models"][
            "min_samples_split"]
        tree_means_per_leaf = None
        tree_vars_per_leaf = None

    if output_file == "":
        output_data_file = config["output_data_file"]
        if output_data_file == "output_samples.csv":
            output_data_file = application_name + "_" + output_data_file
    else:
        output_data_file = output_file

    beginning_of_time = param_space.current_milli_time()
    absolute_configuration_index = 0

    if param_space.get_prior_normalization_flag() is True:
        prior_limit_estimation_points = config["prior_limit_estimation_points"]
        objective_weights = sample_weight_flat(
            optimization_metrics, 1
        )[0]  # this will do fine for 1 objective cases, but for multi-objective optimization it might break
        good_prior_normalization_limits = estimate_prior_limits(
            param_space, prior_limit_estimation_points, objective_weights)
    else:
        good_prior_normalization_limits = None

    # Design of experiments/resume optimization phase
    doe_t0 = datetime.datetime.now()
    if config["resume_optimization"] == True:
        resume_data_file = config["resume_optimization_data"]
        if not resume_data_file.endswith('.csv'):
            print("Error: resume data file must be a CSV")
            raise SystemExit
        if resume_data_file == "output_samples.csv":
            resume_data_file = application_name + "_" + resume_data_file
        data_array, fast_addressing_of_data_array = param_space.load_data_file(
            resume_data_file, debug=False, number_of_cpus=number_of_cpus)
        absolute_configuration_index = len(data_array[list(data_array.keys(
        ))[0]])  # get the number of points evaluated in the previous run
        beginning_of_time = beginning_of_time - data_array[
            param_space.get_timestamp_parameter()[0]][
                -1]  # Set the timestamp back to match the previous run
        print("Resumed optimization, number of samples = %d ......." %
              absolute_configuration_index)

        if absolute_configuration_index < number_of_doe_samples:
            configurations = param_space.get_doe_sample_configurations(
                fast_addressing_of_data_array,
                number_of_doe_samples - absolute_configuration_index,
                "random sampling")

            print(
                "Design of experiment phase, number of new doe samples = %d ......."
                % (number_of_doe_samples - absolute_configuration_index))
            new_data_array = param_space.run_configurations(
                hypermapper_mode, configurations, beginning_of_time,
                black_box_function, exhaustive_search_data_array,
                exhaustive_search_fast_addressing_of_data_array, run_directory)
            data_array = concatenate_data_dictionaries(
                data_array, new_data_array,
                param_space.input_output_and_timestamp_parameter_names)
            absolute_configuration_index = number_of_doe_samples
            iteration_number = 1
        else:
            iteration_number = absolute_configuration_index - number_of_doe_samples + 1
    else:
        fast_addressing_of_data_array = {}
        default_configuration = param_space.get_default_or_random_configuration(
        )
        str_data = param_space.get_unique_hash_string_from_values(
            default_configuration)
        fast_addressing_of_data_array[str_data] = absolute_configuration_index

        if number_of_doe_samples - 1 > 0:
            configurations = param_space.get_doe_sample_configurations(
                fast_addressing_of_data_array, number_of_doe_samples - 1,
                "random sampling") + [default_configuration]
        else:
            configurations = [default_configuration]

        print(
            "Design of experiment phase, number of doe samples = %d ......." %
            number_of_doe_samples)
        data_array = param_space.run_configurations(
            hypermapper_mode, configurations, beginning_of_time,
            black_box_function, exhaustive_search_data_array,
            exhaustive_search_fast_addressing_of_data_array, run_directory)
        absolute_configuration_index += number_of_doe_samples
        iteration_number = 1

    for objective in optimization_metrics:
        lower_bound = min(objective_limits[objective][0],
                          min(data_array[objective]))
        upper_bound = max(objective_limits[objective][1],
                          max(data_array[objective]))
        objective_limits[objective] = [lower_bound, upper_bound]

    if enable_feasible_predictor:
        # HyperMapper needs at least one valid and one invalid sample for its feasibility classifier
        # i.e. they cannot all be equal
        while are_all_elements_equal(data_array[
                feasible_parameter[0]]) and optimization_iterations > 0:
            print(
                "Warning: all points are either valid or invalid, random sampling more configurations."
            )
            print("Number of doe samples so far:",
                  absolute_configuration_index)
            configurations = param_space.get_doe_sample_configurations(
                fast_addressing_of_data_array, 1, "random sampling")
            new_data_array = param_space.run_configurations(
                hypermapper_mode, configurations, beginning_of_time,
                black_box_function, exhaustive_search_data_array,
                exhaustive_search_fast_addressing_of_data_array, run_directory)
            data_array = concatenate_data_dictionaries(
                new_data_array, data_array,
                param_space.input_output_and_timestamp_parameter_names)
            absolute_configuration_index += 1
            optimization_iterations -= 1

    print(
        "\nEnd of doe/resume phase, the number of configuration runs is: %d\n"
        % absolute_configuration_index)
    sys.stdout.write_to_logfile(
        ("DoE time %10.4f sec\n" %
         ((datetime.datetime.now() - doe_t0).total_seconds())))

    with open(
            deal_with_relative_and_absolute_path(run_directory,
                                                 output_data_file), 'w') as f:
        w = csv.writer(f)
        w.writerow(param_space.get_input_output_and_timestamp_parameters())
        tmp_list = [
            param_space.convert_types_to_string(j, data_array)
            for j in param_space.get_input_output_and_timestamp_parameters()
        ]
        tmp_list = list(zip(*tmp_list))
        for i in range(len(data_array[optimization_metrics[0]])):
            w.writerow(tmp_list[i])

    if evaluations_per_optimization_iteration > 1:
        print("Warning, number of evaluations per iteration > 1")
        print(
            "HyperMapper's prior optimization currently does not support multiple runs per iteration, setting evaluations per iteration to 1"
        )

    function_parameters = {}
    function_parameters["param_space"] = param_space
    function_parameters["model_weight"] = model_weight
    function_parameters["threshold"] = {}
    function_parameters[
        "good_prior_normalization_limits"] = good_prior_normalization_limits
    function_parameters["posterior_floor"] = config[
        "posterior_computation_lower_limit"]
    function_parameters['model_type'] = model_type
    bo_t0 = datetime.datetime.now()
    while iteration_number <= optimization_iterations:
        print("Starting optimization iteration", iteration_number)
        model_t0 = datetime.datetime.now()
        iteration_t0 = datetime.datetime.now()

        regression_models, _, _ = models.generate_mono_output_regression_models(
            data_array,
            param_space,
            input_params,
            optimization_metrics,
            1.00,
            model_type=model_type,
            number_of_cpus=number_of_cpus,
            print_importances=print_importances,
            **regression_model_parameters)

        normalized_objectives = {}
        for objective in optimization_metrics:
            if objective_limits[objective][1] == objective_limits[objective][
                    0]:
                normalized_objectives[objective] = [0] * len(
                    data_array[objective])
            else:
                normalized_objectives[objective] = [(x - objective_limits[objective][0]) \
                                        /(objective_limits[objective][1] - objective_limits[objective][0]) for x in data_array[objective]]
            function_parameters["threshold"][objective] = np.quantile(
                data_array[objective], model_good_quantile)

        if model_type == "random_forest":
            # Change splits of each node from (lower_bound + upper_bound)/2 to a uniformly sampled split in (lower_bound, upper_bound)
            bufferx = [data_array[input_param] for input_param in input_params]
            bufferx = list(map(list, list(zip(*bufferx))))
            tree_means_per_leaf = {}
            tree_vars_per_leaf = {}
            leaf_per_sample = models.get_leaves_per_sample(
                bufferx, regression_models, param_space)
            for objective in optimization_metrics:
                tree_means_per_leaf[objective] = models.get_mean_per_leaf(
                    data_array[objective], leaf_per_sample[objective])
                tree_vars_per_leaf[objective] = models.get_var_per_leaf(
                    data_array[objective], leaf_per_sample[objective])
            regression_models = models.transform_rf_using_uniform_splits(
                regression_models, data_array, param_space)
            function_parameters["tree_means_per_leaf"] = tree_means_per_leaf
            function_parameters["tree_vars_per_leaf"] = tree_vars_per_leaf

        classification_model = None
        if enable_feasible_predictor:
            classification_model, _, _ = models.generate_classification_model(
                application_name,
                param_space,
                data_array,
                input_params,
                feasible_parameter,
                1.00,
                debug,
                n_estimators=10,
                max_features=0.75,
                number_of_cpus=number_of_cpus,
                data_array_exhaustive=exhaustive_search_data_array,
                enable_feasible_predictor_grid_search_on_recall_and_precision=
                enable_feasible_predictor_grid_search_on_recall_and_precision,
                feasible_predictor_grid_search_validation_file=
                feasible_predictor_grid_search_validation_file,
                print_importances=print_importances)

        sys.stdout.write_to_logfile(
            ("Model fitting time %10.4f sec\n" %
             ((datetime.datetime.now() - model_t0).total_seconds())))

        objective_weights = sample_weight_flat(optimization_metrics, 1)[0]

        data_array_scalarization, objective_limits = compute_data_array_scalarization(
            data_array, objective_weights, objective_limits, None,
            scalarization_method)
        data_array[scalarization_key] = data_array_scalarization.tolist()
        epsilon = random.uniform(0, 1)
        if epsilon > epsilon_greedy_threshold:
            function_parameters["objective_weights"] = objective_weights
            function_parameters["objective_limits"] = objective_limits
            function_parameters["iteration_number"] = iteration_number
            function_parameters["objective_weights"] = objective_weights
            function_parameters["regression_models"] = regression_models
            function_parameters['classification_model'] = classification_model
            if enable_feasible_predictor:
                function_parameters["posterior_normalization_limits"] = [
                    float("inf"), float("-inf")
                ]
            if acquisition_function_optimizer == "local_search":
                _, best_configuration = local_search(
                    local_search_starting_points,
                    local_search_random_points,
                    param_space,
                    fast_addressing_of_data_array,
                    False,  # set feasibility to false, we handle it inside the acquisition function
                    compute_EI_from_posteriors,
                    function_parameters,
                    scalarization_key,
                    previous_points=data_array)
            else:
                print(
                    "Unrecognized acquisition function optimization method in the configuration file:",
                    acquisition_function_optimizer)
                raise SystemExit

            str_data = param_space.get_unique_hash_string_from_values(
                best_configuration)
            fast_addressing_of_data_array[
                str_data] = absolute_configuration_index
            absolute_configuration_index += 1
        else:
            sys.stdout.write_to_logfile(
                str(epsilon) + " < " + str(epsilon_greedy_threshold) +
                " random sampling a configuration to run\n")
            best_configuration = param_space.random_sample_configurations_without_repetitions(
                fast_addressing_of_data_array, 1, use_priors=False)[0]

        black_box_t0 = datetime.datetime.now()
        best_configuration = [best_configuration]
        new_data_array = param_space.run_configurations(
            hypermapper_mode, best_configuration, beginning_of_time,
            black_box_function, exhaustive_search_data_array,
            exhaustive_search_fast_addressing_of_data_array, run_directory)
        sys.stdout.write_to_logfile(
            ("Black box time %10.4f sec\n" %
             ((datetime.datetime.now() - black_box_t0).total_seconds())))

        with open(
                deal_with_relative_and_absolute_path(run_directory,
                                                     output_data_file),
                'a') as f:
            w = csv.writer(f)
            tmp_list = [
                param_space.convert_types_to_string(j, new_data_array) for j in
                list(param_space.get_input_output_and_timestamp_parameters())
            ]
            tmp_list = list(zip(*tmp_list))
            for i in range(len(new_data_array[optimization_metrics[0]])):
                w.writerow(tmp_list[i])
        data_array = concatenate_data_dictionaries(
            new_data_array, data_array,
            param_space.input_output_and_timestamp_parameter_names)
        for objective in optimization_metrics:
            lower_bound = min(objective_limits[objective][0],
                              min(data_array[objective]))
            upper_bound = max(objective_limits[objective][1],
                              max(data_array[objective]))
            objective_limits[objective] = [lower_bound, upper_bound]

        iteration_number += 1
        sys.stdout.write_to_logfile(
            ("BO iteration time %10.4f sec\n" %
             ((datetime.datetime.now() - iteration_t0).total_seconds())))

    sys.stdout.write_to_logfile(
        ("BO total time %10.4f sec\n" %
         ((datetime.datetime.now() - bo_t0).total_seconds())))
    print("End of Prior Optimization")