def main(): # choices = {'a': 1, 'b': 2} # result = choices.get(key, 'default') user_input = input("Type: \n 1 for Random Search \n 2 for GA \n 3 for ABC \n 4 for Local Search \n") print('Target solution: {}'.format(TARGET)) if user_input == '1': random_search(TARGET, NUMBER_OF_ITERATIONS) elif user_input == '2': ga_search(TARGET, NUMBER_OF_ITERATIONS, POPULATION_SIZE) elif user_input == '3': abc_search(TARGET, NUMBER_OF_ITERATIONS) elif user_input == '4': local_search(TARGET, NUMBER_OF_ITERATIONS)
def grasp_by_max_time(cost_function, alpha, greedy_loss_function, max_time_in_seconds, input, params_glf=None): best_cost_solution = math.inf best_solution = None print("Initiating GRASP with time constraint...") time_start = time.time() time_current = time_start k = 0 while time_current - time_start <= max_time_in_seconds: k = k + 1 feasible_solution = construct(greedy_loss_function, alpha, input, params_glf) cost_of_new_solution, best_solution_in_neighbourhood = local_search( cost_function, input, feasible_solution) if (cost_of_new_solution < best_cost_solution): best_solution = best_solution_in_neighbourhood best_cost_solution = cost_of_new_solution time_current = time.time() time_spent_computing = time_current - time_start return (best_solution, best_cost_solution, time_spent_computing, k)
def random_scalarizations(config, data_array, param_space, fast_addressing_of_data_array, regression_models, iteration_number, objective_weights, objective_limits, classification_model=None): """ Run one iteration of bayesian optimization with random scalarizations. :param config: dictionary containing all the configuration parameters of this optimization. :param data_array: a dictionary containing previously explored points and their function values. :param param_space: parameter space object for the current application. :param fast_addressing_of_data_array: dictionary for quick-access to previously explored configurations. :param regression_models: the surrogate models used to evaluate points. :param iteration_number: the current iteration number. :param objective_weights: objective weights for multi-objective optimization. Not implemented yet. :param objective_limits: estimated minimum and maximum limits for each objective. :param classification_model: feasibility classifier for constrained optimization. """ optimization_metrics = config["optimization_objectives"] number_of_objectives = len(optimization_metrics) local_search_starting_points = config["local_search_starting_points"] local_search_random_points = config["local_search_random_points"] scalarization_key = config["scalarization_key"] optimization_function_parameters = {} optimization_function_parameters['regression_models'] = regression_models optimization_function_parameters['iteration_number'] = iteration_number optimization_function_parameters['data_array'] = data_array optimization_function_parameters[ 'classification_model'] = classification_model optimization_function_parameters['param_space'] = param_space optimization_function_parameters['objective_weights'] = objective_weights optimization_function_parameters['model_type'] = config["models"]["model"] optimization_function_parameters['objective_limits'] = objective_limits optimization_function_parameters['acquisition_function'] = config[ "acquisition_function"] optimization_function_parameters['scalarization_method'] = config[ "scalarization_method"] optimization_function_parameters['number_of_cpus'] = config[ "number_of_cpus"] _, best_configuration = local_search( local_search_starting_points, local_search_random_points, param_space, fast_addressing_of_data_array, False, # we do not want the local search to consider feasibility constraints, only the acquisition functions run_acquisition_function, optimization_function_parameters, scalarization_key, previous_points=data_array) return best_configuration
def run(self): start = time.time() process_gop_list = [] process_item_list = [] process_util_list = [] process_pair_list = [] search_counter = 0 print("Process " + str(self.processIdx) + " starts global search.") for idx, item in enumerate( partition_to_k(self.layer_list, self.acc_cluster_num, False), 0): if idx % PROCESS_NUM == self.processIdx: sub_gop_list = [] search_counter = search_counter + 1 sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag \ = model_split_by_list(self.conv_N, self.conv_M, self.conv_r, self.conv_R, self.conv_K, self.conv_S, self.flag, item) sub_pair_list, sub_lat_list, sub_util_list = \ local_search(sub_conv_N, sub_conv_M, sub_conv_r, sub_conv_R, sub_conv_K, sub_conv_S, sub_flag) for i in range(0, len(sub_conv_N)): sub_gop_list.append( gop_calculate(sub_conv_N[i], sub_conv_M[i], sub_conv_R[i], sub_conv_K[i])) if max(sub_lat_list) < self.overall_lat: overall_lat = max(sub_lat_list) if len(process_pair_list) < 6: process_item_list.append(item) process_pair_list.append(sub_pair_list) # process_pair_list.append([overall_lat]) process_util_list.append([overall_lat]) process_gop_list.append(sub_gop_list) # process_util_list.append(sub_util_list) # process_pair_list.append(sub_util_list) # else: # max_among_mins = process_pair_list.index(max(overall_lat)) # process_pair_list.remove(process_pair_list[max_among_mins]) # process_pair_list.append(sub_pair_list) # process_pair_list.append([overall_lat]) # process_pair_list.append(sub_util_list) # print "For set ID: " + str(idx) + ", the final explored points = ", search_counter if len(process_pair_list) != 0: self.result_Q.put((process_pair_list, process_item_list, process_gop_list, process_util_list)) end = time.time() print("Thread ", self.processIdx, " :", (end - start))
def grasp_by_max_time(cost_function, alpha, greedy_loss_function, max_time_in_seconds, input, params_glf=None): best_cost_solution = math.inf best_solution = None print("Initiating GRASP with time constraint...") time_start = time.time() time_current = time_start k = 0 while time_current - time_start <= max_time_in_seconds: k = k + 1 feasible_solution = construct(greedy_loss_function, alpha, input, params_glf) if input["stop_after_some_time"] == True: if time.time() - input["time_start"] > input["max_time"]: return (best_solution, best_cost_solution, time.time() - time_start, k) cost_of_new_solution, best_solution_in_neighbourhood = local_search( cost_function, input, feasible_solution) # TODO: Remove after debug check_if_solution_makes_sense(best_solution_in_neighbourhood) # TODO: Remove after debug if (cost_of_new_solution < best_cost_solution): best_solution = best_solution_in_neighbourhood best_cost_solution = cost_of_new_solution print(f"Run {k} iteration!!!") time_current = time.time() time_spent_computing = time_current - time_start print(f"Run {k} iterations!!!") return (best_solution, best_cost_solution, time_spent_computing, k)
def grasp(cost_function, alpha, greedy_cost_function, max_iterations, input): best_cost_solution = math.inf best_solution = None print("Initiating GRASP...") start = time.time() for k in range(max_iterations): feasible_solution = construct(greedy_cost_function, alpha, input) cost_of_new_solution, best_solution_in_neighbourhood = local_search( cost_function, input, feasible_solution) print(f"Cost of new solution: {cost_of_new_solution}") print( f"Highest load of new solution: {best_solution_in_neighbourhood['highest_loaded_truck_load']}" ) if (cost_of_new_solution < best_cost_solution): best_solution = best_solution_in_neighbourhood best_cost_solution = cost_of_new_solution return (best_solution, best_cost_solution, time.time() - start, max_iterations)
def grasp(cost_function, alpha, greedy_loss_function, max_iterations, input): best_cost_solution = math.inf best_solution = None print("Initiating GRASP...") for k in range(max_iterations): #print(f"Running {k} iteration: construct") feasible_solution = construct(greedy_loss_function, alpha, input) #print(f"Running {k} iteration: local_search") cost_of_new_solution, best_solution_in_neighbourhood = local_search( cost_function, input, feasible_solution) print(f"Cost of new solution: {cost_of_new_solution}") print( f"Highest load of new solution: {feasible_solution['highest_loaded_truck_load']}" ) sum_in_rows = best_solution_in_neighbourhood['pt'].sum(axis=1) if np.any(sum_in_rows > 1): print(best_solution_in_neighbourhood) print("Local search f****d up for row > 1") sys.exit() if np.any(sum_in_rows == 0): print(best_solution_in_neighbourhood) print("Local search f****d up for row = 0") sys.exit() if (cost_of_new_solution < best_cost_solution): best_solution = best_solution_in_neighbourhood best_cost_solution = cost_of_new_solution print(f"Run {k} iteration!!!") return (best_solution, best_cost_solution, None, max_iterations)
groups = create_groups_with_bfs(graph, nb_groups=3) print(groups.shape) print(groups) plot_graph(graph, groups, title='gaussian kernel graph with 3 groups got with BFS', file_name='gaussian_kernel_graph.png') weights = get_adjacency_matrix(graph) # best_permutation = smb2.spectral_sequencing(weights) best_permutation = smb2.mc_allister(weights) print( f'value of bandwith sum found : {smb2.bandwidth_sum(best_permutation, weights)}' ) best_permutation, best_value = local_search(best_permutation, graph) print(best_permutation) print( f'value of bandwith sum found : {smb2.bandwidth_sum(best_permutation, weights)}' ) print(best_permutation) spectrogram_with_groups( graph, groups, permutation=best_permutation, file_name='spectrogram_gaussian_kernel_graph_groups_bfs.png', title= 'mc_allister then local search on gaussian kernel graph with 3 groups got with BFS' )
def main(w0 = None): # tm should translate unknown words as-is with probability 1 w = w0 if w is None: # lm_logprob, distortion penenalty, direct translate logprob, direct lexicon logprob, inverse translation logprob, inverse lexicon logprob if opts.weights == "no weights specify": w = [1.0/7] * 7 # w = [1.76846735947, 0.352553835525, 1.00071564481, 1.49937872683, 0.562198294709, -0.701483985454, 1.80395218437] else: w = [float(line.strip()) for line in open(opts.weights)] sys.stderr.write(str(w) + '\n') tm = models.TM(opts.tm, opts.k, opts.mute) lm = models.LM(opts.lm, opts.mute) # ibm_t = {} ibm_t = init('./data/ibm.t.gz') french = [tuple(line.strip().split()) for line in open(opts.input).readlines()[:opts.num_sents]] french = french[opts.start : opts.end] bound_width = float(opts.bwidth) for word in set(sum(french,())): if (word,) not in tm: tm[(word,)] = [models.phrase(word, [0.0, 0.0, 0.0, 0.0])] nbest_output = [] total_prob = 0 if opts.mute == 0: sys.stderr.write("Start decoding %s ...\n" % (opts.input,)) for idx,f in enumerate(french): if opts.mute == 0: sys.stderr.write("Decoding sentence #%s ...\n" % (str(idx))) initial_hypothesis = hypothesis(lm.begin(), 0.0, 0, 0, None, None, None) heaps = [{} for _ in f] + [{}] heaps[0][lm.begin(), 0, 0] = initial_hypothesis for i, heap in enumerate(heaps[:-1]): # maintain beam heap # front_item = sorted(heap.itervalues(), key=lambda h: -h.logprob)[0] for h in sorted(heap.itervalues(),key=lambda h: -h.logprob)[:opts.s]: # prune # if h.logprob < front_item.logprob - float(opts.bwidth): # continue fopen = prefix1bits(h.coverage) for j in xrange(fopen,min(fopen+1+opts.disord, len(f)+1)): for k in xrange(j+1, len(f)+1): if f[j:k] in tm: if (h.coverage & bitmap(range(j, k))) == 0: for phrase in tm[f[j:k]]: lm_prob = 0 lm_state = h.lm_state for word in phrase.english.split(): (lm_state, prob) = lm.score(lm_state, word) lm_prob += prob lm_prob += lm.end(lm_state) if k == len(f) else 0.0 coverage = h.coverage | bitmap(range(j, k)) # logprob = h.logprob + lm_prob*w[0] + getDotProduct(phrase.several_logprob, w[2:6]) + abs(h.end+1-j)*w[1] + ibm_model_1_w_score(ibm_t, f, phrase.english)*w[6] logprob = h.logprob logprob += lm_prob*w[0] logprob += getDotProduct(phrase.several_logprob, w[1:5]) # logprob += opts.diseta*abs(h.end+1-j)*w[1] logprob += ibm_model_1_w_score(ibm_t, f, phrase.english)*w[5] logprob += (len(phrase.english.split()) - (k - j)) * w[6] new_hypothesis = hypothesis(lm_state, logprob, coverage, k, h, phrase, abs(h.end + 1 - j)) # add to heap num = onbits(coverage) if (lm_state, coverage, k) not in heaps[num] or new_hypothesis.logprob > heaps[num][lm_state, coverage, k].logprob: heaps[num][lm_state, coverage, k] = new_hypothesis winners = sorted(heaps[-1].itervalues(), key=lambda h: -h.logprob)[0:opts.nbest] def get_lm_logprob(test_list): stance = [] for i in test_list: stance += (i.split()) stance = tuple(stance) lm_state = ("<s>",) score = 0.0 for word in stance: (lm_state, word_score) = lm.score(lm_state, word) score += word_score return score def get_list_and_features(h, idx_self): lst = []; features = [0, 0, 0, 0, 0, 0, 0] current_h = h; while current_h.phrase is not None: # print current_h lst.append(current_h.phrase.english) # features[1] += current_h.distortionPenalty features[1] += current_h.phrase.several_logprob[0] # translation feature 1 features[2] += current_h.phrase.several_logprob[1] # translation feature 2 features[3] += current_h.phrase.several_logprob[2] # translation feature 3 features[4] += current_h.phrase.several_logprob[3] # translation feature 4 current_h = current_h.predecessor lst.reverse() features[0] = get_lm_logprob(lst) # language model score features[5] = ibm_model_1_score(ibm_t, f, lst) features[6] = len(lst) - len(french[idx_self]) return (lst, features) for win in winners: # s = str(idx) + " ||| " (lst, features) = get_list_and_features(win, idx) print local_search.local_search(lst, lm)
def prior_guided_optimization(config, data_array, param_space, fast_addressing_of_data_array, regression_models, iteration_number, objective_weights, objective_limits, classification_model=None): """ Run a prior-guided bayesian optimization iteration. :param config: dictionary containing all the configuration parameters of this optimization. :param data_array: a dictionary containing previously explored points and their function values. :param param_space: parameter space object for the current application. :param fast_addressing_of_data_array: dictionary for quick-access to previously explored configurations. :param regression_models: the surrogate models used to evaluate points. :param iteration_number: the current iteration number. :param objective_weights: objective weights for multi-objective optimization. Not implemented yet. :param objective_limits: estimated minimum and maximum limits for each objective. :param classification_model: feasibility classifier for constrained optimization. """ local_search_starting_points = config["local_search_starting_points"] local_search_random_points = config["local_search_random_points"] scalarization_key = config["scalarization_key"] function_parameters = {} function_parameters["param_space"] = param_space function_parameters["iteration_number"] = iteration_number function_parameters["regression_models"] = regression_models function_parameters['classification_model'] = classification_model function_parameters["objective_weights"] = objective_weights function_parameters["objective_limits"] = objective_limits function_parameters['model_type'] = config["models"]["model"] function_parameters["model_weight"] = config["model_posterior_weight"] function_parameters["posterior_floor"] = config[ "posterior_computation_lower_limit"] model_good_quantile = config["model_good_quantile"] function_parameters["threshold"] = {} optimization_metrics = param_space.get_optimization_parameters() for objective in optimization_metrics: function_parameters["threshold"][objective] = np.quantile( data_array[objective], model_good_quantile) if param_space.get_prior_normalization_flag() is True: prior_limit_estimation_points = config["prior_limit_estimation_points"] good_prior_normalization_limits = estimate_prior_limits( param_space, prior_limit_estimation_points, objective_weights) else: good_prior_normalization_limits = None function_parameters[ "good_prior_normalization_limits"] = good_prior_normalization_limits if classification_model is not None: function_parameters["posterior_normalization_limits"] = [ float("inf"), float("-inf") ] _, best_configuration = local_search( local_search_starting_points, local_search_random_points, param_space, fast_addressing_of_data_array, False, # set feasibility to false, we handle it inside the acquisition function compute_EI_from_posteriors, function_parameters, scalarization_key, previous_points=data_array) return best_configuration
def test(): facilities = read_and_parse_text("test_cases/test0.txt") #local_search(facilities, frozenset()) #local_search(facilities, frozenset({'A','B','C','D','E'})) local_search(facilities, frozenset({'B', 'D'}))
def __init__(self, w, target_est_count=None, target_moe_count=None, target_th_count=None,\ target_est_prop=None, target_moe_prop=None, target_th_prop=None,\ target_est_ratio=None, target_moe_ratio=None, target_th_ratio=None,\ target_th_all=None, count_est=None, count_th_min=None, count_th_max=None,\ exclude=None, auto_exclude=0, base_solutions=100,\ zscore=True, pca=True, local_improvement=True, local_params=None,\ compactness=None, points=None, anchor=None, cardinality=False,\ cv_exclude_count=0, cv_exclude_prop=0, cv_exclude_ratio=0): time1 = time.time() time_output = { 'prep': 0, 'base': 0, 'base_wrapup': 0, 'local': 0, 'local_wrapup': 0, 'wrapup': 0, 'total': 0 } # convert arbitrary IDs in W object to integers id2i = w.id2i neighbors = { id2i[key]: [id2i[neigh] for neigh in w.neighbors[key]] for key in w.id_order } w = ps.W(neighbors) # build KDTree for use in finding base solution if issubclass(type(points), scipy.spatial.KDTree): kd = points points = kd.data elif type(points).__name__ == 'ndarray': kd = ps.common.KDTree(points) elif issubclass(type(points), ps.core.IOHandlers.pyShpIO.PurePyShpWrapper): #loop to find centroids, need to be sure order matches W and data centroids = [] for i in points: centroids.append(i.centroid) kd = ps.common.KDTree(centroids) points = kd.data elif points is None: kd = None else: raise Exception, 'Unsupported type passed to points' # dictionary allowing multivariate and univariate flexibility target_parts = {'target_est_count':target_est_count,\ 'target_est_prop':target_est_prop,\ 'target_est_ratio':target_est_ratio,\ 'target_sde_count':target_moe_count ,\ 'target_sde_prop':target_moe_prop,\ 'target_sde_ratio':target_moe_ratio} # setup the holder for the variables to minimize; later we will put all # the count, ratio and proportion variables into this array. # Also, convert MOEs to standard errors when appropriate total_vars = 0 rows = 0 if target_est_count is not None: rows, cols = target_est_count.shape total_vars += cols target_parts['target_est_count'] = target_est_count * 1.0 target_parts['target_sde_count'] = target_moe_count / 1.645 if target_est_prop is not None: rows, cols = target_est_prop.shape total_vars += cols / 2 target_parts['target_est_prop'] = target_est_prop * 1.0 target_parts['target_sde_prop'] = target_moe_prop / 1.645 if target_est_ratio is not None: rows, cols = target_est_ratio.shape total_vars += cols / 2 target_parts['target_est_ratio'] = target_est_ratio * 1.0 target_parts['target_sde_ratio'] = target_moe_ratio / 1.645 if total_vars == 0: target_est = None print 'warning: optimization steps will not be run since no target_est variables provided' else: target_est = np.ones((rows, total_vars)) * -999 # organize and check the input data; prep data for actual computations position = 0 target_th = [] # IMPORTANT: maintain the order of count then proportion then ratio if target_est_count is not None: target_est, target_th, position = mv_data_prep(target_est_count,\ target_th_count, target_th_all,\ target_est, target_th, position,\ scale=1, ratio=False) if target_est_prop is not None: target_est, target_th, position = mv_data_prep(target_est_prop,\ target_th_prop, target_th_all,\ target_est, target_th, position,\ scale=2, ratio=False) if target_est_ratio is not None: target_est, target_th, position = mv_data_prep(target_est_ratio,\ target_th_ratio, target_th_all,\ target_est, target_th, position,\ scale=2, ratio=True) target_th = np.array(target_th) # compute zscores # NOTE: zscores computed using all data, i.e. we do not screen out # observations in the exclude list. if zscore and target_est is not None: if pca: # Python does not currently have a widely used tool for # computing PCA with missing values. In principle, # NIPALS (Nonlinear Iterative Partial Least Squares) # can accommodate missing values, but the implementation in MDP # 3.4 will return a matrix of NAN values if there is an NAN # value in the input data. # http://sourceforge.net/p/mdp-toolkit/mailman/mdp-toolkit-users/?viewmonth=201111 # http://stats.stackexchange.com/questions/35561/imputation-of-missing-values-for-pca # Therefore, we impute the missing values when the user # requests PCA; compute the z-scores on the imputed data; and # then pass this on to the PCA step. # The imputation replaces a missing value with the average of # its neighbors (i.e., its spatial lag). If missing values # remain (due to missing values in a missing value's neighbor # set), then that value is replaced by the column average. w_standardized = copy.deepcopy(w) w_standardized.transform = 'r' target_est_lag = ps.lag_spatial(w_standardized, target_est) # replace troublemakers with their spatial lag trouble = np.isfinite(target_est) trouble = np.bitwise_not(trouble) target_est[trouble] = target_est_lag[trouble] del target_est_lag del trouble # Pandas ignores missing values by default, so we can # compute the z-score and retain the missing values target_est = pd.DataFrame(target_est) target_est = (target_est - target_est.mean(axis=0)) / target_est.std(axis=0) target_est = target_est.values if pca: # For the PCA case we need to replace any remaining missing # values with their column average. Since we now have z-scores, # we know that the average of every column is zero. # If it's not the PCA case, then we can leave the missing # values in as they will be ignored down the line. if np.isfinite(target_est.sum()) == False: trouble = np.isfinite(target_est) trouble = np.bitwise_not(trouble) target_est[trouble] = 0. del trouble # run principle components on target data (skip PCA if pca=False) # NOTE: matplotlib has deprecated PCA function, also it only uses SVD # which can get tripped up by bad data # NOTE: the logic here is to first identify the principle components and # then weight each component in preparation for future SSD # computations; we weight the data here so that we don't need to # weight the data each time the SSD is computed; in effect we want # to compute the SSD on each raw component and then weight that # component's contribution to the total SSD by the component's share # of total variance explained, since the SSD computation has a # squared term we can take the square root of the data now and then # not have to weight it later # NOTE: PCA computed using all data, i.e. we do not screen out # observations in the exclude list. if pca and target_est is not None: try: # eigenvector approach pca_node = MDP.nodes.PCANode() target_est = pca_node.execute( target_est) # get principle components except: try: # singular value decomposition approach pca_node = MDP.nodes.PCANode(svd=True) target_est = pca_node.execute( target_est) # get principle components except: # NIPALS would be a better approach than imputing # missing values entirely, but MDP 3.4 does not handle # missing values. Leaving this code as a place holder in # case MDP is updated later. ###pca_node = MDP.nodes.NIPALSNode() ###target_est = pca_node.execute(target_est) # get principle components raise Exception, "PCA not possible given input data and settings. Set zscore=True to automatically impute missing values or address missing values in advance." pca_variance = np.sqrt(pca_node.d / pca_node.total_variance) target_est = target_est * pca_variance # weighting for SSD # NOTE: the target_est variable is passed to the SSD function, and the # target_parts variable is passed to the feasibility test function # set the appropriate objective function plan build_region, enclave_test, local_test = function_picker(count_est,\ count_th_min, count_th_max, target_th_count,\ target_th_prop, target_th_ratio, target_th_all) # setup the CV computation get_cv = UTILS.get_mv_cv cv_exclude = [cv_exclude_count, cv_exclude_prop, cv_exclude_ratio] # setup areas to be excluded from computations if exclude: exclude = [id2i[j] for j in exclude] original_exclude = exclude[:] # in integer ID form else: original_exclude = [] # might consider an automated process to drop observations where # count_est=0; at this time the user would be expected to add these # observations to the exclude list time2 = time.time() time_output['prep'] = time2 - time1 # find the feasible solution with the most number of regions regions, id2region, exclude, enclaves = BASE.base_region_iterator(\ w, count_th_min, count_th_max, count_est, target_th, target_est,\ exclude, auto_exclude, get_cv, base_solutions,\ target_parts, build_region, enclave_test, kd, points, anchor, cardinality, cv_exclude) time3 = time.time() time_output['base'] = time3 - time2 problem_ids = list(set(exclude).difference(original_exclude)) if id2region == False: # Infeasible base run exit = "no feasible solution" time3a = time4 = time4a = time.time() else: if target_est is not None: # only compute SSDs if there are target_est variables start_ssds = np.array([ UTILS.sum_squares(region, target_est) for region in regions ]) else: start_ssds = np.ones(len(regions)) * -999.0 if compactness: # capture compactness from base solution start_compactness = UTILS.compactness_global( regions, compactness) if local_improvement and len(regions) > 1: # only run the local improvement if the appropriate flag is set # (local_improvement=True) and if there is more then one region to # swap areas between # swap areas along region borders that improve SSD time3a = time.time() regions, id2region, exit = \ LOCAL.local_search(regions, id2region, w, count_th_min, count_th_max,\ count_est, target_th, target_parts,\ target_est, exclude, get_cv,\ local_test, local_params, cv_exclude) time4 = time.time() # collect stats on SSD for each region end_ssds = np.array([ UTILS.sum_squares(region, target_est) for region in regions ]) ssd_improvement = (end_ssds - start_ssds) / start_ssds ssd_improvement[np.isnan( ssd_improvement )] = 0.0 # makes singleton regions have 0 improvement ssds = np.vstack((start_ssds, end_ssds, ssd_improvement)).T if compactness: # capture compactness from final solution end_compactness = UTILS.compactness_global( regions, compactness) compact_change = \ (end_compactness - start_compactness) / start_compactness compacts = np.vstack( (start_compactness, end_compactness, compact_change)).T else: compacts = np.ones((len(regions), 3)) * -999.0 time4a = time.time() else: time3a = time4 = time.time() # capture start SSDs and compactness, insert -999 for "improvements" ssds = np.vstack((start_ssds, np.ones(start_ssds.shape)*-999,\ np.ones(start_ssds.shape)*-999)).T if compactness: compacts = np.vstack((start_compactness, np.ones(start_compactness.shape)*-999,\ np.ones(start_compactness.shape)*-999)).T else: compacts = np.ones((len(regions), 3)) * -999.0 exit = 'no local improvement' print "Did not run local improvement" time4a = time.time() time_output['base_wrapup'] = time3a - time3 time_output['local'] = time4 - time3a time_output['local_wrapup'] = time4a - time4 #################### # process regionalization results for user output #################### # setup header for the pandas dataframes (estimates, MOEs, CVs) header = [] if target_est_count is not None: if 'pandas' in str(type(target_est_count)): header.extend(target_est_count.columns.tolist()) else: header.extend([ 'count_var' + str(i) for i in range(target_est_count.shape[1]) ]) if target_est_prop is not None: if 'pandas' in str(type(target_est_prop)): header.extend(target_est_count.prop.tolist()) else: header.extend([ 'prop_var' + str(i) for i in range(target_est_prop.shape[1] / 2) ]) if target_est_ratio is not None: if 'pandas' in str(type(target_est_ratio)): header.extend(target_est_ratio.columns.tolist()) else: header.extend([ 'ratio_var' + str(i) for i in range(target_est_ratio.shape[1] / 2) ]) # initialize pandas dataframes (estimates, MOEs, CVs; regions and areas) regionID = pd.Index(range(len(regions)), name='regionID') ests_region = pd.DataFrame(index=regionID, columns=header) moes_region = pd.DataFrame(index=regionID, columns=header) cvs_region = pd.DataFrame(index=regionID, columns=header) areaID = pd.Index(range(w.n), name='areaID') ests_area = pd.DataFrame(index=areaID, columns=header) moes_area = pd.DataFrame(index=areaID, columns=header) cvs_area = pd.DataFrame(index=areaID, columns=header) # setup header and pandas dataframe (count variable, if applicable) header = ['count'] if count_est is not None: if 'pandas' in str(type(count_est)): header = [count_est.columns[0]] counts_region = pd.DataFrame(index=range(len(regions)), columns=header) counts_area = pd.DataFrame(index=range(w.n), columns=header) # create SSD and compactness dataframes if id2region == False: # Infeasible base run ssds = None compacts = None else: ssds = pd.DataFrame( ssds, index=regionID, columns=['start_ssd', 'end_ssd', 'ssd_improvement']) compacts = pd.DataFrame(compacts, index=regionID, columns=[ 'start_compactness', 'end_compactness', 'compactness_improvement' ]) # this one-dimensional list will contain the region IDs (ordered by area) ordered_region_ids = np.ones(w.n) * -9999 for i, region in enumerate(regions): if count_est is not None: # get region totals for count variable counts_region.ix[i] = count_est[region].sum() for j in region: counts_area.ix[j] = count_est[j] ests = [] sdes = [] if target_est_count is not None: # est, MOE and CV for count data est, sde = UTILS.get_est_sde_count(region, target_parts) est[np.isnan(est)] = 0.0 # clean up 0/0 case sde[np.isnan(sde)] = 0.0 # clean up 0/0 case ests.extend(est) sdes.extend(sde) if target_est_prop is not None: # est, MOE and CV for proportion data est, sde = UTILS.get_est_sde_prop(region, target_parts) est[np.isnan(est)] = 0.0 # clean up 0/0 case sde[np.isnan(sde)] = 0.0 # clean up 0/0 case ests.extend(est) sdes.extend(sde) if target_est_ratio is not None: # est, MOE and CV for ratio data est, sde = UTILS.get_est_sde_ratio(region, target_parts) est[np.isnan(est)] = 0.0 # clean up 0/0 case sde[np.isnan(sde)] = 0.0 # clean up 0/0 case ests.extend(est) sdes.extend(sde) ests_region, moes_region, cvs_region = wrapup_region(\ i, ests, sdes, target_parts, ests_region, moes_region, cvs_region) ests_area, moes_area, cvs_area = wrapup_areas(\ region, target_parts, ests_area, moes_area, cvs_area) ordered_region_ids[region] = i # set excluded areas to region ID -999 ordered_region_ids[exclude] = -999 time5 = time.time() time_output['wrapup'] = time5 - time4 time_output['total'] = time5 - time1 self.exit = exit self.time = time_output self.enclaves = enclaves self.p = len(regions) self.regions = regions self.region_ids = ordered_region_ids.tolist() self.ssds = ssds self.compactness = compacts self.ests_region = ests_region self.moes_region = moes_region self.cvs_region = cvs_region self.ests_area = ests_area self.moes_area = moes_area self.cvs_area = cvs_area self.counts_region = counts_region self.counts_area = counts_area self.problem_ids = problem_ids
def solve_it(input_data): # Modify this code to run your optimization algorithm # parse the input lines = input_data.split('\n') parts = lines[0].split() facility_count = int(parts[0]) customer_count = int(parts[1]) facilities = [] for i in range(1, facility_count + 1): parts = lines[i].split() facilities.append( Facility(i - 1, float(parts[0]), int(parts[1]), Point(float(parts[2]), float(parts[3])))) customers = [] for i in range(facility_count + 1, facility_count + 1 + customer_count): parts = lines[i].split() customers.append( Customer(i - 1 - facility_count, int(parts[0]), Point(float(parts[1]), float(parts[2])))) from mip1 import run1 from mip1 import weight_random, weight_length, weight_length_plus_some_setup #solution = run1(facilities, customers, weight_random, 20) #from mip2 import run2 #solution = run2(facilities, customers, num_fac=2, iterations=500) from local_search import local_search solution = local_search(facilities, customers) # build a trivial solution # pack the facilities one by one until all the customers are served '''solution = [-1]*len(customers) capacity_remaining = [f.capacity for f in facilities] facility_index = 0 for customer in customers: if capacity_remaining[facility_index] >= customer.demand: solution[customer.index] = facility_index capacity_remaining[facility_index] -= customer.demand else: facility_index += 1 assert capacity_remaining[facility_index] >= customer.demand solution[customer.index] = facility_index capacity_remaining[facility_index] -= customer.demand ''' used = [0] * len(facilities) for facility_index in solution: used[facility_index] = 1 # calculate the cost of the solution obj = sum([f.setup_cost * used[f.index] for f in facilities]) for customer in customers: obj += length(customer.location, facilities[solution[customer.index]].location) # prepare the solution in the specified output format output_data = '%.2f' % obj + ' ' + str(0) + '\n' output_data += ' '.join(map(str, solution)) return output_data
visualise_sol( w_price, w_space, w_capacity, solution ) # ITERATIVE if task == "iterative": for i in range(8): if m_time: print "@ Start constructive h.: %s" % ( stops.start_time() ) init_solution = constructive_random_heuristic( w_price, w_space, w_capacity ) if m_time: print "@ Start iterative ls.: %s" % ( stops.current_time() ) frame = local_search( init_solution, w_price, w_space, w_capacity ) solution = frame.run_iteration() temp_price = calculate_price( solution, w_price ) if temp_price < best_sol: best_sol = temp_price best_type = "iterative" if m_time: print "@ Finish: %s" % ( stops.current_time() ) print "\t\tIterative: %s" % (str(temp_price)) if stats: print solution_table( solution ) if visualise: visualise_sol( w_price, w_space, w_capacity, solution ) # GREAT DELUGE if task in ("deluge", "delugeall", \ "delugerandom", "delugegreedy", "delugepeckish"):
def main(config, black_box_function=None, output_file=""): """ Run design-space exploration using prior injection. :param config: dictionary containing all the configuration parameters of this design-space exploration. :param black_box_function: black-box function to optimize if running on default mode. :param output_file: a name for the file used to save the optimization results. """ debug = False sys.stdout.write_to_logfile(str(config) + "\n") param_space = space.Space(config) random_time = datetime.datetime.now() run_directory = config["run_directory"] application_name = config["application_name"] hypermapper_mode = config["hypermapper_mode"]["mode"] log_file = deal_with_relative_and_absolute_path(run_directory, config["log_file"]) sys.stdout.change_log_file(log_file) if (hypermapper_mode == 'client-server'): sys.stdout.switch_log_only_on_file(True) if hypermapper_mode == "default": if black_box_function == None: print("Error: the black box function must be provided") raise SystemExit if not callable(black_box_function): print("Error: the black box function parameter is not callable") raise SystemExit input_params = param_space.get_input_parameters() optimization_metrics = config["optimization_objectives"] if len(optimization_metrics) > 1: print( "Error: prior optimization does not support multi-objective optimization yet" ) exit() number_of_objectives = len(optimization_metrics) optimization_iterations = config["optimization_iterations"] evaluations_per_optimization_iteration = config[ "evaluations_per_optimization_iteration"] number_of_cpus = config["number_of_cpus"] if number_of_cpus > 1: print( "Warning: this mode supports only sequential execution for now. Running on a single cpu." ) number_of_cpus = 1 print_importances = config["print_parameter_importance"] epsilon_greedy_threshold = config["epsilon_greedy_threshold"] if "feasible_output" in config: feasible_output = config["feasible_output"] feasible_output_name = feasible_output["name"] enable_feasible_predictor = feasible_output[ "enable_feasible_predictor"] enable_feasible_predictor_grid_search_on_recall_and_precision = feasible_output[ "enable_feasible_predictor_grid_search_on_recall_and_precision"] feasible_predictor_grid_search_validation_file = feasible_output[ "feasible_predictor_grid_search_validation_file"] feasible_parameter = param_space.get_feasible_parameter() acquisition_function_optimizer = config["acquisition_function_optimizer"] if acquisition_function_optimizer == "local_search": local_search_random_points = config["local_search_random_points"] local_search_starting_points = config["local_search_starting_points"] elif acquisition_function_optimizer == "posterior_sampling": posterior_sampling_tuning_points = config[ "posterior_sampling_tuning_points"] posterior_sampling_final_samples = config[ "posterior_sampling_final_samples"] posterior_sampling_mcmc_chains = config[ "posterior_sampling_mcmc_chains"] else: print( "Unrecognized acquisition function optimization method in the configuration file:", acquisition_function_optimizer) raise SystemExit exhaustive_search_data_array = None exhaustive_search_fast_addressing_of_data_array = None scalarization_key = config["scalarization_key"] scalarization_method = config["scalarization_method"] model_weight = config["model_posterior_weight"] model_good_quantile = config["model_good_quantile"] weight_sampling = config["weight_sampling"] objective_limits = {} for objective in optimization_metrics: objective_limits[objective] = [float("inf"), float("-inf")] number_of_doe_samples = config["design_of_experiment"]["number_of_samples"] model_type = config["models"]["model"] regression_model_parameters = {} if model_type == "random_forest": regression_model_parameters["n_estimators"] = config["models"][ "number_of_trees"] regression_model_parameters["max_features"] = config["models"][ "max_features"] regression_model_parameters["bootstrap"] = config["models"][ "bootstrap"] regression_model_parameters["min_samples_split"] = config["models"][ "min_samples_split"] tree_means_per_leaf = None tree_vars_per_leaf = None if output_file == "": output_data_file = config["output_data_file"] if output_data_file == "output_samples.csv": output_data_file = application_name + "_" + output_data_file else: output_data_file = output_file beginning_of_time = param_space.current_milli_time() absolute_configuration_index = 0 if param_space.get_prior_normalization_flag() is True: prior_limit_estimation_points = config["prior_limit_estimation_points"] objective_weights = sample_weight_flat( optimization_metrics, 1 )[0] # this will do fine for 1 objective cases, but for multi-objective optimization it might break good_prior_normalization_limits = estimate_prior_limits( param_space, prior_limit_estimation_points, objective_weights) else: good_prior_normalization_limits = None # Design of experiments/resume optimization phase doe_t0 = datetime.datetime.now() if config["resume_optimization"] == True: resume_data_file = config["resume_optimization_data"] if not resume_data_file.endswith('.csv'): print("Error: resume data file must be a CSV") raise SystemExit if resume_data_file == "output_samples.csv": resume_data_file = application_name + "_" + resume_data_file data_array, fast_addressing_of_data_array = param_space.load_data_file( resume_data_file, debug=False, number_of_cpus=number_of_cpus) absolute_configuration_index = len(data_array[list(data_array.keys( ))[0]]) # get the number of points evaluated in the previous run beginning_of_time = beginning_of_time - data_array[ param_space.get_timestamp_parameter()[0]][ -1] # Set the timestamp back to match the previous run print("Resumed optimization, number of samples = %d ......." % absolute_configuration_index) if absolute_configuration_index < number_of_doe_samples: configurations = param_space.get_doe_sample_configurations( fast_addressing_of_data_array, number_of_doe_samples - absolute_configuration_index, "random sampling") print( "Design of experiment phase, number of new doe samples = %d ......." % (number_of_doe_samples - absolute_configuration_index)) new_data_array = param_space.run_configurations( hypermapper_mode, configurations, beginning_of_time, black_box_function, exhaustive_search_data_array, exhaustive_search_fast_addressing_of_data_array, run_directory) data_array = concatenate_data_dictionaries( data_array, new_data_array, param_space.input_output_and_timestamp_parameter_names) absolute_configuration_index = number_of_doe_samples iteration_number = 1 else: iteration_number = absolute_configuration_index - number_of_doe_samples + 1 else: fast_addressing_of_data_array = {} default_configuration = param_space.get_default_or_random_configuration( ) str_data = param_space.get_unique_hash_string_from_values( default_configuration) fast_addressing_of_data_array[str_data] = absolute_configuration_index if number_of_doe_samples - 1 > 0: configurations = param_space.get_doe_sample_configurations( fast_addressing_of_data_array, number_of_doe_samples - 1, "random sampling") + [default_configuration] else: configurations = [default_configuration] print( "Design of experiment phase, number of doe samples = %d ......." % number_of_doe_samples) data_array = param_space.run_configurations( hypermapper_mode, configurations, beginning_of_time, black_box_function, exhaustive_search_data_array, exhaustive_search_fast_addressing_of_data_array, run_directory) absolute_configuration_index += number_of_doe_samples iteration_number = 1 for objective in optimization_metrics: lower_bound = min(objective_limits[objective][0], min(data_array[objective])) upper_bound = max(objective_limits[objective][1], max(data_array[objective])) objective_limits[objective] = [lower_bound, upper_bound] if enable_feasible_predictor: # HyperMapper needs at least one valid and one invalid sample for its feasibility classifier # i.e. they cannot all be equal while are_all_elements_equal(data_array[ feasible_parameter[0]]) and optimization_iterations > 0: print( "Warning: all points are either valid or invalid, random sampling more configurations." ) print("Number of doe samples so far:", absolute_configuration_index) configurations = param_space.get_doe_sample_configurations( fast_addressing_of_data_array, 1, "random sampling") new_data_array = param_space.run_configurations( hypermapper_mode, configurations, beginning_of_time, black_box_function, exhaustive_search_data_array, exhaustive_search_fast_addressing_of_data_array, run_directory) data_array = concatenate_data_dictionaries( new_data_array, data_array, param_space.input_output_and_timestamp_parameter_names) absolute_configuration_index += 1 optimization_iterations -= 1 print( "\nEnd of doe/resume phase, the number of configuration runs is: %d\n" % absolute_configuration_index) sys.stdout.write_to_logfile( ("DoE time %10.4f sec\n" % ((datetime.datetime.now() - doe_t0).total_seconds()))) with open( deal_with_relative_and_absolute_path(run_directory, output_data_file), 'w') as f: w = csv.writer(f) w.writerow(param_space.get_input_output_and_timestamp_parameters()) tmp_list = [ param_space.convert_types_to_string(j, data_array) for j in param_space.get_input_output_and_timestamp_parameters() ] tmp_list = list(zip(*tmp_list)) for i in range(len(data_array[optimization_metrics[0]])): w.writerow(tmp_list[i]) if evaluations_per_optimization_iteration > 1: print("Warning, number of evaluations per iteration > 1") print( "HyperMapper's prior optimization currently does not support multiple runs per iteration, setting evaluations per iteration to 1" ) function_parameters = {} function_parameters["param_space"] = param_space function_parameters["model_weight"] = model_weight function_parameters["threshold"] = {} function_parameters[ "good_prior_normalization_limits"] = good_prior_normalization_limits function_parameters["posterior_floor"] = config[ "posterior_computation_lower_limit"] function_parameters['model_type'] = model_type bo_t0 = datetime.datetime.now() while iteration_number <= optimization_iterations: print("Starting optimization iteration", iteration_number) model_t0 = datetime.datetime.now() iteration_t0 = datetime.datetime.now() regression_models, _, _ = models.generate_mono_output_regression_models( data_array, param_space, input_params, optimization_metrics, 1.00, model_type=model_type, number_of_cpus=number_of_cpus, print_importances=print_importances, **regression_model_parameters) normalized_objectives = {} for objective in optimization_metrics: if objective_limits[objective][1] == objective_limits[objective][ 0]: normalized_objectives[objective] = [0] * len( data_array[objective]) else: normalized_objectives[objective] = [(x - objective_limits[objective][0]) \ /(objective_limits[objective][1] - objective_limits[objective][0]) for x in data_array[objective]] function_parameters["threshold"][objective] = np.quantile( data_array[objective], model_good_quantile) if model_type == "random_forest": # Change splits of each node from (lower_bound + upper_bound)/2 to a uniformly sampled split in (lower_bound, upper_bound) bufferx = [data_array[input_param] for input_param in input_params] bufferx = list(map(list, list(zip(*bufferx)))) tree_means_per_leaf = {} tree_vars_per_leaf = {} leaf_per_sample = models.get_leaves_per_sample( bufferx, regression_models, param_space) for objective in optimization_metrics: tree_means_per_leaf[objective] = models.get_mean_per_leaf( data_array[objective], leaf_per_sample[objective]) tree_vars_per_leaf[objective] = models.get_var_per_leaf( data_array[objective], leaf_per_sample[objective]) regression_models = models.transform_rf_using_uniform_splits( regression_models, data_array, param_space) function_parameters["tree_means_per_leaf"] = tree_means_per_leaf function_parameters["tree_vars_per_leaf"] = tree_vars_per_leaf classification_model = None if enable_feasible_predictor: classification_model, _, _ = models.generate_classification_model( application_name, param_space, data_array, input_params, feasible_parameter, 1.00, debug, n_estimators=10, max_features=0.75, number_of_cpus=number_of_cpus, data_array_exhaustive=exhaustive_search_data_array, enable_feasible_predictor_grid_search_on_recall_and_precision= enable_feasible_predictor_grid_search_on_recall_and_precision, feasible_predictor_grid_search_validation_file= feasible_predictor_grid_search_validation_file, print_importances=print_importances) sys.stdout.write_to_logfile( ("Model fitting time %10.4f sec\n" % ((datetime.datetime.now() - model_t0).total_seconds()))) objective_weights = sample_weight_flat(optimization_metrics, 1)[0] data_array_scalarization, objective_limits = compute_data_array_scalarization( data_array, objective_weights, objective_limits, None, scalarization_method) data_array[scalarization_key] = data_array_scalarization.tolist() epsilon = random.uniform(0, 1) if epsilon > epsilon_greedy_threshold: function_parameters["objective_weights"] = objective_weights function_parameters["objective_limits"] = objective_limits function_parameters["iteration_number"] = iteration_number function_parameters["objective_weights"] = objective_weights function_parameters["regression_models"] = regression_models function_parameters['classification_model'] = classification_model if enable_feasible_predictor: function_parameters["posterior_normalization_limits"] = [ float("inf"), float("-inf") ] if acquisition_function_optimizer == "local_search": _, best_configuration = local_search( local_search_starting_points, local_search_random_points, param_space, fast_addressing_of_data_array, False, # set feasibility to false, we handle it inside the acquisition function compute_EI_from_posteriors, function_parameters, scalarization_key, previous_points=data_array) else: print( "Unrecognized acquisition function optimization method in the configuration file:", acquisition_function_optimizer) raise SystemExit str_data = param_space.get_unique_hash_string_from_values( best_configuration) fast_addressing_of_data_array[ str_data] = absolute_configuration_index absolute_configuration_index += 1 else: sys.stdout.write_to_logfile( str(epsilon) + " < " + str(epsilon_greedy_threshold) + " random sampling a configuration to run\n") best_configuration = param_space.random_sample_configurations_without_repetitions( fast_addressing_of_data_array, 1, use_priors=False)[0] black_box_t0 = datetime.datetime.now() best_configuration = [best_configuration] new_data_array = param_space.run_configurations( hypermapper_mode, best_configuration, beginning_of_time, black_box_function, exhaustive_search_data_array, exhaustive_search_fast_addressing_of_data_array, run_directory) sys.stdout.write_to_logfile( ("Black box time %10.4f sec\n" % ((datetime.datetime.now() - black_box_t0).total_seconds()))) with open( deal_with_relative_and_absolute_path(run_directory, output_data_file), 'a') as f: w = csv.writer(f) tmp_list = [ param_space.convert_types_to_string(j, new_data_array) for j in list(param_space.get_input_output_and_timestamp_parameters()) ] tmp_list = list(zip(*tmp_list)) for i in range(len(new_data_array[optimization_metrics[0]])): w.writerow(tmp_list[i]) data_array = concatenate_data_dictionaries( new_data_array, data_array, param_space.input_output_and_timestamp_parameter_names) for objective in optimization_metrics: lower_bound = min(objective_limits[objective][0], min(data_array[objective])) upper_bound = max(objective_limits[objective][1], max(data_array[objective])) objective_limits[objective] = [lower_bound, upper_bound] iteration_number += 1 sys.stdout.write_to_logfile( ("BO iteration time %10.4f sec\n" % ((datetime.datetime.now() - iteration_t0).total_seconds()))) sys.stdout.write_to_logfile( ("BO total time %10.4f sec\n" % ((datetime.datetime.now() - bo_t0).total_seconds()))) print("End of Prior Optimization")