def iteration_algorithm(self, n_restarts=10, n_samples=10): """ Checked :param n_restarts: :param n_samples: :return: """ if self.parameters is None: self.estimate_parameters_kernel() parameters = self.parameters else: parameters = self.parameters samples = self.sample_variable(parameters, n_samples) bounds = [ tuple(bound) for bound in [self.gp.bounds[i] for i in range(self.x_domain)] ] start = DomainService.get_points_domain( n_restarts, self.gp.bounds[0:self.x_domain], type_bounds=self.gp.type_bounds[0:self.x_domain]) dim = len(start[0]) start_points = {} for i in range(n_restarts): start_points[i] = start[i] optimization = Optimization(NELDER, wrapper_ei_objective, bounds, None, hessian=None, tol=None, minimize=False) args = (False, None, True, 0, optimization, self, samples, parameters) sol = Parallel.run_function_different_arguments_parallel( wrapper_optimize, start_points, *args) solutions = [] results_opt = [] for i in range(n_restarts): if sol.get(i) is None: logger.info( "Error in computing optimum of a_{n+1} at one sample at point %d" % i) continue solutions.append(sol.get(i)['optimal_value']) results_opt.append(sol.get(i)) ind_max = np.argmax(solutions) control = results_opt[ind_max]['solution'] # do in parallel environment = self.get_environment(control, parameters) return np.concatenate((control, environment))
def get_environment(self, control, parameters_kernel, n_restarts=10): """ correct See p.1142, eq. 15 :param control: :return: """ bounds = [ tuple(bound) for bound in [self.gp.bounds[i] for i in self.w_domain] ] bounds_2 = [] for bound in bounds: bounds_2.append([bound[0], bound[-1]]) bounds = bounds_2 start = DomainService.get_points_domain(n_restarts, bounds) dim = len(start[0]) start_points = {} for i in range(n_restarts): start_points[i] = start[i] optimization = Optimization(NELDER, wrapper_evaluate_squared_error, bounds, None, hessian=None, tol=None, minimize=False) run_parallel = True args = (False, None, run_parallel, 0, optimization, self, control, parameters_kernel) sol = Parallel.run_function_different_arguments_parallel( wrapper_optimize, start_points, *args) solutions = [] results_opt = [] for i in range(n_restarts): if sol.get(i) is None: logger.info( "Error in computing optimum of a_{n+1} at one sample at point %d" % i) continue solutions.append(sol.get(i)['optimal_value']) results_opt.append(sol.get(i)) ind_max = np.argmax(solutions) environment = results_opt[ind_max]['solution'] return environment
def wrapper_evaluate_sbo(candidate_points, task, self): """ :param candidate_points: np.array(rxn) :param task: (int) :param self: sbo instance :return: np.array(r) """ tasks = candidate_points.shape[0] * [task] tasks = np.array(tasks).reshape((len(tasks), 1)) candidate_points = np.concatenate((candidate_points, tasks), axis=1) vectors = self.bq.compute_posterior_parameters_kg_many_cp( self.discretization, candidate_points) a = vectors['a'] b = vectors['b'] r = candidate_points.shape[0] values = np.zeros(r) b_vectors = {} for i in xrange(r): b_vectors[i] = b[:, i] args = ( False, None, True, 0, a, self, ) val = Parallel.run_function_different_arguments_parallel( wrapper_hvoi, b_vectors, *args) for i in xrange(r): if val.get(i) is None: logger.info("Computation of VOI failed for new_point %d" % i) continue values[i] = val[i] return values
def integrate_toy_example(x): """ :param x: [float, float, int, int] :return: [float] """ points = {} for task in xrange(n_folds): point = deepcopy(x) point.append(task) points[task] = point errors = Parallel.run_function_different_arguments_parallel( toy_example, points) values = convert_dictionary_to_list(errors) return [np.mean(np.array(values))]
def test_run_function_different_arguments_parallel(self): arguments = {0: 1, 1: 2, 2: 3, 3: 4} result = Parallel.run_function_different_arguments_parallel(f, arguments) assert result == {0: 1, 1: 2, 2: 3, 3: 4} with self.assertRaises(Exception): Parallel.run_function_different_arguments_parallel(g, arguments, all_success=True) Parallel.run_function_different_arguments_parallel(g, arguments) mock = Mock(side_effect=KeyboardInterrupt) assert -1 == Parallel.run_function_different_arguments_parallel( mock, arguments, all_success=False, signal=mock)
def toy_example(x): """ :param x: [float, float, int, int, int] :return: [float] """ x = list(x) points = {} for task in xrange(n_folds): point = deepcopy(x) point.append(task) points[task] = point # val = toy_example(point) # values.append(val[0]) errors = Parallel.run_function_different_arguments_parallel(error_per_fold, points, parallel=False) values = convert_dictionary_to_list(errors) return [np.mean(np.array(values))]
def estimate_parameters_kernel(self, n_restarts=10): """ Correct :param n_restarts: :return: """ start = self.gp.sample_parameters_posterior(n_restarts) start = [sample[2:] for sample in start] dim = len(start[0]) start_points = {} for i in xrange(n_restarts): start_points[i] = start[i] optimization = Optimization( NELDER, wrapper_log_posterior_distribution_length_scale, [(None, None) for i in range(dim)], None, hessian=None, tol=None, minimize=False) args = (False, None, True, 0, optimization, self) sol = Parallel.run_function_different_arguments_parallel( wrapper_optimize, start_points, *args) solutions = [] results_opt = [] for i in xrange(n_restarts): if sol.get(i) is None: logger.info( "Error in computing optimum of a_{n+1} at one sample at point %d" % i) continue solutions.append(sol.get(i)['optimal_value']) results_opt.append(sol.get(i)) ind_max = np.argmax(solutions) self.parameters = results_opt[ind_max]['solution'] return results_opt[ind_max]['solution']
def wrapper_evaluate_sbo_mc(candidate_points, task, self, n_samples, n_restarts): """ :param candidate_points: np.array(rxn) :param task: (int) :param self: sbo instance :param n_samples: (int) Number of samples for the MC method. :param n_restarts: (int) Number of restarts to optimize a_{n+1} given a sample. :return: np.array(r) """ tasks = candidate_points.shape[0] * [task] tasks = np.array(tasks).reshape((len(tasks), 1)) candidate_points = np.concatenate((candidate_points, tasks), axis=1) r = candidate_points.shape[0] values = np.zeros(r) points = {} for i in xrange(r): points[i] = candidate_points[i, :] args = (False, None, False, 0, self, True, n_samples, n_restarts) val = Parallel.run_function_different_arguments_parallel( wrapper_objective_voi, points, *args) for i in xrange(r): if val.get(i) is None: logger.info("Computation of VOI failed for new_point %d" % i) continue values[i] = val[i] return values
def generate_evaluations(self, problem_name, model_type, training_name, n_training, random_seed, iteration, n_points_by_dimension=None, n_tasks=0): """ Generates evaluations of SBO, and write them in the debug directory. :param problem_name: (str) :param model_type: (str) :param training_name: (str) :param n_training: (int) :param random_seed: (int) :param iteration: (int) :param n_points_by_dimension: [int] Number of points by dimension :param n_tasks: (int) n_tasks > 0 if the last element of the domain is a task """ if not os.path.exists(DEBUGGING_DIR): os.mkdir(DEBUGGING_DIR) debug_dir = path.join(DEBUGGING_DIR, problem_name) if not os.path.exists(debug_dir): os.mkdir(debug_dir) kernel_name = '' for kernel in self.gp.type_kernel: kernel_name += kernel + '_' kernel_name = kernel_name[0:-1] f_name = self._filename_points_ei_evaluations( model_type=model_type, problem_name=problem_name, type_kernel=kernel_name, training_name=training_name, n_training=n_training, random_seed=random_seed) debug_path = path.join(debug_dir, f_name) vectors = JSONFile.read(debug_path) if vectors is None: bounds = self.gp.bounds n_points = n_points_by_dimension if n_points is None: n_points = (bounds[0][1] - bounds[0][0]) * 10 if n_tasks > 0: bounds_x = [bounds[i] for i in xrange(len(bounds) - 1)] n_points_x = [n_points[i] for i in xrange(len(n_points))] else: n_points_x = n_points bounds_x = bounds points = [] for bound, number_points in zip(bounds_x, n_points_x): points.append(np.linspace(bound[0], bound[1], number_points)) vectors = [] for point in itertools.product(*points): vectors.append(point) JSONFile.write(vectors, debug_path) n = len(vectors) points_ = deepcopy(vectors) vectors = np.array(vectors) if n_tasks > 0: vectors_ = None for i in xrange(n_tasks): task_vector = np.zeros(n) + i task_vector = task_vector.reshape((n, 1)) points_ = np.concatenate((vectors, task_vector), axis=1) if vectors_ is not None: vectors_ = np.concatenate((vectors_, points_), axis=0) else: vectors_ = points_ vectors = vectors_ # TODO: extend to the case where w can be continuous n = vectors.shape[0] points = {} for i in xrange(n): points[i] = vectors[i, :] args = ( False, None, False, 0, self, ) val = Parallel.run_function_different_arguments_parallel( wrapper_objective_acquisition_function, points, *args) values = np.zeros(n) for i in xrange(n): values[i] = val.get(i) f_name = self._filename_ei_evaluations(iteration=iteration, model_type=model_type, problem_name=problem_name, type_kernel=kernel_name, training_name=training_name, n_training=n_training, random_seed=random_seed) debug_path = path.join(debug_dir, f_name) JSONFile.write({'points': points_, 'evaluations': values}, debug_path) return values
def optimize(self, start=None, random_seed=None, parallel=True, n_restarts=10, n_best_restarts=0, n_samples_parameters=0, start_new_chain=False, maxepoch=11, **kwargs): """ Optimizes EI :param start: np.array(n) :param random_seed: int :param parallel: boolean :param n_restarts: int :param n_best_restarts: (int) Chooses the best n_best_restarts based on EI :param n_samples_parameters: int :param start_new_chain: (boolean) If True, we start a new chain with n_samples_parameters samples of the parameters of the GP model. :return: """ if random_seed is not None: np.random.seed(random_seed) if start_new_chain: if self.gp.name_model == BAYESIAN_QUADRATURE: self.gp.gp.start_new_chain() self.gp.gp.sample_parameters(DEFAULT_N_PARAMETERS) else: self.gp.start_new_chain() self.gp.sample_parameters(DEFAULT_N_PARAMETERS) bounds = self.gp.bounds if start is None: if self.gp.separate_tasks and self.gp.name_model == BAYESIAN_QUADRATURE: tasks = self.gp.tasks n_tasks = len(tasks) n_restarts = int(np.ceil(n_restarts / n_tasks) * n_tasks) ind = [[i] for i in range(n_restarts)] np.random.shuffle(ind) task_chosen = np.zeros((n_restarts, 1)) n_task_per_group = n_restarts / n_tasks for i in range(n_tasks): for j in range(n_task_per_group): tk = ind[j + i * n_task_per_group] task_chosen[tk, 0] = i start_points = DomainService.get_points_domain( n_restarts, bounds, type_bounds=self.gp.type_bounds, simplex_domain=self.simplex_domain) start_points = np.concatenate((start_points, task_chosen), axis=1) else: start_points = DomainService.get_points_domain( n_restarts, bounds, type_bounds=self.gp.type_bounds, simplex_domain=self.simplex_domain) start = np.array(start_points) if n_best_restarts > 0 and n_best_restarts < n_restarts: point_dict = {} for j in xrange(start.shape[0]): point_dict[j] = start[j, :] args = (False, None, True, 0, self, DEFAULT_N_PARAMETERS) ei_values = Parallel.run_function_different_arguments_parallel( wrapper_objective_acquisition_function, point_dict, *args) values = [ei_values[i] for i in ei_values] values_index = sorted(range(len(values)), key=lambda k: values[k]) values_index = values_index[-n_best_restarts:] start = [] for j in values_index: start.append(point_dict[j]) start = np.array(start) n_restarts = start.shape[0] bounds = [tuple(bound) for bound in self.bounds_opt] objective_function = wrapper_objective_acquisition_function grad_function = wrapper_gradient_acquisition_function if n_samples_parameters == 0: #TODO: CHECK THIS optimization = Optimization(LBFGS_NAME, objective_function, bounds, grad_function, minimize=False) args = (False, None, parallel, 0, optimization, self, n_samples_parameters) opt_method = wrapper_optimize point_dict = {} for j in xrange(n_restarts): point_dict[j] = start[j, :] else: #TODO CHANGE wrapper_objective_voi, wrapper_grad_voi_sgd TO NO SOLVE MAX_a_{n+1} in #TODO: parallel for the several starting points args_ = (self, DEFAULT_N_PARAMETERS) optimization = Optimization( SGD_NAME, objective_function, bounds, wrapper_evaluate_gradient_ei_sample_params, minimize=False, full_gradient=grad_function, args=args_, debug=True, simplex_domain=self.simplex_domain, **{'maxepoch': maxepoch}) args = (False, None, parallel, 0, optimization, n_samples_parameters, self) #TODO: THINK ABOUT N_THREADS. Do we want to run it in parallel? opt_method = wrapper_sgd random_seeds = np.random.randint(0, 4294967295, n_restarts) point_dict = {} for j in xrange(n_restarts): point_dict[j] = [start[j, :], random_seeds[j]] optimal_solutions = Parallel.run_function_different_arguments_parallel( opt_method, point_dict, *args) maximum_values = [] for j in xrange(n_restarts): maximum_values.append(optimal_solutions.get(j)['optimal_value']) ind_max = np.argmax(maximum_values) logger.info("Results of the optimization of the EI: ") logger.info(optimal_solutions.get(ind_max)) self.optimization_results.append(optimal_solutions.get(ind_max)) return optimal_solutions.get(ind_max)
def get_training_data(cls, problem_name, training_name, bounds_domain, n_training=5, points=None, noise=False, n_samples=None, random_seed=DEFAULT_RANDOM_SEED, parallel=True, type_bounds=None, cache=True, gp_path_cache=None, simplex_domain=None, objective_function=None): """ :param problem_name: str :param training_name: (str), prefix used to save the training data. :param bounds_domain: [([float, float] or [float])], the first case is when the bounds are lower or upper bound of the respective entry; in the second case, it's list of finite points representing the domain of that entry. :param n_training: (int), number of training points if points is None :param points: [[float]] :param noise: boolean, true if the evaluations are noisy :param n_samples: int. If noise is true, we take n_samples of the function to estimate its value. :param random_seed: int :param parallel: (boolean) Train in parallel if it's True. :param type_bounds: [0 or 1], 0 if the bounds are lower or upper bound of the respective entry, 1 if the bounds are all the finite options for that entry. :param cache: (boolean) Try to get model from cache :return: {'points': [[float]], 'evaluations': [float], 'var_noise': [float] or []} """ if cache and gp_path_cache is not None: data = JSONFile.read(gp_path_cache) if data is not None: return data['data'] logger.info("Getting training data") rs = random_seed if points is not None and len(points) > 0: n_training = len(points) rs = 0 file_name = cls._filename( problem_name=problem_name, training_name=training_name, n_points=n_training, random_seed=rs, ) if not os.path.exists(PROBLEM_DIR): os.mkdir(PROBLEM_DIR) training_dir = path.join(PROBLEM_DIR, problem_name, 'data') if not os.path.exists(path.join(PROBLEM_DIR, problem_name)): os.mkdir(path.join(PROBLEM_DIR, problem_name)) if not os.path.exists(training_dir): os.mkdir(training_dir) training_path = path.join(training_dir, file_name) if cache: training_data = JSONFile.read(training_path) else: training_data = None if training_data is not None: return training_data if n_training == 0: return {'points': [], 'evaluations': [], 'var_noise': []} np.random.seed(random_seed) if points is None or len(points) == 0: points = cls.get_points_domain(n_training, bounds_domain, random_seed, training_name, problem_name, type_bounds, simplex_domain=simplex_domain) if objective_function is None: name_module = cls.get_name_module(problem_name) module = __import__(name_module, globals(), locals(), -1) else: name_module = None module = None training_data = {} training_data['points'] = points training_data['evaluations'] = [] training_data['var_noise'] = [] if not parallel: for point in points: if noise: if module is not None: evaluation = cls.evaluate_function( module, point, n_samples) else: evaluation = objective_function(point, n_samples) training_data['var_noise'].append(evaluation[1]) else: if module is not None: evaluation = cls.evaluate_function(module, point) else: evaluation = objective_function(point) training_data['evaluations'].append(evaluation[0]) JSONFile.write(training_data, training_path) JSONFile.write(training_data, training_path) return training_data arguments = convert_list_to_dictionary(points) if name_module is not None: kwargs = { 'name_module': name_module, 'cls_': cls, 'n_samples': n_samples } else: kwargs = { 'name_module': None, 'cls_': cls, 'n_samples': n_samples, 'objective_function': objective_function } training_points = Parallel.run_function_different_arguments_parallel( wrapper_evaluate_objective_function, arguments, **kwargs) training_points = convert_dictionary_to_list(training_points) training_data['evaluations'] = [value[0] for value in training_points] if noise: training_data['var_noise'] = [ value[1] for value in training_points ] if cache: JSONFile.write(training_data, training_path) return training_data
def optimize_mean(self, n_restarts=10, candidate_solutions=None, candidate_values=None): """ Checked :param n_restarts: :return: """ if self.parameters is None: self.estimate_parameters_kernel() parameters = self.parameters else: parameters = self.parameters bounds = [tuple(bound) for bound in [self.gp.bounds[i] for i in range(self.x_domain)]] start = DomainService.get_points_domain( n_restarts, self.gp.bounds[0:self.x_domain], type_bounds=self.gp.type_bounds[0:self.x_domain]) dim = len(start[0]) start_points = {} for i in range(n_restarts): start_points[i] = start[i] optimization = Optimization( NELDER, wrapper_mean_objective, bounds, None, hessian=None, tol=None, minimize=False) args = (False, None, True, 0, optimization, self, parameters) sol = Parallel.run_function_different_arguments_parallel( wrapper_optimize, start_points, *args) solutions = [] results_opt = [] for i in range(n_restarts): if sol.get(i) is None: logger.info("Error in computing optimum of a_{n+1} at one sample at point %d" % i) continue solutions.append(sol.get(i)['optimal_value']) results_opt.append(sol.get(i)) ind_max = np.argmax(solutions) sol = results_opt[ind_max] sol['optimal_value'] = [sol['optimal_value']] if candidate_solutions is not None and len(candidate_solutions) > 0: n = len(candidate_values) candidate_solutions_2 = candidate_solutions values = [] point_dict = {} args = (False, None, True, 0, self, parameters) for j in range(n): point_dict[j] = np.array(candidate_solutions_2[j]) values = Parallel.run_function_different_arguments_parallel( wrapper_mean_objective, point_dict, *args) values_candidates = [] for j in range(n): values_candidates.append(values[j]) ind_max_2 = np.argmax(values_candidates) if np.max(values_candidates) > sol['optimal_value'][0]: solution = point_dict[ind_max_2] value = np.max(values_candidates) sol = {} sol['optimal_value'] = [value] sol['solution'] = solution return sol