コード例 #1
0
    def iteration_algorithm(self, n_restarts=10, n_samples=10):
        """
        Checked
        :param n_restarts:
        :param n_samples:
        :return:
        """
        if self.parameters is None:
            self.estimate_parameters_kernel()
            parameters = self.parameters
        else:
            parameters = self.parameters

        samples = self.sample_variable(parameters, n_samples)

        bounds = [
            tuple(bound)
            for bound in [self.gp.bounds[i] for i in range(self.x_domain)]
        ]
        start = DomainService.get_points_domain(
            n_restarts,
            self.gp.bounds[0:self.x_domain],
            type_bounds=self.gp.type_bounds[0:self.x_domain])

        dim = len(start[0])
        start_points = {}
        for i in range(n_restarts):
            start_points[i] = start[i]
        optimization = Optimization(NELDER,
                                    wrapper_ei_objective,
                                    bounds,
                                    None,
                                    hessian=None,
                                    tol=None,
                                    minimize=False)
        args = (False, None, True, 0, optimization, self, samples, parameters)
        sol = Parallel.run_function_different_arguments_parallel(
            wrapper_optimize, start_points, *args)
        solutions = []
        results_opt = []
        for i in range(n_restarts):
            if sol.get(i) is None:
                logger.info(
                    "Error in computing optimum of a_{n+1} at one sample at point %d"
                    % i)
                continue
            solutions.append(sol.get(i)['optimal_value'])
            results_opt.append(sol.get(i))
        ind_max = np.argmax(solutions)
        control = results_opt[ind_max]['solution']
        # do in parallel

        environment = self.get_environment(control, parameters)

        return np.concatenate((control, environment))
コード例 #2
0
    def get_environment(self, control, parameters_kernel, n_restarts=10):
        """
        correct
        See p.1142, eq. 15
        :param control:
        :return:

        """
        bounds = [
            tuple(bound)
            for bound in [self.gp.bounds[i] for i in self.w_domain]
        ]
        bounds_2 = []
        for bound in bounds:
            bounds_2.append([bound[0], bound[-1]])
        bounds = bounds_2
        start = DomainService.get_points_domain(n_restarts, bounds)

        dim = len(start[0])
        start_points = {}
        for i in range(n_restarts):
            start_points[i] = start[i]
        optimization = Optimization(NELDER,
                                    wrapper_evaluate_squared_error,
                                    bounds,
                                    None,
                                    hessian=None,
                                    tol=None,
                                    minimize=False)

        run_parallel = True
        args = (False, None, run_parallel, 0, optimization, self, control,
                parameters_kernel)
        sol = Parallel.run_function_different_arguments_parallel(
            wrapper_optimize, start_points, *args)
        solutions = []
        results_opt = []
        for i in range(n_restarts):
            if sol.get(i) is None:
                logger.info(
                    "Error in computing optimum of a_{n+1} at one sample at point %d"
                    % i)
                continue
            solutions.append(sol.get(i)['optimal_value'])
            results_opt.append(sol.get(i))
        ind_max = np.argmax(solutions)
        environment = results_opt[ind_max]['solution']

        return environment
コード例 #3
0
def wrapper_evaluate_sbo(candidate_points, task, self):
    """

    :param candidate_points: np.array(rxn)
    :param task: (int)
    :param self: sbo instance
    :return: np.array(r)
    """
    tasks = candidate_points.shape[0] * [task]
    tasks = np.array(tasks).reshape((len(tasks), 1))

    candidate_points = np.concatenate((candidate_points, tasks), axis=1)

    vectors = self.bq.compute_posterior_parameters_kg_many_cp(
        self.discretization, candidate_points)

    a = vectors['a']
    b = vectors['b']

    r = candidate_points.shape[0]

    values = np.zeros(r)

    b_vectors = {}
    for i in xrange(r):
        b_vectors[i] = b[:, i]

    args = (
        False,
        None,
        True,
        0,
        a,
        self,
    )
    val = Parallel.run_function_different_arguments_parallel(
        wrapper_hvoi, b_vectors, *args)

    for i in xrange(r):
        if val.get(i) is None:
            logger.info("Computation of VOI failed for new_point %d" % i)
            continue
        values[i] = val[i]

    return values
コード例 #4
0
def integrate_toy_example(x):
    """

    :param x: [float, float, int, int]
    :return: [float]
    """

    points = {}
    for task in xrange(n_folds):
        point = deepcopy(x)
        point.append(task)
        points[task] = point

    errors = Parallel.run_function_different_arguments_parallel(
        toy_example, points)

    values = convert_dictionary_to_list(errors)

    return [np.mean(np.array(values))]
コード例 #5
0
    def test_run_function_different_arguments_parallel(self):
        arguments = {0: 1, 1: 2, 2: 3, 3: 4}

        result = Parallel.run_function_different_arguments_parallel(f, arguments)

        assert result == {0: 1, 1: 2, 2: 3, 3: 4}

        with self.assertRaises(Exception):
            Parallel.run_function_different_arguments_parallel(g, arguments, all_success=True)

        Parallel.run_function_different_arguments_parallel(g, arguments)

        mock = Mock(side_effect=KeyboardInterrupt)

        assert -1 == Parallel.run_function_different_arguments_parallel(
            mock, arguments, all_success=False, signal=mock)
コード例 #6
0
def toy_example(x):
    """

    :param x: [float, float, int, int, int]
    :return: [float]
    """
    x = list(x)
    points = {}
    for task in xrange(n_folds):
        point = deepcopy(x)
        point.append(task)
        points[task] = point
        # val = toy_example(point)
        # values.append(val[0])

    errors = Parallel.run_function_different_arguments_parallel(error_per_fold,
                                                                points,
                                                                parallel=False)

    values = convert_dictionary_to_list(errors)

    return [np.mean(np.array(values))]
コード例 #7
0
    def estimate_parameters_kernel(self, n_restarts=10):
        """
        Correct
        :param n_restarts:
        :return:
        """
        start = self.gp.sample_parameters_posterior(n_restarts)

        start = [sample[2:] for sample in start]
        dim = len(start[0])
        start_points = {}
        for i in xrange(n_restarts):
            start_points[i] = start[i]
        optimization = Optimization(
            NELDER,
            wrapper_log_posterior_distribution_length_scale,
            [(None, None) for i in range(dim)],
            None,
            hessian=None,
            tol=None,
            minimize=False)
        args = (False, None, True, 0, optimization, self)
        sol = Parallel.run_function_different_arguments_parallel(
            wrapper_optimize, start_points, *args)
        solutions = []
        results_opt = []
        for i in xrange(n_restarts):
            if sol.get(i) is None:
                logger.info(
                    "Error in computing optimum of a_{n+1} at one sample at point %d"
                    % i)
                continue
            solutions.append(sol.get(i)['optimal_value'])
            results_opt.append(sol.get(i))
        ind_max = np.argmax(solutions)

        self.parameters = results_opt[ind_max]['solution']

        return results_opt[ind_max]['solution']
コード例 #8
0
def wrapper_evaluate_sbo_mc(candidate_points, task, self, n_samples,
                            n_restarts):
    """

    :param candidate_points: np.array(rxn)
    :param task: (int)
    :param self: sbo instance
    :param n_samples: (int) Number of samples for the MC method.
    :param n_restarts: (int) Number of restarts to optimize a_{n+1} given a sample.

    :return: np.array(r)
    """
    tasks = candidate_points.shape[0] * [task]
    tasks = np.array(tasks).reshape((len(tasks), 1))

    candidate_points = np.concatenate((candidate_points, tasks), axis=1)

    r = candidate_points.shape[0]

    values = np.zeros(r)

    points = {}
    for i in xrange(r):
        points[i] = candidate_points[i, :]

    args = (False, None, False, 0, self, True, n_samples, n_restarts)
    val = Parallel.run_function_different_arguments_parallel(
        wrapper_objective_voi, points, *args)

    for i in xrange(r):
        if val.get(i) is None:
            logger.info("Computation of VOI failed for new_point %d" % i)
            continue
        values[i] = val[i]

    return values
コード例 #9
0
    def generate_evaluations(self,
                             problem_name,
                             model_type,
                             training_name,
                             n_training,
                             random_seed,
                             iteration,
                             n_points_by_dimension=None,
                             n_tasks=0):
        """
        Generates evaluations of SBO, and write them in the debug directory.

        :param problem_name: (str)
        :param model_type: (str)
        :param training_name: (str)
        :param n_training: (int)
        :param random_seed: (int)
        :param iteration: (int)
        :param n_points_by_dimension: [int] Number of points by dimension
        :param n_tasks: (int) n_tasks > 0 if the last element of the domain is a task

        """

        if not os.path.exists(DEBUGGING_DIR):
            os.mkdir(DEBUGGING_DIR)

        debug_dir = path.join(DEBUGGING_DIR, problem_name)

        if not os.path.exists(debug_dir):
            os.mkdir(debug_dir)

        kernel_name = ''
        for kernel in self.gp.type_kernel:
            kernel_name += kernel + '_'
        kernel_name = kernel_name[0:-1]

        f_name = self._filename_points_ei_evaluations(
            model_type=model_type,
            problem_name=problem_name,
            type_kernel=kernel_name,
            training_name=training_name,
            n_training=n_training,
            random_seed=random_seed)

        debug_path = path.join(debug_dir, f_name)

        vectors = JSONFile.read(debug_path)

        if vectors is None:
            bounds = self.gp.bounds
            n_points = n_points_by_dimension
            if n_points is None:
                n_points = (bounds[0][1] - bounds[0][0]) * 10

            if n_tasks > 0:
                bounds_x = [bounds[i] for i in xrange(len(bounds) - 1)]
                n_points_x = [n_points[i] for i in xrange(len(n_points))]
            else:
                n_points_x = n_points
                bounds_x = bounds

            points = []
            for bound, number_points in zip(bounds_x, n_points_x):
                points.append(np.linspace(bound[0], bound[1], number_points))

            vectors = []
            for point in itertools.product(*points):
                vectors.append(point)

            JSONFile.write(vectors, debug_path)

        n = len(vectors)
        points_ = deepcopy(vectors)

        vectors = np.array(vectors)

        if n_tasks > 0:
            vectors_ = None
            for i in xrange(n_tasks):
                task_vector = np.zeros(n) + i
                task_vector = task_vector.reshape((n, 1))
                points_ = np.concatenate((vectors, task_vector), axis=1)

                if vectors_ is not None:
                    vectors_ = np.concatenate((vectors_, points_), axis=0)
                else:
                    vectors_ = points_
            vectors = vectors_

        # TODO: extend to the case where w can be continuous

        n = vectors.shape[0]

        points = {}
        for i in xrange(n):
            points[i] = vectors[i, :]

        args = (
            False,
            None,
            False,
            0,
            self,
        )
        val = Parallel.run_function_different_arguments_parallel(
            wrapper_objective_acquisition_function, points, *args)

        values = np.zeros(n)
        for i in xrange(n):
            values[i] = val.get(i)

        f_name = self._filename_ei_evaluations(iteration=iteration,
                                               model_type=model_type,
                                               problem_name=problem_name,
                                               type_kernel=kernel_name,
                                               training_name=training_name,
                                               n_training=n_training,
                                               random_seed=random_seed)

        debug_path = path.join(debug_dir, f_name)

        JSONFile.write({'points': points_, 'evaluations': values}, debug_path)

        return values
コード例 #10
0
    def optimize(self,
                 start=None,
                 random_seed=None,
                 parallel=True,
                 n_restarts=10,
                 n_best_restarts=0,
                 n_samples_parameters=0,
                 start_new_chain=False,
                 maxepoch=11,
                 **kwargs):
        """
        Optimizes EI

        :param start: np.array(n)
        :param random_seed: int
        :param parallel: boolean
        :param n_restarts: int
        :param n_best_restarts: (int) Chooses the best n_best_restarts based on EI
        :param n_samples_parameters: int
        :param start_new_chain: (boolean) If True, we start a new chain with n_samples_parameters
            samples of the parameters of the GP model.
        :return:
        """

        if random_seed is not None:
            np.random.seed(random_seed)

        if start_new_chain:
            if self.gp.name_model == BAYESIAN_QUADRATURE:
                self.gp.gp.start_new_chain()
                self.gp.gp.sample_parameters(DEFAULT_N_PARAMETERS)
            else:
                self.gp.start_new_chain()
                self.gp.sample_parameters(DEFAULT_N_PARAMETERS)

        bounds = self.gp.bounds

        if start is None:
            if self.gp.separate_tasks and self.gp.name_model == BAYESIAN_QUADRATURE:
                tasks = self.gp.tasks
                n_tasks = len(tasks)
                n_restarts = int(np.ceil(n_restarts / n_tasks) * n_tasks)

                ind = [[i] for i in range(n_restarts)]
                np.random.shuffle(ind)
                task_chosen = np.zeros((n_restarts, 1))
                n_task_per_group = n_restarts / n_tasks

                for i in range(n_tasks):
                    for j in range(n_task_per_group):
                        tk = ind[j + i * n_task_per_group]
                        task_chosen[tk, 0] = i

                start_points = DomainService.get_points_domain(
                    n_restarts,
                    bounds,
                    type_bounds=self.gp.type_bounds,
                    simplex_domain=self.simplex_domain)

                start_points = np.concatenate((start_points, task_chosen),
                                              axis=1)
            else:
                start_points = DomainService.get_points_domain(
                    n_restarts,
                    bounds,
                    type_bounds=self.gp.type_bounds,
                    simplex_domain=self.simplex_domain)

            start = np.array(start_points)

        if n_best_restarts > 0 and n_best_restarts < n_restarts:
            point_dict = {}
            for j in xrange(start.shape[0]):
                point_dict[j] = start[j, :]
            args = (False, None, True, 0, self, DEFAULT_N_PARAMETERS)
            ei_values = Parallel.run_function_different_arguments_parallel(
                wrapper_objective_acquisition_function, point_dict, *args)
            values = [ei_values[i] for i in ei_values]
            values_index = sorted(range(len(values)), key=lambda k: values[k])
            values_index = values_index[-n_best_restarts:]
            start = []
            for j in values_index:
                start.append(point_dict[j])
            start = np.array(start)

        n_restarts = start.shape[0]
        bounds = [tuple(bound) for bound in self.bounds_opt]

        objective_function = wrapper_objective_acquisition_function
        grad_function = wrapper_gradient_acquisition_function

        if n_samples_parameters == 0:
            #TODO: CHECK THIS
            optimization = Optimization(LBFGS_NAME,
                                        objective_function,
                                        bounds,
                                        grad_function,
                                        minimize=False)

            args = (False, None, parallel, 0, optimization, self,
                    n_samples_parameters)

            opt_method = wrapper_optimize

            point_dict = {}
            for j in xrange(n_restarts):
                point_dict[j] = start[j, :]
        else:

            #TODO CHANGE wrapper_objective_voi, wrapper_grad_voi_sgd TO NO SOLVE MAX_a_{n+1} in
            #TODO: parallel for the several starting points

            args_ = (self, DEFAULT_N_PARAMETERS)

            optimization = Optimization(
                SGD_NAME,
                objective_function,
                bounds,
                wrapper_evaluate_gradient_ei_sample_params,
                minimize=False,
                full_gradient=grad_function,
                args=args_,
                debug=True,
                simplex_domain=self.simplex_domain,
                **{'maxepoch': maxepoch})

            args = (False, None, parallel, 0, optimization,
                    n_samples_parameters, self)

            #TODO: THINK ABOUT N_THREADS. Do we want to run it in parallel?

            opt_method = wrapper_sgd

            random_seeds = np.random.randint(0, 4294967295, n_restarts)
            point_dict = {}
            for j in xrange(n_restarts):
                point_dict[j] = [start[j, :], random_seeds[j]]

        optimal_solutions = Parallel.run_function_different_arguments_parallel(
            opt_method, point_dict, *args)

        maximum_values = []
        for j in xrange(n_restarts):
            maximum_values.append(optimal_solutions.get(j)['optimal_value'])

        ind_max = np.argmax(maximum_values)

        logger.info("Results of the optimization of the EI: ")
        logger.info(optimal_solutions.get(ind_max))

        self.optimization_results.append(optimal_solutions.get(ind_max))

        return optimal_solutions.get(ind_max)
コード例 #11
0
    def get_training_data(cls,
                          problem_name,
                          training_name,
                          bounds_domain,
                          n_training=5,
                          points=None,
                          noise=False,
                          n_samples=None,
                          random_seed=DEFAULT_RANDOM_SEED,
                          parallel=True,
                          type_bounds=None,
                          cache=True,
                          gp_path_cache=None,
                          simplex_domain=None,
                          objective_function=None):
        """

        :param problem_name: str
        :param training_name: (str), prefix used to save the training data.
        :param bounds_domain: [([float, float] or [float])], the first case is when the bounds are
            lower or upper bound of the respective entry; in the second case, it's list of finite
            points representing the domain of that entry.
        :param n_training: (int), number of training points if points is None
        :param points: [[float]]
        :param noise: boolean, true if the evaluations are noisy
        :param n_samples: int. If noise is true, we take n_samples of the function to estimate its
            value.
        :param random_seed: int
        :param parallel: (boolean) Train in parallel if it's True.
        :param type_bounds: [0 or 1], 0 if the bounds are lower or upper bound of the respective
            entry, 1 if the bounds are all the finite options for that entry.
        :param cache: (boolean) Try to get model from cache
        :return: {'points': [[float]], 'evaluations': [float], 'var_noise': [float] or []}
        """

        if cache and gp_path_cache is not None:
            data = JSONFile.read(gp_path_cache)
            if data is not None:
                return data['data']

        logger.info("Getting training data")

        rs = random_seed
        if points is not None and len(points) > 0:
            n_training = len(points)
            rs = 0

        file_name = cls._filename(
            problem_name=problem_name,
            training_name=training_name,
            n_points=n_training,
            random_seed=rs,
        )

        if not os.path.exists(PROBLEM_DIR):
            os.mkdir(PROBLEM_DIR)

        training_dir = path.join(PROBLEM_DIR, problem_name, 'data')

        if not os.path.exists(path.join(PROBLEM_DIR, problem_name)):
            os.mkdir(path.join(PROBLEM_DIR, problem_name))

        if not os.path.exists(training_dir):
            os.mkdir(training_dir)

        training_path = path.join(training_dir, file_name)

        if cache:
            training_data = JSONFile.read(training_path)
        else:
            training_data = None

        if training_data is not None:
            return training_data

        if n_training == 0:
            return {'points': [], 'evaluations': [], 'var_noise': []}

        np.random.seed(random_seed)

        if points is None or len(points) == 0:
            points = cls.get_points_domain(n_training,
                                           bounds_domain,
                                           random_seed,
                                           training_name,
                                           problem_name,
                                           type_bounds,
                                           simplex_domain=simplex_domain)

        if objective_function is None:
            name_module = cls.get_name_module(problem_name)
            module = __import__(name_module, globals(), locals(), -1)
        else:
            name_module = None
            module = None

        training_data = {}
        training_data['points'] = points
        training_data['evaluations'] = []
        training_data['var_noise'] = []

        if not parallel:
            for point in points:
                if noise:
                    if module is not None:
                        evaluation = cls.evaluate_function(
                            module, point, n_samples)
                    else:
                        evaluation = objective_function(point, n_samples)
                    training_data['var_noise'].append(evaluation[1])
                else:
                    if module is not None:
                        evaluation = cls.evaluate_function(module, point)
                    else:
                        evaluation = objective_function(point)
                training_data['evaluations'].append(evaluation[0])
                JSONFile.write(training_data, training_path)
            JSONFile.write(training_data, training_path)
            return training_data

        arguments = convert_list_to_dictionary(points)

        if name_module is not None:
            kwargs = {
                'name_module': name_module,
                'cls_': cls,
                'n_samples': n_samples
            }
        else:
            kwargs = {
                'name_module': None,
                'cls_': cls,
                'n_samples': n_samples,
                'objective_function': objective_function
            }

        training_points = Parallel.run_function_different_arguments_parallel(
            wrapper_evaluate_objective_function, arguments, **kwargs)

        training_points = convert_dictionary_to_list(training_points)

        training_data['evaluations'] = [value[0] for value in training_points]

        if noise:
            training_data['var_noise'] = [
                value[1] for value in training_points
            ]

        if cache:
            JSONFile.write(training_data, training_path)

        return training_data
コード例 #12
0
    def optimize_mean(self, n_restarts=10, candidate_solutions=None, candidate_values=None):
        """
        Checked
        :param n_restarts:
        :return:
        """
        if self.parameters is None:
            self.estimate_parameters_kernel()
            parameters = self.parameters
        else:
            parameters = self.parameters
        bounds = [tuple(bound) for bound in [self.gp.bounds[i] for i in range(self.x_domain)]]
        start = DomainService.get_points_domain(
            n_restarts, self.gp.bounds[0:self.x_domain], type_bounds=self.gp.type_bounds[0:self.x_domain])
        dim = len(start[0])
        start_points = {}
        for i in range(n_restarts):
            start_points[i] = start[i]
        optimization = Optimization(
            NELDER,
            wrapper_mean_objective,
            bounds,
            None,
            hessian=None, tol=None,
            minimize=False)
        args = (False, None, True, 0, optimization, self, parameters)
        sol = Parallel.run_function_different_arguments_parallel(
            wrapper_optimize, start_points, *args)
        solutions = []
        results_opt = []
        for i in range(n_restarts):
            if sol.get(i) is None:
                logger.info("Error in computing optimum of a_{n+1} at one sample at point %d"
                            % i)
                continue
            solutions.append(sol.get(i)['optimal_value'])
            results_opt.append(sol.get(i))
        ind_max = np.argmax(solutions)

        sol = results_opt[ind_max]
        sol['optimal_value'] = [sol['optimal_value']]

        if candidate_solutions is not None and len(candidate_solutions) > 0:

            n = len(candidate_values)
            candidate_solutions_2 = candidate_solutions
            values = []
            point_dict = {}
            args = (False, None, True, 0, self, parameters)
            for j in range(n):
                point_dict[j] = np.array(candidate_solutions_2[j])
            values = Parallel.run_function_different_arguments_parallel(
                wrapper_mean_objective, point_dict, *args)
            values_candidates = []
            for j in range(n):
                values_candidates.append(values[j])
            ind_max_2 = np.argmax(values_candidates)

            if np.max(values_candidates) > sol['optimal_value'][0]:
                solution = point_dict[ind_max_2]
                value = np.max(values_candidates)
                sol = {}
                sol['optimal_value'] = [value]
                sol['solution'] = solution

        return sol