Exemple #1
0
    def evaluate_point(self, point):
        assert point in self._domain
        value = None
        if point.vertex_height == "low":
            value = quadratic(
                x_1=point.low_quadratic_params.x_1,
                x_2=point.low_quadratic_params.x_2
            ) + self._vertical_translations[point.vertex_height]
        elif point.vertex_height == 5:
            value = quadratic(
                x_1=point.medium_quadratic_params.x_1,
                x_2=point.medium_quadratic_params.x_2) + point.vertex_height
        elif point.vertex_height == 15:
            value = quadratic(
                x_1=point.high_quadratic_params.x_1,
                x_2=point.high_quadratic_params.x_2) + point.vertex_height
        else:
            raise RuntimeError(
                f"Unrecognized point.vertex_height value: {point.vertex_height}"
            )

        return Point(y=value)
    def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start(
            self, use_remote_optimizer):
        """Tests the bayesian optimizer on a simple quadratic function with no prior data.

        :return:
        """
        input_space = SimpleHypergrid(name="input",
                                      dimensions=[
                                          ContinuousDimension(name='x_1',
                                                              min=-10,
                                                              max=10),
                                          ContinuousDimension(name='x_2',
                                                              min=-10,
                                                              max=10)
                                      ])

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=input_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        optimizer_config = bayesian_optimizer_config_store.default
        optimizer_config.min_samples_required_for_guided_design_of_experiments = 50
        optimizer_config.homogeneous_random_forest_regression_model_config.n_estimators = 10
        optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.splitter = "best"
        optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.n_new_samples_before_refit = 2

        print(optimizer_config.to_json(indent=2))

        if use_remote_optimizer:
            bayesian_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config)
        else:
            bayesian_optimizer = self.bayesian_optimizer_factory.create_local_optimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config)

        num_iterations = 62
        old_optimum = np.inf
        for i in range(num_iterations):
            suggested_params = bayesian_optimizer.suggest()
            suggested_params_dict = suggested_params.to_dict()
            target_value = quadratic(**suggested_params_dict)
            print(
                f"[{i+1}/{num_iterations}] Suggested params: {suggested_params_dict}, target_value: {target_value}"
            )

            input_values_df = pd.DataFrame({
                param_name: [param_value]
                for param_name, param_value in suggested_params_dict.items()
            })
            target_values_df = pd.DataFrame({'y': [target_value]})

            bayesian_optimizer.register(
                parameter_values_pandas_frame=input_values_df,
                target_values_pandas_frame=target_values_df)
            if i > optimizer_config.min_samples_required_for_guided_design_of_experiments and i % 10 == 1:
                _, all_targets, _ = bayesian_optimizer.get_all_observations()
                best_config, optimum = bayesian_optimizer.optimum(
                    optimum_definition=OptimumDefinition.BEST_OBSERVATION)
                print(f"[{i}/{num_iterations}] Optimum: {optimum}")
                assert optimum.y == all_targets.min()[0]
                assert input_space.contains_point(best_config)
                assert output_space.contains_point(optimum)
                assert optimum.y <= old_optimum
                old_optimum = optimum.y
                self.validate_optima(optimizer=bayesian_optimizer)
                random_forest_gof_metrics = bayesian_optimizer.compute_surrogate_model_goodness_of_fit(
                )[0]
                print(
                    f"Relative squared error: {random_forest_gof_metrics.relative_squared_error}, Relative absolute error: {random_forest_gof_metrics.relative_absolute_error}"
                )

        random_forest_gof_metrics = bayesian_optimizer.compute_surrogate_model_goodness_of_fit(
        )[0]
        assert random_forest_gof_metrics.last_refit_iteration_number > 0.7 * num_iterations
        models_gof_metrics = [random_forest_gof_metrics]

        for model_gof_metrics in models_gof_metrics:
            assert 0 <= model_gof_metrics.relative_absolute_error <= 1  # This could fail if the models are really wrong. Not expected in this unit test though.
            assert 0 <= model_gof_metrics.relative_squared_error <= 1

            # There is an invariant linking mean absolute error (MAE), root mean squared error (RMSE) and number of observations (n) let's assert it.
            n = model_gof_metrics.last_refit_iteration_number
            assert model_gof_metrics.mean_absolute_error <= model_gof_metrics.root_mean_squared_error <= math.sqrt(
                n) * model_gof_metrics.mean_absolute_error

            # We know that the sample confidence interval is wider (or equal to) prediction interval. So hit rates should be ordered accordingly.
            assert model_gof_metrics.sample_90_ci_hit_rate >= model_gof_metrics.prediction_90_ci_hit_rate
    def test_bayesian_optimizer_on_simple_2d_quadratic_function_pre_heated(
            self):
        """ Tests the bayesian optimizer on a simple quadratic function first feeding the optimizer a lot of data.

        :return:
        """
        input_space = SimpleHypergrid(name="input",
                                      dimensions=[
                                          ContinuousDimension(name='x_1',
                                                              min=-100,
                                                              max=100),
                                          ContinuousDimension(name='x_2',
                                                              min=-100,
                                                              max=100)
                                      ])

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        x_1, x_2 = np.meshgrid(input_space['x_1'].linspace(num=21),
                               input_space['x_2'].linspace(num=21))

        y = quadratic(x_1=x_1, x_2=x_2)

        input_values_dataframe = pd.DataFrame({
            'x_1': x_1.reshape(-1),
            'x_2': x_2.reshape(-1)
        })
        output_values_dataframe = pd.DataFrame({'y': y.reshape(-1)})

        optimization_problem = OptimizationProblem(
            parameter_space=input_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        local_optimizer = self.bayesian_optimizer_factory.create_local_optimizer(
            optimization_problem=optimization_problem,
            optimizer_config=bayesian_optimizer_config_store.default,
        )

        remote_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
            optimization_problem=optimization_problem,
            optimizer_config=bayesian_optimizer_config_store.default)

        optimizers = [local_optimizer, remote_optimizer]
        for bayesian_optimizer in optimizers:
            # A call to .optimum() should throw before we feed any data to the optimizer.
            #
            with pytest.raises(ValueError):
                bayesian_optimizer.optimum(OptimumDefinition.BEST_OBSERVATION)
            self.validate_optima(optimizer=bayesian_optimizer)

            bayesian_optimizer.register(
                parameter_values_pandas_frame=input_values_dataframe,
                target_values_pandas_frame=output_values_dataframe)
            observed_best_config, observed_best_optimum = bayesian_optimizer.optimum(
                OptimumDefinition.BEST_OBSERVATION)
            assert observed_best_optimum.y == output_values_dataframe['y'].min(
            )

            self.validate_optima(optimizer=bayesian_optimizer)

            num_guided_samples = 2
            for _ in range(num_guided_samples):
                # Suggest the parameters
                suggested_params = bayesian_optimizer.suggest()
                suggested_params_dict = suggested_params.to_dict()

                # Reformat them to feed the parameters to the target
                target_value = quadratic(**suggested_params_dict)
                print(suggested_params, target_value)

                # Reformat the observation to feed it back to the optimizer
                input_values_df = pd.DataFrame({
                    param_name: [param_value]
                    for param_name, param_value in
                    suggested_params_dict.items()
                })
                target_values_df = pd.DataFrame({'y': [target_value]})

                # Register the observation with the optimizer
                bayesian_optimizer.register(
                    parameter_values_pandas_frame=input_values_df,
                    target_values_pandas_frame=target_values_df)

            best_config_point, best_objective = bayesian_optimizer.optimum()
            print(
                f"Optimum config: {best_config_point}, optimum objective: {best_objective}"
            )