Esempio n. 1
0
    def setUpClass(cls) -> None:
        global_values.declare_singletons()

        cls.slope = 10
        cls.y_intercept = 10
        cls.input_values = np.linspace(start=0, stop=100, num=1000, endpoint=True)
        cls.output_values = cls.input_values * cls.slope + cls.y_intercept

        cls.input_space = SimpleHypergrid(
            name="input",
            dimensions=[ContinuousDimension(name="x", min=0, max=100)]
        )

        cls.output_space = SimpleHypergrid(
            name="output",
            dimensions=[ContinuousDimension(name="y", min=-math.inf, max=math.inf)]
        )

        cls.input_pandas_dataframe = pd.DataFrame({"x": cls.input_values})
        cls.output_pandas_dataframe = pd.DataFrame({"y": cls.output_values})

        cls.model_config = HomogeneousRandomForestRegressionModelConfig()
        cls.model = HomogeneousRandomForestRegressionModel(
            model_config=cls.model_config,
            input_space=cls.input_space,
            output_space=cls.output_space
        )
        cls.model.fit(cls.input_pandas_dataframe, cls.output_pandas_dataframe, iteration_number=len(cls.input_pandas_dataframe.index))

        cls.sample_inputs = {'x': np.linspace(start=-10, stop=110, num=13, endpoint=True)}
        cls.sample_inputs_pandas_dataframe = pd.DataFrame(cls.sample_inputs)
        cls.sample_predictions = cls.model.predict(cls.sample_inputs_pandas_dataframe)
Esempio n. 2
0
    def setUpClass(cls) -> None:

        cls.simple_hypergrid = SimpleHypergrid(
            name='simple_adaptee',
            dimensions=[
                CategoricalDimension(name='categorical_mixed_types', values=['red', True, False, 1]),
                DiscreteDimension(name='one_to_ten', min=1, max=10),
                ContinuousDimension(name='zero_to_one', min=0, max=1),
                OrdinalDimension(name='ordinal_mixed_types', ordered_values=[1, False, 'two'])
            ]
        )

        cls.hierarchical_hypergrid = SimpleHypergrid(
            name='hierarchical_adaptee',
            dimensions=[
                CategoricalDimension(name='categorical_mixed_types', values=['red', True, False, 3]),
                DiscreteDimension(name='one_to_ten', min=1, max=10),
                ContinuousDimension(name='zero_to_one', min=0, max=1),
                OrdinalDimension(name='ordinal_mixed_types', ordered_values=[3, False, 'two'])
            ]
        ).join(
            subgrid=SimpleHypergrid(
                name="nested_grid",
                dimensions=[
                    CategoricalDimension(name='categorical_mixed_types', values=['red', False, True, 3]),
                    DiscreteDimension(name='one_to_ten', min=1, max=10),
                    ContinuousDimension(name='zero_to_one', min=0, max=1),
                    OrdinalDimension(name='ordinal_mixed_types', ordered_values=[3, 'two', False])
                ]
            ),
            on_external_dimension=CategoricalDimension("categorical_mixed_types", values=[True])
        )
Esempio n. 3
0
    def __init__(self, objective_function_config: Point = None):
        assert objective_function_config in enveloped_waves_config_space, f"{objective_function_config} not in {enveloped_waves_config_space}"
        ObjectiveFunctionBase.__init__(self, objective_function_config)
        self._parameter_space = SimpleHypergrid(
            name="domain",
            dimensions=[
                ContinuousDimension(name=f"x_{i}", min=0, max=objective_function_config.num_periods * objective_function_config.period)
                for i in range(self.objective_function_config.num_params)
            ]
        )

        self._output_space = SimpleHypergrid(
            name="range",
            dimensions=[
                ContinuousDimension(name="y", min=-math.inf, max=math.inf)
            ]
        )

        if self.objective_function_config.envelope_type == "linear":
            self._envelope = self._linear_envelope
        elif self.objective_function_config.envelope_type == "quadratic":
            self._envelope = self._quadratic_envelope
        elif self.objective_function_config.envelope_type == "sine":
            self._envelope = self._sine_envelope
        else:
            self._envelope = lambda x: x * 0 + 1
Esempio n. 4
0
    def test_pareto_frontier_volume_simple(self):
        """A simple sanity test on the pareto frontier volume computations.
        """

        # Let's generate a pareto frontier in 2D. ALl points lay on a line y = 1 - x
        x = np.linspace(start=0, stop=1, num=100)
        y = 1 - x
        pareto_df = pd.DataFrame({'x': x, 'y': y})
        optimization_problem = OptimizationProblem(
            parameter_space=None,
            objective_space=SimpleHypergrid(name='objectives',
                                            dimensions=[
                                                ContinuousDimension(name='x',
                                                                    min=0,
                                                                    max=1),
                                                ContinuousDimension(name='y',
                                                                    min=0,
                                                                    max=1)
                                            ]),
            objectives=[
                Objective(name='x', minimize=False),
                Objective(name='y', minimize=False)
            ])
        pareto_frontier = ParetoFrontier(optimization_problem, pareto_df)
        pareto_volume_estimator = pareto_frontier.approximate_pareto_volume(
            num_samples=1000000)
        lower_bound, upper_bound = pareto_volume_estimator.get_two_sided_confidence_interval_on_pareto_volume(
            alpha=0.05)
        print(lower_bound, upper_bound)
        assert 0.49 < lower_bound < upper_bound < 0.51
Esempio n. 5
0
    def setUp(self):
        # Let's create a simple quadratic response function
        self.input_space = SimpleHypergrid(name="2d_X_search_domain",
                                           dimensions=[
                                               ContinuousDimension(name="x1",
                                                                   min=0.0,
                                                                   max=5.0),
                                               ContinuousDimension(name="x2",
                                                                   min=0.0,
                                                                   max=5.0)
                                           ])
        self.output_space = SimpleHypergrid(
            name="degree2_polynomial",
            dimensions=[
                ContinuousDimension(name="degree2_polynomial_y",
                                    min=-10**15,
                                    max=10**15)
            ])

        lasso_model_config = SklearnLassoRegressionModelConfig.DEFAULT
        rf_model_config = SklearnRandomForestRegressionModelConfig.DEFAULT
        self.model_config = \
            RegressionEnhancedRandomForestRegressionModelConfig(
                max_basis_function_degree=2,
                min_abs_root_model_coef=0.02,
                boosting_root_model_name=SklearnLassoRegressionModelConfig.__name__,
                boosting_root_model_config=lasso_model_config,
                random_forest_model_config=rf_model_config,
                perform_initial_root_model_hyper_parameter_search=True,
                perform_initial_random_forest_hyper_parameter_search=True)
Esempio n. 6
0
    def setUp(self):
        # Let's create a simple linear mapping
        self.slope = 10
        self.y_intercept = 10
        self.input_values = np.linspace(start=0,
                                        stop=100,
                                        num=1001,
                                        endpoint=True)
        self.input_output_mapping = lambda input: input * self.slope + self.y_intercept
        self.output_values = self.input_output_mapping(self.input_values)

        self.input_space = SimpleHypergrid(
            name="input",
            dimensions=[ContinuousDimension(name="x", min=0, max=100)])

        self.output_space = SimpleHypergrid(name="output",
                                            dimensions=[
                                                ContinuousDimension(
                                                    name="y",
                                                    min=-math.inf,
                                                    max=math.inf)
                                            ])

        self.input_pandas_dataframe = pd.DataFrame({"x": self.input_values})
        self.output_pandas_dataframe = pd.DataFrame({"y": self.output_values})
Esempio n. 7
0
    def setUp(self):
        self.logger = create_logger(self.__class__.__name__)
        # Start up the gRPC service.
        #
        self.server = OptimizerMicroserviceServer(port=50051, num_threads=10)
        self.server.start()

        self.optimizer_service_channel = grpc.insecure_channel('localhost:50051')
        self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_channel, logger=self.logger)
        self.optimizer_monitor = OptimizerMonitor(grpc_channel=self.optimizer_service_channel, logger=self.logger)

        # Define the optimization problem.
        #
        input_space = SimpleHypergrid(
            name="input",
            dimensions=[
                ContinuousDimension(name='x_1', min=-100, max=100),
                ContinuousDimension(name='x_2', min=-100, max=100)
            ]
        )

        output_space = SimpleHypergrid(
            name="output",
            dimensions=[
                ContinuousDimension(name='y', min=-math.inf, max=math.inf)
            ]
        )

        self.optimization_problem = OptimizationProblem(
            parameter_space=input_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)]
        )
Esempio n. 8
0
    def test_optimum_before_register_error(self):

        input_space = SimpleHypergrid(
            name="input",
            dimensions=[ContinuousDimension(name='x', min=-10, max=10)])

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=input_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])
        bayesian_optimizer = self.bayesian_optimizer_factory.create_local_optimizer(
            optimization_problem=optimization_problem,
            optimizer_config=bayesian_optimizer_config_store.default)

        with pytest.raises(ValueError):
            bayesian_optimizer.optimum()

        bayesian_optimizer.register(
            parameter_values_pandas_frame=pd.DataFrame({'x': [0.0]}),
            target_values_pandas_frame=pd.DataFrame({'y': [1.0]}))
        bayesian_optimizer.optimum()
    def _build_simple_hypergrid_target(self) -> None:
        """ Builds a SimpleHypergrid target for a SimpleHypergrid adaptee.

        :return:
        """

        self._target = SimpleHypergrid(name=self._adaptee.name,
                                       dimensions=None,
                                       random_state=self._adaptee.random_state)

        # Now we iterate over all dimensions and when necessary map the CategoricalDimensions to DiscreteDimensions
        #
        for adaptee_dimension in self._adaptee.dimensions:
            if isinstance(adaptee_dimension, DiscreteDimension):
                target_dimension = ContinuousDimension(
                    name=adaptee_dimension.name,
                    min=0,
                    max=1,
                    include_max=False)
            else:
                target_dimension = ContinuousDimension(
                    name=adaptee_dimension.name,
                    min=0,
                    max=1,
                    include_min=adaptee_dimension.include_min,
                    include_max=adaptee_dimension.include_max)

            self._target.add_dimension(target_dimension)
            self._adaptee_to_target_dimension_mappings[
                adaptee_dimension.name] = target_dimension
            self._target_to_adaptee_dimension_mappings[
                target_dimension.name] = adaptee_dimension
Esempio n. 10
0
    def test_bayesian_optimizer_on_simple_2d_quadratic_function_cold_start(
            self):
        """ Tests the bayesian optimizer on a simple quadratic function with no prior data.

        :return:
        """
        input_space = SimpleHypergrid(name="input",
                                      dimensions=[
                                          ContinuousDimension(name='x_1',
                                                              min=-100,
                                                              max=100),
                                          ContinuousDimension(name='x_2',
                                                              min=-100,
                                                              max=100)
                                      ])

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=input_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        bayesian_optimizer = BayesianOptimizer(
            optimization_problem=optimization_problem,
            optimizer_config=BayesianOptimizerConfig.DEFAULT,
            logger=self.logger)

        num_guided_samples = 1000
        for i in range(num_guided_samples):
            suggested_params = bayesian_optimizer.suggest()
            suggested_params_dict = suggested_params.to_dict()

            target_value = quadratic(**suggested_params_dict)
            self.logger.info(
                f"[{i}/{num_guided_samples}] suggested params: {suggested_params}, target: {target_value}"
            )

            input_values_df = pd.DataFrame({
                param_name: [param_value]
                for param_name, param_value in suggested_params_dict.items()
            })
            target_values_df = pd.DataFrame({'y': [target_value]})

            bayesian_optimizer.register(input_values_df, target_values_df)
            if i > 20 and i % 20 == 0:
                self.logger.info(
                    f"[{i}/{num_guided_samples}] Optimum: {bayesian_optimizer.optimum()}"
                )

        self.logger.info(f"Optimum: {bayesian_optimizer.optimum()}")
Esempio n. 11
0
    def test_basic_functionality_on_2d_objective_space(self):
        """Basic sanity check. Mainly used to help us develop the API.
        """

        # Let's just create a bunch of random points, build a pareto frontier
        # and verify that the invariants hold.
        #
        parameter_space = SimpleHypergrid(
            name='params',
            dimensions=[
                ContinuousDimension(name='x1', min=0, max=10)
            ]
        )

        objective_space = SimpleHypergrid(
            name='objectives',
            dimensions=[
                ContinuousDimension(name='y1', min=0, max=10),
                ContinuousDimension(name='y2', min=0, max=10)
            ]
        )

        optimization_problem = OptimizationProblem(
            parameter_space=parameter_space,
            objective_space=objective_space,
            objectives=[
                Objective(name='y1', minimize=False),
                Objective(name='y2', minimize=False)
            ]
        )

        num_rows = 100000
        random_objectives_df = objective_space.random_dataframe(num_rows)

        # They don't match but they don't need to for this test.
        #
        random_params_df = parameter_space.random_dataframe(num_rows)

        pareto_frontier = ParetoFrontier(
            optimization_problem=optimization_problem,
            objectives_df=random_objectives_df,
            parameters_df=random_params_df
        )
        pareto_df = pareto_frontier.pareto_df

        non_pareto_index = random_objectives_df.index.difference(pareto_df.index)
        for i, row in pareto_df.iterrows():
            # Now let's make sure that no point in pareto is dominated by any non-pareto point.
            #
            assert (random_objectives_df.loc[non_pareto_index] < row).any(axis=1).sum() == len(non_pareto_index)

            # Let's also make sure that no point on the pareto is dominated by any other point there.
            #
            other_rows = pareto_df.index.difference([i])
            assert (pareto_df.loc[other_rows] > row).all(axis=1).sum() == 0
Esempio n. 12
0
    def test_repeated_values(self):
        """Validates that the algorithm does its job in the presence of repeated values.

        :return:
        """

        optimization_problem = OptimizationProblem(
            parameter_space=None,
            objective_space=SimpleHypergrid(
                name="objectives",
                dimensions=[
                    ContinuousDimension(name='y1', min=0, max=5),
                    ContinuousDimension(name='y2', min=0, max=5)
                ]
            ),
            objectives=[
                Objective(name='y1', minimize=False),
                Objective(name='y2', minimize=False)
            ]
        )

        expected_pareto_df = pd.DataFrame(
            [
                [1, 2],
                [1, 2],
                [2, 1],
                [0.5, 2],
                [1, 1],
                [2, 0.5]
            ],
            columns=['y1', 'y2']
        )

        dominated_df = pd.DataFrame(
            [
                [0.5, 0.5],
                [0.5, 1],
                [0.5, 1.5],
                [1, 0.5],
                [1.5, 0.5]
            ],
            columns=['y1', 'y2']
        )

        all_objectives_df = pd.concat([dominated_df, expected_pareto_df])
        pareto_frontier = ParetoFrontier(
            optimization_problem,
            objectives_df=all_objectives_df,
            parameters_df=pd.DataFrame(index=all_objectives_df.index)
        )
        computed_pareto_df = pareto_frontier.pareto_df
        assert computed_pareto_df.sort_values(by=['y1','y2']).equals(expected_pareto_df.sort_values(by=['y1', 'y2']))
Esempio n. 13
0
class SimpleBayesianOptimizerConfig(metaclass=DefaultConfigMeta):

    CONFIG_SPACE = SimpleHypergrid(name="SimpleBayesianOptimizerConfig",
                                   dimensions=[
                                       CategoricalDimension(
                                           name='utility_function',
                                           values=['ucb', 'ei', 'poi']),
                                       ContinuousDimension(name='kappa',
                                                           min=-5,
                                                           max=5),
                                       ContinuousDimension(name='xi',
                                                           min=-5,
                                                           max=5)
                                   ])

    _DEFAULT = Point(utility_function='ucb', kappa=3, xi=1)

    @classmethod
    def contains(cls, config):
        if not isinstance(config, cls):
            return False

        return Point(utility_function=config.utility_function,
                     kappa=config.kappa,
                     xi=config.xi) in cls.CONFIG_SPACE

    @classmethod
    def create_from_config_point(cls, config_point):
        assert config_point in cls.CONFIG_SPACE
        return cls(utility_function=config_point.utility_function,
                   kappa=config_point.kappa,
                   xi=config_point.xi)

    def __init__(self, utility_function=None, kappa=None, xi=None):
        if utility_function is None:
            utility_function = self._DEFAULT.utility_function
        if kappa is None:
            kappa = self._DEFAULT.kappa
        if xi is None:
            xi = self._DEFAULT.xi

        self.utility_function = utility_function
        self.kappa = kappa
        self.xi = xi

    def to_dict(self):
        return {
            'utility_function': self.utility_function,
            'kappa': self.kappa,
            'xi': self.xi
        }
Esempio n. 14
0
    def test_optimization_problem_none_context(self):
        parameter_space = SimpleHypergrid(
            name="test",
            dimensions=[
                ContinuousDimension(name="x", min=0, max=1),
                OrdinalDimension(name="y", ordered_values=[1, 2, 3, 5, 10]),
                CategoricalDimension(name="y2", values=[True, False])
            ])
        objective_space = SimpleHypergrid(name="z",
                                          dimensions=[
                                              ContinuousDimension(
                                                  name="z\n special",
                                                  min=-50,
                                                  max=-49),
                                              ContinuousDimension(name="z1",
                                                                  min=-1,
                                                                  max=1)
                                          ])
        optimization_problem = OptimizationProblem(
            parameter_space=parameter_space,
            objective_space=objective_space,
            objectives=[
                Objective(name="z\n special", minimize=True),
                Objective(name="z1", minimize=False)
            ])

        encoded_problem = OptimizerServiceEncoder.encode_optimization_problem(
            optimization_problem)
        decoded_problem = OptimizerServiceDecoder.decode_optimization_problem(
            encoded_problem)

        print(f"Context space is: {decoded_problem.context_space}")
        assert decoded_problem.context_space is None

        # Ensure that the parameter space is still valid
        # Parameter Space
        for _ in range(1000):
            assert decoded_problem.parameter_space.random() in parameter_space
            assert parameter_space.random() in decoded_problem.parameter_space

        # Output Space
        for _ in range(1000):
            assert decoded_problem.objective_space.random() in objective_space
            assert objective_space.random() in decoded_problem.objective_space

        # Feature Space
        for _ in range(1000):
            assert decoded_problem.feature_space.random(
            ) in optimization_problem.feature_space
            assert optimization_problem.feature_space.random(
            ) in decoded_problem.feature_space
Esempio n. 15
0
class Flower(ObjectiveFunctionBase):
    """ Flower function exposing the ObjectiveFunctionBase interface.

    """

    _domain = SimpleHypergrid(name="flower",
                              dimensions=[
                                  ContinuousDimension(name='x1',
                                                      min=-100,
                                                      max=100),
                                  ContinuousDimension(name='x2',
                                                      min=-100,
                                                      max=100)
                              ])

    _range = SimpleHypergrid(name='range',
                             dimensions=[
                                 ContinuousDimension(name='y',
                                                     min=-math.inf,
                                                     max=math.inf)
                             ])

    def __init__(self, objective_function_config: Point = None):
        assert objective_function_config is None, "This function takes no configuration."
        ObjectiveFunctionBase.__init__(self, objective_function_config)

    @property
    def parameter_space(self) -> Hypergrid:
        return self._domain

    @property
    def output_space(self) -> Hypergrid:
        return self._range

    def evaluate_dataframe(self, dataframe: pd.DataFrame):
        a = 1
        b = 2
        c = 4
        x = dataframe.to_numpy()
        sum_of_squares = np.sum(x**2, axis=1)
        x_norm = np.sqrt(sum_of_squares)
        values = a * x_norm + b * np.sin(c * np.arctan2(x[:, 0], x[:, 1]))
        return pd.DataFrame({'y': values})

    def get_context(self) -> Point:
        """ Returns a context value for this objective function.

        If the context changes on every invokation, this should return the latest one.
        :return:
        """
        return Point()
Esempio n. 16
0
    def test_construct_feature_dataframe_no_context(self):
        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])
        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=objective_function.output_space,
            objectives=[Objective(name='y', minimize=True)])
        n_samples = 100
        parameter_df = optimization_problem.parameter_space.random_dataframe(
            n_samples)
        feature_df = optimization_problem.construct_feature_dataframe(
            parameters_df=parameter_df)
        assert feature_df.shape == (
            n_samples,
            len(optimization_problem.parameter_space.dimension_names) + 1)
        expected_columns = sorted([
            f"three_level_quadratic_config.{n}"
            for n in optimization_problem.parameter_space.dimension_names
        ])
        assert (
            feature_df.columns[:-1].sort_values() == expected_columns).all()
        assert feature_df.columns[-1] == "contains_context"
        assert not feature_df.contains_context.any()
Esempio n. 17
0
class ExperimentDesignerConfig(metaclass=DefaultConfigMeta):

    CONFIG_SPACE = SimpleHypergrid(
        name='experiment_designer_config',
        dimensions=[
            CategoricalDimension(
                'utility_function_implementation',
                values=[ConfidenceBoundUtilityFunction.__name__]),
            CategoricalDimension('numeric_optimizer_implementation',
                                 values=[RandomSearchOptimizer.__name__]),
            ContinuousDimension('fraction_random_suggestions', min=0, max=1)
        ]).join(subgrid=ConfidenceBoundUtilityFunctionConfig.CONFIG_SPACE,
                on_external_dimension=CategoricalDimension(
                    'utility_function_implementation',
                    values=[ConfidenceBoundUtilityFunction.__name__])).join(
                        subgrid=RandomSearchOptimizerConfig.CONFIG_SPACE,
                        on_external_dimension=CategoricalDimension(
                            'numeric_optimizer_implementation',
                            values=[RandomSearchOptimizer.__name__]))

    _DEFAULT = Point(
        utility_function_implementation=ConfidenceBoundUtilityFunction.
        __name__,
        numeric_optimizer_implementation=RandomSearchOptimizer.__name__,
        confidence_bound_utility_function_config=
        ConfidenceBoundUtilityFunctionConfig.DEFAULT,
        random_search_optimizer_config=RandomSearchOptimizerConfig.DEFAULT,
        fraction_random_suggestions=0.5)
Esempio n. 18
0
    def test_hierarchical_quadratic_cold_start(self):

        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        num_restarts = 2
        for restart_num in range(num_restarts):

            optimizer_config = bayesian_optimizer_config_store.default
            optimizer_config.min_samples_required_for_guided_design_of_experiments = 20
            optimizer_config.homogeneous_random_forest_regression_model_config.n_estimators = 10
            optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.splitter = "best"
            optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.min_samples_to_fit = 10
            optimizer_config.homogeneous_random_forest_regression_model_config.decision_tree_regression_model_config.n_new_samples_before_refit = 2

            local_optimizer = self.bayesian_optimizer_factory.create_local_optimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config)

            remote_optimizer = self.bayesian_optimizer_factory.create_remote_optimizer(
                optimization_problem=optimization_problem,
                optimizer_config=optimizer_config)

            for bayesian_optimizer in [local_optimizer, remote_optimizer]:
                num_guided_samples = 50
                for i in range(num_guided_samples):
                    suggested_params = bayesian_optimizer.suggest()
                    y = objective_function.evaluate_point(suggested_params)
                    print(
                        f"[{i}/{num_guided_samples}] {suggested_params}, y: {y}"
                    )

                    input_values_df = pd.DataFrame({
                        param_name: [param_value]
                        for param_name, param_value in suggested_params
                    })
                    target_values_df = y.to_dataframe()
                    bayesian_optimizer.register(
                        feature_values_pandas_frame=input_values_df,
                        target_values_pandas_frame=target_values_df)
                best_config_point, best_objective = bayesian_optimizer.optimum(
                    optimum_definition=OptimumDefinition.BEST_OBSERVATION)
                print(
                    f"[Restart:  {restart_num}/{num_restarts}] Optimum config: {best_config_point}, optimum objective: {best_objective}"
                )
                self.validate_optima(optimizer=bayesian_optimizer)
Esempio n. 19
0
    def __init__(self, objective_function_config: Point):
        assert objective_function_config.polynomial_objective_config in PolynomialObjective.CONFIG_SPACE
        ObjectiveFunctionBase.__init__(self, objective_function_config)

        # Let's start building the parameter space for it.
        #
        self._parameter_space = SimpleHypergrid(
            name="domain",
            dimensions=[
                CategoricalDimension(name="polynomial_id", values=[id for id in range(self.objective_function_config.num_nested_polynomials)])
            ]
        )

        polynomial_objective_config = self.objective_function_config.polynomial_objective_config
        self._polynomial_objective_config = polynomial_objective_config
        self._polynomials = []
        # Let's create the required number of polynomials.
        #
        for i in range(self.objective_function_config.num_nested_polynomials):
            polynomial_objective_config.seed += i + 1 # Change the seed so that it's still effective but also reproducible.
            polynomial = PolynomialObjectiveWrapper(polynomial_objective_config, domain_name=f"domain_{i}")
            self._polynomials.append(polynomial)
            self._parameter_space.join(
                subgrid=polynomial.parameter_space,
                on_external_dimension=CategoricalDimension(name="polynomial_id", values=[i])
            )

        self._output_space = SimpleHypergrid(
            name='output_space',
            dimensions=[
                ContinuousDimension(name='y', min=-math.inf, max=math.inf)
            ]
        )
Esempio n. 20
0
class ConfidenceBoundUtilityFunctionConfig(metaclass=DefaultConfigMeta):
    CONFIG_SPACE = SimpleHypergrid(
        name="confidence_bound_utility_function_config",
        dimensions=[
            CategoricalDimension(name="utility_function_name",
                                 values=[
                                     "lower_confidence_bound_on_improvement",
                                     "upper_confidence_bound_on_improvement"
                                 ]),
            ContinuousDimension(name="alpha", min=0.01, max=0.5)
        ])
    _DEFAULT = Point(
        utility_function_name="upper_confidence_bound_on_improvement",
        alpha=0.01)

    @classmethod
    def create_from_config_point(cls, config_point):
        config_key_value_pairs = {
            param_name: value
            for param_name, value in config_point
        }
        return cls(**config_key_value_pairs)

    def __init__(self,
                 utility_function_name=_DEFAULT.utility_function_name,
                 alpha=_DEFAULT.alpha):
        self.utility_function_name = utility_function_name
        self.alpha = alpha
Esempio n. 21
0
    def setUp(self):
        mlos_globals.init_mlos_global_context()
        mlos_globals.mlos_global_context.start_clock()
        self.logger = create_logger('TestSmartCacheWithRemoteOptimizer')
        self.logger.level = logging.DEBUG

        # Start up the gRPC service.
        #
        self.server = OptimizerMicroserviceServer(port=50051, num_threads=10)
        self.server.start()

        self.optimizer_service_grpc_channel = grpc.insecure_channel('localhost:50051')
        self.bayesian_optimizer_factory = BayesianOptimizerFactory(grpc_channel=self.optimizer_service_grpc_channel, logger=self.logger)

        self.mlos_agent = MlosAgent(
            logger=self.logger,
            communication_channel=mlos_globals.mlos_global_context.communication_channel,
            shared_config=mlos_globals.mlos_global_context.shared_config,
            bayesian_optimizer_grpc_channel=self.optimizer_service_grpc_channel
        )

        self.mlos_agent_thread = Thread(target=self.mlos_agent.run)
        self.mlos_agent_thread.start()

        global_values.declare_singletons()  # TODO: having both globals and global_values is a problem

        # Let's add the allowed component types
        self.mlos_agent.add_allowed_component_type(SmartCache)
        self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator)
        self.mlos_agent.set_configuration(
            component_type=SmartCacheWorkloadGenerator,
            new_config_values=Point(
                workload_type='cyclical_key_from_range',
                cyclical_key_from_range_config=Point(
                    min=0,
                    range_width=2048
                )
            )
        )

        # Let's create the workload
        self.smart_cache_workload = SmartCacheWorkloadGenerator(logger=self.logger)

        self.optimizer = None
        self.working_set_size_estimator = WorkingSetSizeEstimator()
        self.hit_rate_monitor = HitRateMonitor()

        self.smart_cache_experiment = MlosExperiment(
            smart_component_types=[SmartCache],
            telemetry_aggregators=[self.working_set_size_estimator, self.hit_rate_monitor]
        )

        self.optimization_problem = OptimizationProblem(
            parameter_space=SmartCache.parameter_search_space,
            objective_space=SimpleHypergrid(name="objectives", dimensions=[ContinuousDimension(name="hit_rate", min=0, max=1)]),
            objectives=[Objective(name="hit_rate", minimize=False)]
        )
Esempio n. 22
0
 def decode_continuous_dimension(
     serialized: OptimizerService_pb2.ContinuousDimension
 ) -> ContinuousDimension:
     assert isinstance(serialized, OptimizerService_pb2.ContinuousDimension)
     return ContinuousDimension(name=serialized.Name,
                                min=serialized.Min,
                                max=serialized.Max,
                                include_min=serialized.IncludeMin,
                                include_max=serialized.IncludeMax)
Esempio n. 23
0
    def test_construct_feature_dataframe_context(self):
        def f(parameters, context):
            return pd.DataFrame({
                'function_value':
                -np.exp(-50 * (parameters.x - 0.5 * context.y - 0.5)**2)
            })

        input_space = SimpleHypergrid(
            name="my_input_name",
            dimensions=[ContinuousDimension(name="x", min=0, max=1)])
        output_space = SimpleHypergrid(name="objective",
                                       dimensions=[
                                           ContinuousDimension(
                                               name="function_value",
                                               min=-10,
                                               max=10)
                                       ])
        context_space = SimpleHypergrid(
            name="my_context_name",
            dimensions=[ContinuousDimension(name="y", min=-1, max=1)])

        optimization_problem = OptimizationProblem(
            parameter_space=input_space,
            objective_space=output_space,
            # we want to minimize the function
            objectives=[Objective(name="function_value", minimize=True)],
            context_space=context_space)
        n_samples = 100
        parameter_df = input_space.random_dataframe(n_samples)
        context_df = context_space.random_dataframe(n_samples)
        with pytest.raises(ValueError, match="Context required"):
            optimization_problem.construct_feature_dataframe(
                parameters_df=parameter_df)

        feature_df = optimization_problem.construct_feature_dataframe(
            parameters_df=parameter_df, context_df=context_df)

        assert isinstance(feature_df, pd.DataFrame)
        assert feature_df.shape == (n_samples, 3)
        assert (feature_df.columns == [
            'my_input_name.x', 'contains_context', 'my_context_name.y'
        ]).all()
        assert feature_df.contains_context.all()
Esempio n. 24
0
class HomogeneousRandomForestRegressionModelConfig(RegressionModelConfig):

    CONFIG_SPACE = SimpleHypergrid(
        name="homogeneous_random_forest_regression_model_config",
        dimensions=[
            DiscreteDimension(name="n_estimators", min=1, max=100),
            ContinuousDimension(name="features_fraction_per_estimator", min=0, max=1, include_min=False, include_max=True),
            ContinuousDimension(name="samples_fraction_per_estimator", min=0, max=1, include_min=False, include_max=True),
            CategoricalDimension(name="regressor_implementation", values=[DecisionTreeRegressionModel.__name__]),
        ]
    ).join(
        subgrid=DecisionTreeRegressionModelConfig.CONFIG_SPACE,
        on_external_dimension=CategoricalDimension(name="regressor_implementation", values=[DecisionTreeRegressionModel.__name__])
    )

    _DEFAULT = Point(
        n_estimators=5,
        features_fraction_per_estimator=1,
        samples_fraction_per_estimator=0.7,
        regressor_implementation=DecisionTreeRegressionModel.__name__,
        decision_tree_regression_model_config=DecisionTreeRegressionModelConfig.DEFAULT
    )

    def __init__(
            self,
            n_estimators=_DEFAULT.n_estimators,
            features_fraction_per_estimator=_DEFAULT.features_fraction_per_estimator,
            samples_fraction_per_estimator=_DEFAULT.samples_fraction_per_estimator,
            regressor_implementation=_DEFAULT.regressor_implementation,
            decision_tree_regression_model_config: Point()=_DEFAULT.decision_tree_regression_model_config
    ):
        self.n_estimators = n_estimators
        self.features_fraction_per_estimator = features_fraction_per_estimator
        self.samples_fraction_per_estimator = samples_fraction_per_estimator
        self.regressor_implementation = regressor_implementation

        assert regressor_implementation == DecisionTreeRegressionModel.__name__
        self.decision_tree_regression_model_config = DecisionTreeRegressionModelConfig.create_from_config_point(decision_tree_regression_model_config)

    @classmethod
    def contains(cls, config): # pylint: disable=unused-argument
        return True  # TODO: see if you can remove this class entirely.
    def _build_simple_hypergrid_target(self) -> None:
        self._target = SimpleHypergrid(name=self._adaptee.name,
                                       dimensions=None,
                                       random_state=self._adaptee.random_state)

        # Add non-transformed adaptee dimensions to the target
        for adaptee_dimension in self._adaptee.dimensions:
            if adaptee_dimension.name not in self._adaptee_dimension_names_to_transform:
                self._target.add_dimension(adaptee_dimension.copy())

        if not self._adaptee_contains_dimensions_to_transform:
            return

        # add new dimensions to be created by sklearn PolynomialFeatures

        # construct target dim names using adaptee dim names and polynomial feature powers matrix
        # This logic is worked out explicitly here so we have control over the derived dimension names.
        # Currently, the code only substitutes adaptee feature names into the default feature_names produced by
        # sklearn's PolynomialFeatures .get_feature_names() method.
        poly_feature_dim_names = self._get_polynomial_feature_names()
        for i, poly_feature_name in enumerate(poly_feature_dim_names):
            ith_terms_powers = self._polynomial_features_powers[i]

            if not self._polynomial_features_kwargs[
                    'include_bias'] and ith_terms_powers.sum() == 0:
                # the constant term is skipped
                continue
            else:
                # replace adaptee dim names for poly feature name {x0_, x1_, ...} representatives
                target_dim_name = poly_feature_name
                for j, adaptee_dim_name in enumerate(
                        self._adaptee_dimension_names_to_transform):
                    adaptee_dim_power = ith_terms_powers[j]
                    if adaptee_dim_power == 0:
                        continue
                    if adaptee_dim_power == 1:
                        poly_feature_adaptee_dim_name_standin = f'x{j}{self._internal_feature_name_terminal_char}'
                        adaptee_dim_replacement_name = adaptee_dim_name
                    else:
                        # power > 1 cases
                        poly_feature_adaptee_dim_name_standin = f'x{j}{self._internal_feature_name_terminal_char}^{adaptee_dim_power}'
                        adaptee_dim_replacement_name = f'{adaptee_dim_name}^{adaptee_dim_power}'

                    target_dim_name = target_dim_name.replace(
                        poly_feature_adaptee_dim_name_standin,
                        adaptee_dim_replacement_name)
            # add target dimension
            # min and max are placed at -Inf and +Inf since .random() on the target hypergrid is generated on the original
            # hypergrid and passed through the adapters.
            self._target.add_dimension(
                ContinuousDimension(name=target_dim_name,
                                    min=-math.inf,
                                    max=math.inf))
            self._target_polynomial_feature_map[target_dim_name] = i
Esempio n. 26
0
 def test_continuous_dimension(self, include_min, include_max):
     continuous_dimension = ContinuousDimension(name='continuous',
                                                min=0,
                                                max=10,
                                                include_min=include_min,
                                                include_max=include_max)
     serialized = OptimizerServiceEncoder.encode_continuous_dimension(
         continuous_dimension)
     deserialized_continuous_dimension = OptimizerServiceDecoder.decode_continuous_dimension(
         serialized)
     assert isinstance(serialized, OptimizerService_pb2.ContinuousDimension)
     assert deserialized_continuous_dimension == continuous_dimension
Esempio n. 27
0
    def setUp(self):
        mlos_globals.init_mlos_global_context()
        mlos_globals.mlos_global_context.start_clock()
        self.logger = create_logger('TestSmartCacheWithRemoteOptimizer')
        self.logger.level = logging.INFO

        self.mlos_agent = MlosAgent(
            logger=self.logger,
            communication_channel=mlos_globals.mlos_global_context.
            communication_channel,
            shared_config=mlos_globals.mlos_global_context.shared_config,
        )

        self.mlos_agent_thread = Thread(target=self.mlos_agent.run)
        self.mlos_agent_thread.start()

        global_values.declare_singletons(
        )  # TODO: having both globals and global_values is a problem

        self.workload_duration_s = 5

        # Let's add the allowed component types
        self.mlos_agent.add_allowed_component_type(SmartCache)
        self.mlos_agent.add_allowed_component_type(SmartCacheWorkloadGenerator)

        # Let's create the workload
        self.smart_cache_workload = SmartCacheWorkloadGenerator(
            logger=self.logger)

        self.optimizer = None
        self.working_set_size_estimator = WorkingSetSizeEstimator()

        self.cache_config_timer = Timer(
            timeout_ms=200,
            observer_callback=self._set_new_cache_configuration)

        self.smart_cache_experiment = MlosExperiment(
            smart_component_types=[SmartCache],
            telemetry_aggregators=[
                self.cache_config_timer, self.working_set_size_estimator
            ])

        self.optimization_problem = OptimizationProblem(
            parameter_space=SmartCache.parameter_search_space,
            objective_space=SimpleHypergrid(name="objectives",
                                            dimensions=[
                                                ContinuousDimension(
                                                    name="miss_rate",
                                                    min=0,
                                                    max=1)
                                            ]),
            context_space=None,  # TODO: add working set size estimate
            objectives=[Objective(name="miss_rate", minimize=True)])
Esempio n. 28
0
    def __init__(self, objective_function_config: Point):
        assert objective_function_config in PolynomialObjective.CONFIG_SPACE
        ObjectiveFunctionBase.__init__(self, objective_function_config)
        self._polynomial_objective_config = objective_function_config
        self._polynomial_function = PolynomialObjective(
            seed=objective_function_config.seed,
            input_domain_dimension=objective_function_config.
            input_domain_dimension,
            max_degree=objective_function_config.max_degree,
            include_mixed_coefficients=objective_function_config.
            include_mixed_coefficients,
            percent_coefficients_zeroed=objective_function_config.
            percent_coefficients_zeroed,
            coefficient_domain_min=objective_function_config.
            coefficient_domain_min,
            coefficient_domain_width=objective_function_config.
            coefficient_domain_width,
            include_noise=objective_function_config.include_noise,
            noise_coefficient_of_variation=objective_function_config.
            noise_coefficient_of_variation,
        )

        self._parameter_space = SimpleHypergrid(
            name="domain",
            dimensions=[
                ContinuousDimension(
                    name=f"x_{i}",
                    min=objective_function_config.coefficient_domain_min,
                    max=objective_function_config.coefficient_domain_min +
                    objective_function_config.coefficient_domain_width) for i
                in range(objective_function_config.input_domain_dimension)
            ])

        self._output_space = SimpleHypergrid(name='output_space',
                                             dimensions=[
                                                 ContinuousDimension(
                                                     name='y',
                                                     min=-math.inf,
                                                     max=math.inf)
                                             ])
    def test_composite_dimension(self):
        original_A = ContinuousDimension(name='x', min=0, max=1)
        original_B = ContinuousDimension(name='x', min=2, max=3)
        original_C = ContinuousDimension(name='x', min=2.5, max=3.5)
        original_D = original_A.union(original_B) - original_C
        original_E = original_B - original_C
        original_F = original_A.union(original_E)

        serialized_A = json.dumps(original_A,
                                  cls=HypergridJsonEncoder,
                                  indent=2)
        serialized_B = json.dumps(original_B,
                                  cls=HypergridJsonEncoder,
                                  indent=2)
        serialized_C = json.dumps(original_C,
                                  cls=HypergridJsonEncoder,
                                  indent=2)
        serialized_D = json.dumps(original_D,
                                  cls=HypergridJsonEncoder,
                                  indent=2)
        serialized_E = json.dumps(original_E,
                                  cls=HypergridJsonEncoder,
                                  indent=2)
        serialized_F = json.dumps(original_F,
                                  cls=HypergridJsonEncoder,
                                  indent=2)

        A = json.loads(serialized_A, cls=HypergridJsonDecoder)
        B = json.loads(serialized_B, cls=HypergridJsonDecoder)
        C = json.loads(serialized_C, cls=HypergridJsonDecoder)
        D = json.loads(serialized_D, cls=HypergridJsonDecoder)
        E = json.loads(serialized_E, cls=HypergridJsonDecoder)
        F = json.loads(serialized_F, cls=HypergridJsonDecoder)

        self.assertTrue(A in original_A)
        self.assertTrue(B in original_B)
        self.assertTrue(C in original_C)
        self.assertTrue(D in original_D)
        self.assertTrue(E in original_E)
        self.assertTrue(F in original_F)

        self.assertTrue(original_A in A)
        self.assertTrue(original_B in B)
        self.assertTrue(original_C in C)
        self.assertTrue(original_D in D)
        self.assertTrue(original_E in E)
        self.assertTrue(original_F in F)

        self.assertTrue(0.5 in D)
        self.assertTrue(1.5 not in D)
        self.assertTrue(2.5 not in D)
        self.assertTrue(3.4 not in D)
        self.assertTrue(35 not in D)
        self.assertTrue(2 in E)
        self.assertTrue(2.5 not in E)
        self.assertTrue(0 in F and 1 in F and 1.5 not in F and 2 in F
                        and 2.5 not in F)
    def setup_method(self, method):
        # Let's create a simple linear mapping
        self.gradient = 10
        self.y_intercept = 10
        self.input_values = np.linspace(start=0,
                                        stop=100,
                                        num=101,
                                        endpoint=True)
        self.output_values = self.input_values * self.gradient + self.y_intercept

        self.input_space = SimpleHypergrid(
            name="input",
            dimensions=[ContinuousDimension(name="x", min=0, max=100)])

        self.output_space = SimpleHypergrid(name="output",
                                            dimensions=[
                                                ContinuousDimension(
                                                    name="y",
                                                    min=-math.inf,
                                                    max=math.inf)
                                            ])

        self.input_pandas_dataframe = pd.DataFrame({"x": self.input_values})
        self.output_pandas_dataframe = pd.DataFrame({"y": self.output_values})