def test_random_random_forest_models(self):
        """ Test's random forests with random configs

        :return:
        """
        sample_inputs = {
            'x': np.linspace(start=-10, stop=110, num=121, endpoint=True)
        }
        sample_inputs_pandas_dataframe = pd.DataFrame(sample_inputs)

        num_iterations = 5
        for i in range(num_iterations):
            if i % 10 == 0:
                print(f"{datetime.datetime.utcnow()} {i}/{num_iterations}")

            model_config = homogeneous_random_forest_config_store.parameter_space.random(
            )
            model_config.n_estimators = min(model_config.n_estimators, 20)
            print(model_config)
            model = HomogeneousRandomForestRegressionModel(
                model_config=model_config,
                input_space=self.input_space,
                output_space=self.output_space)
            model.fit(self.input_pandas_dataframe,
                      self.output_pandas_dataframe,
                      iteration_number=i)
            predictions = model.predict(sample_inputs_pandas_dataframe)
            for sample_input, prediction in zip(
                    sample_inputs_pandas_dataframe['x'],
                    predictions.get_dataframe().iterrows()):
                print(sample_input, prediction)
Exemple #2
0
    def __init__(self,
                 optimization_problem: OptimizationProblem,
                 optimizer_config: Point,
                 logger=None):
        if logger is None:
            logger = create_logger("BayesianOptimizer")
        self.logger = logger
        # Let's initialize the optimizer.
        #
        assert len(optimization_problem.objectives
                   ) == 1, "For now this is a single-objective optimizer."
        OptimizerBase.__init__(self, optimization_problem)

        assert optimizer_config in bayesian_optimizer_config_store.parameter_space, "Invalid config."
        self.optimizer_config = optimizer_config

        # Now let's put together the surrogate model.
        #
        assert self.optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__, "TODO: implement more"
        self.surrogate_model = HomogeneousRandomForestRegressionModel(
            model_config=self.optimizer_config.
            homogeneous_random_forest_regression_model_config,
            input_space=self.optimization_problem.
            parameter_space,  # TODO: change to feature space
            output_space=self.optimization_problem.objective_space,
            logger=self.logger)

        # Now let's put together the experiment designer that will suggest parameters for each experiment.
        #
        assert self.optimizer_config.experiment_designer_implementation == ExperimentDesigner.__name__
        self.experiment_designer = ExperimentDesigner(
            designer_config=self.optimizer_config.experiment_designer_config,
            optimization_problem=self.optimization_problem,
            surrogate_model=self.surrogate_model,
            logger=self.logger)

        self._optimizer_convergence_state = BayesianOptimizerConvergenceState(
            surrogate_model_fit_state=self.surrogate_model.fit_state)

        # Also let's make sure we have the dataframes we need for the surrogate model.
        # TODO: this will need a better home - either a DataSet class or the surrogate model itself.
        self._feature_values_df = pd.DataFrame(columns=[
            dimension.name for dimension in
            self.optimization_problem.parameter_space.dimensions
        ])
        self._target_values_df = pd.DataFrame(columns=[
            dimension.name for dimension in
            self.optimization_problem.objective_space.dimensions
        ])
    def test_glow_worm_on_three_level_quadratic(self):
        output_space = SimpleHypergrid(name="output",
                                       dimensions=[
                                           ContinuousDimension(name='y',
                                                               min=-math.inf,
                                                               max=math.inf)
                                       ])

        objective_function_config = objective_function_config_store.get_config_by_name(
            'three_level_quadratic')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)
        # Let's warm up the model a bit
        #
        num_warmup_samples = 1000
        random_params_df = objective_function.parameter_space.random_dataframe(
            num_samples=num_warmup_samples)
        y = objective_function.evaluate_dataframe(random_params_df)

        model = HomogeneousRandomForestRegressionModel(
            model_config=self.model_config,
            input_space=objective_function.parameter_space,
            output_space=output_space)
        model.fit(feature_values_pandas_frame=random_params_df,
                  target_values_pandas_frame=y,
                  iteration_number=num_warmup_samples)

        optimization_problem = OptimizationProblem(
            parameter_space=objective_function.parameter_space,
            objective_space=output_space,
            objectives=[Objective(name='y', minimize=True)])

        utility_function = ConfidenceBoundUtilityFunction(
            function_config=self.utility_function_config,
            surrogate_model=model,
            minimize=optimization_problem.objectives[0].minimize)

        glow_worm_swarm_optimizer = GlowWormSwarmOptimizer(
            optimization_problem=optimization_problem,
            utility_function=utility_function,
            optimizer_config=glow_worm_swarm_optimizer_config_store.default)

        num_iterations = 5
        for i in range(num_iterations):
            suggested_params = glow_worm_swarm_optimizer.suggest()
            print(f"[{i+1}/{num_iterations}] {suggested_params.to_json()}")
            self.assertTrue(
                suggested_params in objective_function.parameter_space)
Exemple #4
0
    def __init__(self,
                 model_config: Point,
                 input_space: Hypergrid,
                 output_space: Hypergrid,
                 logger=None):
        MultiObjectiveRegressionModel.__init__(self,
                                               model_type=type(self),
                                               model_config=model_config,
                                               input_space=input_space,
                                               output_space=output_space)
        if logger is None:
            logger = create_logger("MultiObjectiveHomogeneousRandomForest")
        self.logger = logger

        # We just need to assert that the model config belongs in homogeneous_random_forest_config_store.parameter_space.
        # A more elaborate solution might be needed down the road, but for now this simple solution should suffice.
        #
        assert model_config in homogeneous_random_forest_config_store.parameter_space

        self._regressors_by_objective_name = KeyOrderedDict(
            ordered_keys=self.output_dimension_names,
            value_type=HomogeneousRandomForestRegressionModel)

        for output_dimension in output_space.dimensions:
            random_forest = HomogeneousRandomForestRegressionModel(
                model_config=model_config,
                input_space=input_space,
                output_space=SimpleHypergrid(
                    name=f"{output_dimension.name}_objective",
                    dimensions=[output_dimension]),
                logger=self.logger)
            self._regressors_by_objective_name[
                output_dimension.name] = random_forest
Exemple #5
0
    def setUpClass(cls) -> None:
        global_values.declare_singletons()

        cls.slope = 10
        cls.y_intercept = 10
        cls.input_values = np.linspace(start=0, stop=100, num=1000, endpoint=True)
        cls.output_values = cls.input_values * cls.slope + cls.y_intercept

        cls.input_space = SimpleHypergrid(
            name="input",
            dimensions=[ContinuousDimension(name="x", min=0, max=100)]
        )

        cls.output_space = SimpleHypergrid(
            name="output",
            dimensions=[ContinuousDimension(name="y", min=-math.inf, max=math.inf)]
        )

        cls.input_pandas_dataframe = pd.DataFrame({"x": cls.input_values})
        cls.output_pandas_dataframe = pd.DataFrame({"y": cls.output_values})

        cls.model_config = HomogeneousRandomForestRegressionModelConfig()
        cls.model = HomogeneousRandomForestRegressionModel(
            model_config=cls.model_config,
            input_space=cls.input_space,
            output_space=cls.output_space
        )
        cls.model.fit(cls.input_pandas_dataframe, cls.output_pandas_dataframe, iteration_number=len(cls.input_pandas_dataframe.index))

        cls.sample_inputs = {'x': np.linspace(start=-10, stop=110, num=13, endpoint=True)}
        cls.sample_inputs_pandas_dataframe = pd.DataFrame(cls.sample_inputs)
        cls.sample_predictions = cls.model.predict(cls.sample_inputs_pandas_dataframe)
    def __init__(self,
                 model_config: Point,
                 input_space: Hypergrid,
                 output_space: Hypergrid,
                 logger=None):
        NaiveMultiObjectiveRegressionModel.__init__(
            self,
            model_type=HomogeneousRandomForestRegressionModel,
            model_config=model_config,
            input_space=input_space,
            output_space=output_space,
            logger=logger)

        # We just need to assert that the model config belongs in homogeneous_random_forest_config_store.parameter_space.
        # A more elaborate solution might be needed down the road, but for now this simple solution should suffice.
        #
        assert model_config in homogeneous_random_forest_config_store.parameter_space

        for output_dimension in output_space.dimensions:
            random_forest = HomogeneousRandomForestRegressionModel(
                model_config=model_config,
                input_space=input_space,
                output_space=SimpleHypergrid(
                    name=f"{output_dimension.name}_objective",
                    dimensions=[output_dimension]),
                logger=self.logger)
            self._regressors_by_objective_name[
                output_dimension.name] = random_forest
Exemple #7
0
    def test_default_homogeneous_random_forest_model(self):

        model_config = HomogeneousRandomForestRegressionModelConfig()
        model = HomogeneousRandomForestRegressionModel(
            model_config=model_config,
            input_space=self.input_space,
            output_space=self.output_space
        )

        for i in range(2):
            model.fit(self.input_pandas_dataframe, self.output_pandas_dataframe, iteration_number=i)
            print("Random forest predictions")

            sample_inputs = {'x': np.linspace(start=-10, stop=110, num=13, endpoint=True)}
            sample_inputs_pandas_dataframe = pd.DataFrame(sample_inputs)
            predictions = model.predict(sample_inputs_pandas_dataframe)
            for sample_input, prediction in zip(sample_inputs_pandas_dataframe['x'],
                                                predictions.get_dataframe().iterrows()):
                print(sample_input, prediction)
    def setup_class(cls):
        """ Set's up all the objects needed to test the RandomSearchOptimizer

        To test the RandomSearchOptimizer we need to first construct:
        * an optimization problem
        * a utility function

        To construct a utility function we need the same set up as in the TestConfidenceBoundUtilityFunction
        test.



        :return:
        """
        global_values.declare_singletons()
        global_values.tracer = Tracer(actor_id=cls.__name__, thread_id=0)

        objective_function_config = objective_function_config_store.get_config_by_name(
            '2d_quadratic_concave_up')
        objective_function = ObjectiveFunctionFactory.create_objective_function(
            objective_function_config=objective_function_config)

        cls.input_space = objective_function.parameter_space
        cls.output_space = objective_function.output_space

        cls.input_values_dataframe = objective_function.parameter_space.random_dataframe(
            num_samples=2500)
        cls.output_values_dataframe = objective_function.evaluate_dataframe(
            cls.input_values_dataframe)

        cls.model_config = homogeneous_random_forest_config_store.default

        print(cls.model_config)

        cls.model = HomogeneousRandomForestRegressionModel(
            model_config=cls.model_config,
            input_space=cls.input_space,
            output_space=cls.output_space)
        cls.model.fit(cls.input_values_dataframe,
                      cls.output_values_dataframe,
                      iteration_number=len(cls.input_values_dataframe.index))

        cls.utility_function_config = Point(
            utility_function_name="upper_confidence_bound_on_improvement",
            alpha=0.05)

        cls.optimization_problem = OptimizationProblem(
            parameter_space=cls.input_space,
            objective_space=cls.output_space,
            objectives=[Objective(name='y', minimize=True)])

        cls.utility_function = ConfidenceBoundUtilityFunction(
            function_config=cls.utility_function_config,
            surrogate_model=cls.model,
            minimize=cls.optimization_problem.objectives[0].minimize)
Exemple #9
0
    def test_random_random_forest_models(self):
        """ Test's random forests with random configs

        :return:
        """
        sample_inputs = {'x': np.linspace(start=-10, stop=110, num=121, endpoint=True)}
        sample_inputs_pandas_dataframe = pd.DataFrame(sample_inputs)

        num_iterations = 5
        for i in range(num_iterations):
            if i % 10 == 0:
                print(f"{datetime.datetime.utcnow()} {i}/{num_iterations}")
            model_config_values = HomogeneousRandomForestRegressionModelConfig.CONFIG_SPACE.random()
            print(str(model_config_values.to_json(indent=2)))
            model_config = HomogeneousRandomForestRegressionModelConfig(**model_config_values.dimension_value_dict)
            model = HomogeneousRandomForestRegressionModel(
                model_config=model_config,
                input_space=self.input_space,
                output_space=self.output_space
            )
            model.fit(self.input_pandas_dataframe, self.output_pandas_dataframe, iteration_number=i)
            predictions = model.predict(sample_inputs_pandas_dataframe)
            for sample_input, prediction in zip(sample_inputs_pandas_dataframe['x'], predictions.get_dataframe().iterrows()):
                print(sample_input, prediction)
Exemple #10
0
class BayesianOptimizer(OptimizerBase):
    """Generic Bayesian Optimizer based on regresson model

    Uses extra trees as surrogate model and confidence bound acquisition function by default.

    Attributes
    ----------
    logger : Logger
    optimization_problem : OptimizationProblem
    surrogate_model : HomogeneousRandomForestRegressionModel
    optimizer_config : Point
    experiment_designer: ExperimentDesigner

    """
    @trace()
    def __init__(self,
                 optimization_problem: OptimizationProblem,
                 optimizer_config: Point,
                 logger=None):
        if logger is None:
            logger = create_logger("BayesianOptimizer")
        self.logger = logger
        # Let's initialize the optimizer.
        #
        assert len(optimization_problem.objectives
                   ) == 1, "For now this is a single-objective optimizer."
        OptimizerBase.__init__(self, optimization_problem)

        assert optimizer_config in bayesian_optimizer_config_store.parameter_space, "Invalid config."
        self.optimizer_config = optimizer_config

        # Now let's put together the surrogate model.
        #
        assert self.optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__, "TODO: implement more"
        self.surrogate_model = HomogeneousRandomForestRegressionModel(
            model_config=self.optimizer_config.
            homogeneous_random_forest_regression_model_config,
            input_space=self.optimization_problem.
            parameter_space,  # TODO: change to feature space
            output_space=self.optimization_problem.objective_space,
            logger=self.logger)

        # Now let's put together the experiment designer that will suggest parameters for each experiment.
        #
        assert self.optimizer_config.experiment_designer_implementation == ExperimentDesigner.__name__
        self.experiment_designer = ExperimentDesigner(
            designer_config=self.optimizer_config.experiment_designer_config,
            optimization_problem=self.optimization_problem,
            surrogate_model=self.surrogate_model,
            logger=self.logger)

        self._optimizer_convergence_state = BayesianOptimizerConvergenceState(
            surrogate_model_fit_state=self.surrogate_model.fit_state)

        # Also let's make sure we have the dataframes we need for the surrogate model.
        # TODO: this will need a better home - either a DataSet class or the surrogate model itself.
        self._feature_values_df = pd.DataFrame(columns=[
            dimension.name for dimension in
            self.optimization_problem.parameter_space.dimensions
        ])
        self._target_values_df = pd.DataFrame(columns=[
            dimension.name for dimension in
            self.optimization_problem.objective_space.dimensions
        ])

    @property
    def trained(self):
        return self.surrogate_model.trained

    @property
    def num_observed_samples(self):
        return len(self._feature_values_df.index)

    def compute_surrogate_model_goodness_of_fit(self):
        if not self.surrogate_model.trained:
            raise RuntimeError("Model has not been trained yet.")
        return self.surrogate_model.compute_goodness_of_fit(
            features_df=self._feature_values_df.copy(),
            target_df=self._target_values_df.copy(),
            data_set_type=DataSetType.TRAIN)

    def get_optimizer_convergence_state(self):
        return self._optimizer_convergence_state

    def get_all_observations(self):
        return self._feature_values_df.copy(), self._target_values_df.copy()

    @trace()
    def suggest(self, random=False, context=None):
        # TODO: pass context to the suggest method
        random = random or self.num_observed_samples < self.optimizer_config.min_samples_required_for_guided_design_of_experiments
        suggested_config = self.experiment_designer.suggest(random=random)
        assert suggested_config in self.optimization_problem.parameter_space
        return suggested_config

    @trace()
    def register(self, feature_values_pandas_frame,
                 target_values_pandas_frame):
        # TODO: add to a Dataset and move on. The surrogate model should have a reference to the same dataset
        # TODO: and should be able to refit automatically.

        self._feature_values_df = self._feature_values_df.append(
            feature_values_pandas_frame, ignore_index=True)
        self._target_values_df = self._target_values_df.append(
            target_values_pandas_frame, ignore_index=True)

        # TODO: ascertain that min_samples_required ... is more than min_samples to fit the model
        if self.num_observed_samples >= self.optimizer_config.min_samples_required_for_guided_design_of_experiments:
            self.surrogate_model.fit(
                feature_values_pandas_frame=self._feature_values_df,
                target_values_pandas_frame=self._target_values_df,
                iteration_number=len(self._feature_values_df.index))

    @trace()
    def predict(self, feature_values_pandas_frame, t=None):
        return self.surrogate_model.predict(feature_values_pandas_frame)

    def focus(self, subspace):
        ...

    def reset_focus(self):
        ...
Exemple #11
0
    def __init__(self,
                 optimization_problem: OptimizationProblem,
                 optimizer_config: Point,
                 logger=None):
        if logger is None:
            logger = create_logger("BayesianOptimizer")
        self.logger = logger

        # Let's initialize the optimizer.
        #
        assert len(optimization_problem.objectives
                   ) == 1, "For now this is a single-objective optimizer."
        OptimizerBase.__init__(self, optimization_problem)

        # Since the optimization_problem.objective_space can now be multi-dimensional (as a milestone towards multi-objective
        # optimization), we have to prepare a smaller objective space for the surrogate model.
        # TODO: create multiple models each predicting a different objective. Also consider multi-objective models.
        #
        assert not optimization_problem.objective_space.is_hierarchical(
        ), "Not supported."
        only_objective = optimization_problem.objectives[0]
        self.surrogate_model_output_space = SimpleHypergrid(
            name="surrogate_model_output_space",
            dimensions=[
                optimization_problem.objective_space[only_objective.name]
            ])

        assert optimizer_config in bayesian_optimizer_config_store.parameter_space, "Invalid config."
        self.optimizer_config = optimizer_config

        # Now let's put together the surrogate model.
        #
        assert self.optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__, "TODO: implement more"
        self.surrogate_model = HomogeneousRandomForestRegressionModel(
            model_config=self.optimizer_config.
            homogeneous_random_forest_regression_model_config,
            input_space=self.optimization_problem.feature_space,
            output_space=self.surrogate_model_output_space,
            logger=self.logger)

        # Now let's put together the experiment designer that will suggest parameters for each experiment.
        #
        assert self.optimizer_config.experiment_designer_implementation == ExperimentDesigner.__name__
        self.experiment_designer = ExperimentDesigner(
            designer_config=self.optimizer_config.experiment_designer_config,
            optimization_problem=self.optimization_problem,
            surrogate_model=self.surrogate_model,
            logger=self.logger)

        self._optimizer_convergence_state = BayesianOptimizerConvergenceState(
            surrogate_model_fit_state=self.surrogate_model.fit_state)

        # Also let's make sure we have the dataframes we need for the surrogate model.
        #
        self._parameter_names = [
            dimension.name for dimension in
            self.optimization_problem.parameter_space.dimensions
        ]
        self._parameter_names_set = set(self._parameter_names)

        self._context_names = ([
            dimension.name
            for dimension in self.optimization_problem.context_space.dimensions
        ] if self.optimization_problem.context_space else [])
        self._context_names_set = set(self._context_names)

        self._target_names = [
            dimension.name for dimension in
            self.optimization_problem.objective_space.dimensions
        ]
        self._target_names_set = set(self._target_names)

        self._parameter_values_df = pd.DataFrame(columns=self._parameter_names)
        self._context_values_df = pd.DataFrame(columns=self._context_names)
        self._target_values_df = pd.DataFrame(columns=self._target_names)
Exemple #12
0
class BayesianOptimizer(OptimizerBase):
    """Generic Bayesian Optimizer based on regresson model

    Uses extra trees as surrogate model and confidence bound acquisition function by default.

    Attributes
    ----------
    logger : Logger
    optimization_problem : OptimizationProblem
    surrogate_model : HomogeneousRandomForestRegressionModel
    optimizer_config : Point
    experiment_designer: ExperimentDesigner

    """
    @trace()
    def __init__(self,
                 optimization_problem: OptimizationProblem,
                 optimizer_config: Point,
                 logger=None):
        if logger is None:
            logger = create_logger("BayesianOptimizer")
        self.logger = logger

        # Let's initialize the optimizer.
        #
        assert len(optimization_problem.objectives
                   ) == 1, "For now this is a single-objective optimizer."
        OptimizerBase.__init__(self, optimization_problem)

        # Since the optimization_problem.objective_space can now be multi-dimensional (as a milestone towards multi-objective
        # optimization), we have to prepare a smaller objective space for the surrogate model.
        # TODO: create multiple models each predicting a different objective. Also consider multi-objective models.
        #
        assert not optimization_problem.objective_space.is_hierarchical(
        ), "Not supported."
        only_objective = optimization_problem.objectives[0]
        self.surrogate_model_output_space = SimpleHypergrid(
            name="surrogate_model_output_space",
            dimensions=[
                optimization_problem.objective_space[only_objective.name]
            ])

        assert optimizer_config in bayesian_optimizer_config_store.parameter_space, "Invalid config."
        self.optimizer_config = optimizer_config

        # Now let's put together the surrogate model.
        #
        assert self.optimizer_config.surrogate_model_implementation == HomogeneousRandomForestRegressionModel.__name__, "TODO: implement more"
        self.surrogate_model = HomogeneousRandomForestRegressionModel(
            model_config=self.optimizer_config.
            homogeneous_random_forest_regression_model_config,
            input_space=self.optimization_problem.feature_space,
            output_space=self.surrogate_model_output_space,
            logger=self.logger)

        # Now let's put together the experiment designer that will suggest parameters for each experiment.
        #
        assert self.optimizer_config.experiment_designer_implementation == ExperimentDesigner.__name__
        self.experiment_designer = ExperimentDesigner(
            designer_config=self.optimizer_config.experiment_designer_config,
            optimization_problem=self.optimization_problem,
            surrogate_model=self.surrogate_model,
            logger=self.logger)

        self._optimizer_convergence_state = BayesianOptimizerConvergenceState(
            surrogate_model_fit_state=self.surrogate_model.fit_state)

        # Also let's make sure we have the dataframes we need for the surrogate model.
        #
        self._parameter_names = [
            dimension.name for dimension in
            self.optimization_problem.parameter_space.dimensions
        ]
        self._parameter_names_set = set(self._parameter_names)

        self._context_names = ([
            dimension.name
            for dimension in self.optimization_problem.context_space.dimensions
        ] if self.optimization_problem.context_space else [])
        self._context_names_set = set(self._context_names)

        self._target_names = [
            dimension.name for dimension in
            self.optimization_problem.objective_space.dimensions
        ]
        self._target_names_set = set(self._target_names)

        self._parameter_values_df = pd.DataFrame(columns=self._parameter_names)
        self._context_values_df = pd.DataFrame(columns=self._context_names)
        self._target_values_df = pd.DataFrame(columns=self._target_names)

    @property
    def trained(self):
        return self.surrogate_model.trained

    @property
    def num_observed_samples(self):
        return len(self._parameter_values_df.index)

    def compute_surrogate_model_goodness_of_fit(self):
        if not self.surrogate_model.trained:
            raise RuntimeError("Model has not been trained yet.")
        feature_values_pandas_frame = self.optimization_problem.construct_feature_dataframe(
            parameter_values=self._parameter_values_df.copy(),
            context_values=self._context_values_df.copy())
        return self.surrogate_model.compute_goodness_of_fit(
            features_df=feature_values_pandas_frame,
            target_df=self._target_values_df.copy(),
            data_set_type=DataSetType.TRAIN)

    def get_optimizer_convergence_state(self):
        return self._optimizer_convergence_state

    def get_all_observations(self):
        return self._parameter_values_df.copy(), self._target_values_df.copy(
        ), self._context_values_df.copy()

    @trace()
    def suggest(self, random=False, context: Point = None):
        if self.optimization_problem.context_space is not None:
            if context is None:
                raise ValueError(
                    "Context required by optimization problem but not provided."
                )
            assert context in self.optimization_problem.context_space
        random = random or self.num_observed_samples < self.optimizer_config.min_samples_required_for_guided_design_of_experiments
        context_values = context.to_dataframe(
        ) if context is not None else None
        suggested_config = self.experiment_designer.suggest(
            random=random, context_values_dataframe=context_values)
        assert suggested_config in self.optimization_problem.parameter_space
        return suggested_config

    @trace()
    def register(self,
                 parameter_values_pandas_frame,
                 target_values_pandas_frame,
                 context_values_pandas_frame=None):
        # TODO: add to a Dataset and move on. The surrogate model should have a reference to the same dataset
        # TODO: and should be able to refit automatically.

        if self.optimization_problem.context_space is not None and context_values_pandas_frame is None:
            raise ValueError(
                "Context required by optimization problem but not provided.")

        parameter_columns_to_retain = [
            column for column in parameter_values_pandas_frame.columns
            if column in self._parameter_names_set
        ]
        target_columns_to_retain = [
            column for column in target_values_pandas_frame.columns
            if column in self._target_names_set
        ]

        if len(parameter_columns_to_retain) == 0:
            raise ValueError(
                f"None of the {parameter_values_pandas_frame.columns} is a parameter recognized by this optimizer."
            )

        if len(target_columns_to_retain) == 0:
            raise ValueError(
                f"None of {target_values_pandas_frame.columns} is a target recognized by this optimizer."
            )

        parameter_values_pandas_frame = parameter_values_pandas_frame[
            parameter_columns_to_retain]
        target_values_pandas_frame = target_values_pandas_frame[
            target_columns_to_retain]

        all_null_parameters = parameter_values_pandas_frame[
            parameter_values_pandas_frame.isnull().all(axis=1)]
        if len(all_null_parameters.index) > 0:
            raise ValueError(
                f"{len(all_null_parameters.index)} of the observations contain(s) no valid parameters."
            )

        all_null_context = parameter_values_pandas_frame[
            parameter_values_pandas_frame.isnull().all(axis=1)]
        if len(all_null_context.index) > 0:
            raise ValueError(
                f"{len(all_null_context.index)} of the observations contain(s) no valid context."
            )

        all_null_targets = target_values_pandas_frame[
            target_values_pandas_frame.isnull().all(axis=1)]
        if len(all_null_targets.index) > 0:
            raise ValueError(
                f"{len(all_null_targets.index)} of the observations contain(s) no valid targets"
            )

        if context_values_pandas_frame is not None:
            if len(parameter_values_pandas_frame) != len(
                    context_values_pandas_frame):
                raise ValueError(
                    f"Incompatible shape of parameters and context: "
                    f"{parameter_values_pandas_frame.shape} and {context_values_pandas_frame.shape}."
                )
            context_columns_to_retain = [
                column for column in context_values_pandas_frame.columns
                if column in self._context_names_set
            ]
            if len(context_columns_to_retain) == 0:
                raise ValueError(
                    f"None of the {context_values_pandas_frame.columns} is a context recognized by this optimizer."
                )
            context_values_pandas_frame = context_values_pandas_frame[
                context_columns_to_retain]
            self._context_values_df = self._context_values_df.append(
                context_values_pandas_frame, ignore_index=True)

        self._parameter_values_df = self._parameter_values_df.append(
            parameter_values_pandas_frame, ignore_index=True)
        self._target_values_df = self._target_values_df.append(
            target_values_pandas_frame, ignore_index=True)

        # TODO: ascertain that min_samples_required ... is more than min_samples to fit the model
        if self.num_observed_samples >= self.optimizer_config.min_samples_required_for_guided_design_of_experiments:
            feature_values_pandas_frame = self.optimization_problem.construct_feature_dataframe(
                parameter_values=self._parameter_values_df,
                context_values=self._context_values_df)
            self.surrogate_model.fit(
                feature_values_pandas_frame=feature_values_pandas_frame,
                target_values_pandas_frame=self._target_values_df,
                iteration_number=len(self._parameter_values_df.index))

    @trace()
    def predict(self,
                parameter_values_pandas_frame,
                t=None,
                context_values_pandas_frame=None):  # pylint: disable=unused-argument
        # TODO: make this streaming and/or using arrow.
        #
        feature_values_pandas_frame = self.optimization_problem.construct_feature_dataframe(
            parameter_values=parameter_values_pandas_frame,
            context_values=context_values_pandas_frame)
        return self.surrogate_model.predict(feature_values_pandas_frame)

    def focus(self, subspace):
        ...

    def reset_focus(self):
        ...
Exemple #13
0
    def setUpClass(cls):
        """ Set's up all the objects needed to test the RandomSearchOptimizer

        To test the RandomSearchOptimizer we need to first construct:
        * an optimization problem
        * a utility function

        To construct a utility function we need the same set up as in the TestConfidenceBoundUtilityFunction
        test.



        :return:
        """
        global_values.declare_singletons()

        cls.input_space = SimpleHypergrid(name="input",
                                          dimensions=[
                                              ContinuousDimension(name='x_1',
                                                                  min=-100,
                                                                  max=100),
                                              ContinuousDimension(name='x_2',
                                                                  min=-100,
                                                                  max=100)
                                          ])

        cls.output_space = SimpleHypergrid(name="output",
                                           dimensions=[
                                               ContinuousDimension(
                                                   name='y',
                                                   min=-math.inf,
                                                   max=math.inf)
                                           ])

        x_1, x_2 = np.meshgrid(cls.input_space['x_1'].linspace(num=201),
                               cls.input_space['x_2'].linspace(num=201))

        y = -quadratic(x_1=x_1, x_2=x_2)

        cls.input_values_dataframe = pd.DataFrame({
            'x_1': x_1.reshape(-1),
            'x_2': x_2.reshape(-1)
        })
        cls.output_values_dataframe = pd.DataFrame({'y': y.reshape(-1)})

        cls.model_config = HomogeneousRandomForestRegressionModelConfig()
        cls.model = HomogeneousRandomForestRegressionModel(
            model_config=cls.model_config,
            input_space=cls.input_space,
            output_space=cls.output_space)
        cls.model.fit(cls.input_values_dataframe,
                      cls.output_values_dataframe,
                      iteration_number=len(cls.input_values_dataframe.index))

        cls.utility_function_config = ConfidenceBoundUtilityFunctionConfig(
            utility_function_name="upper_confidence_bound_on_improvement",
            alpha=0.05)

        cls.optimization_problem = OptimizationProblem(
            parameter_space=cls.input_space,
            objective_space=cls.output_space,
            objectives=[Objective(name='y', minimize=True)])

        cls.utility_function = ConfidenceBoundUtilityFunction(
            function_config=cls.utility_function_config,
            surrogate_model=cls.model,
            minimize=cls.optimization_problem.objectives[0].minimize)