Example #1
0
def normalize_dimensions(dimensions):
    """Create a ``Space`` where all dimensions are normalized to unit range.
    This is particularly useful for Gaussian process based regressors and is
    used internally by ``gp_minimize``.
    Parameters
    ----------
    dimensions : list, shape (n_dims,)
        List of search space dimensions.
        Each search dimension can be defined either as
        - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
          dimensions),
        - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
          dimensions),
        - as a list of categories (for `Categorical` dimensions), or
        - an instance of a `Dimension` object (`Real`, `Integer` or
          `Categorical`).
         NOTE: The upper and lower bounds are inclusive for `Integer`
         dimensions.
    """
    space = Space(dimensions)
    transformed_dimensions = []
    for dimension in space.dimensions:
        # check if dimension is of a Dimension instance
        if isinstance(dimension, Dimension):
            # Change the transformer to normalize
            # and add it to the new transformed dimensions
            dimension.set_transformer("normalize")
            transformed_dimensions.append(dimension)
        else:
            raise RuntimeError("Unknown dimension type "
                               "(%s)" % type(dimension))

    return Space(transformed_dimensions)
Example #2
0
    def __init__(self,
                 problem: opti.Problem,
                 base_est_params: dict = None,
                 gurobi_env: Optional[Callable] = None):

        self.problem: opti.Problem = problem
        if base_est_params is None:
            self._base_est_params: dict = {}
        else:
            self._base_est_params: dict = base_est_params
        self.model: lgb.Booster = None

        self.num_obj = len(self.problem.outputs.names)

        # Gurobi environment handling in case you are using the Gurobi Cloud service
        self.gurobi_env = gurobi_env

        self.cat_names: list[str] = None
        self.cat_idx: list[int] = None

        if self.problem.data is None:
            raise ValueError("No initial data points provided.")

        dimensions: list = self._build_dimensions_list()

        self.space = Space(dimensions)

        self.entmoot_optimizer: Optimizer = Optimizer(
            dimensions=dimensions,
            base_estimator="ENTING",
            n_initial_points=0,
            num_obj=self.num_obj,
            random_state=73,
            base_estimator_kwargs=self._base_est_params)

        self._fit_model()
Example #3
0
    def generate(self, dimensions, n_samples, random_state=None):
        """Creates latin hypercube samples.

        Parameters
        ----------
        dimensions : list, shape (n_dims,)
            List of search space dimensions.
            Each search dimension can be defined either as

            - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
              dimensions),
            - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
              dimensions),
            - as a list of categories (for `Categorical` dimensions), or
            - an instance of a `Dimension` object (`Real`, `Integer` or
              `Categorical`).

        n_samples : int
            The order of the LHS sequence. Defines the number of samples.
        random_state : int, RandomState instance, or None (default)
            Set random state to something other than None for reproducible
            results.

        Returns
        -------
        np.array, shape=(n_dim, n_samples)
            LHS set
        """
        rng = check_random_state(random_state)
        space = Space(dimensions)
        transformer = space.get_transformer()
        n_dim = space.n_dims
        space.set_transformer("normalize")
        if self.criterion is None or n_samples == 1:
            h = self._lhs_normalized(n_dim, n_samples, rng)
            h = space.inverse_transform(h)
            space.set_transformer(transformer)
            return h
        else:
            h_opt = self._lhs_normalized(n_dim, n_samples, rng)
            h_opt = space.inverse_transform(h_opt)
            if self.criterion == "correlation":
                mincorr = np.inf
                for i in range(self.iterations):
                    # Generate a random LHS
                    h = self._lhs_normalized(n_dim, n_samples, rng)
                    r = np.corrcoef(np.array(h).T)
                    if len(np.abs(r[r != 1])) > 0 and \
                            np.max(np.abs(r[r != 1])) < mincorr:
                        mincorr = np.max(np.abs(r - np.eye(r.shape[0])))
                        h_opt = h.copy()
                        h_opt = space.inverse_transform(h_opt)
            elif self.criterion == "maximin":
                maxdist = 0
                # Maximize the minimum distance between points
                for i in range(self.iterations):
                    h = self._lhs_normalized(n_dim, n_samples, rng)
                    d = spatial.distance.pdist(np.array(h), 'euclidean')
                    if maxdist < np.min(d):
                        maxdist = np.min(d)
                        h_opt = h.copy()
                        h_opt = space.inverse_transform(h_opt)
            elif self.criterion == "ratio":
                minratio = np.inf

                # Maximize the minimum distance between points
                for i in range(self.iterations):
                    h = self._lhs_normalized(n_dim, n_samples, rng)
                    p = spatial.distance.pdist(np.array(h), 'euclidean')
                    if np.min(p) == 0:
                        ratio = np.max(p) / 1e-8
                    else:
                        ratio = np.max(p) / np.min(p)
                    if minratio > ratio:
                        minratio = ratio
                        h_opt = h.copy()
                        h_opt = space.inverse_transform(h_opt)
            else:
                raise ValueError("Wrong criterion."
                                 "Got {}".format(self.criterion))
            space.set_transformer(transformer)
            return h_opt
Example #4
0
class Optimizer(object):
    """
    This class is used to run the main BO loop. 
    Optimizer objects define all BO settings and store the current data set.
    
    The self.ask function provides new input proposals, and resulting black-box
    values can be added through self.tell. This procedure is automated via
    self.run, where a callable function is provided as an input.
    
    Functions self.predict_with_est and self.predict_with_acq use the current
    surrogate model to predict inputs.
    
    :param dimensions: list
        list of search-space variables, i.e.
            If (lower: float, upper: float), gives continuous variable
            If (lower: int, upper: int), gives discrete variable
            If list of categories, gives categorical variable
    :param base_estimator: str
        currently available estimator types are in ["ENTING"]
    :param n_initial_points: int
        number of initial points sampled before surrogate model is trained
    :param initial_point_generator: str
        currently supported sampling generators are in
        ["sobol", "halton", "hammersly", "lhs", "random", "grid"]
    :param num_obj: int
        gives the number of objectives that the black-box is being optimized for
    :param acq_func: str
        acquisition function type that is used for exploitation vs. exploration
        trade-off, i.e. currently supported ["LCB"] and ["LCB", "HLCB"] for
        num_obj == 1
    :param acq_optimizer: str
        optimization method used to minimize the acquisition function, i.e.
        currently supports ["sampling", "global"]
    :param random_state: Optional[int]
        fixed random seed to generate reproducible results
    :param acq_func_kwargs: Optional[dict]
        define additional params for acquisition function, i.e.
            "kappa": int, influences acquisition function "min mu - kappa * alpha"
    :param acq_optimizer_kwargs: Optional[dict]
        define additional params for acqu. optimizer "sampling":
            "n_points": int, number of points to minimize the acquisition function
        define additional params for acqu. optimizer "global":
            "env": GRBEnv, defines Gurobi environment for cluster computations
            "gurobi_timelimit": int, optimization time limit in sec
            "add_model_core": GRBModel, Gurobi optimization model that includes 
                additional constraints
    :param base_estimator_kwargs: Optional[dict]
        defines additional params that influence the base_estimator behavior
            "lgbm_params": dict, additional parameters that are passed to lightgbm
            "ensemble_type": str, options in ['GBRT', 'RF'],
                "GBRT": uses gradient-boosted tree regressor
                "RF": uses random forest
            "unc_metric": str, options in ['exploration', 'penalty'], i.e.
                negative or positive alpha contribution in "min mu \pm kappa * alpha"
            "unc_scaling": str, options in ["standard", "normalize"], i.e.
                scaling used for the distance-based uncertainty metric
            "dist_metric": str, options in ["manhattan",'squared_euclidean'], i.e.
                metric used to define non-categorical distance for uncertainty
            "cat_dist_metric": str, options in ["overlap", "goodall4", "of"], i.e.
                metric used to define categorical distance for uncertainty
    :param model_queue_size: Optional[int]
        defines number of previous models that are stored in self.models
    :param verbose: bool
        defines the verbosity level of the output
    """
    def __init__(self,
                 dimensions: list,
                 base_estimator: str = "ENTING",
                 n_initial_points: int = 50,
                 initial_point_generator: str = "random",
                 num_obj: int = 1,
                 acq_func: str = "LCB",
                 acq_optimizer: str = "global",
                 random_state: Optional[int] = None,
                 acq_func_kwargs: Optional[dict] = None,
                 acq_optimizer_kwargs: Optional[dict] = None,
                 base_estimator_kwargs: Optional[dict] = None,
                 model_queue_size: Optional[int] = None,
                 verbose: bool = False):

        from entmoot.utils import is_supported
        from entmoot.utils import cook_estimator
        from entmoot.utils import cook_initial_point_generator
        from sklearn.utils import check_random_state

        # define random state
        self.rng = check_random_state(random_state)

        # store and create acquisition function set
        self.acq_func = acq_func
        if acq_func_kwargs is None:
            self.acq_func_kwargs = dict()
        else:
            self.acq_func_kwargs = acq_func_kwargs

        allowed_acq_funcs = ["LCB", "HLCB"]
        if self.acq_func not in allowed_acq_funcs:
            raise ValueError("expected acq_func to be in %s, got %s" %
                             (",".join(allowed_acq_funcs), self.acq_func))

        # configure counter of points
        if n_initial_points < 0:
            raise ValueError("Expected `n_initial_points` >= 0, got %d" %
                             n_initial_points)
        self._n_initial_points = n_initial_points
        self.n_initial_points_ = n_initial_points

        # initialize search space and output
        from entmoot.space.space import Space

        self.space = Space(dimensions)

        self._initial_samples = None
        self._initial_point_generator = \
            cook_initial_point_generator(initial_point_generator)

        if self._initial_point_generator is not None:
            transformer = self.space.get_transformer()
            self._initial_samples = self._initial_point_generator.generate(
                self.space.dimensions,
                n_initial_points,
                random_state=self.rng.randint(0,
                                              np.iinfo(np.int32).max))
            self.space.set_transformer(transformer)

        self.num_obj = num_obj

        # create base_estimator
        self.base_estimator_kwargs = {} if base_estimator_kwargs is None else base_estimator_kwargs

        from entmoot.learning.tree_model import EntingRegressor, MisicRegressor

        if type(base_estimator) not in [EntingRegressor, MisicRegressor]:
            if type(base_estimator) in [str]:
                # define random_state of estimator
                est_random_state = self.rng.randint(0, np.iinfo(np.int32).max)

                # check support of base_estimator if exists
                if not is_supported(base_estimator):
                    raise ValueError("Estimator type: %s is not supported." %
                                     base_estimator)

                # build base_estimator
                base_estimator = cook_estimator(self.space,
                                                base_estimator,
                                                self.base_estimator_kwargs,
                                                num_obj=self.num_obj,
                                                random_state=est_random_state)
            else:
                raise ValueError("Estimator type: %s is not supported." %
                                 base_estimator)

        self.base_estimator_ = base_estimator

        # Configure Optimizer
        self.acq_optimizer = acq_optimizer

        # record other arguments
        if acq_optimizer_kwargs is None:
            acq_optimizer_kwargs = dict()

        self.acq_optimizer_kwargs = acq_optimizer_kwargs
        self.n_points = acq_optimizer_kwargs.get("n_points", 10000)
        self.gurobi_env = acq_optimizer_kwargs.get("env", None)
        self.gurobi_timelimit = acq_optimizer_kwargs.get(
            "gurobi_timelimit", None)

        ## Initialize storage for optimization
        if not isinstance(model_queue_size, (int, type(None))):
            raise TypeError("model_queue_size should be an int or None, "
                            "got {}".format(type(model_queue_size)))

        # model cache
        self.max_model_queue_size = model_queue_size
        self.models = []

        # data set cache
        self.Xi = []
        self.yi = []

        # model_mu and model_std cache
        self.model_mu = []
        self.model_std = []

        # global opti metrics
        self.gurobi_mipgap = []

        # Initialize cache for `ask` method responses
        # This ensures that multiple calls to `ask` with n_points set
        # return same sets of points. Reset to {} at every call to `tell`.
        self.cache_ = {}

        # Handle solver print output
        if not isinstance(verbose, (int, type(None))):
            raise TypeError("verbose should be an int of [0,1,2] or bool, "
                            "got {}".format(type(verbose)))

        if isinstance(verbose, bool):
            if verbose:
                self.verbose = 1
            else:
                self.verbose = 0
        elif isinstance(verbose, int):
            if verbose not in [0, 1, 2]:
                raise TypeError("if verbose is int, it should in [0,1,2], "
                                "got {}".format(verbose))
            else:
                self.verbose = verbose

        # printed_switch_to_model defines if notification of switch from
        # intitial point generation to model-based point generation has been
        # printed yet
        if self.verbose > 0:
            self.printed_switch_to_model = False
        else:
            self.printed_switch_to_model = True

    def copy(self, random_state: Optional[int] = None):
        """Create a shallow copy of an instance of the optimizer.

        :param random_state: Optional[int]
        :return optimizer: Optimizer
        """

        optimizer = Optimizer(
            dimensions=self.space.dimensions,
            base_estimator=self.base_estimator_,
            n_initial_points=self.n_initial_points_,
            initial_point_generator=self._initial_point_generator,
            acq_func=self.acq_func,
            acq_optimizer=self.acq_optimizer,
            acq_func_kwargs=self.acq_func_kwargs,
            acq_optimizer_kwargs=self.acq_optimizer_kwargs,
            random_state=random_state,
            verbose=self.verbose)

        optimizer._initial_samples = self._initial_samples
        optimizer.printed_switch_to_model = self.printed_switch_to_model

        if self.Xi:
            optimizer._tell(self.Xi, self.yi)

        return optimizer

    def ask(
        self,
        n_points: int = None,
        strategy: str = "cl_min",
        weights: Optional[list] = None,
        add_model_core=None,
        gurobi_env=None,
    ) -> list:
        """
        Computes the next point (or multiple points) at which the objective
        should be evaluated.

        :param n_points: int = None,
            gives the number of points that should be returned
        :param strategy: str = "cl_min",
            determines the liar strategy, i.e.
            (https://hal.archives-ouvertes.fr/hal-00732512/document)
            used for single-objective batch proposals, i.e.
                "cl_min": str, uses minimum of observations as lie
                "cl_mean": str, uses mean of observations as lie
                "cl_max": str, uses maximum of observations as lie
        :param weights: Optional[list] = None,
            1D list of weights of size num_obj for single multi-objective proposal;
            2D list of weights of size shape(n_points,num_obj) for
                batch multi-objectiveproposals
        :param add_model_core: GRBModel = None,
            Gurobi optimization model that includes additional constraints
        :param gurobi_env: Gurobi Env = None,
            Gurobi environment used for computation of the next proposal

        :return next_x: list, next proposal of shape(n_points, n_dims)
        """

        # update gurobi_env attribute
        if gurobi_env:
            self.gurobi_env = gurobi_env

        # check if single point or batch of point is returned
        if n_points is None and weights is None:
            return self._ask(add_model_core=add_model_core)
        elif self.num_obj > 1:
            if weights is not None:
                n_points = len(weights)

            X = []
            for i in range(n_points):
                self._next_x = None

                # use the weights if provided
                if weights:
                    w = weights[i]
                    assert len(w) == self.num_obj, \
                        f"The {i}'th provided weight has dim '{len(w)}' but " \
                        f"number of objectives is '{self.num_obj}'."
                    next_x = self._ask(weight=w, add_model_core=add_model_core)
                else:
                    next_x = self._ask(add_model_core=add_model_core)
                X.append(next_x)
            return X if len(X) > 1 else X[0]

        supported_strategies = ["cl_min", "cl_mean", "cl_max"]

        if not (isinstance(n_points, int) and n_points > 0):
            raise ValueError("n_points should be int > 0, got " +
                             str(n_points))

        if strategy not in supported_strategies:
            raise ValueError("Expected parallel_strategy to be one of " +
                             str(supported_strategies) + ", " +
                             "got %s" % strategy)

        # caching the result with n_points not None. If some new parameters
        # are provided to the ask, the cache_ is not used.
        if (n_points, strategy) in self.cache_:
            return self.cache_[(n_points, strategy)]

        # Copy of the optimizer is made in order to manage the
        # deletion of points with "lie" objective (the copy of
        # optimizer is simply discarded)
        opt = self.copy(random_state=self.rng.randint(0,
                                                      np.iinfo(np.int32).max))

        X = []
        for i in range(n_points):

            x = opt.ask()
            X.append(x)

            if strategy == "cl_min":
                y_lie = np.min(opt.yi) if opt.yi else 0.0  # CL-min lie
            elif strategy == "cl_mean":
                y_lie = np.mean(opt.yi) if opt.yi else 0.0  # CL-mean lie
            else:
                y_lie = np.max(opt.yi) if opt.yi else 0.0  # CL-max lie

            opt._tell(x, y_lie)

        self.printed_switch_to_model = opt.printed_switch_to_model

        self.cache_ = {(n_points, strategy): X}  # cache_ the result

        return X

    def _ask(self, weight: Optional[list] = None, add_model_core=None):
        """
        Computes the next point at which the objective should be evaluated.

        :param weight: Optional[list] = None,
            list of weights of size num_obj to trade-off between different
            objective contributions
        :param add_model_core: GRBModel = None,
            Gurobi optimization model that includes additional constraints

        :return next_x: list, next proposal of shape(n_points, n_dims)
        """

        if self._n_initial_points > 0 or self.base_estimator_ is None:
            # this will not make a copy of `self.rng` and hence keep advancing
            # our random state.
            if self._initial_samples is None:
                return self.space.rvs(random_state=self.rng)[0]
            else:
                # The samples are evaluated starting form initial_samples[0]
                return self._initial_samples[len(self._initial_samples) -
                                             self._n_initial_points]

        elif self._next_x is not None:
            # return self._next_x if optimizer hasn't learned anything new
            return self._next_x

        else:
            # after being "told" n_initial_points we switch from sampling
            # random points to using a surrogate model

            # create fresh copy of base_estimator
            est = self.base_estimator_.copy()
            self.base_estimator_ = est

            # esimator is fitted using a generic fit function
            est.fit(self.space.transform(self.Xi), self.yi)

            # we cache the estimator in model_queue
            if self.max_model_queue_size is None:
                self.models.append(est)
            elif len(self.models) < self.max_model_queue_size:
                self.models.append(est)
            else:
                # Maximum list size obtained, remove oldest model.
                self.models.pop(0)
                self.models.append(est)

            if not self.printed_switch_to_model:
                print("")
                print("SOLVER: initial points exhausted")
                print("   -> switch to model-based optimization")
                self.printed_switch_to_model = True

            # this code provides a heuristic solution that uses sampling as the optimization strategy
            if self.acq_optimizer == "sampling":
                # sample a large number of points and then pick the best ones as
                # starting points
                X = self.space.transform(
                    self.space.rvs(n_samples=self.n_points,
                                   random_state=self.rng))

                values = _gaussian_acquisition(
                    X=X,
                    model=est,
                    y_opt=np.min(self.yi),
                    acq_func=self.acq_func,
                    acq_func_kwargs=self.acq_func_kwargs)
                # Find the minimum of the acquisition function by randomly
                # sampling points from the space
                next_x = X[np.argmin(values)]

                # derive model mu and std
                # next_xt = self.space.transform([next_x])[0]
                next_model_mu, next_model_std = \
                    self.models[-1].predict(
                        X=np.asarray(next_x).reshape(1, -1),
                        return_std=True)

                model_mu = next_model_mu[0]
                model_std = next_model_std[0]

            # acquisition function is optimized globally
            elif self.acq_optimizer == "global":
                try:
                    import gurobipy as gp
                except ModuleNotFoundError:
                    ImportError(
                        "GurobiNotFoundError: "
                        "To run `aqu_optimizer='global'` "
                        "please install the Gurobi solver "
                        "(https://www.gurobi.com/) and its interface "
                        "gurobipy. "
                        "Alternatively, change `aqu_optimizer='sampling'`.")

                if add_model_core is None:
                    add_model_core = \
                        self.acq_optimizer_kwargs.get("add_model_core", None)

                next_x, model_mu, model_std, gurobi_mipgap = \
                    self.models[-1].get_global_next_x(acq_func=self.acq_func,
                                                      acq_func_kwargs=self.acq_func_kwargs,
                                                      acq_optimizer_kwargs=self.acq_optimizer_kwargs,
                                                      add_model_core=add_model_core,
                                                      weight=weight,
                                                      verbose=self.verbose,
                                                      gurobi_env=self.gurobi_env,
                                                      gurobi_timelimit=self.gurobi_timelimit)

                self.gurobi_mipgap.append(gurobi_mipgap)

            # note the need for [0] at the end
            self._next_x = self.space.inverse_transform(next_x.reshape(
                (1, -1)))[0]

            from entmoot.utils import get_cat_idx

            for idx, xi in enumerate(self._next_x):
                if idx not in get_cat_idx(self.space):
                    self._next_x[idx] = xi

                    # enforce variable bounds
                    if self._next_x[idx] > self.space.bounds[idx][1]:
                        self._next_x[idx] = self.space.bounds[idx][1]
                    elif self._next_x[idx] < self.space.bounds[idx][0]:
                        self._next_x[idx] = self.space.bounds[idx][0]

            self._model_mu = model_mu
            self._model_std = model_std

            if self.models:
                self.model_mu.append(self._model_mu)
                self.model_std.append(self._model_std)

            # check how far the new next_x is away from existing data
            next_x = self._next_x
            min_delta_x = min(
                [self.space.distance(next_x, xi) for xi in self.Xi])
            if abs(min_delta_x) <= 1e-8:
                warnings.warn("The objective has been evaluated "
                              "at this point before.")

            # return point computed from last call to tell()
            return next_x

    def tell(self, x: list, y: list):
        """
        Checks that both x and y are valid points for the given search space.

        :param x: list, locations of new data points
        :param y: list, target value of new data points
        """

        from entmoot.utils import check_x_in_space
        check_x_in_space(x, self.space)
        self._check_y_is_valid(x, y)
        self._tell(x, y)

    def _tell(self, x, y):
        """
        Adds the new data points to the data set.

        :param x: list, locations of new data points
        :param y: list, target value of new data points
        """

        from entmoot.utils import is_listlike
        from entmoot.utils import is_2Dlistlike

        # if y isn't a scalar it means we have been handed a batch of points
        if is_listlike(y) and is_2Dlistlike(x):
            self.Xi.extend(x)
            self.yi.extend(y)
            self._n_initial_points -= len(y)
        elif is_listlike(x):
            self.Xi.append(x)
            self.yi.append(y)
            self._n_initial_points -= 1
        else:
            raise ValueError("Type of arguments `x` (%s) and `y` (%s) "
                             "not compatible." % (type(x), type(y)))

        # optimizer learned something new - discard cache
        self.cache_ = {}

        # set self._next_x to None to indicate that the solver has learned something new
        self._next_x = None

    def _check_y_is_valid(self, x, y):
        """check if the shape and types of x and y are consistent."""

        from entmoot.utils import is_listlike
        from entmoot.utils import is_2Dlistlike

        # single objective checks for scalar values
        if self.num_obj == 1:
            # if y isn't a scalar it means we have been handed a batch of points
            if is_listlike(y) and is_2Dlistlike(x):
                for y_value in y:
                    if not isinstance(y_value, Number):
                        raise ValueError("expected y to be a list of scalars")

            elif is_listlike(x):
                if not isinstance(y, Number):
                    raise ValueError("`func` should return a scalar")

            else:
                raise ValueError("Type of arguments `x` (%s) and `y` (%s) "
                                 "not compatible." % (type(x), type(y)))
        else:
            # if y isn't a scalar it means we have been handed a batch of points
            if is_listlike(y[0]) and is_2Dlistlike(x):
                for y_value in y:
                    if len(y_value) != self.num_obj:
                        raise ValueError(
                            f"expected y to be of size {self.num_obj}")
                    for yi in y_value:
                        if not isinstance(yi, Number):
                            raise ValueError(
                                f"expected y to be a list of list-like items of length {self.num_obj}"
                            )
            elif is_listlike(x):
                if len(y) != self.num_obj:
                    raise ValueError(
                        f"`func` should return a list-like item of length {self.num_obj}"
                    )
                for yi in y:
                    if not isinstance(yi, Number):
                        raise ValueError(
                            f"`func` should return a list-like item of length {self.num_obj}"
                        )
            else:
                raise ValueError("Type of arguments `x` (%s) and `y` (%s) "
                                 "not compatible." % (type(x), type(y)))

    def run(self, func, n_iter=1, no_progress_bar=False, update_min=False):
        """Execute ask() + tell() `n_iter` times"""
        from tqdm import tqdm

        for itr in tqdm(range(n_iter), disable=no_progress_bar):
            x = self.ask()
            self.tell(x, func(x))

            if no_progress_bar and update_min:
                print(
                    f"Min. obj.: {round(min(self.yi),2)} at itr.: {itr+1} / {n_iter}",
                    end="\r")

        from entmoot.utils import create_result

        result = create_result(self.Xi,
                               self.yi,
                               self.space,
                               self.rng,
                               models=self.models,
                               model_mu=self.model_mu,
                               model_std=self.model_std,
                               gurobi_mipgap=self.gurobi_mipgap)

        if no_progress_bar and update_min:
            print(
                f"Min. obj.: {round(min(self.yi),2)} at itr.: {n_iter} / {n_iter}"
            )
        return result

    def update_next(self):
        """Updates the value returned by opt.ask(). Useful if a parameter
        was updated after ask was called."""
        self.cache_ = {}
        # Ask for a new next_x.
        # We only need to overwrite _next_x if it exists.
        if hasattr(self, '_next_x'):
            opt = self.copy(random_state=self.rng)
            self._next_x = opt._next_x

    def get_result(self):
        """Returns the same result that would be returned by opt.tell()
        but without calling tell

        Parameters
        ----------
        -

        Returns
        -------
        res : `OptimizeResult`, scipy object
            OptimizeResult instance with the required information.

        """
        from entmoot.utils import create_result

        result = create_result(self.Xi,
                               self.yi,
                               self.space,
                               self.rng,
                               models=self.models,
                               model_mu=self.model_mu,
                               model_std=self.model_std,
                               gurobi_mipgap=self.gurobi_mipgap)
        return result

    def predict_with_est(self, x, return_std=True):
        from entmoot.utils import is_2Dlistlike

        if is_2Dlistlike(x):
            next_x = np.asarray(self.space.transform(x))
        else:
            next_x = np.asarray(self.space.transform([x])[0]).reshape(1, -1)

        est = self.base_estimator_
        est.fit(self.space.transform(self.Xi), self.yi)
        temp_mu, temp_std = \
            est.predict(
                X=next_x,
                return_std=True)

        if is_2Dlistlike(x):
            if return_std:
                return temp_mu, temp_std
            else:
                return temp_mu
        else:
            if return_std:
                return temp_mu[0], temp_std[0]
            else:
                return temp_mu[0]

    def predict_with_acq(self, x):
        from entmoot.utils import is_2Dlistlike

        if is_2Dlistlike(x):
            next_x = np.asarray(self.space.transform(x))
        else:
            next_x = np.asarray(self.space.transform([x])[0]).reshape(1, -1)

        if self.models:
            temp_val = _gaussian_acquisition(
                X=next_x,
                model=self.models[-1],
                y_opt=np.min(self.yi),
                acq_func=self.acq_func,
                acq_func_kwargs=self.acq_func_kwargs)
        else:
            est = self.base_estimator_
            est.fit(self.space.transform(self.Xi), self.yi)

            temp_val = _gaussian_acquisition(
                X=next_x,
                model=est,
                y_opt=np.min(self.yi),
                acq_func=self.acq_func,
                acq_func_kwargs=self.acq_func_kwargs)

        if is_2Dlistlike(x):
            return temp_val
        else:
            return temp_val[0]

    def predict_pareto(
        self,
        sampling_strategy: str = 'random',
        num_samples: int = 10,
        num_levels: int = 10,
        add_model_core=None,
        gurobi_env=None,
    ):
        """
        Computes the next point at which the objective should be evaluated.

        :param sampling_strategy: str = 'grid'
            picks the strategy to sample weights for the muli-objective function
                'random': gives 'num_samples' randomly drawn weights that sum to 1
                'grid': defines ordered grid of samples depending on 'num_levels'
        :param num_samples: int = 10,
            defines number of samples for 'random' sampling strategy
        :param num_levels: int = 10,
            defines levels per dimension for 'grid' sampling strategy
        :param add_model_core: GRBModel = None,
            Gurobi optimization model that includes additional constraints
        :param gurobi_env: Gurobi Env = None,
            Gurobi environment used for computation of the next proposal

        :return pareto_x: list, next pareto point predictions of
            shape(n_points, n_dims)
        """

        # update gurobi_env attribute
        if gurobi_env:
            self.gurobi_env = gurobi_env

        assert self.num_obj > 1, \
            f"Number of objectives needs to be > 1 to" \
            f" compute Pareto frontiers."

        from opti.sampling.simplex import sample, grid

        # pick the sampling strategy
        if sampling_strategy == 'random':
            weights = sample(self.num_obj, num_samples)
        elif sampling_strategy == 'grid':
            weights = grid(self.num_obj, num_levels)
        else:
            raise ValueError("'sampling_type' must be in ['random', 'grid'")

        # fit current model
        est = self.base_estimator_
        est.fit(self.space.transform(self.Xi), self.yi)

        # add model constraints if necessary
        if add_model_core is None:
            add_model_core = \
                self.acq_optimizer_kwargs.get("add_model_core", None)

        # compute pareto points based on weight vector
        pareto = []
        for w in weights:
            temp_x, temp_mu, model_std, gurobi_mipgap = \
                est.get_global_next_x(acq_func=self.acq_func,
                                      acq_func_kwargs=self.acq_func_kwargs,
                                      acq_optimizer_kwargs=self.acq_optimizer_kwargs,
                                      add_model_core=add_model_core,
                                      weight=w,
                                      verbose=self.verbose,
                                      gurobi_env=self.gurobi_env,
                                      gurobi_timelimit=self.gurobi_timelimit)
            pareto.append((temp_x, temp_mu))
        return pareto
Example #5
0
    def __init__(self,
                 dimensions: list,
                 base_estimator: str = "ENTING",
                 n_initial_points: int = 50,
                 initial_point_generator: str = "random",
                 num_obj: int = 1,
                 acq_func: str = "LCB",
                 acq_optimizer: str = "global",
                 random_state: Optional[int] = None,
                 acq_func_kwargs: Optional[dict] = None,
                 acq_optimizer_kwargs: Optional[dict] = None,
                 base_estimator_kwargs: Optional[dict] = None,
                 model_queue_size: Optional[int] = None,
                 verbose: bool = False):

        from entmoot.utils import is_supported
        from entmoot.utils import cook_estimator
        from entmoot.utils import cook_initial_point_generator
        from sklearn.utils import check_random_state

        # define random state
        self.rng = check_random_state(random_state)

        # store and create acquisition function set
        self.acq_func = acq_func
        if acq_func_kwargs is None:
            self.acq_func_kwargs = dict()
        else:
            self.acq_func_kwargs = acq_func_kwargs

        allowed_acq_funcs = ["LCB", "HLCB"]
        if self.acq_func not in allowed_acq_funcs:
            raise ValueError("expected acq_func to be in %s, got %s" %
                             (",".join(allowed_acq_funcs), self.acq_func))

        # configure counter of points
        if n_initial_points < 0:
            raise ValueError("Expected `n_initial_points` >= 0, got %d" %
                             n_initial_points)
        self._n_initial_points = n_initial_points
        self.n_initial_points_ = n_initial_points

        # initialize search space and output
        from entmoot.space.space import Space

        self.space = Space(dimensions)

        self._initial_samples = None
        self._initial_point_generator = \
            cook_initial_point_generator(initial_point_generator)

        if self._initial_point_generator is not None:
            transformer = self.space.get_transformer()
            self._initial_samples = self._initial_point_generator.generate(
                self.space.dimensions,
                n_initial_points,
                random_state=self.rng.randint(0,
                                              np.iinfo(np.int32).max))
            self.space.set_transformer(transformer)

        self.num_obj = num_obj

        # create base_estimator
        self.base_estimator_kwargs = {} if base_estimator_kwargs is None else base_estimator_kwargs

        from entmoot.learning.tree_model import EntingRegressor, MisicRegressor

        if type(base_estimator) not in [EntingRegressor, MisicRegressor]:
            if type(base_estimator) in [str]:
                # define random_state of estimator
                est_random_state = self.rng.randint(0, np.iinfo(np.int32).max)

                # check support of base_estimator if exists
                if not is_supported(base_estimator):
                    raise ValueError("Estimator type: %s is not supported." %
                                     base_estimator)

                # build base_estimator
                base_estimator = cook_estimator(self.space,
                                                base_estimator,
                                                self.base_estimator_kwargs,
                                                num_obj=self.num_obj,
                                                random_state=est_random_state)
            else:
                raise ValueError("Estimator type: %s is not supported." %
                                 base_estimator)

        self.base_estimator_ = base_estimator

        # Configure Optimizer
        self.acq_optimizer = acq_optimizer

        # record other arguments
        if acq_optimizer_kwargs is None:
            acq_optimizer_kwargs = dict()

        self.acq_optimizer_kwargs = acq_optimizer_kwargs
        self.n_points = acq_optimizer_kwargs.get("n_points", 10000)
        self.gurobi_env = acq_optimizer_kwargs.get("env", None)
        self.gurobi_timelimit = acq_optimizer_kwargs.get(
            "gurobi_timelimit", None)

        ## Initialize storage for optimization
        if not isinstance(model_queue_size, (int, type(None))):
            raise TypeError("model_queue_size should be an int or None, "
                            "got {}".format(type(model_queue_size)))

        # model cache
        self.max_model_queue_size = model_queue_size
        self.models = []

        # data set cache
        self.Xi = []
        self.yi = []

        # model_mu and model_std cache
        self.model_mu = []
        self.model_std = []

        # global opti metrics
        self.gurobi_mipgap = []

        # Initialize cache for `ask` method responses
        # This ensures that multiple calls to `ask` with n_points set
        # return same sets of points. Reset to {} at every call to `tell`.
        self.cache_ = {}

        # Handle solver print output
        if not isinstance(verbose, (int, type(None))):
            raise TypeError("verbose should be an int of [0,1,2] or bool, "
                            "got {}".format(type(verbose)))

        if isinstance(verbose, bool):
            if verbose:
                self.verbose = 1
            else:
                self.verbose = 0
        elif isinstance(verbose, int):
            if verbose not in [0, 1, 2]:
                raise TypeError("if verbose is int, it should in [0,1,2], "
                                "got {}".format(verbose))
            else:
                self.verbose = verbose

        # printed_switch_to_model defines if notification of switch from
        # intitial point generation to model-based point generation has been
        # printed yet
        if self.verbose > 0:
            self.printed_switch_to_model = False
        else:
            self.printed_switch_to_model = True
Example #6
0
    def generate(self, dimensions, n_samples, random_state=None):
        """Creates samples from a regular grid.

        Parameters
        ----------
        dimensions : list, shape (n_dims,)
            List of search space dimensions.
            Each search dimension can be defined either as

            - a `(lower_bound, upper_bound)` tuple (for `Real` or `Integer`
              dimensions),
            - a `(lower_bound, upper_bound, "prior")` tuple (for `Real`
              dimensions),
            - as a list of categories (for `Categorical` dimensions), or
            - an instance of a `Dimension` object (`Real`, `Integer` or
              `Categorical`).

        n_samples : int
            The order of the Halton sequence. Defines the number of samples.
        random_state : int, RandomState instance, or None (default)
            Set random state to something other than None for reproducible
            results.

        Returns
        -------
        np.array, shape=(n_dim, n_samples)
            grid set
        """
        rng = check_random_state(random_state)
        space = Space(dimensions)
        n_dim = space.n_dims
        transformer = space.get_transformer()
        space.set_transformer("normalize")

        if self.border == "include":
            if self.use_full_layout:
                order = int(np.floor(np.sqrt(n_samples)))
            else:
                order = int(np.ceil(np.sqrt(n_samples)))
            if order < 2:
                order = 2
            h = _create_uniform_grid_include_border(n_dim, order)
        elif self.border == "exclude":
            if self.use_full_layout:
                order = int(np.floor(np.sqrt(n_samples)))
            else:
                order = int(np.ceil(np.sqrt(n_samples)))
            if order < 1:
                order = 1
            h = _create_uniform_grid_exclude_border(n_dim, order)
        elif self.border == "only":
            if self.use_full_layout:
                order = int(np.floor(n_samples / 2.))
            else:
                order = int(np.ceil(n_samples / 2.))
            if order < 2:
                order = 2
            h = _create_uniform_grid_exclude_border(n_dim, order)
        else:
            raise ValueError("Wrong value for border")
        if np.size(h, 0) > n_samples:
            rng.shuffle(h)
            h = h[:n_samples, :]
        elif np.size(h, 0) < n_samples:
            if self.append_border == "only":
                order = int(np.ceil((n_samples - np.size(h, 0)) / 2.))
                if order < 2:
                    order = 2
                h2 = _create_uniform_grid_only_border(n_dim, order)
            elif self.append_border == "include":
                order = int(np.ceil(np.sqrt(n_samples - np.size(h, 0))))
                if order < 2:
                    order = 2
                h2 = _create_uniform_grid_include_border(n_dim, order)
            elif self.append_border == "exclude":
                order = int(np.ceil(np.sqrt(n_samples - np.size(h, 0))))
                if order < 1:
                    order = 1
                h2 = _create_uniform_grid_exclude_border(n_dim, order)
            else:
                raise ValueError("Wrong value for append_border")
            h = np.vstack((h, h2[:(n_samples - np.size(h, 0))]))
            rng.shuffle(h)
        else:
            rng.shuffle(h)
        h = space.inverse_transform(h)
        space.set_transformer(transformer)
        return h
Example #7
0
    def suggest_experiments(
        self, num_experiments=1, prev_res: DataSet = None, **kwargs
    ):
        """Suggest experiments using ENTMOOT tree-based Bayesian Optimization

        Parameters
        ----------
        num_experiments: int, optional
            The number of experiments (i.e., samples) to generate. Default is 1.
        prev_res: :class:`~summit.utils.data.DataSet`, optional
            Dataset with data from previous experiments of previous iteration.
            If no data is passed, then random sampling will
            be used to suggest an initial design.

        Returns
        -------
        next_experiments : :class:`~summit.utils.data.DataSet`
            A Dataset object with the suggested experiments

        """

        param = None
        xbest = np.zeros(self.domain.num_continuous_dimensions())
        obj = self.domain.output_variables[0]
        objective_dir = -1.0 if obj.maximize else 1.0
        fbest = float("inf")

        bounds = [k["domain"] for k in self.input_domain]

        space = Space(bounds)
        core_model = get_core_gurobi_model(space)
        gvars = core_model.getVars()

        for c in self.constraints:
            left = LinExpr()
            left.addTerms(c[0], gvars)
            left.addConstant(c[1])
            core_model.addLConstr(left, c[2], 0)

        core_model.update()

        entmoot_model = Optimizer(
            dimensions=bounds,
            base_estimator=self.estimator_type,
            std_estimator=self.std_estimator_type,
            n_initial_points=self.initial_points,
            initial_point_generator=self.generator_type,
            acq_func=self.acquisition_type,
            acq_optimizer=self.optimizer_type,
            random_state=None,
            acq_func_kwargs=None,
            acq_optimizer_kwargs={"add_model_core": core_model},
            base_estimator_kwargs={"min_child_samples": self.min_child_samples},
            std_estimator_kwargs=None,
            model_queue_size=None,
            verbose=False,
        )

        # If we have previous results:
        if prev_res is not None:
            # Get inputs and outputs
            inputs, outputs = self.transform.transform_inputs_outputs(
                prev_res, transform_descriptors=self.use_descriptors
            )

            # Set up maximization and minimization by converting maximization to minimization problem
            for v in self.domain.variables:
                if v.is_objective and v.maximize:
                    outputs[v.name] = -1 * outputs[v.name]
                if isinstance(v, CategoricalVariable):
                    if not self.use_descriptors:
                        inputs[v.name] = self.categorical_wrapper(
                            inputs[v.name], v.levels
                        )

            inputs = inputs.to_numpy()
            outputs = outputs.to_numpy()

            if self.prev_param is not None:
                X_step = self.prev_param[0]
                Y_step = self.prev_param[1]

                X_step = np.vstack((X_step, inputs))
                Y_step = np.vstack((Y_step, outputs))

            else:
                X_step = inputs
                Y_step = outputs
            # Convert to list form to give to optimizer
            prev_X = [list(x) for x in X_step]
            prev_y = [y for x in Y_step for y in x]

            # Train entmoot model
            entmoot_model.tell(prev_X, prev_y, fit=True)

            # Store parameters (history of suggested points and function evaluations)
            param = [X_step, Y_step]
            fbest = np.min(Y_step)
            xbest = X_step[np.argmin(Y_step)]

        request = np.array(
            entmoot_model.ask(n_points=num_experiments, strategy="cl_mean")
        )
        # Generate DataSet object with variable values of next
        next_experiments = None
        transform_descriptors = False
        if request is not None and len(request) != 0:
            next_experiments = {}
            i_inp = 0
            for v in self.domain.variables:
                if not v.is_objective:
                    if isinstance(v, CategoricalVariable):
                        if v.ds is None or not self.use_descriptors:
                            cat_list = []
                            for j, entry in enumerate(request[:, i_inp]):
                                cat_list.append(
                                    self.categorical_unwrap(entry, v.levels)
                                )
                            next_experiments[v.name] = np.asarray(cat_list)
                            i_inp += 1
                        else:
                            descriptor_names = v.ds.data_columns
                            for d in descriptor_names:
                                next_experiments[d] = request[:, i_inp]
                                i_inp += 1
                            transform_descriptors = True
                    else:
                        next_experiments[v.name] = request[:, i_inp]
                        i_inp += 1
            next_experiments = DataSet.from_df(pd.DataFrame(data=next_experiments))
            next_experiments[("strategy", "METADATA")] = "ENTMOOT"

        self.fbest = objective_dir * fbest
        self.xbest = xbest
        self.prev_param = param

        # Do any necessary transformation back
        next_experiments = self.transform.un_transform(
            next_experiments, transform_descriptors=self.use_descriptors
        )

        return next_experiments