Example #1
0
 def f_np_wrapper(x: np.ndarray, f: Callable):
     """Given a torch callable, compute value + grad given a numpy array."""
     if np.isnan(x).any():
         raise RuntimeError(
             f"{np.isnan(x).sum()} elements of the {x.size} element array "
             f"`x` are NaN."
         )
     X = (
         torch.from_numpy(x)
         .to(initial_conditions)
         .view(shapeX)
         .contiguous()
         .requires_grad_(True)
     )
     X_fix = fix_features(X, fixed_features=fixed_features)
     loss = f(X_fix).sum()
     # compute gradient w.r.t. the inputs (does not accumulate in leaves)
     gradf = _arrayify(torch.autograd.grad(loss, X)[0].contiguous().view(-1))
     if np.isnan(gradf).any():
         msg = (
             f"{np.isnan(gradf).sum()} elements of the {x.size} element "
             "gradient array `gradf` are NaN. This often indicates numerical issues."
         )
         if initial_conditions.dtype != torch.double:
             msg += " Consider using `dtype=torch.double`."
         raise RuntimeError(msg)
     fval = loss.item()
     return fval, gradf
 def f(x):
     X = (torch.from_numpy(x).to(
         initial_conditions).contiguous().requires_grad_(True))
     X_fix = fix_features(X=X, fixed_features=fixed_features)
     loss = -acquisition_function(X_fix[None]).sum()
     # compute gradient w.r.t. the inputs (does not accumulate in leaves)
     gradf = _arrayify(
         torch.autograd.grad(loss, X)[0].contiguous().view(-1))
     fval = loss.item()
     return fval, gradf
Example #3
0
    def test_fix_features(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        X, X_null_two, X_expected, X_expected_null_two = self._getTensors(
            device)
        X.requires_grad_(True)
        X_null_two.requires_grad_(True)

        X_fix = fix_features(X, {0: -1, 2: -2})
        X_fix_null_two = fix_features(X_null_two, {0: -1, 2: None})

        self.assertTrue(torch.equal(X_fix, X_expected))
        self.assertTrue(torch.equal(X_fix_null_two, X_expected_null_two))

        def f(X):
            return X.sum()

        f(X).backward()
        self.assertTrue(torch.equal(X.grad, torch.ones_like(X)))
        X.grad.zero_()

        f(X_fix).backward()
        self.assertTrue(
            torch.equal(
                X.grad,
                torch.tensor([[0.0, 1.0, 0.0], [0.0, 1.0, 0.0]],
                             device=device)))

        f(X_null_two).backward()
        self.assertTrue(torch.equal(X_null_two.grad, torch.ones_like(X)))
        X_null_two.grad.zero_()
        f(X_fix_null_two).backward()
        self.assertTrue(
            torch.equal(
                X_null_two.grad,
                torch.tensor([[0.0, 1.0, 0.0], [0.0, 1.0, 0.0]],
                             device=device),
            ))
Example #4
0
    def gen(
        self,
        num_points: int,  # Current implementation only generates 1 point at a time
        model: MonotonicRejectionGP,
    ):
        """Query next point(s) to run by optimizing the acquisition function.
        Args:
            num_points (int, optional): Number of points to query.
            model (AEPsychMixin): Fitted model of the data.
        Returns:
            np.ndarray: Next set of point(s) to evaluate, [num_points x dim].
        """

        options = self.model_gen_options or {}
        num_restarts = options.get("num_restarts", 10)
        raw_samples = options.get("raw_samples", 1000)
        verbosity_freq = options.get("verbosity_freq", -1)
        lr = options.get("lr", 0.01)
        momentum = options.get("momentum", 0.9)
        nesterov = options.get("nesterov", True)
        epochs = options.get("epochs", 50)
        milestones = options.get("milestones", [25, 40])
        gamma = options.get("gamma", 0.1)
        loss_constraint_fun = options.get(
            "loss_constraint_fun", default_loss_constraint_fun
        )

        # Augment bounds with deriv indicator
        bounds = torch.cat((model.bounds_, torch.zeros(2, 1)), dim=1)
        # Fix deriv indicator to 0 during optimization
        fixed_features = {(bounds.shape[1] - 1): 0.0}
        # Fix explore features to random values
        if self.explore_features is not None:
            for idx in self.explore_features:
                val = (
                    bounds[0, idx]
                    + torch.rand(1, dtype=bounds.dtype)
                    * (bounds[1, idx] - bounds[0, idx])
                ).item()
                fixed_features[idx] = val
                bounds[0, idx] = val
                bounds[1, idx] = val

        acqf = self._instantiate_acquisition_fn(model)

        # Initialize
        batch_initial_conditions = gen_batch_initial_conditions(
            acq_function=acqf,
            bounds=bounds,
            q=1,
            num_restarts=num_restarts,
            raw_samples=raw_samples,
        )
        clamped_candidates = columnwise_clamp(
            X=batch_initial_conditions, lower=bounds[0], upper=bounds[1]
        ).requires_grad_(True)
        candidates = fix_features(clamped_candidates, fixed_features)
        optimizer = torch.optim.SGD(
            params=[clamped_candidates], lr=lr, momentum=momentum, nesterov=nesterov
        )
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=milestones, gamma=gamma
        )

        # Optimize
        for epoch in range(epochs):
            loss = -acqf(candidates).sum()

            # adjust loss based on constraints on candidates
            loss = loss_constraint_fun(loss, candidates)

            if verbosity_freq > 0 and epoch % verbosity_freq == 0:
                logger.info("Iter: {} - Value: {:.3f}".format(epoch, -(loss.item())))

            def closure():
                optimizer.zero_grad()
                loss.backward(
                    retain_graph=True
                )  # Variational model requires retain_graph
                return loss

            optimizer.step(closure)
            clamped_candidates.data = columnwise_clamp(
                X=clamped_candidates, lower=bounds[0], upper=bounds[1]
            )
            candidates = fix_features(clamped_candidates, fixed_features)
            lr_scheduler.step()

        # Extract best point
        with torch.no_grad():
            batch_acquisition = acqf(candidates)
        best = torch.argmax(batch_acquisition.view(-1), dim=0)
        Xopt = candidates[best][:, :-1].detach()
        return Xopt
def gen_candidates_scipy(
    initial_conditions: Tensor,
    acquisition_function: Module,
    lower_bounds: Optional[Union[float, Tensor]] = None,
    upper_bounds: Optional[Union[float, Tensor]] = None,
    constraints=(),
    options: Optional[Dict[str, Any]] = None,
    fixed_features: Optional[Dict[int, Optional[float]]] = None,
) -> Tuple[Tensor, Tensor]:
    """
    This function generates a set of candidates using `scipy.optimize.minimize`

    Parameters
    ----------
    :param initial_conditions: starting points for optimization
    :param acquisition_function: acquisition function to be optimized

    Optional parameters
    -------------------
    :param lower_bounds: minimum values for each column of initial_conditions
    :param upper_bounds: maximum values for each column of initial_conditions
    :param constraints: constraints in scipy format
    :param options: options for candidate generation
    :param fixed_features: A map {feature_index: value} for features that should be fixed to a particular value
        during generation.

    Returns
    -------
    :return: 2-element tuple containing the set of generated candidates and the acquisition value for each t-batch.
    """

    options = options or {}
    x0 = columnwise_clamp(initial_conditions, lower_bounds,
                          upper_bounds).requires_grad_(True)

    bounds = Bounds(lb=lower_bounds, ub=upper_bounds, keep_feasible=True)

    def f(x):
        X = (torch.from_numpy(x).to(
            initial_conditions).contiguous().requires_grad_(True))
        X_fix = fix_features(X=X, fixed_features=fixed_features)
        loss = -acquisition_function(X_fix[None]).sum()
        # compute gradient w.r.t. the inputs (does not accumulate in leaves)
        gradf = _arrayify(
            torch.autograd.grad(loss, X)[0].contiguous().view(-1))
        fval = loss.item()
        return fval, gradf

    candidates = torch.zeros(x0.shape, dtype=torch.float64)
    # TODO this does not handle the case where q!=1
    for i in range(x0.shape[0]):
        res = minimize(
            f,
            x0[i, 0].detach().numpy(),
            method="SLSQP",
            jac=True,
            bounds=bounds,
            constraints=constraints,
            options={k: v
                     for k, v in options.items() if k != "method"},
        )
        candidates[i] = fix_features(
            X=torch.from_numpy(res.x).to(initial_conditions).contiguous(),
            fixed_features=fixed_features,
        )

    batch_acquisition = acquisition_function(candidates)

    return candidates, batch_acquisition
Example #6
0
def gen_candidates_torch(
        initial_conditions: Tensor,
        acquisition_function: Callable,
        lower_bounds: Optional[Union[float, Tensor]] = None,
        upper_bounds: Optional[Union[float, Tensor]] = None,
        optimizer: Type[Optimizer] = torch.optim.Adam,
        options: Optional[Dict[str, Union[float, str]]] = None,
        verbose: bool = True,
        fixed_features: Optional[Dict[int, Optional[float]]] = None,
) -> Iterable[Any]:  # -> Tuple[Tensor, Any, Optional[Tensor]]:
    r"""Generate a set of candidates using a `torch.optim` optimizer.

    Optimizes an acquisition function starting from a set of initial candidates
    using an optimizer from `torch.optim`.

    Args:
        initial_conditions: Starting points for optimization.
        acquisition_function: Acquisition function to be used.
        lower_bounds: Minimum values for each column of initial_conditions.
        upper_bounds: Maximum values for each column of initial_conditions.
        optimizer (Optimizer): The pytorch optimizer to use to perform
            candidate search.
        options: Options used to control the optimization. Includes
            maxiter: Maximum number of iterations
        verbose: If True, provide verbose output.
        fixed_features: This is a dictionary of feature indices to values, where
            all generated candidates will have features fixed to these values.
            If the dictionary value is None, then that feature will just be
            fixed to the clamped value and not optimized. Assumes values to be
            compatible with lower_bounds and upper_bounds!

    Returns:
        2-element tuple containing

        - The set of generated candidates.
        - The acquisition value for each t-batch.
    """
    options = options or {}
    _jitter = options.get('jitter', 0.)
    clamped_candidates = columnwise_clamp(
        X=initial_conditions, lower=lower_bounds, upper=upper_bounds
    ).requires_grad_(True)
    candidates = fix_features(clamped_candidates, fixed_features)

    bayes_optimizer = optimizer(
        params=[clamped_candidates], lr=options.get("lr", 0.025)
    )
    i = 0
    stop = False
    stopping_criterion = ExpMAStoppingCriterion(
        **_filter_kwargs(ExpMAStoppingCriterion, **options)
    )
    while not stop:
        i += 1
        batch_loss = acquisition_function(candidates)
        loss = -batch_loss.sum()

        if verbose:
            print("Iter: {} - Value: {:.3f}".format(i, -(loss.item())))

        if torch.isnan(loss):
            print('loss is nan, exiting optimization of the acquisition function.')
            break

        bayes_optimizer.zero_grad()
        loss.backward()
        if options.get('clip_gradient', False):
            torch.nn.utils.clip_grad_value_(clamped_candidates, clip_value=options.get('clip_value', 10.))
        bayes_optimizer.step()
        clamped_candidates.data = columnwise_clamp(
            clamped_candidates, lower_bounds + _jitter, upper_bounds - _jitter
        )
        candidates = fix_features(clamped_candidates, fixed_features)
        stop = stopping_criterion.evaluate(fvals=loss.detach())

    # clamped_candidates = columnwise_clamp(
    #     X=candidates, lower=lower_bounds, upper=upper_bounds, raise_on_violation=True
    # )

    with torch.no_grad():
        batch_acquisition = acquisition_function(candidates)

    return candidates, batch_acquisition
Example #7
0
def gen_batch_initial_conditions(
    acq_function: AcquisitionFunction,
    bounds: Tensor,
    q: int,
    num_restarts: int,
    raw_samples: int,
    fixed_features: Optional[Dict[int, float]] = None,
    options: Optional[Dict[str, Union[bool, float, int]]] = None,
    inequality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None,
    equality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None,
) -> Tensor:
    r"""Generate a batch of initial conditions for random-restart optimziation.

    TODO: Support t-batches of initial conditions.

    Args:
        acq_function: The acquisition function to be optimized.
        bounds: A `2 x d` tensor of lower and upper bounds for each column of `X`.
        q: The number of candidates to consider.
        num_restarts: The number of starting points for multistart acquisition
            function optimization.
        raw_samples: The number of raw samples to consider in the initialization
            heuristic. Note: if `sample_around_best` is True (the default is False),
            then `2 * raw_samples` samples are used.
        fixed_features: A map `{feature_index: value}` for features that
            should be fixed to a particular value during generation.
        options: Options for initial condition generation. For valid options see
            `initialize_q_batch` and `initialize_q_batch_nonneg`. If `options`
            contains a `nonnegative=True` entry, then `acq_function` is
            assumed to be non-negative (useful when using custom acquisition
            functions). In addition, an "init_batch_limit" option can be passed
            to specify the batch limit for the initialization. This is useful
            for avoiding memory limits when computing the batch posterior over
            raw samples.
        inequality constraints: A list of tuples (indices, coefficients, rhs),
            with each tuple encoding an inequality constraint of the form
            `\sum_i (X[indices[i]] * coefficients[i]) >= rhs`.
        equality constraints: A list of tuples (indices, coefficients, rhs),
            with each tuple encoding an inequality constraint of the form
            `\sum_i (X[indices[i]] * coefficients[i]) = rhs`.

    Returns:
        A `num_restarts x q x d` tensor of initial conditions.

    Example:
        >>> qEI = qExpectedImprovement(model, best_f=0.2)
        >>> bounds = torch.tensor([[0.], [1.]])
        >>> Xinit = gen_batch_initial_conditions(
        >>>     qEI, bounds, q=3, num_restarts=25, raw_samples=500
        >>> )
    """
    options = options or {}
    seed: Optional[int] = options.get("seed")
    batch_limit: Optional[int] = options.get(
        "init_batch_limit", options.get("batch_limit")
    )
    batch_initial_arms: Tensor
    factor, max_factor = 1, 5
    init_kwargs = {}
    device = bounds.device
    bounds_cpu = bounds.cpu()
    if "eta" in options:
        init_kwargs["eta"] = options.get("eta")
    if options.get("nonnegative") or is_nonnegative(acq_function):
        init_func = initialize_q_batch_nonneg
        if "alpha" in options:
            init_kwargs["alpha"] = options.get("alpha")
    else:
        init_func = initialize_q_batch

    q = 1 if q is None else q
    # the dimension the samples are drawn from
    effective_dim = bounds.shape[-1] * q
    if effective_dim > SobolEngine.MAXDIM and settings.debug.on():
        warnings.warn(
            f"Sample dimension q*d={effective_dim} exceeding Sobol max dimension "
            f"({SobolEngine.MAXDIM}). Using iid samples instead.",
            SamplingWarning,
        )

    while factor < max_factor:
        with warnings.catch_warnings(record=True) as ws:
            n = raw_samples * factor
            if inequality_constraints is None and equality_constraints is None:
                if effective_dim <= SobolEngine.MAXDIM:
                    X_rnd = draw_sobol_samples(bounds=bounds_cpu, n=n, q=q, seed=seed)
                else:
                    with manual_seed(seed):
                        # load on cpu
                        X_rnd_nlzd = torch.rand(
                            n, q, bounds_cpu.shape[-1], dtype=bounds.dtype
                        )
                    X_rnd = bounds_cpu[0] + (bounds_cpu[1] - bounds_cpu[0]) * X_rnd_nlzd
            else:
                X_rnd = (
                    get_polytope_samples(
                        n=n * q,
                        bounds=bounds,
                        inequality_constraints=inequality_constraints,
                        equality_constraints=equality_constraints,
                        seed=seed,
                        n_burnin=options.get("n_burnin", 10000),
                        thinning=options.get("thinning", 32),
                    )
                    .view(n, q, -1)
                    .cpu()
                )
            # sample points around best
            if options.get("sample_around_best", False):
                X_best_rnd = sample_points_around_best(
                    acq_function=acq_function,
                    n_discrete_points=n * q,
                    sigma=options.get("sample_around_best_sigma", 1e-3),
                    bounds=bounds,
                    subset_sigma=options.get("sample_around_best_subset_sigma", 1e-1),
                    prob_perturb=options.get("sample_around_best_prob_perturb"),
                )
                if X_best_rnd is not None:
                    X_rnd = torch.cat(
                        [
                            X_rnd,
                            X_best_rnd.view(n, q, bounds.shape[-1]).cpu(),
                        ],
                        dim=0,
                    )
            X_rnd = fix_features(X_rnd, fixed_features=fixed_features)
            with torch.no_grad():
                if batch_limit is None:
                    batch_limit = X_rnd.shape[0]
                Y_rnd_list = []
                start_idx = 0
                while start_idx < X_rnd.shape[0]:
                    end_idx = min(start_idx + batch_limit, X_rnd.shape[0])
                    Y_rnd_curr = acq_function(
                        X_rnd[start_idx:end_idx].to(device=device)
                    ).cpu()
                    Y_rnd_list.append(Y_rnd_curr)
                    start_idx += batch_limit
                Y_rnd = torch.cat(Y_rnd_list)
            batch_initial_conditions = init_func(
                X=X_rnd, Y=Y_rnd, n=num_restarts, **init_kwargs
            ).to(device=device)
            if not any(issubclass(w.category, BadInitialCandidatesWarning) for w in ws):
                return batch_initial_conditions
            if factor < max_factor:
                factor += 1
                if seed is not None:
                    seed += 1  # make sure to sample different X_rnd
    warnings.warn(
        "Unable to find non-zero acquisition function values - initial conditions "
        "are being selected randomly.",
        BadInitialCandidatesWarning,
    )
    return batch_initial_conditions
Example #8
0
def gen_value_function_initial_conditions(
    acq_function: AcquisitionFunction,
    bounds: Tensor,
    num_restarts: int,
    raw_samples: int,
    current_model: Model,
    fixed_features: Optional[Dict[int, float]] = None,
    options: Optional[Dict[str, Union[bool, float, int]]] = None,
) -> Tensor:
    r"""Generate a batch of smart initializations for optimizing
    the value function of qKnowledgeGradient.

    This function generates initial conditions for optimizing the inner problem of
    KG, i.e. its value function, using the maximizer of the posterior objective.
    Intutively, the maximizer of the fantasized posterior will often be close to a
    maximizer of the current posterior. This function uses that fact to generate the
    initital conditions for the fantasy points. Specifically, a fraction of `1 -
    frac_random` (see options) of raw samples is generated by sampling from the set of
    maximizers of the posterior objective (obtained via random restart optimization)
    according to a softmax transformation of their respective values. This means that
    this initialization strategy internally solves an acquisition function
    maximization problem. The remaining raw samples are generated using
    `draw_sobol_samples`. All raw samples are then evaluated, and the initial
    conditions are selected according to the standard initialization strategy in
    'initialize_q_batch' individually for each inner problem.

    Args:
        acq_function: The value function instance to be optimized.
        bounds: A `2 x d` tensor of lower and upper bounds for each column of
            task features.
        num_restarts: The number of starting points for multistart acquisition
            function optimization.
        raw_samples: The number of raw samples to consider in the initialization
            heuristic.
        current_model: The model of the KG acquisition function that was used to
            generate the fantasy model of the value function.
        fixed_features: A map `{feature_index: value}` for features that
            should be fixed to a particular value during generation.
        options: Options for initial condition generation. These contain all
            settings for the standard heuristic initialization from
            `gen_batch_initial_conditions`. In addition, they contain
            `frac_random` (the fraction of fully random fantasy points),
            `num_inner_restarts` and `raw_inner_samples` (the number of random
            restarts and raw samples for solving the posterior objective
            maximization problem, respectively) and `eta` (temperature parameter
            for sampling heuristic from posterior objective maximizers).

    Returns:
        A `num_restarts x batch_shape x q x d` tensor that can be used as initial
        conditions for `optimize_acqf()`. Here `batch_shape` is the batch shape
        of value function model.

    Example:
        >>> fant_X = torch.rand(5, 1, 2)
        >>> fantasy_model = model.fantasize(fant_X, SobolQMCNormalSampler(16))
        >>> value_function = PosteriorMean(fantasy_model)
        >>> bounds = torch.tensor([[0., 0.], [1., 1.]])
        >>> Xinit = gen_value_function_initial_conditions(
        >>>     value_function, bounds, num_restarts=10, raw_samples=512,
        >>>     options={"frac_random": 0.25},
        >>> )
    """
    options = options or {}
    seed: Optional[int] = options.get("seed")
    frac_random: float = options.get("frac_random", 0.6)
    if not 0 < frac_random < 1:
        raise ValueError(
            f"frac_random must take on values in (0,1). Value: {frac_random}"
        )

    # compute maximizer of the current value function
    value_function = _get_value_function(
        model=current_model,
        objective=getattr(acq_function, "objective", None),
        posterior_transform=acq_function.posterior_transform,
        sampler=getattr(acq_function, "sampler", None),
        project=getattr(acq_function, "project", None),
    )
    from botorch.optim.optimize import optimize_acqf

    fantasy_cands, fantasy_vals = optimize_acqf(
        acq_function=value_function,
        bounds=bounds,
        q=1,
        num_restarts=options.get("num_inner_restarts", 20),
        raw_samples=options.get("raw_inner_samples", 1024),
        fixed_features=fixed_features,
        return_best_only=False,
        options={
            k: v
            for k, v in options.items()
            if k
            not in ("frac_random", "num_inner_restarts", "raw_inner_samples", "eta")
        },
    )

    batch_shape = acq_function.model.batch_shape
    # sampling from the optimizers
    n_value = int((1 - frac_random) * raw_samples)  # number of non-random ICs
    if n_value > 0:
        eta = options.get("eta", 2.0)
        weights = torch.exp(eta * standardize(fantasy_vals))
        idx = batched_multinomial(
            weights=weights.expand(*batch_shape, -1),
            num_samples=n_value,
            replacement=True,
        ).permute(-1, *range(len(batch_shape)))
        resampled = fantasy_cands[idx]
    else:
        resampled = torch.empty(
            0,
            *batch_shape,
            1,
            bounds.shape[-1],
            dtype=fantasy_cands.dtype,
            device=fantasy_cands.device,
        )
    # add qMC samples
    randomized = draw_sobol_samples(
        bounds=bounds, n=raw_samples - n_value, q=1, batch_shape=batch_shape, seed=seed
    ).to(resampled)
    # full set of raw samples
    X_rnd = torch.cat([resampled, randomized], dim=0)
    X_rnd = fix_features(X_rnd, fixed_features=fixed_features)

    # evaluate the raw samples
    with torch.no_grad():
        Y_rnd = acq_function(X_rnd)

    # select the restart points using the heuristic
    return initialize_q_batch(
        X=X_rnd, Y=Y_rnd, n=num_restarts, eta=options.get("eta", 2.0)
    )
Example #9
0
def gen_candidates_scipy(
    initial_conditions: Tensor,
    acquisition_function: AcquisitionFunction,
    lower_bounds: Optional[Union[float, Tensor]] = None,
    upper_bounds: Optional[Union[float, Tensor]] = None,
    inequality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None,
    equality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None,
    nonlinear_inequality_constraints: Optional[List[Callable]] = None,
    options: Optional[Dict[str, Any]] = None,
    fixed_features: Optional[Dict[int, Optional[float]]] = None,
) -> Tuple[Tensor, Tensor]:
    r"""Generate a set of candidates using `scipy.optimize.minimize`.

    Optimizes an acquisition function starting from a set of initial candidates
    using `scipy.optimize.minimize` via a numpy converter.

    Args:
        initial_conditions: Starting points for optimization.
        acquisition_function: Acquisition function to be used.
        lower_bounds: Minimum values for each column of initial_conditions.
        upper_bounds: Maximum values for each column of initial_conditions.
        inequality constraints: A list of tuples (indices, coefficients, rhs),
            with each tuple encoding an inequality constraint of the form
            `\sum_i (X[indices[i]] * coefficients[i]) >= rhs`.
        equality constraints: A list of tuples (indices, coefficients, rhs),
            with each tuple encoding an inequality constraint of the form
            `\sum_i (X[indices[i]] * coefficients[i]) = rhs`.
        nonlinear_inequality_constraints: A list of callables with that represent
            non-linear inequality constraints of the form `callable(x) >= 0`. Each
            callable is expected to take a `(num_restarts) x q x d`-dim tensor as
            an input and return a `(num_restarts) x q`-dim tensor with the
            constraint values. The constraints will later be passed to SLSQP.
        options: Options used to control the optimization including "method"
            and "maxiter". Select method for `scipy.minimize` using the
            "method" key. By default uses L-BFGS-B for box-constrained problems
            and SLSQP if inequality or equality constraints are present.
        fixed_features: This is a dictionary of feature indices to values, where
            all generated candidates will have features fixed to these values.
            If the dictionary value is None, then that feature will just be
            fixed to the clamped value and not optimized. Assumes values to be
            compatible with lower_bounds and upper_bounds!

    Returns:
        2-element tuple containing

        - The set of generated candidates.
        - The acquisition value for each t-batch.

    Example:
        >>> qEI = qExpectedImprovement(model, best_f=0.2)
        >>> bounds = torch.tensor([[0., 0.], [1., 2.]])
        >>> Xinit = gen_batch_initial_conditions(
        >>>     qEI, bounds, q=3, num_restarts=25, raw_samples=500
        >>> )
        >>> batch_candidates, batch_acq_values = gen_candidates_scipy(
                initial_conditions=Xinit,
                acquisition_function=qEI,
                lower_bounds=bounds[0],
                upper_bounds=bounds[1],
            )
    """
    options = options or {}

    # if there are fixed features we may optimize over a domain of lower dimension
    reduced_domain = False
    if fixed_features:
        # TODO: We can support fixed features, see Max's comment on D33551393. We can
        # consider adding this at a later point.
        if nonlinear_inequality_constraints:
            raise NotImplementedError(
                "Fixed features are not supported when non-linear inequality "
                "constraints are given."
            )
        # if there are no constraints things are straightforward
        if not (inequality_constraints or equality_constraints):
            reduced_domain = True
        # if there are we need to make sure features are fixed to specific values
        else:
            reduced_domain = None not in fixed_features.values()

    if reduced_domain:
        _no_fixed_features = _remove_fixed_features_from_optimization(
            fixed_features=fixed_features,
            acquisition_function=acquisition_function,
            initial_conditions=initial_conditions,
            lower_bounds=lower_bounds,
            upper_bounds=upper_bounds,
            inequality_constraints=inequality_constraints,
            equality_constraints=equality_constraints,
        )
        # call the routine with no fixed_features
        clamped_candidates, batch_acquisition = gen_candidates_scipy(
            initial_conditions=_no_fixed_features.initial_conditions,
            acquisition_function=_no_fixed_features.acquisition_function,
            lower_bounds=_no_fixed_features.lower_bounds,
            upper_bounds=_no_fixed_features.upper_bounds,
            inequality_constraints=_no_fixed_features.inequality_constraints,
            equality_constraints=_no_fixed_features.equality_constraints,
            options=options,
            fixed_features=None,
        )
        clamped_candidates = _no_fixed_features.acquisition_function._construct_X_full(
            clamped_candidates
        )
        return clamped_candidates, batch_acquisition

    clamped_candidates = columnwise_clamp(
        X=initial_conditions, lower=lower_bounds, upper=upper_bounds
    )

    shapeX = clamped_candidates.shape
    x0 = clamped_candidates.view(-1)
    bounds = make_scipy_bounds(
        X=initial_conditions, lower_bounds=lower_bounds, upper_bounds=upper_bounds
    )
    constraints = make_scipy_linear_constraints(
        shapeX=clamped_candidates.shape,
        inequality_constraints=inequality_constraints,
        equality_constraints=equality_constraints,
    )

    def f_np_wrapper(x: np.ndarray, f: Callable):
        """Given a torch callable, compute value + grad given a numpy array."""
        if np.isnan(x).any():
            raise RuntimeError(
                f"{np.isnan(x).sum()} elements of the {x.size} element array "
                f"`x` are NaN."
            )
        X = (
            torch.from_numpy(x)
            .to(initial_conditions)
            .view(shapeX)
            .contiguous()
            .requires_grad_(True)
        )
        X_fix = fix_features(X, fixed_features=fixed_features)
        loss = f(X_fix).sum()
        # compute gradient w.r.t. the inputs (does not accumulate in leaves)
        gradf = _arrayify(torch.autograd.grad(loss, X)[0].contiguous().view(-1))
        if np.isnan(gradf).any():
            msg = (
                f"{np.isnan(gradf).sum()} elements of the {x.size} element "
                "gradient array `gradf` are NaN. This often indicates numerical issues."
            )
            if initial_conditions.dtype != torch.double:
                msg += " Consider using `dtype=torch.double`."
            raise RuntimeError(msg)
        fval = loss.item()
        return fval, gradf

    if nonlinear_inequality_constraints:
        # Make sure `batch_limit` is 1 for now.
        if not (len(shapeX) == 3 and shapeX[:2] == torch.Size([1, 1])):
            raise ValueError(
                "`batch_limit` must be 1 when non-linear inequality constraints "
                "are given."
            )
        constraints += make_scipy_nonlinear_inequality_constraints(
            nonlinear_inequality_constraints=nonlinear_inequality_constraints,
            f_np_wrapper=f_np_wrapper,
            x0=x0,
        )
    x0 = _arrayify(x0)

    def f(x):
        return -acquisition_function(x)

    res = minimize(
        fun=f_np_wrapper,
        args=(f,),
        x0=x0,
        method=options.get("method", "SLSQP" if constraints else "L-BFGS-B"),
        jac=True,
        bounds=bounds,
        constraints=constraints,
        callback=options.get("callback", None),
        options={k: v for k, v in options.items() if k not in ["method", "callback"]},
    )
    candidates = fix_features(
        X=torch.from_numpy(res.x).to(initial_conditions).reshape(shapeX),
        fixed_features=fixed_features,
    )

    # SLSQP sometimes fails in the line search or may just fail to find a feasible
    # candidate in which case we just return the starting point. This happens rarely,
    # so it shouldn't be an issue given enough restarts.
    if nonlinear_inequality_constraints and any(
        nlc(candidates.view(-1)) < NLC_TOL for nlc in nonlinear_inequality_constraints
    ):
        candidates = torch.from_numpy(x0).to(candidates).reshape(shapeX)
        warnings.warn(
            "SLSQP failed to converge to a solution the satisfies the non-linear "
            "constraints. Returning the feasible starting point."
        )

    clamped_candidates = columnwise_clamp(
        X=candidates, lower=lower_bounds, upper=upper_bounds, raise_on_violation=True
    )
    with torch.no_grad():
        batch_acquisition = acquisition_function(clamped_candidates)

    return clamped_candidates, batch_acquisition
Example #10
0
def gen_candidates_scipy(
    initial_conditions: Tensor,
    acquisition_function: Module,
    lower_bounds: Optional[Union[float, Tensor]] = None,
    upper_bounds: Optional[Union[float, Tensor]] = None,
    inequality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None,
    equality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None,
    options: Optional[Dict[str, Any]] = None,
    fixed_features: Optional[Dict[int, Optional[float]]] = None,
) -> Tuple[Tensor, Tensor]:
    r"""Generate a set of candidates using `scipy.optimize.minimize`.

    Optimizes an acquisition function starting from a set of initial candidates
    using `scipy.optimize.minimize` via a numpy converter.

    Args:
        initial_conditions: Starting points for optimization.
        acquisition_function: Acquisition function to be used.
        lower_bounds: Minimum values for each column of initial_conditions.
        upper_bounds: Maximum values for each column of initial_conditions.
        inequality constraints: A list of tuples (indices, coefficients, rhs),
            with each tuple encoding an inequality constraint of the form
            `\sum_i (X[indices[i]] * coefficients[i]) >= rhs`.
        equality constraints: A list of tuples (indices, coefficients, rhs),
            with each tuple encoding an inequality constraint of the form
            `\sum_i (X[indices[i]] * coefficients[i]) = rhs`.
        options: Options used to control the optimization including "method"
            and "maxiter". Select method for `scipy.minimize` using the
            method" key. By default uses L-BFGS-B for box-constrained problems
            and SLSQP if inequality or equality constraints are present.
        fixed_features: This is a dictionary of feature indices to values, where
            all generated candidates will have features fixed to these values.
            If the dictionary value is None, then that feature will just be
            fixed to the clamped value and not optimized. Assumes values to be
            compatible with lower_bounds and upper_bounds!

    Returns:
        2-element tuple containing

        - The set of generated candidates.
        - The acquisition value for each t-batch.

    Example:
        >>> qEI = qExpectedImprovement(model, best_f=0.2)
        >>> bounds = torch.tensor([[0., 0.], [1., 2.]])
        >>> Xinit = gen_batch_initial_conditions(
        >>>     qEI, bounds, q=3, num_restarts=25, raw_samples=500
        >>> )
        >>> batch_candidates, batch_acq_values = gen_candidates_scipy(
                initial_conditions=Xinit,
                acquisition_function=qEI,
                lower_bounds=bounds[0],
                upper_bounds=bounds[1],
            )
    """
    options = options or {}
    clamped_candidates = columnwise_clamp(
        X=initial_conditions, lower=lower_bounds, upper=upper_bounds
    ).requires_grad_(True)

    shapeX = clamped_candidates.shape
    x0 = _arrayify(clamped_candidates.view(-1))
    bounds = make_scipy_bounds(
        X=initial_conditions, lower_bounds=lower_bounds, upper_bounds=upper_bounds
    )
    constraints = make_scipy_linear_constraints(
        shapeX=clamped_candidates.shape,
        inequality_constraints=inequality_constraints,
        equality_constraints=equality_constraints,
    )

    def f(x):
        if np.isnan(x).any():
            raise RuntimeError(
                f"{np.isnan(x).sum()} elements of the {x.size} element array "
                f"`x` are NaN."
            )
        X = (
            torch.from_numpy(x)
            .to(initial_conditions)
            .view(shapeX)
            .contiguous()
            .requires_grad_(True)
        )
        X_fix = fix_features(X=X, fixed_features=fixed_features)
        loss = -acquisition_function(X_fix).sum()
        # compute gradient w.r.t. the inputs (does not accumulate in leaves)
        gradf = _arrayify(torch.autograd.grad(loss, X)[0].contiguous().view(-1))
        if np.isnan(gradf).any():
            msg = (
                f"{np.isnan(gradf).sum()} elements of the {x.size} element "
                "gradient array `gradf` are NaN. This often indicates numerical issues."
            )
            if initial_conditions.dtype != torch.double:
                msg += " Consider using `dtype=torch.double`."
            raise RuntimeError(msg)
        fval = loss.item()
        return fval, gradf

    res = minimize(
        f,
        x0,
        method=options.get("method", "SLSQP" if constraints else "L-BFGS-B"),
        jac=True,
        bounds=bounds,
        constraints=constraints,
        callback=options.get("callback", None),
        options={k: v for k, v in options.items() if k not in ["method", "callback"]},
    )
    candidates = fix_features(
        X=torch.from_numpy(res.x).to(initial_conditions).view(shapeX).contiguous(),
        fixed_features=fixed_features,
    )
    clamped_candidates = columnwise_clamp(
        X=candidates, lower=lower_bounds, upper=upper_bounds, raise_on_violation=True
    )
    with torch.no_grad():
        batch_acquisition = acquisition_function(clamped_candidates)
    return clamped_candidates, batch_acquisition
Example #11
0
def gen_candidates_torch(
    initial_conditions: Tensor,
    acquisition_function: Callable,
    lower_bounds: Optional[Union[float, Tensor]] = None,
    upper_bounds: Optional[Union[float, Tensor]] = None,
    optimizer: Type[Optimizer] = torch.optim.Adam,
    options: Optional[Dict[str, Union[float, str]]] = None,
    verbose: bool = True,
    fixed_features: Optional[Dict[int, Optional[float]]] = None,
) -> Tuple[Tensor, Tensor]:
    r"""Generate a set of candidates using a `torch.optim` optimizer.

    Optimizes an acquisition function starting from a set of initial candidates
    using an optimizer from `torch.optim`.

    Args:
        initial_conditions: Starting points for optimization.
        acquisition_function: Acquisition function to be used.
        lower_bounds: Minimum values for each column of initial_conditions.
        upper_bounds: Maximum values for each column of initial_conditions.
        optimizer (Optimizer): The pytorch optimizer to use to perform
            candidate search.
        options: Options used to control the optimization. Includes
            maxiter: Maximum number of iterations
        verbose: If True, provide verbose output.
        fixed_features: This is a dictionary of feature indices to values, where
            all generated candidates will have features fixed to these values.
            If the dictionary value is None, then that feature will just be
            fixed to the clamped value and not optimized. Assumes values to be
            compatible with lower_bounds and upper_bounds!

    Returns:
        2-element tuple containing

        - The set of generated candidates.
        - The acquisition value for each t-batch.

    Example:
        >>> qEI = qExpectedImprovement(model, best_f=0.2)
        >>> bounds = torch.tensor([[0., 0.], [1., 2.]])
        >>> Xinit = gen_batch_initial_conditions(
        >>>     qEI, bounds, q=3, num_restarts=25, raw_samples=500
        >>> )
        >>> batch_candidates, batch_acq_values = gen_candidates_torch(
                initial_conditions=Xinit,
                acquisition_function=qEI,
                lower_bounds=bounds[0],
                upper_bounds=bounds[1],
            )
    """
    options = options or {}
    clamped_candidates = columnwise_clamp(
        X=initial_conditions, lower=lower_bounds, upper=upper_bounds
    ).requires_grad_(True)
    candidates = fix_features(clamped_candidates, fixed_features)
    bayes_optimizer = optimizer(
        params=[clamped_candidates], lr=options.get("lr", 0.025)
    )
    param_trajectory: Dict[str, List[Tensor]] = {"candidates": []}
    loss_trajectory: List[float] = []
    i = 0
    stop = False
    stopping_criterion = ExpMAStoppingCriterion(
        **_filter_kwargs(ExpMAStoppingCriterion, **options)
    )
    while not stop:
        i += 1
        loss = -acquisition_function(candidates).sum()
        if verbose:
            print("Iter: {} - Value: {:.3f}".format(i, -(loss.item())))
        loss_trajectory.append(loss.item())
        param_trajectory["candidates"].append(candidates.clone())

        def closure():
            bayes_optimizer.zero_grad()
            loss.backward()
            return loss

        bayes_optimizer.step(closure)
        clamped_candidates.data = columnwise_clamp(
            clamped_candidates, lower_bounds, upper_bounds
        )
        candidates = fix_features(clamped_candidates, fixed_features)
        stop = stopping_criterion.evaluate(fvals=loss.detach())
    clamped_candidates = columnwise_clamp(
        X=candidates, lower=lower_bounds, upper=upper_bounds, raise_on_violation=True
    )
    with torch.no_grad():
        batch_acquisition = acquisition_function(candidates)
    return candidates, batch_acquisition
    def gen(
        self,
        model_gen_options: Optional[Dict[str, Any]] = None,
        explore_features: Optional[List[int]] = None,
    ) -> Tuple[Tensor, Optional[List[Dict[str, Any]]]]:
        """Generate candidate by optimizing acquisition function.

        Args:
            model_gen_options: Dictionary with options for generating candidate, such as
                SGD parameters. See code for all options and their defaults.
            explore_features: List of features that will be selected randomly and then
                fixed for acquisition fn optimization.

        Returns:
            Xopt: (1 x d) tensor of the generated candidate
            candidate_metadata: List of dict of metadata for each candidate. Contains
                acquisition value for the candidate.
        """
        # Default optimization settings
        # TODO are these sufficiently robust? Can they be tuned better?
        options = model_gen_options or {}
        num_restarts = options.get("num_restarts", 10)
        raw_samples = options.get("raw_samples", 1000)
        verbosity_freq = options.get("verbosity_freq", -1)
        lr = options.get("lr", 0.01)
        momentum = options.get("momentum", 0.9)
        nesterov = options.get("nesterov", True)
        epochs = options.get("epochs", 50)
        milestones = options.get("milestones", [25, 40])
        gamma = options.get("gamma", 0.1)
        loss_constraint_fun = options.get(
            "loss_constraint_fun", default_loss_constraint_fun
        )

        acq_function = self._get_acquisition_fn()
        # Augment bounds with deriv indicator
        bounds = torch.cat((self.bounds_, torch.zeros(2, 1, dtype=self.dtype)), dim=1)
        # Fix deriv indicator to 0 during optimization
        fixed_features = {(bounds.shape[1] - 1): 0.0}
        # Fix explore features to random values
        if explore_features is not None:
            for idx in explore_features:
                val = (
                    bounds[0, idx]
                    + torch.rand(1, dtype=self.dtype)
                    * (bounds[1, idx] - bounds[0, idx])
                ).item()
                fixed_features[idx] = val
                bounds[0, idx] = val
                bounds[1, idx] = val

        # Initialize
        batch_initial_conditions = gen_batch_initial_conditions(
            acq_function=acq_function,
            bounds=bounds,
            q=1,
            num_restarts=num_restarts,
            raw_samples=raw_samples,
        )
        clamped_candidates = columnwise_clamp(
            X=batch_initial_conditions, lower=bounds[0], upper=bounds[1]
        ).requires_grad_(True)
        candidates = fix_features(clamped_candidates, fixed_features)
        optimizer = torch.optim.SGD(
            params=[clamped_candidates], lr=lr, momentum=momentum, nesterov=nesterov
        )
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=milestones, gamma=gamma
        )

        # Optimize
        for epoch in range(epochs):
            loss = -acq_function(candidates).sum()

            # adjust loss based on constraints on candidates
            loss = loss_constraint_fun(loss, candidates)

            if verbosity_freq > 0 and epoch % verbosity_freq == 0:
                logger.info("Iter: {} - Value: {:.3f}".format(epoch, -(loss.item())))

            def closure():
                optimizer.zero_grad()
                loss.backward(
                    retain_graph=True
                )  # Variational model requires retain_graph
                return loss

            optimizer.step(closure)
            clamped_candidates.data = columnwise_clamp(
                X=clamped_candidates, lower=bounds[0], upper=bounds[1]
            )
            candidates = fix_features(clamped_candidates, fixed_features)
            lr_scheduler.step()

        # Extract best point
        with torch.no_grad():
            batch_acquisition = acq_function(candidates)
        best = torch.argmax(batch_acquisition.view(-1), dim=0)
        Xopt = candidates[best][:, :-1].detach()
        candidate_metadata = [{"acquisition_value": batch_acquisition[best].item()}]
        return Xopt, candidate_metadata