Ejemplo n.º 1
0
def optimize_qparego_and_get_observation(model, train_obj, sampler):
    """Samples a set of random weights for each candidate in the batch, performs sequential greedy optimization
    of the qParEGO acquisition function, and returns a new candidate and observation."""
    acq_func_list = []
    for _ in range(BATCH_SIZE):
        weights = sample_simplex(problem.num_objectives, **tkwargs).squeeze()
        objective = GenericMCObjective(
            get_chebyshev_scalarization(weights=weights, Y=train_obj))
        acq_func = qExpectedImprovement(  # pyre-ignore: [28]
            model=model,
            objective=objective,
            best_f=objective(train_obj).max(),
            sampler=sampler,
        )
        acq_func_list.append(acq_func)
    # optimize
    candidates, _ = optimize_acqf_list(
        acq_function_list=acq_func_list,
        bounds=standard_bounds,
        num_restarts=NUM_RESTARTS,
        raw_samples=RAW_SAMPLES,  # used for intialization heuristic
        options={
            "batch_limit": 5,
            "maxiter": 200
        },
    )
    # observe new values
    new_x = unnormalize(candidates.detach(), bounds=problem.bounds)
    new_obj = problem(new_x)
    return new_x, new_obj
Ejemplo n.º 2
0
    def test_1d_query(self):
        seed = 1
        torch.manual_seed(seed)
        np.random.seed(seed)
        n_init = 150
        n_opt = 1
        lb = -4.0
        ub = 4.0

        target = 0.5

        def obj(x):
            return -((Normal(0, 1).cdf(x[..., 0]) - target) ** 2)

        # Test sine function with period 4
        def test_fun(x):
            return np.sin(np.pi * x / 4)

        strat_list = [
            Strategy(
                lb=lb,
                ub=ub,
                n_trials=n_init,
                generator=SobolGenerator(lb=lb, ub=ub, seed=seed),
            ),
            Strategy(
                lb=lb,
                ub=ub,
                model=GPClassificationModel(lb=lb, ub=ub, inducing_size=10),
                generator=OptimizeAcqfGenerator(
                    qUpperConfidenceBound,
                    acqf_kwargs={"beta": 1.96, "objective": GenericMCObjective(obj)},
                ),
                n_trials=n_opt,
            ),
        ]

        strat = SequentialStrategy(strat_list)

        for _i in range(n_init + n_opt):
            next_x = strat.gen()
            strat.add_data(next_x, [bernoulli.rvs(norm.cdf(test_fun(next_x)))])

        # We expect the global max to be at (2, 1), the min at (-2, -1)
        fmax, argmax = strat.get_max()
        self.assertTrue(np.abs(fmax - 1) < 0.5)
        self.assertTrue(np.abs(argmax[0] - 2) < 0.5)

        fmin, argmin = strat.get_min()
        self.assertTrue(np.abs(fmin + 1) < 0.5)
        self.assertTrue(np.abs(argmin[0] + 2) < 0.5)

        # Query at x=2 should be f=1
        self.assertTrue(np.abs(strat.predict(torch.tensor([2]))[0] - 1) < 0.5)

        # Inverse query at val 1 should return (1,[2])
        val, loc = strat.inv_query(1.0, constraints={})
        self.assertTrue(np.abs(val - 1) < 0.5)
        self.assertTrue(np.abs(loc[0] - 2) < 0.5)
Ejemplo n.º 3
0
def get_PosteriorMean(
    model: Model,
    objective_weights: Tensor,
    outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None,
    X_observed: Optional[Tensor] = None,
    X_pending: Optional[Tensor] = None,
    **kwargs: Any,
) -> AcquisitionFunction:
    r"""Instantiates a PosteriorMean acquisition function.

    Note: If no OutcomeConstraints given, return an analytic acquisition
    function. This requires {optimizer_kwargs: {joint_optimization: True}} or an
    optimizer that does not assume pending point support.

    Args:
        objective_weights: The objective is to maximize a weighted sum of
            the columns of f(x). These are the weights.
        outcome_constraints: A tuple of (A, b). For k outcome constraints
            and m outputs at f(x), A is (k x m) and b is (k x 1) such that
            A f(x) <= b. (Not used by single task models)
        X_observed: A tensor containing points observed for all objective
            outcomes and outcomes that appear in the outcome constraints (if
            there are any).
        X_pending: A tensor containing points whose evaluation is pending (i.e.
            that have been submitted for evaluation) present for all objective
            outcomes and outcomes that appear in the outcome constraints (if
            there are any).

    Returns:
        PosteriorMean: The instantiated acquisition function.
    """
    if X_observed is None:
        raise ValueError("There are no feasible observed points.")
    # construct Objective module
    if kwargs.get("chebyshev_scalarization", False):
        obj_tf = get_chebyshev_scalarization(
            weights=objective_weights,
            Y=torch.stack(kwargs.get("Ys")).transpose(0, 1).squeeze(-1),
        )
    else:
        obj_tf = get_objective_weights_transform(objective_weights)

    def obj_fn(samples: Tensor, X: Optional[Tensor] = None) -> Tensor:
        return obj_tf(samples)

    if outcome_constraints is None:
        objective = GenericMCObjective(objective=obj_fn)
    else:
        con_tfs = get_outcome_constraint_transforms(outcome_constraints)
        inf_cost = get_infeasible_cost(X=X_observed,
                                       model=model,
                                       objective=obj_fn)
        objective = ConstrainedMCObjective(objective=obj_fn,
                                           constraints=con_tfs or [],
                                           infeasible_cost=inf_cost)
    # Use qSimpleRegret, not analytic posterior, to handle arbitrary objective fns.
    acq_func = qSimpleRegret(model, objective=objective)
    return acq_func
Ejemplo n.º 4
0
 def test_prune_inferior_points(self):
     for dtype in (torch.float, torch.double):
         X = torch.rand(3, 2, device=self.device, dtype=dtype)
         # the event shape is `q x t` = 3 x 1
         samples = torch.tensor([[-1.0], [0.0], [1.0]],
                                device=self.device,
                                dtype=dtype)
         mm = MockModel(MockPosterior(samples=samples))
         # test that a batched X raises errors
         with self.assertRaises(UnsupportedError):
             prune_inferior_points(model=mm, X=X.expand(2, 3, 2))
         # test that a batched model raises errors (event shape is `q x t` = 3 x 1)
         mm2 = MockModel(MockPosterior(samples=samples.expand(2, 3, 1)))
         with self.assertRaises(UnsupportedError):
             prune_inferior_points(model=mm2, X=X)
         # test that invalid max_frac is checked properly
         with self.assertRaises(ValueError):
             prune_inferior_points(model=mm, X=X, max_frac=1.1)
         # test basic behaviour
         X_pruned = prune_inferior_points(model=mm, X=X)
         self.assertTrue(torch.equal(X_pruned, X[[-1]]))
         # test custom objective
         neg_id_obj = GenericMCObjective(lambda X: -X.squeeze(-1))
         X_pruned = prune_inferior_points(model=mm,
                                          X=X,
                                          objective=neg_id_obj)
         self.assertTrue(torch.equal(X_pruned, X[[0]]))
         # test non-repeated samples (requires mocking out MockPosterior's rsample)
         samples = torch.tensor(
             [[[3.0], [0.0], [0.0]], [[0.0], [2.0], [0.0]],
              [[0.0], [0.0], [1.0]]],
             device=self.device,
             dtype=dtype,
         )
         with mock.patch.object(MockPosterior,
                                "rsample",
                                return_value=samples):
             mm = MockModel(MockPosterior(samples=samples))
             X_pruned = prune_inferior_points(model=mm, X=X)
         self.assertTrue(torch.equal(X_pruned, X))
         # test max_frac limiting
         with mock.patch.object(MockPosterior,
                                "rsample",
                                return_value=samples):
             mm = MockModel(MockPosterior(samples=samples))
             X_pruned = prune_inferior_points(model=mm, X=X, max_frac=2 / 3)
         self.assertTrue(torch.equal(X_pruned, X[:2]))
         # test that zero-probability is in fact pruned
         samples[2, 0, 0] = 10
         with mock.patch.object(MockPosterior,
                                "rsample",
                                return_value=samples):
             mm = MockModel(MockPosterior(samples=samples))
             X_pruned = prune_inferior_points(model=mm, X=X)
         self.assertTrue(torch.equal(X_pruned, X[:2]))
Ejemplo n.º 5
0
 def test_generic_mc_objective(self):
     for dtype in (torch.float, torch.double):
         obj = GenericMCObjective(generic_obj)
         samples = torch.randn(1, device=self.device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
         samples = torch.randn(2, device=self.device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
         samples = torch.randn(3, 1, device=self.device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
         samples = torch.randn(3, 2, device=self.device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
Ejemplo n.º 6
0
 def test_generic_mc_objective(self, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     for dtype in (torch.float, torch.double):
         obj = GenericMCObjective(generic_obj)
         samples = torch.randn(1, device=device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
         samples = torch.randn(2, device=device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
         samples = torch.randn(3, 1, device=device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
         samples = torch.randn(3, 2, device=device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
Ejemplo n.º 7
0
    def test_evaluate_kg(self):
        # a thorough test using real model and dtype double
        d = 2
        dtype = torch.double
        bounds = torch.tensor([[0], [1]], device=self.device,
                              dtype=dtype).repeat(1, d)
        train_X = torch.rand(3, d, device=self.device, dtype=dtype)
        train_Y = torch.rand(3, 1, device=self.device, dtype=dtype)
        model = SingleTaskGP(train_X, train_Y)
        qKG = qKnowledgeGradient(
            model=model,
            num_fantasies=2,
            objective=None,
            X_pending=torch.rand(2, d, device=self.device, dtype=dtype),
            current_value=torch.rand(1, device=self.device, dtype=dtype),
        )
        X = torch.rand(4, 3, d, device=self.device, dtype=dtype)
        options = {"num_inner_restarts": 2, "raw_inner_samples": 3}
        val = qKG.evaluate(X,
                           bounds=bounds,
                           num_restarts=2,
                           raw_samples=3,
                           options=options)
        # verify output shape
        self.assertEqual(val.size(), torch.Size([4]))
        # verify dtype
        self.assertEqual(val.dtype, dtype)

        # test i) no dimension is squeezed out, ii) dtype float, iii) MC objective,
        # and iv) t_batch_mode_transform
        dtype = torch.float
        bounds = torch.tensor([[0], [1]], device=self.device, dtype=dtype)
        train_X = torch.rand(1, 1, device=self.device, dtype=dtype)
        train_Y = torch.rand(1, 1, device=self.device, dtype=dtype)
        model = SingleTaskGP(train_X, train_Y)
        qKG = qKnowledgeGradient(
            model=model,
            num_fantasies=1,
            objective=GenericMCObjective(
                objective=lambda Y, X: Y.norm(dim=-1)),
        )
        X = torch.rand(1, 1, device=self.device, dtype=dtype)
        options = {"num_inner_restarts": 1, "raw_inner_samples": 1}
        val = qKG.evaluate(X,
                           bounds=bounds,
                           num_restarts=1,
                           raw_samples=1,
                           options=options)
        # verify output shape
        self.assertEqual(val.size(), torch.Size([1]))
        # verify dtype
        self.assertEqual(val.dtype, dtype)
Ejemplo n.º 8
0
 def test_get_value_function(self):
     mm = MockModel(None)
     # test PosteriorMean
     vf = _get_value_function(mm)
     self.assertIsInstance(vf, PosteriorMean)
     self.assertIsNone(vf.objective)
     # test SimpleRegret
     obj = GenericMCObjective(lambda Y: Y.sum(dim=-1))
     sampler = IIDNormalSampler(num_samples=2)
     vf = _get_value_function(model=mm, objective=obj, sampler=sampler)
     self.assertIsInstance(vf, qSimpleRegret)
     self.assertEqual(vf.objective, obj)
     self.assertEqual(vf.sampler, sampler)
Ejemplo n.º 9
0
 def test_get_value_function(self):
     with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
         mock_num_outputs.return_value = 1
         mm = MockModel(None)
         # test PosteriorMean
         vf = _get_value_function(mm)
         self.assertIsInstance(vf, PosteriorMean)
         self.assertIsNone(vf.objective)
         # test SimpleRegret
         obj = GenericMCObjective(lambda Y: Y.sum(dim=-1))
         sampler = IIDNormalSampler(num_samples=2)
         vf = _get_value_function(model=mm, objective=obj, sampler=sampler)
         self.assertIsInstance(vf, qSimpleRegret)
         self.assertEqual(vf.objective, obj)
         self.assertEqual(vf.sampler, sampler)
Ejemplo n.º 10
0
 def test_generic_mc_objective_deprecated(self):
     for dtype in (torch.float, torch.double):
         with warnings.catch_warnings(record=True) as ws, settings.debug(True):
             obj = GenericMCObjective(generic_obj_deprecated)
             warning_msg = (
                 "The `objective` callable of `GenericMCObjective` is expected to "
                 "take two arguments. Passing a callable that expects a single "
                 "argument will result in an error in future versions."
             )
             self.assertTrue(
                 any(issubclass(w.category, DeprecationWarning) for w in ws)
             )
             self.assertTrue(any(warning_msg in str(w.message) for w in ws))
         samples = torch.randn(1, device=self.device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
         samples = torch.randn(2, device=self.device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
         samples = torch.randn(3, 1, device=self.device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
         samples = torch.randn(3, 2, device=self.device, dtype=dtype)
         self.assertTrue(torch.equal(obj(samples), generic_obj(samples)))
Ejemplo n.º 11
0
 def test_cache_root_decomposition(self):
     tkwargs = {"device": self.device}
     for dtype in (torch.float, torch.double):
         tkwargs["dtype"] = dtype
         # test mt-mvn
         train_x = torch.rand(2, 1, **tkwargs)
         train_y = torch.rand(2, 2, **tkwargs)
         test_x = torch.rand(2, 1, **tkwargs)
         model = SingleTaskGP(train_x, train_y)
         sampler = IIDNormalSampler(1)
         with torch.no_grad():
             posterior = model.posterior(test_x)
         acqf = DummyCachedCholeskyAcqf(
             model=model,
             sampler=sampler,
             objective=GenericMCObjective(lambda Y: Y[..., 0]),
         )
         baseline_L = torch.eye(2, **tkwargs)
         with mock.patch(
                 EXTRACT_BATCH_COVAR_PATH,
                 wraps=extract_batch_covar) as mock_extract_batch_covar:
             with mock.patch(CHOLESKY_PATH,
                             return_value=baseline_L) as mock_cholesky:
                 acqf._cache_root_decomposition(posterior=posterior)
                 mock_extract_batch_covar.assert_called_once_with(
                     posterior.mvn)
                 mock_cholesky.assert_called_once()
         # test mvn
         model = SingleTaskGP(train_x, train_y[:, :1])
         with torch.no_grad():
             posterior = model.posterior(test_x)
         with mock.patch(
                 EXTRACT_BATCH_COVAR_PATH) as mock_extract_batch_covar:
             with mock.patch(CHOLESKY_PATH,
                             return_value=baseline_L) as mock_cholesky:
                 acqf._cache_root_decomposition(posterior=posterior)
                 mock_extract_batch_covar.assert_not_called()
                 mock_cholesky.assert_called_once()
         self.assertTrue(torch.equal(acqf._baseline_L, baseline_L))
Ejemplo n.º 12
0
 def test_get_value_function(self):
     with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
         mock_num_outputs.return_value = 1
         mm = MockModel(None)
         # test PosteriorMean
         vf = _get_value_function(mm)
         self.assertIsInstance(vf, PosteriorMean)
         self.assertIsNone(vf.objective)
         # test SimpleRegret
         obj = GenericMCObjective(lambda Y, X: Y.sum(dim=-1))
         sampler = IIDNormalSampler(num_samples=2)
         vf = _get_value_function(model=mm, objective=obj, sampler=sampler)
         self.assertIsInstance(vf, qSimpleRegret)
         self.assertEqual(vf.objective, obj)
         self.assertEqual(vf.sampler, sampler)
         # test with project
         mock_project = mock.Mock(
             return_value=torch.ones(1, 1, 1, device=self.device)
         )
         vf = _get_value_function(
             model=mm,
             objective=obj,
             sampler=sampler,
             project=mock_project,
         )
         self.assertIsInstance(vf, ProjectedAcquisitionFunction)
         self.assertEqual(vf.objective, obj)
         self.assertEqual(vf.sampler, sampler)
         self.assertEqual(vf.project, mock_project)
         test_X = torch.rand(1, 1, 1, device=self.device)
         with mock.patch.object(
             vf, "base_value_function", __class__=torch.nn.Module, return_value=None
         ) as patch_bvf:
             vf(test_X)
             mock_project.assert_called_once_with(test_X)
             patch_bvf.assert_called_once_with(
                 torch.ones(1, 1, 1, device=self.device)
             )
Ejemplo n.º 13
0
def bo_qei(config):
    """Optimizes over designs x in an offline optimization problem
    using the CMA Evolution Strategy

    Args:

    config: dict
        a dictionary of hyper parameters such as the learning rate
    """

    # create the training task and logger
    logger = Logger(config['logging_dir'])
    task = StaticGraphTask(config['task'], **config['task_kwargs'])

    if config['normalize_ys']:
        task.map_normalize_y()
    if task.is_discrete and not config["use_vae"]:
        task.map_to_logits()
    if config['normalize_xs']:
        task.map_normalize_x()

    x = task.x
    y = task.y

    if task.is_discrete and config["use_vae"]:

        vae_model = SequentialVAE(task,
                                  hidden_size=config['vae_hidden_size'],
                                  latent_size=config['vae_latent_size'],
                                  activation=config['vae_activation'],
                                  kernel_size=config['vae_kernel_size'],
                                  num_blocks=config['vae_num_blocks'])

        vae_trainer = VAETrainer(vae_model,
                                 vae_optim=tf.keras.optimizers.Adam,
                                 vae_lr=config['vae_lr'],
                                 beta=config['vae_beta'])

        # create the training task and logger
        train_data, val_data = build_pipeline(
            x=x,
            y=y,
            batch_size=config['vae_batch_size'],
            val_size=config['val_size'])

        # estimate the number of training steps per epoch
        vae_trainer.launch(train_data, val_data, logger, config['vae_epochs'])

        # map the x values to latent space
        x = vae_model.encoder_cnn.predict(x)[0]

        mean = np.mean(x, axis=0, keepdims=True)
        standard_dev = np.std(x - mean, axis=0, keepdims=True)
        x = (x - mean) / standard_dev

    input_shape = x.shape[1:]
    input_size = np.prod(input_shape)

    # create the training task and logger
    train_data, val_data = build_pipeline(
        x=x,
        y=y,
        bootstraps=config['bootstraps'],
        batch_size=config['ensemble_batch_size'],
        val_size=config['val_size'])

    # make several keras neural networks with two hidden layers
    forward_models = [
        ForwardModel(input_shape,
                     hidden_size=config['hidden_size'],
                     num_layers=config['num_layers'],
                     initial_max_std=config['initial_max_std'],
                     initial_min_std=config['initial_min_std'])
        for b in range(config['bootstraps'])
    ]

    # create a trainer for a forward model with a conservative objective
    ensemble = Ensemble(forward_models,
                        forward_model_optim=tf.keras.optimizers.Adam,
                        forward_model_lr=config['ensemble_lr'])

    # train the model for an additional number of epochs
    ensemble.launch(train_data, val_data, logger, config['ensemble_epochs'])

    # select the top 1 initial designs from the dataset
    indices = tf.math.top_k(y[:, 0], k=config['bo_gp_samples'])[1]
    initial_x = tf.gather(x, indices, axis=0)
    initial_y = tf.gather(y, indices, axis=0)

    from botorch.models import FixedNoiseGP, ModelListGP
    from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood
    from botorch.acquisition.objective import GenericMCObjective
    from botorch.optim import optimize_acqf
    from botorch import fit_gpytorch_model
    from botorch.acquisition.monte_carlo import qExpectedImprovement
    from botorch.sampling.samplers import SobolQMCNormalSampler
    from botorch.exceptions import BadInitialCandidatesWarning

    import torch
    import time
    import warnings

    warnings.filterwarnings('ignore', category=BadInitialCandidatesWarning)
    warnings.filterwarnings('ignore', category=RuntimeWarning)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    dtype = torch.float32

    def objective(input_x):
        original_x = input_x
        # convert the tensor into numpy before using a TF model
        if torch.cuda.is_available():
            input_x = input_x.detach().cpu().numpy()
        else:
            input_x = input_x.detach().numpy()
        batch_shape = input_x.shape[:-1]
        # pass the input into a TF model
        input_x = tf.reshape(input_x, [-1, *input_shape])

        # optimize teh ground truth or the learned model
        if config["optimize_ground_truth"]:
            if task.is_discrete and config["use_vae"]:
                input_x = tf.argmax(
                    vae_model.decoder_cnn.predict(input_x * standard_dev +
                                                  mean),
                    axis=2,
                    output_type=tf.int32)
            value = task.predict(input_x)
        else:
            value = ensemble.get_distribution(input_x).mean()

        ys = value.numpy()

        ys.reshape(list(batch_shape) + [1])
        # convert the scores back to pytorch tensors
        return torch.tensor(ys).type_as(original_x).to(device, dtype=dtype)

    NOISE_SE = config['bo_noise_se']
    train_yvar = torch.tensor(NOISE_SE**2, device=device, dtype=dtype)

    def initialize_model(train_x, train_obj, state_dict=None):
        # define models for objective
        model_obj = FixedNoiseGP(train_x, train_obj,
                                 train_yvar.expand_as(train_obj)).to(train_x)
        # combine into a multi-output GP model
        model = ModelListGP(model_obj)
        mll = SumMarginalLogLikelihood(model.likelihood, model)
        # load state dict if it is passed
        if state_dict is not None:
            model.load_state_dict(state_dict)
        return mll, model

    def obj_callable(Z):
        return Z[..., 0]

    # define a feasibility-weighted objective for optimization
    obj = GenericMCObjective(obj_callable)

    BATCH_SIZE = config['bo_batch_size']
    bounds = torch.tensor([
        np.min(x, axis=0).reshape([input_size]).tolist(),
        np.max(x, axis=0).reshape([input_size]).tolist()
    ],
                          device=device,
                          dtype=dtype)

    def optimize_acqf_and_get_observation(acq_func):
        """Optimizes the acquisition function, and returns
        a new candidate and a noisy observation."""
        # optimize
        try:
            candidates, _ = optimize_acqf(
                acq_function=acq_func,
                bounds=bounds,
                q=BATCH_SIZE,
                num_restarts=config['bo_num_restarts'],
                raw_samples=config[
                    'bo_raw_samples'],  # used for intialization heuristic
                options={
                    "batch_limit": config['bo_batch_limit'],
                    "maxiter": config['bo_maxiter']
                })
        except RuntimeError:
            return
        # observe new values
        new_x = candidates.detach()
        exact_obj = objective(candidates)
        new_obj = exact_obj + NOISE_SE * torch.randn_like(exact_obj)
        return new_x, new_obj

    N_BATCH = config['bo_iterations']
    MC_SAMPLES = config['bo_mc_samples']

    best_observed_ei = []

    # call helper functions to generate initial training data and initialize model
    train_x_ei = initial_x.numpy().reshape([initial_x.shape[0], input_size])
    train_x_ei = torch.tensor(train_x_ei).to(device, dtype=dtype)

    train_obj_ei = initial_y.numpy().reshape([initial_y.shape[0], 1])
    train_obj_ei = torch.tensor(train_obj_ei).to(device, dtype=dtype)

    best_observed_value_ei = train_obj_ei.max().item()
    mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei)
    best_observed_ei.append(best_observed_value_ei)

    # run N_BATCH rounds of BayesOpt after the initial random batch
    for iteration in range(1, N_BATCH + 1):

        t0 = time.time()

        # fit the models
        fit_gpytorch_model(mll_ei)

        # define the qEI acquisition module using a QMC sampler
        qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES)

        # for best_f, we use the best observed noisy values as an approximation
        qEI = qExpectedImprovement(model=model_ei,
                                   best_f=train_obj_ei.max(),
                                   sampler=qmc_sampler,
                                   objective=obj)

        # optimize and get new observation
        result = optimize_acqf_and_get_observation(qEI)
        if result is None:
            print("RuntimeError was encountered, most likely a "
                  "'symeig_cpu: the algorithm failed to converge'")
            break
        new_x_ei, new_obj_ei = result

        # update training points
        train_x_ei = torch.cat([train_x_ei, new_x_ei])
        train_obj_ei = torch.cat([train_obj_ei, new_obj_ei])

        # update progress
        best_value_ei = obj(train_x_ei).max().item()
        best_observed_ei.append(best_value_ei)

        # reinitialize the models so they are ready for fitting on next iteration
        # use the current state dict to speed up fitting
        mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei,
                                            model_ei.state_dict())

        t1 = time.time()
        print(
            f"Batch {iteration:>2}: best_value = "
            f"({best_value_ei:>4.2f}), "
            f"time = {t1 - t0:>4.2f}.",
            end="")

    if torch.cuda.is_available():
        x_sol = train_x_ei.detach().cpu().numpy()
        y_sol = train_obj_ei.detach().cpu().numpy()

    else:
        x_sol = train_x_ei.detach().numpy()
        y_sol = train_obj_ei.detach().numpy()

    # select the top 1 initial designs from the dataset
    indices = tf.math.top_k(y_sol[:, 0], k=config['solver_samples'])[1]
    solution = tf.gather(x_sol, indices, axis=0)
    solution = tf.reshape(solution, [-1, *input_shape])

    if task.is_discrete and config["use_vae"]:
        solution = solution * standard_dev + mean
        logits = vae_model.decoder_cnn.predict(solution)
        solution = tf.argmax(logits, axis=2, output_type=tf.int32)

    # save the current solution to the disk
    np.save(os.path.join(config["logging_dir"], f"solution.npy"),
            solution.numpy())

    # evaluate the found solution and record a video
    score = task.predict(solution)
    if task.is_normalized_y:
        score = task.denormalize_y(score)
    logger.record("score", score, N_BATCH, percentile=True)
Ejemplo n.º 14
0
 def test_evaluate_q_multi_fidelity_knowledge_gradient(self):
     for dtype in (torch.float, torch.double):
         # basic test
         n_f = 4
         current_value = torch.rand(1, device=self.device, dtype=dtype)
         cau = GenericCostAwareUtility(mock_util)
         mean = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype)
         variance = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype)
         mfm = MockModel(MockPosterior(mean=mean, variance=variance))
         with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f:
             with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
                 mock_num_outputs.return_value = 1
                 mm = MockModel(None)
                 qMFKG = qMultiFidelityKnowledgeGradient(
                     model=mm,
                     num_fantasies=n_f,
                     current_value=current_value,
                     cost_aware_utility=cau,
                 )
                 X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype)
                 val = qMFKG(X)
                 patch_f.assert_called_once()
                 cargs, ckwargs = patch_f.call_args
                 self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1]))
         val_exp = mock_util(X, mean.squeeze(-1) - current_value).mean(dim=0)
         self.assertTrue(torch.allclose(val, val_exp, atol=1e-4))
         self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :]))
         # batched evaluation
         b = 2
         current_value = torch.rand(b, device=self.device, dtype=dtype)
         cau = GenericCostAwareUtility(mock_util)
         mean = torch.rand(n_f, b, 1, device=self.device, dtype=dtype)
         variance = torch.rand(n_f, b, 1, device=self.device, dtype=dtype)
         mfm = MockModel(MockPosterior(mean=mean, variance=variance))
         X = torch.rand(b, n_f + 1, 1, device=self.device, dtype=dtype)
         with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f:
             with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
                 mock_num_outputs.return_value = 1
                 mm = MockModel(None)
                 qMFKG = qMultiFidelityKnowledgeGradient(
                     model=mm,
                     num_fantasies=n_f,
                     current_value=current_value,
                     cost_aware_utility=cau,
                 )
                 val = qMFKG(X)
                 patch_f.assert_called_once()
                 cargs, ckwargs = patch_f.call_args
                 self.assertEqual(ckwargs["X"].shape, torch.Size([b, 1, 1]))
         val_exp = mock_util(X, mean.squeeze(-1) - current_value).mean(dim=0)
         self.assertTrue(torch.allclose(val, val_exp, atol=1e-4))
         self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :]))
         # pending points and current value
         mean = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype)
         variance = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype)
         X_pending = torch.rand(2, 1, device=self.device, dtype=dtype)
         mfm = MockModel(MockPosterior(mean=mean, variance=variance))
         current_value = torch.rand(1, device=self.device, dtype=dtype)
         X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype)
         with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f:
             with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
                 mock_num_outputs.return_value = 1
                 mm = MockModel(None)
                 qMFKG = qMultiFidelityKnowledgeGradient(
                     model=mm,
                     num_fantasies=n_f,
                     X_pending=X_pending,
                     current_value=current_value,
                     cost_aware_utility=cau,
                 )
                 val = qMFKG(X)
                 patch_f.assert_called_once()
                 cargs, ckwargs = patch_f.call_args
                 self.assertEqual(ckwargs["X"].shape, torch.Size([1, 3, 1]))
         val_exp = mock_util(X, mean.squeeze(-1) - current_value).mean(dim=0)
         self.assertTrue(torch.allclose(val, val_exp, atol=1e-4))
         self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :]))
         # test objective (inner MC sampling)
         objective = GenericMCObjective(objective=lambda Y: Y.norm(dim=-1))
         samples = torch.randn(3, 1, 1, device=self.device, dtype=dtype)
         mfm = MockModel(MockPosterior(samples=samples))
         X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype)
         with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f:
             with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
                 mock_num_outputs.return_value = 1
                 mm = MockModel(None)
                 qMFKG = qMultiFidelityKnowledgeGradient(
                     model=mm,
                     num_fantasies=n_f,
                     objective=objective,
                     current_value=current_value,
                     cost_aware_utility=cau,
                 )
                 val = qMFKG(X)
                 patch_f.assert_called_once()
                 cargs, ckwargs = patch_f.call_args
                 self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1]))
         val_exp = mock_util(X, objective(samples) - current_value).mean(dim=0)
         self.assertTrue(torch.allclose(val, val_exp, atol=1e-4))
         self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :]))
Ejemplo n.º 15
0
def qparego_candidates_func(
    train_x: "torch.Tensor",
    train_obj: "torch.Tensor",
    train_con: Optional["torch.Tensor"],
    bounds: "torch.Tensor",
) -> "torch.Tensor":
    """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization.

    The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler`
    with multi-objective optimization when the number of objectives is larger than three.

    .. seealso::
        :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value
        descriptions.
    """

    n_objectives = train_obj.size(-1)

    weights = sample_simplex(n_objectives).squeeze()
    scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj)

    if train_con is not None:
        train_y = torch.cat([train_obj, train_con], dim=-1)

        constraints = []
        n_constraints = train_con.size(1)

        for i in range(n_constraints):
            constraints.append(lambda Z, i=i: Z[..., -n_constraints + i])

        objective = ConstrainedMCObjective(
            objective=lambda Z: scalarization(Z[..., :n_objectives]),
            constraints=constraints,
        )
    else:
        train_y = train_obj

        objective = GenericMCObjective(scalarization)

    train_x = normalize(train_x, bounds=bounds)

    model = SingleTaskGP(train_x,
                         train_y,
                         outcome_transform=Standardize(m=train_y.size(-1)))
    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_model(mll)

    acqf = qExpectedImprovement(
        model=model,
        best_f=objective(train_y).max(),
        sampler=SobolQMCNormalSampler(num_samples=256),
        objective=objective,
    )

    standard_bounds = torch.zeros_like(bounds)
    standard_bounds[1] = 1

    candidates, _ = optimize_acqf(
        acq_function=acqf,
        bounds=standard_bounds,
        q=1,
        num_restarts=20,
        raw_samples=1024,
        options={
            "batch_limit": 5,
            "maxiter": 200
        },
        sequential=True,
    )

    candidates = unnormalize(candidates.detach(), bounds=bounds)

    return candidates
Ejemplo n.º 16
0
 def test_initialize_q_knowledge_gradient(self):
     for dtype in (torch.float, torch.double):
         mean = torch.zeros(1, 1, device=self.device, dtype=dtype)
         mm = MockModel(MockPosterior(mean=mean))
         # test error when neither specifying neither sampler nor num_fantasies
         with self.assertRaises(ValueError):
             qKnowledgeGradient(model=mm, num_fantasies=None)
         # test error when sampler and num_fantasies arg are inconsistent
         sampler = IIDNormalSampler(num_samples=16)
         with self.assertRaises(ValueError):
             qKnowledgeGradient(model=mm, num_fantasies=32, sampler=sampler)
         # test default construction
         qKG = qKnowledgeGradient(model=mm, num_fantasies=32)
         self.assertEqual(qKG.num_fantasies, 32)
         self.assertIsInstance(qKG.sampler, SobolQMCNormalSampler)
         self.assertEqual(qKG.sampler.sample_shape, torch.Size([32]))
         self.assertIsNone(qKG.objective)
         self.assertIsNone(qKG.inner_sampler)
         self.assertIsNone(qKG.X_pending)
         self.assertIsNone(qKG.current_value)
         self.assertEqual(qKG.get_augmented_q_batch_size(q=3), 32 + 3)
         # test custom construction
         obj = GenericMCObjective(lambda Y, X: Y.mean(dim=-1))
         sampler = IIDNormalSampler(num_samples=16)
         X_pending = torch.zeros(2, 2, device=self.device, dtype=dtype)
         qKG = qKnowledgeGradient(
             model=mm,
             num_fantasies=16,
             sampler=sampler,
             objective=obj,
             X_pending=X_pending,
         )
         self.assertEqual(qKG.num_fantasies, 16)
         self.assertEqual(qKG.sampler, sampler)
         self.assertEqual(qKG.sampler.sample_shape, torch.Size([16]))
         self.assertEqual(qKG.objective, obj)
         self.assertIsInstance(qKG.inner_sampler, SobolQMCNormalSampler)
         self.assertEqual(qKG.inner_sampler.sample_shape, torch.Size([128]))
         self.assertTrue(torch.equal(qKG.X_pending, X_pending))
         self.assertIsNone(qKG.current_value)
         self.assertEqual(qKG.get_augmented_q_batch_size(q=3), 16 + 3)
         # test assignment of num_fantasies from sampler if not provided
         qKG = qKnowledgeGradient(model=mm, num_fantasies=None, sampler=sampler)
         self.assertEqual(qKG.sampler.sample_shape, torch.Size([16]))
         # test custom construction with inner sampler and current value
         inner_sampler = SobolQMCNormalSampler(num_samples=256)
         current_value = torch.zeros(1, device=self.device, dtype=dtype)
         qKG = qKnowledgeGradient(
             model=mm,
             num_fantasies=8,
             objective=obj,
             inner_sampler=inner_sampler,
             current_value=current_value,
         )
         self.assertEqual(qKG.num_fantasies, 8)
         self.assertEqual(qKG.sampler.sample_shape, torch.Size([8]))
         self.assertEqual(qKG.objective, obj)
         self.assertIsInstance(qKG.inner_sampler, SobolQMCNormalSampler)
         self.assertEqual(qKG.inner_sampler, inner_sampler)
         self.assertIsNone(qKG.X_pending)
         self.assertTrue(torch.equal(qKG.current_value, current_value))
         self.assertEqual(qKG.get_augmented_q_batch_size(q=3), 8 + 3)
         # test construction with non-MC objective (ScalarizedObjective)
         qKG_s = qKnowledgeGradient(
             model=mm,
             num_fantasies=16,
             sampler=sampler,
             objective=ScalarizedObjective(weights=torch.rand(2)),
         )
         self.assertIsNone(qKG_s.inner_sampler)
         self.assertIsInstance(qKG_s.objective, ScalarizedObjective)
         # test error if no objective and multi-output model
         mean2 = torch.zeros(1, 2, device=self.device, dtype=dtype)
         mm2 = MockModel(MockPosterior(mean=mean2))
         with self.assertRaises(UnsupportedError):
             qKnowledgeGradient(model=mm2)
Ejemplo n.º 17
0
    def test_evaluate_qMFKG(self):
        for dtype in (torch.float, torch.double):
            # basic test
            n_f = 4
            current_value = torch.rand(1, device=self.device, dtype=dtype)
            cau = GenericCostAwareUtility(mock_util)
            mean = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype)
            variance = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype)
            mfm = MockModel(MockPosterior(mean=mean, variance=variance))
            with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f:
                with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
                    mock_num_outputs.return_value = 1
                    mm = MockModel(None)
                    qMFKG = qMultiFidelityKnowledgeGradient(
                        model=mm,
                        num_fantasies=n_f,
                        current_value=current_value,
                        cost_aware_utility=cau,
                    )
                    X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype)
                    val = qMFKG(X)
                    patch_f.assert_called_once()
                    cargs, ckwargs = patch_f.call_args
                    self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1]))
            val_exp = mock_util(X, mean.squeeze(-1) - current_value).mean(dim=0)
            self.assertTrue(torch.allclose(val, val_exp, atol=1e-4))
            self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :]))
            # batched evaluation
            b = 2
            current_value = torch.rand(b, device=self.device, dtype=dtype)
            cau = GenericCostAwareUtility(mock_util)
            mean = torch.rand(n_f, b, 1, device=self.device, dtype=dtype)
            variance = torch.rand(n_f, b, 1, device=self.device, dtype=dtype)
            mfm = MockModel(MockPosterior(mean=mean, variance=variance))
            X = torch.rand(b, n_f + 1, 1, device=self.device, dtype=dtype)
            with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f:
                with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
                    mock_num_outputs.return_value = 1
                    mm = MockModel(None)
                    qMFKG = qMultiFidelityKnowledgeGradient(
                        model=mm,
                        num_fantasies=n_f,
                        current_value=current_value,
                        cost_aware_utility=cau,
                    )
                    val = qMFKG(X)
                    patch_f.assert_called_once()
                    cargs, ckwargs = patch_f.call_args
                    self.assertEqual(ckwargs["X"].shape, torch.Size([b, 1, 1]))
            val_exp = mock_util(X, mean.squeeze(-1) - current_value).mean(dim=0)
            self.assertTrue(torch.allclose(val, val_exp, atol=1e-4))
            self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :]))
            # pending points and current value
            mean = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype)
            variance = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype)
            X_pending = torch.rand(2, 1, device=self.device, dtype=dtype)
            mfm = MockModel(MockPosterior(mean=mean, variance=variance))
            current_value = torch.rand(1, device=self.device, dtype=dtype)
            X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype)
            with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f:
                with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
                    mock_num_outputs.return_value = 1
                    mm = MockModel(None)
                    qMFKG = qMultiFidelityKnowledgeGradient(
                        model=mm,
                        num_fantasies=n_f,
                        X_pending=X_pending,
                        current_value=current_value,
                        cost_aware_utility=cau,
                    )
                    val = qMFKG(X)
                    patch_f.assert_called_once()
                    cargs, ckwargs = patch_f.call_args
                    self.assertEqual(ckwargs["X"].shape, torch.Size([1, 3, 1]))
            val_exp = mock_util(X, mean.squeeze(-1) - current_value).mean(dim=0)
            self.assertTrue(torch.allclose(val, val_exp, atol=1e-4))
            self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :]))
            # test objective (inner MC sampling)
            objective = GenericMCObjective(objective=lambda Y, X: Y.norm(dim=-1))
            samples = torch.randn(3, 1, 1, device=self.device, dtype=dtype)
            mfm = MockModel(MockPosterior(samples=samples))
            X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype)
            with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f:
                with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
                    mock_num_outputs.return_value = 1
                    mm = MockModel(None)
                    qMFKG = qMultiFidelityKnowledgeGradient(
                        model=mm,
                        num_fantasies=n_f,
                        objective=objective,
                        current_value=current_value,
                        cost_aware_utility=cau,
                    )
                    val = qMFKG(X)
                    patch_f.assert_called_once()
                    cargs, ckwargs = patch_f.call_args
                    self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1]))
            val_exp = mock_util(X, objective(samples) - current_value).mean(dim=0)
            self.assertTrue(torch.allclose(val, val_exp, atol=1e-4))
            self.assertTrue(torch.equal(qMFKG.extract_candidates(X), X[..., :-n_f, :]))
            # test valfunc_cls and valfunc_argfac
            d, p, d_prime = 4, 3, 2
            samples = torch.ones(3, 1, 1, device=self.device, dtype=dtype)
            mean = torch.tensor(
                [[0.25], [0.5], [0.75]], device=self.device, dtype=dtype
            )
            weights = torch.tensor([0.5, 1.0, 1.0], device=self.device, dtype=dtype)
            mfm = MockModel(MockPosterior(mean=mean, samples=samples))
            X = torch.rand(n_f * d + d, d, device=self.device, dtype=dtype)
            sample_points = torch.rand(p, d_prime, device=self.device, dtype=dtype)
            with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f:
                with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
                    mock_num_outputs.return_value = 1
                    mm = MockModel(None)
                    qMFKG = qMultiFidelityKnowledgeGradient(
                        model=mm,
                        num_fantasies=n_f,
                        project=lambda X: project_to_sample_points(X, sample_points),
                        valfunc_cls=ScalarizedPosteriorMean,
                        valfunc_argfac=lambda model: {"weights": weights},
                    )
                    val = qMFKG(X)
                    patch_f.assert_called_once()
                    cargs, ckwargs = patch_f.call_args
                    self.assertEqual(ckwargs["X"].shape, torch.Size([1, 16, 4]))
                    val_exp = torch.tensor([1.375], dtype=dtype)
                    self.assertTrue(torch.allclose(val, val_exp, atol=1e-4))

                    patch_f.reset_mock()
                    qMFKG = qMultiFidelityKnowledgeGradient(
                        model=mm,
                        num_fantasies=n_f,
                        project=lambda X: project_to_sample_points(X, sample_points),
                        valfunc_cls=qExpectedImprovement,
                        valfunc_argfac=lambda model: {"best_f": 0.0},
                    )
                    val = qMFKG(X)
                    patch_f.assert_called_once()
                    cargs, ckwargs = patch_f.call_args
                    self.assertEqual(ckwargs["X"].shape, torch.Size([1, 16, 4]))
                    val_exp = torch.tensor([1.0], dtype=dtype)
                    self.assertTrue(torch.allclose(val, val_exp, atol=1e-4))
Ejemplo n.º 18
0
 def test_evaluate_q_knowledge_gradient(self):
     for dtype in (torch.float, torch.double):
         # basic test
         n_f = 4
         mean = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype)
         variance = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype)
         mfm = MockModel(MockPosterior(mean=mean, variance=variance))
         with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f:
             with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
                 mock_num_outputs.return_value = 1
                 mm = MockModel(None)
                 qKG = qKnowledgeGradient(model=mm, num_fantasies=n_f)
                 X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype)
                 val = qKG(X)
                 patch_f.assert_called_once()
                 cargs, ckwargs = patch_f.call_args
                 self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1]))
         self.assertTrue(torch.allclose(val, mean.mean(), atol=1e-4))
         self.assertTrue(torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :]))
         # batched evaluation
         b = 2
         mean = torch.rand(n_f, b, 1, device=self.device, dtype=dtype)
         variance = torch.rand(n_f, b, 1, device=self.device, dtype=dtype)
         mfm = MockModel(MockPosterior(mean=mean, variance=variance))
         X = torch.rand(b, n_f + 1, 1, device=self.device, dtype=dtype)
         with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f:
             with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
                 mock_num_outputs.return_value = 1
                 mm = MockModel(None)
                 qKG = qKnowledgeGradient(model=mm, num_fantasies=n_f)
                 val = qKG(X)
                 patch_f.assert_called_once()
                 cargs, ckwargs = patch_f.call_args
                 self.assertEqual(ckwargs["X"].shape, torch.Size([b, 1, 1]))
         self.assertTrue(
             torch.allclose(val, mean.mean(dim=0).squeeze(-1), atol=1e-4)
         )
         self.assertTrue(torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :]))
         # pending points and current value
         X_pending = torch.rand(2, 1, device=self.device, dtype=dtype)
         mean = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype)
         variance = torch.rand(n_f, 1, 1, device=self.device, dtype=dtype)
         mfm = MockModel(MockPosterior(mean=mean, variance=variance))
         current_value = torch.rand(1, device=self.device, dtype=dtype)
         X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype)
         with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f:
             with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
                 mock_num_outputs.return_value = 1
                 mm = MockModel(None)
                 qKG = qKnowledgeGradient(
                     model=mm,
                     num_fantasies=n_f,
                     X_pending=X_pending,
                     current_value=current_value,
                 )
                 val = qKG(X)
                 patch_f.assert_called_once()
                 cargs, ckwargs = patch_f.call_args
                 self.assertEqual(ckwargs["X"].shape, torch.Size([1, 3, 1]))
         self.assertTrue(torch.allclose(val, mean.mean() - current_value, atol=1e-4))
         self.assertTrue(torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :]))
         # test objective (inner MC sampling)
         objective = GenericMCObjective(objective=lambda Y, X: Y.norm(dim=-1))
         samples = torch.randn(3, 1, 1, device=self.device, dtype=dtype)
         mfm = MockModel(MockPosterior(samples=samples))
         X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype)
         with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f:
             with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
                 mock_num_outputs.return_value = 1
                 mm = MockModel(None)
                 qKG = qKnowledgeGradient(
                     model=mm, num_fantasies=n_f, objective=objective
                 )
                 val = qKG(X)
                 patch_f.assert_called_once()
                 cargs, ckwargs = patch_f.call_args
                 self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1]))
         self.assertTrue(torch.allclose(val, objective(samples).mean(), atol=1e-4))
         self.assertTrue(torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :]))
         # test non-MC objective (ScalarizedObjective)
         weights = torch.rand(2, device=self.device, dtype=dtype)
         objective = ScalarizedObjective(weights=weights)
         mean = torch.tensor([1.0, 0.5], device=self.device, dtype=dtype).expand(
             n_f, 1, 2
         )
         cov = torch.tensor(
             [[1.0, 0.1], [0.1, 0.5]], device=self.device, dtype=dtype
         ).expand(n_f, 2, 2)
         posterior = GPyTorchPosterior(MultitaskMultivariateNormal(mean, cov))
         mfm = MockModel(posterior)
         with mock.patch.object(MockModel, "fantasize", return_value=mfm) as patch_f:
             with mock.patch(NO, new_callable=mock.PropertyMock) as mock_num_outputs:
                 mock_num_outputs.return_value = 2
                 mm = MockModel(None)
                 qKG = qKnowledgeGradient(
                     model=mm, num_fantasies=n_f, objective=objective
                 )
                 val = qKG(X)
                 patch_f.assert_called_once()
                 cargs, ckwargs = patch_f.call_args
                 self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1, 1]))
                 val_expected = (mean * weights).sum(-1).mean(0)
                 self.assertTrue(torch.allclose(val, val_expected))
Ejemplo n.º 19
0
 def test_prune_inferior_points(self):
     for dtype in (torch.float, torch.double):
         X = torch.rand(3, 2, device=self.device, dtype=dtype)
         # the event shape is `q x t` = 3 x 1
         samples = torch.tensor([[-1.0], [0.0], [1.0]],
                                device=self.device,
                                dtype=dtype)
         mm = MockModel(MockPosterior(samples=samples))
         # test that a batched X raises errors
         with self.assertRaises(UnsupportedError):
             prune_inferior_points(model=mm, X=X.expand(2, 3, 2))
         # test that a batched model raises errors (event shape is `q x t` = 3 x 1)
         mm2 = MockModel(MockPosterior(samples=samples.expand(2, 3, 1)))
         with self.assertRaises(UnsupportedError):
             prune_inferior_points(model=mm2, X=X)
         # test that invalid max_frac is checked properly
         with self.assertRaises(ValueError):
             prune_inferior_points(model=mm, X=X, max_frac=1.1)
         # test basic behaviour
         X_pruned = prune_inferior_points(model=mm, X=X)
         self.assertTrue(torch.equal(X_pruned, X[[-1]]))
         # test custom objective
         neg_id_obj = GenericMCObjective(lambda Y, X: -(Y.squeeze(-1)))
         X_pruned = prune_inferior_points(model=mm,
                                          X=X,
                                          objective=neg_id_obj)
         self.assertTrue(torch.equal(X_pruned, X[[0]]))
         # test non-repeated samples (requires mocking out MockPosterior's rsample)
         samples = torch.tensor(
             [[[3.0], [0.0], [0.0]], [[0.0], [2.0], [0.0]],
              [[0.0], [0.0], [1.0]]],
             device=self.device,
             dtype=dtype,
         )
         with mock.patch.object(MockPosterior,
                                "rsample",
                                return_value=samples):
             mm = MockModel(MockPosterior(samples=samples))
             X_pruned = prune_inferior_points(model=mm, X=X)
         self.assertTrue(torch.equal(X_pruned, X))
         # test max_frac limiting
         with mock.patch.object(MockPosterior,
                                "rsample",
                                return_value=samples):
             mm = MockModel(MockPosterior(samples=samples))
             X_pruned = prune_inferior_points(model=mm, X=X, max_frac=2 / 3)
         if self.device == torch.device("cuda"):
             # sorting has different order on cuda
             self.assertTrue(
                 torch.equal(X_pruned, torch.stack([X[2], X[1]], dim=0)))
         else:
             self.assertTrue(torch.equal(X_pruned, X[:2]))
         # test that zero-probability is in fact pruned
         samples[2, 0, 0] = 10
         with mock.patch.object(MockPosterior,
                                "rsample",
                                return_value=samples):
             mm = MockModel(MockPosterior(samples=samples))
             X_pruned = prune_inferior_points(model=mm, X=X)
         self.assertTrue(torch.equal(X_pruned, X[:2]))
         # test high-dim sampling
         with ExitStack() as es:
             mock_event_shape = es.enter_context(
                 mock.patch(
                     "botorch.utils.testing.MockPosterior.base_sample_shape",
                     new_callable=mock.PropertyMock,
                 ))
             mock_event_shape.return_value = torch.Size(
                 [1, 1, torch.quasirandom.SobolEngine.MAXDIM + 1])
             es.enter_context(
                 mock.patch.object(MockPosterior,
                                   "rsample",
                                   return_value=samples))
             mm = MockModel(MockPosterior(samples=samples))
             with warnings.catch_warnings(
                     record=True) as ws, settings.debug(True):
                 prune_inferior_points(model=mm, X=X)
                 self.assertTrue(
                     issubclass(ws[-1].category, SamplingWarning))
Ejemplo n.º 20
0
def make_bayes_opt_functions(args): 
    '''
    Generates and returns functions used to run Bayesian optimization
    Argument:
        args:                   Keyword arguments specifying exact settings for optimization

    Returns:
        objective :                         objective maximized for BO
        generate_initial_observations :     function to generate initial observations
        initialize_model :                  function to initialize GP
        optimize_acqf_and_get_observation : function to optimize acquisition function based on model
        case_diff :                         computes case difference between prediction array and ground truth at t=T
        unnormalize_theta :                 converts BO params to simulation params (unit cube to real parameters)
        header :                            header lines to be printed to log file

    '''
    header = []

    # depending on mode, set parameter bounds 
    if args.measures_optimized:
        param_bounds = settings_measures_param_bounds
    else:
        param_bounds = settings_model_param_bounds

    # remember line executed
    header.append('=' * 100)
    header.append(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
    header.append('python ' + ' '.join(sys.argv))
    header.append('=' * 100)

    mob_settings = args.mob
    data_area = args.area
    data_country = args.country

    # initialize mobility object to obtain information (no trace generation yet)
    with open(mob_settings, 'rb') as fp:
        kwargs = pickle.load(fp)
    mob = MobilitySimulator(**kwargs)
    
    # data settings
    verbose = not args.not_verbose
    use_households = not args.no_households
    data_start_date = args.start
    data_end_date = args.end
    debug_simulation_days = args.endsimat

    # simulation settings
    n_init_samples = args.ninit
    n_iterations = args.niters
    simulation_roll_outs = args.rollouts
    cpu_count = args.cpu_count
    dynamic_tracing = not args.no_dynamic_tracing
    load_observations = args.load

    # set testing parameters
    testing_params = settings_testing_params

    # BO acquisition function optimization (Knowledge gradient)
    acqf_opt_num_fantasies = args.acqf_opt_num_fantasies
    acqf_opt_num_restarts = args.acqf_opt_num_restarts
    acqf_opt_raw_samples = args.acqf_opt_raw_samples
    acqf_opt_batch_limit = args.acqf_opt_batch_limit
    acqf_opt_maxiter = args.acqf_opt_maxiter

    """
    Bayesian optimization pipeline
    """


    # Import Covid19 data
    # Shape (max_days, num_age_groups)
    new_cases_ = collect_data_from_df(country=data_country, area=data_area, datatype='new',
                                      start_date_string=data_start_date, end_date_string=data_end_date)
    assert(len(new_cases_.shape) == 2)

    if new_cases_[0].sum() == 0:
        print('No positive cases at provided start time; cannot seed simulation.\n'
              'Consider setting a later start date for calibration using the "--start" flag.')
        exit(0)

    # Scale down cases based on number of people in town, region, and downsampling
    new_cases = np.ceil(
        (new_cases_ * mob.num_people_unscaled) /
        (mob.downsample * mob.region_population))
    num_age_groups = new_cases.shape[1]
    header.append('Downsampling : ' + str(mob.downsample))
    header.append('Town population: ' + str(mob.num_people))
    header.append('Town population (unscaled): ' + str(mob.num_people_unscaled))
    header.append('Region population : ' + str(mob.region_population))

    # Set test capacity per day as (a) command line; or (b) maximum daily positive case increase over observed period
    if args.testingcap:
        testing_params['tests_per_batch'] = (args.testingcap / mob.num_people_unscaled)
    else:
        daily_increase = new_cases.sum(axis=1)[1:] - new_cases.sum(axis=1)[:-1]
        testing_params['tests_per_batch'] = int(daily_increase.max())

    test_lag_days = int(testing_params['test_reporting_lag'] / TO_HOURS)
    assert(int(testing_params['test_reporting_lag']) % 24 == 0)

    # generate initial seeds based on case numbers
    initial_seeds = gen_initial_seeds(new_cases)
    header.append('Initial seed counts : ' + str(initial_seeds))

    # in debug mode, shorten time of simulation, shorten time
    if debug_simulation_days:
        new_cases = new_cases[:debug_simulation_days]

    # Maximum time fixed by real data, init mobility simulator simulation
    # maximum time to simulate, in hours
    max_time = int(new_cases.shape[0] * TO_HOURS)
    max_time += TO_HOURS * test_lag_days  # longer due to test lag in simulations
    testing_params['testing_t_window'] = [0.0, max_time]
    mob.simulate(max_time=max_time, dynamic_tracing=True)

    header.append(
        'Daily test capacity in sim.: ' + str(testing_params['tests_per_batch']))
    header.append(
        'Max time T (days): ' + str(new_cases.shape[0]))
    header.append(
        'Target cases per age group at t=0:   ' + str(list(map(int, new_cases[0].tolist()))))
    header.append(
        'Target cases per age group at t=T:   ' + str(list(map(int, new_cases[-1].tolist()))))

    # instantiate correct distributions
    distributions = CovidDistributions(country=args.country)

    # set Bayesian optimization target as positive cases
    n_days, n_age = new_cases.shape
    G_obs = torch.tensor(new_cases).reshape(n_days * n_age)  # flattened

    sim_bounds = pdict_to_parr(param_bounds, measures_optimized=args.measures_optimized).T

    n_params = sim_bounds.shape[1]

    header.append(f'Parameters : {n_params}')
    header.append('Parameter bounds: ' + str(parr_to_pdict(sim_bounds.T, measures_optimized=args.measures_optimized)))

    # extract lockdown period
    sim_start_date = pd.to_datetime(args.start)
    sim_end_date = sim_start_date + timedelta(days=int(max_time / TO_HOURS))

    lockdown_start_date = pd.to_datetime(
        settings_lockdown_dates[args.country]['start'])
    lockdown_end_date = pd.to_datetime(
        settings_lockdown_dates[args.country]['end'])

    days_until_lockdown_start = (lockdown_start_date - sim_start_date).days
    days_until_lockdown_end = (lockdown_end_date - sim_start_date).days

    header.append(f'Simulation starts at : {sim_start_date}')
    header.append(f'             ends at : {sim_end_date}')
    header.append(f'Lockdown   starts at : {lockdown_start_date}')
    header.append(f'             ends at : {lockdown_end_date}')
    
    # create settings dictionary for simulations
    launch_kwargs = dict(
        mob_settings=mob_settings,
        distributions=distributions,
        random_repeats=simulation_roll_outs,
        cpu_count=cpu_count,
        initial_seeds=initial_seeds,
        testing_params=testing_params,
        max_time=max_time,
        num_people=mob.num_people,
        num_sites=mob.num_sites,
        home_loc=mob.home_loc,
        site_loc=mob.site_loc,
        dynamic_tracing=dynamic_tracing,
        verbose=False)


    '''
    Define central functions for optimization
    '''

    G_obs = torch.tensor(new_cases).reshape(1, n_days * n_age)
    
    def composite_squared_loss(G):
        '''
        Objective function
        Note: in BO, objectives are maximized
        '''
        return - (G - G_obs).pow(2).sum(dim=-1)

    # select objective
    objective = GenericMCObjective(composite_squared_loss)

    def case_diff(preds):
        '''
        Computes case difference of predictions and ground truth at t=T
        '''
        return  preds.reshape(n_days, n_age)[-1].sum() - torch.tensor(new_cases)[-1].sum()

    def unnormalize_theta(theta):
        '''
        Computes unnormalized parameters
        '''
        return transforms.unnormalize(theta, sim_bounds)

    def composite_simulation(norm_params):
        """
        Takes a set of normalized (unit cube) BO parameters
        and returns simulator output means and standard errors based on multiple
        random restarts. This corresponds to the black-box function.
        """

        # un-normalize normalized params to obtain simulation parameters
        params = transforms.unnormalize(norm_params, sim_bounds)

        # finalize settings based which parameters are calibrated
        kwargs = copy.deepcopy(launch_kwargs)
        if args.measures_optimized:

            '''
            Measures are calibrated
            '''

            measure_params = parr_to_pdict(params, measures_optimized=args.measures_optimized)

            # social distancing measures: calibration is only done for `SocialDistancingForAllMeasure` for now
            measure_list_ = [
                SocialDistancingForPositiveMeasure(
                    t_window=Interval(0.0, max_time), p_stay_home=1.0),
                SocialDistancingForPositiveMeasureHousehold(
                    t_window=Interval(0.0, max_time), p_isolate=1.0),
                SocialDistancingForAllMeasure(
                    t_window=Interval(TO_HOURS * days_until_lockdown_start,
                                      TO_HOURS * days_until_lockdown_end),
                    p_stay_home=measure_params['p_stay_home']),
            ]
            
            # close sites if specified
            if args.measures_close:
                beta_multipliers = {'education': 1.0, 'social': 1.0,
                                'bus_stop': 1.0, 'office': 1.0, 'supermarket': 1.0}
                for category in args.measures_close:
                    if category in beta_multipliers.keys():
                        beta_multipliers[category] = 0.0
                    else:
                        raise ValueError(f'Site type `{category}` passed in `--measures_close` is invalid.\n'
                                         f'Available are {str(list(beta_multipliers.keys()))}')
                
                measure_list_.append(BetaMultiplierMeasureByType(
                    t_window=Interval(TO_HOURS * days_until_lockdown_start,
                                      TO_HOURS * days_until_lockdown_end),
                    beta_multiplier=beta_multipliers
                ))
            
            kwargs['measure_list'] = MeasureList(measure_list_)

            # get optimized model paramters for this country and area
            calibrated_model_params = settings_optimized_town_params[args.country][args.area]
            if calibrated_model_params is None:
                raise ValueError(f'Cannot optimize measures for {args.country}-{args.area} because model parameters ' 
                                  'have not been fitted yet. Set values in `calibration_settings.py`')
            kwargs['params'] = calibrated_model_params

        else:

            '''
            Model parameters calibrated
            '''
            
            kwargs['measure_list'] = MeasureList([
                SocialDistancingForPositiveMeasure(
                    t_window=Interval(0.0, max_time), p_stay_home=1.0),
                SocialDistancingForPositiveMeasureHousehold(
                    t_window=Interval(0.0, max_time), p_isolate=1.0),
            ])

            kwargs['params'] = parr_to_pdict(params, measures_optimized=args.measures_optimized)


        # run simulation in parallel,
        summary = launch_parallel_simulations(**kwargs)

        # (random_repeats, n_people)
        posi_started = torch.tensor(summary.state_started_at['posi'])
        posi_started -= test_lag_days * TO_HOURS # account for test lag

        # (random_repeats, n_days)
        age_groups = torch.tensor(summary.people_age)
        posi_cumulative = convert_timings_to_cumulative_daily(
            timings=posi_started, age_groups=age_groups, time_horizon=n_days * TO_HOURS)

        if posi_cumulative.shape[0] <= 1:
            raise ValueError('Must run at least 2 random restarts per setting to get estimate of noise in observation.')

        # compute mean and standard error of means        
        G = torch.mean(posi_cumulative, dim=0)
        G_sem = torch.std(posi_cumulative, dim=0) / math.sqrt(posi_cumulative.shape[0])

        # make sure noise is not zero for non-degerateness
        G_sem = torch.max(G_sem, MIN_NOISE)

        # flatten
        G = G.reshape(1, n_days * n_age)
        G_sem = G_sem.reshape(1, n_days * n_age)

        return G, G_sem


    def generate_initial_observations(n, logger):
        """
        Takes an integer `n` and generates `n` initial observations
        from the black box function using Sobol random parameter settings
        in the unit cube. Returns parameter setting and black box function outputs
        """

        if n <= 0:
            raise ValueError(
                'qKnowledgeGradient and GP needs at least one observation to be defined properly.')

        # sobol sequence
        # new_thetas: [n, n_params]
        new_thetas = torch.tensor(
            sobol_seq.i4_sobol_generate(n_params, n), dtype=torch.float)

        # simulator observations
        # new_G, new_G_sem: [n, n_days * n_age] (flattened outputs)
        new_G = torch.zeros((n, n_days * n_age), dtype=torch.float)
        new_G_sem = torch.zeros((n, n_days * n_age), dtype=torch.float)

        for i in range(n):

            t0 = time.time()

            # get mean and standard error of mean (sem) of every simulation output
            G, G_sem = composite_simulation(new_thetas[i, :])
            new_G[i, :] = G
            new_G_sem[i, :] = G_sem

            # log
            G_objectives = objective(new_G[:i+1])
            best_idx = G_objectives.argmax()
            best = G_objectives[best_idx].item()
            current = objective(G).item()
            case_diff = (
                G.reshape(n_days, n_age)[-1].sum()
                - G_obs.reshape(n_days, n_age)[-1].sum())

            t1 = time.time()
            logger.log(
                i=i - n,
                time=t1 - t0,
                best=best,
                objective=current,
                case_diff=case_diff,
                theta=transforms.unnormalize(new_thetas[i, :].detach().squeeze(), sim_bounds)
            )

            # save state
            state = {
                'train_theta': new_thetas[:i+1],
                'train_G': new_G[:i+1],
                'train_G_sem': new_G_sem[:i+1],
                'best_observed_obj': best,
                'best_observed_idx': best_idx,
            }
            save_state(state, logger.filename + '_init')

        # compute best objective from simulations
        f = objective(new_G)
        best_f_idx = f.argmax()
        best_f = f[best_f_idx].item()

        return new_thetas, new_G, new_G_sem, best_f, best_f_idx

    def initialize_model(train_x, train_y, train_y_sem):
        """
        Defines a GP given X, Y, and noise observations (standard error of mean)
        """
        
        train_ynoise = train_y_sem.pow(2.0) # noise is in variance units
        
        # standardize outputs to zero mean, unit variance to have good hyperparameter tuning
        model = FixedNoiseGP(train_x, train_y, train_ynoise, outcome_transform=Standardize(m=n_days * n_age))

        # "Loss" for GPs - the marginal log likelihood
        mll = ExactMarginalLogLikelihood(model.likelihood, model)

        return mll, model

    # Model initialization
    # parameters used in BO are always in unit cube for optimal hyperparameter tuning of GPs
    bo_bounds = torch.stack([torch.zeros(n_params), torch.ones(n_params)])

    def optimize_acqf_and_get_observation(acq_func, args):
        """
        Optimizes the acquisition function, and returns a new candidate and a noisy observation.
        botorch defaults:  num_restarts=10, raw_samples=256, batch_limit=5, maxiter=200
        """

        batch_initial_conditions = gen_one_shot_kg_initial_conditions(
            acq_function=acq_func,
            bounds=bo_bounds,
            q=1,
            num_restarts=args.acqf_opt_num_restarts,
            raw_samples=args.acqf_opt_raw_samples,
            options={"batch_limit": args.acqf_opt_batch_limit,
                     "maxiter": args.acqf_opt_maxiter},
        )

        # optimize acquisition function
        candidates, _ = optimize_acqf(
            acq_function=acq_func,
            bounds=bo_bounds,
            q=1,
            num_restarts=args.acqf_opt_num_restarts,
            raw_samples=args.acqf_opt_raw_samples,  # used for intialization heuristic
            options={"batch_limit": args.acqf_opt_batch_limit,
                     "maxiter": args.acqf_opt_maxiter},
            batch_initial_conditions=batch_initial_conditions
        )

        # proposed evaluation
        new_theta = candidates.detach()

        # observe new noisy function evaluation
        new_G, new_G_sem = composite_simulation(new_theta.squeeze())

        return new_theta, new_G, new_G_sem

    # return functions
    return (
        objective, 
        generate_initial_observations,
        initialize_model,
        optimize_acqf_and_get_observation,
        case_diff,
        unnormalize_theta,
        header,
    )
Ejemplo n.º 21
0
def get_NEI(
    model: Model,
    objective_weights: Tensor,
    outcome_constraints: Optional[Tuple[Tensor, Tensor]] = None,
    X_observed: Optional[Tensor] = None,
    X_pending: Optional[Tensor] = None,
    **kwargs: Any,
) -> AcquisitionFunction:
    r"""Instantiates a qNoisyExpectedImprovement acquisition function.

    Args:
        model: The underlying model which the acqusition function uses
            to estimate acquisition values of candidates.
        objective_weights: The objective is to maximize a weighted sum of
            the columns of f(x). These are the weights.
        outcome_constraints: A tuple of (A, b). For k outcome constraints
            and m outputs at f(x), A is (k x m) and b is (k x 1) such that
            A f(x) <= b. (Not used by single task models)
        X_observed: A tensor containing points observed for all objective
            outcomes and outcomes that appear in the outcome constraints (if
            there are any).
        X_pending: A tensor containing points whose evaluation is pending (i.e.
            that have been submitted for evaluation) present for all objective
            outcomes and outcomes that appear in the outcome constraints (if
            there are any).
        mc_samples: The number of MC samples to use (default: 512).
        qmc: If True, use qMC instead of MC (default: True).
        prune_baseline: If True, prune the baseline points for NEI (default: True).
        chebyshev_scalarization: Use augmented Chebyshev scalarization.

    Returns:
        qNoisyExpectedImprovement: The instantiated acquisition function.
    """
    if X_observed is None:
        raise ValueError("There are no feasible observed points.")
    # construct Objective module
    if kwargs.get("chebyshev_scalarization", False):
        if "Ys" not in kwargs:
            raise ValueError("Chebyshev Scalarization requires Ys argument")
        Y_tensor = torch.cat(kwargs.get("Ys"), dim=-1)
        obj_tf = get_chebyshev_scalarization(weights=objective_weights,
                                             Y=Y_tensor)
    else:
        obj_tf = get_objective_weights_transform(objective_weights)
    if outcome_constraints is None:
        objective = GenericMCObjective(objective=obj_tf)
    else:
        con_tfs = get_outcome_constraint_transforms(outcome_constraints)
        inf_cost = get_infeasible_cost(X=X_observed,
                                       model=model,
                                       objective=obj_tf)
        objective = ConstrainedMCObjective(objective=obj_tf,
                                           constraints=con_tfs or [],
                                           infeasible_cost=inf_cost)
    return get_acquisition_function(
        acquisition_function_name="qNEI",
        model=model,
        objective=objective,
        X_observed=X_observed,
        X_pending=X_pending,
        prune_baseline=kwargs.get("prune_baseline", True),
        mc_samples=kwargs.get("mc_samples", 512),
        qmc=kwargs.get("qmc", True),
        # pyre-fixme[6]: Expected `Optional[int]` for 9th param but got
        #  `Union[float, int]`.
        seed=torch.randint(1, 10000, (1, )).item(),
    )
Ejemplo n.º 22
0
def make_bayes_opt_functions(args): 
    '''
    Generates and returns functions used to run Bayesian optimization
    Argument:
        args:                   Keyword arguments specifying exact settings for optimization

    Returns:
        objective :                         objective maximized for BO
        generate_initial_observations :     function to generate initial observations
        initialize_model :                  function to initialize GP
        optimize_acqf_and_get_observation : function to optimize acquisition function based on model
        case_diff :                         computes case difference between prediction array and ground truth at t=T
        unnormalize_theta :                 converts BO params to simulation params (unit cube to real parameters)
        header :                            header lines to be printed to log file

    '''
    header = []

    # set parameter bounds based on calibration mode (single beta vs multiple beta)
    multi_beta_calibration = args.multi_beta_calibration
    if multi_beta_calibration:
        param_bounds = calibration_model_param_bounds_multi
    else:
        param_bounds = calibration_model_param_bounds_single
        
    # remember line executed
    header.append('=' * 100)
    header.append(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
    header.append('python ' + ' '.join(sys.argv))
    header.append('=' * 100)

    data_country = args.country
    data_area = args.area
    mob_settings = args.mob or calibration_mob_paths[data_country][data_area][0] # 0: downscaled, 1: full scale 

    # initialize mobility object to obtain information (no trace generation yet)
    with open(mob_settings, 'rb') as fp:
        mob_kwargs = pickle.load(fp)
    mob = MobilitySimulator(**mob_kwargs)
    
    # data settings
    verbose = not args.not_verbose
    use_households = not args.no_households
    data_start_date = args.start or calibration_start_dates[data_country][data_area]
    data_end_date = args.end or calibration_lockdown_dates[args.country]['end']
    per_age_group_objective = args.per_age_group_objective

    # simulation settings
    n_init_samples = args.ninit
    n_iterations = args.niters
    simulation_roll_outs = args.rollouts
    cpu_count = args.cpu_count
    lazy_contacts = not args.no_lazy_contacts
    load_observations = args.load

    # set testing parameters
    testing_params = calibration_testing_params

    # BO acquisition function optimization (Knowledge gradient)
    acqf_opt_num_fantasies = args.acqf_opt_num_fantasies
    acqf_opt_num_restarts = args.acqf_opt_num_restarts
    acqf_opt_raw_samples = args.acqf_opt_raw_samples
    acqf_opt_batch_limit = args.acqf_opt_batch_limit
    acqf_opt_maxiter = args.acqf_opt_maxiter

    """
    Bayesian optimization pipeline
    """

    # Import Covid19 data
    # Shape (max_days, num_age_groups)
    unscaled_area_cases = collect_data_from_df(country=data_country, area=data_area, datatype='new',
                                               start_date_string=data_start_date, end_date_string=data_end_date)
    assert(len(unscaled_area_cases.shape) == 2)

    # Scale down cases based on number of people in town and region
    sim_cases = downsample_cases(unscaled_area_cases, mob_kwargs)

    # Generate initial seeds based on unscaled case numbers in town
    initial_seeds = gen_initial_seeds(
        sim_cases, day=0)

    if sum(initial_seeds.values()) == 0:
        print('No states seeded at start time; cannot start simulation.\n'
              'Consider setting a later start date for calibration using the "--start" flag.')
        exit(0)

    num_age_groups = sim_cases.shape[1]
    header.append('Downsampling :                    {}'.format(mob.downsample))
    header.append('Simulation population:            {}'.format(mob.num_people))
    header.append('Simulation population (unscaled): {}'.format(mob.num_people_unscaled))
    header.append('Area population :                 {}'.format(mob.region_population))
    header.append('Initial seed counts :             {}'.format(initial_seeds))

    scaled_test_capacity = get_test_capacity(
        country=data_country, area=data_area, 
        mob_settings=mob_kwargs, end_date_string=data_end_date)

    testing_params['tests_per_batch'] = scaled_test_capacity

    test_lag_days = int(testing_params['test_reporting_lag'] / TO_HOURS)
    assert(int(testing_params['test_reporting_lag']) % 24 == 0)

    # Maximum time fixed by real data, init mobility simulator simulation
    # maximum time to simulate, in hours
    max_time = int(sim_cases.shape[0] * TO_HOURS)
    max_time += TO_HOURS * test_lag_days  # simulate longer due to test lag in simulations
    testing_params['testing_t_window'] = [0.0, max_time]
    mob.simulate(max_time=max_time, lazy_contacts=True)

    header.append(
        'Target cases per age group at t=0:   {} {}'.format(sim_cases[0].sum().item(), list(sim_cases[0].tolist())))
    header.append(
        'Target cases per age group at t=T:   {} {}'.format(sim_cases[-1].sum().item(), list(sim_cases[-1].tolist())))
    header.append(
        'Daily test capacity in sim.:         {}'.format(testing_params['tests_per_batch']))

    # instantiate correct distributions
    distributions = CovidDistributions(country=args.country)

    # set Bayesian optimization target as positive cases
    n_days, n_age = sim_cases.shape
    
    sim_bounds = pdict_to_parr(
        pdict=param_bounds, 
        multi_beta_calibration=multi_beta_calibration
    ).T

    n_params = sim_bounds.shape[1]

    header.append(f'Parameters : {n_params}')
    header.append('Parameter bounds: {}'.format(parr_to_pdict(parr=sim_bounds.T, multi_beta_calibration=multi_beta_calibration)))

    # extract lockdown period
    sim_start_date = pd.to_datetime(data_start_date)
    sim_end_date = sim_start_date + timedelta(days=int(max_time / TO_HOURS))

    lockdown_start_date = pd.to_datetime(
        calibration_lockdown_dates[args.country]['start'])
    lockdown_end_date = pd.to_datetime(
        calibration_lockdown_dates[args.country]['end'])

    days_until_lockdown_start = (lockdown_start_date - sim_start_date).days
    days_until_lockdown_end = (lockdown_end_date - sim_start_date).days

    header.append(f'Simulation starts at : {sim_start_date}')
    header.append(f'             ends at : {sim_end_date}')
    header.append(f'Lockdown   starts at : {lockdown_start_date}')
    header.append(f'             ends at : {lockdown_end_date}')
    header.append(f'Cases compared until : {pd.to_datetime(data_end_date)}')
    header.append(f'            for days : {sim_cases.shape[0]}')
    
    # create settings dictionary for simulations
    launch_kwargs = dict(
        mob_settings=mob_settings,
        distributions=distributions,
        random_repeats=simulation_roll_outs,
        cpu_count=cpu_count,
        initial_seeds=initial_seeds,
        testing_params=testing_params,
        max_time=max_time,
        num_people=mob.num_people,
        num_sites=mob.num_sites,
        home_loc=mob.home_loc,
        site_loc=mob.site_loc,
        lazy_contacts=lazy_contacts,
        verbose=False)


    '''
    Define central functions for optimization
    '''

    G_obs = torch.tensor(sim_cases).reshape(1, n_days * n_age)
    G_obs_aggregate = torch.tensor(sim_cases).sum(dim=-1)

    '''
    Objective function
    Note: in BO and botorch, objectives are maximized
    '''
    if per_age_group_objective:
        def composite_squared_loss(G):
            return - (G - G_obs).pow(2).sum(dim=-1) / n_days

    else:
        def composite_squared_loss(G):
            return - (G - G_obs_aggregate).pow(2).sum(dim=-1) / n_days


    # select objective function
    objective = GenericMCObjective(composite_squared_loss)

    def case_diff(preds):
        '''
        Computes aggregate case difference of predictions and ground truth at t=T
        '''
        if per_age_group_objective:
            return preds[-1].sum(dim=-1) - G_obs_aggregate[-1]
        else:
            return preds[-1] - G_obs_aggregate[-1]

    def unnormalize_theta(theta):
        '''
        Computes unnormalized parameters
        '''
        return transforms.unnormalize(theta, sim_bounds)

    def composite_simulation(norm_params):
        """
        Takes a set of normalized (unit cube) BO parameters
        and returns simulator output means and standard errors based on multiple
        random restarts. This corresponds to the black-box function.
        """

        # un-normalize normalized params to obtain simulation parameters
        params = transforms.unnormalize(norm_params, sim_bounds)

        # finalize model parameters based on given parameters and calibration mode
        kwargs = copy.deepcopy(launch_kwargs)        
        all_params = parr_to_pdict(parr=params, multi_beta_calibration=multi_beta_calibration)

        if multi_beta_calibration:
            betas = all_params['betas']
        else:
            betas = {
                'education': all_params['beta_site'],
                'social': all_params['beta_site'],
                'bus_stop': all_params['beta_site'],
                'office': all_params['beta_site'],
                'supermarket': all_params['beta_site'],
            }

        model_params = {
            'betas' : betas,
            'beta_household' : all_params['beta_household'],
        }

        # set exposure parameters
        kwargs['params'] = model_params

        # set measure parameters
        kwargs['measure_list'] = MeasureList([
            # standard behavior of positively tested: full isolation
            SocialDistancingForPositiveMeasure(
                t_window=Interval(0.0, max_time), p_stay_home=1.0),
            SocialDistancingForPositiveMeasureHousehold(
                t_window=Interval(0.0, max_time), p_isolate=1.0),

            # social distancing factor during lockdown: calibrated
            SocialDistancingForAllMeasure(
                t_window=Interval(TO_HOURS * days_until_lockdown_start,
                                  TO_HOURS * days_until_lockdown_end),
                p_stay_home=all_params['p_stay_home']),

            # site specific measures: fixed in advance, outside of calibration
            BetaMultiplierMeasureByType(
                t_window=Interval(TO_HOURS * days_until_lockdown_start,
                                  TO_HOURS * days_until_lockdown_end),
                beta_multiplier=calibration_lockdown_beta_multipliers)
        ])

        # run simulation in parallel,
        summary = launch_parallel_simulations(**kwargs)

        # (random_repeats, n_people)
        posi_started = torch.tensor(summary.state_started_at['posi'])
        posi_started -= test_lag_days * TO_HOURS # account for test lag in objective computation

        # (random_repeats, n_days)
        age_groups = torch.tensor(summary.people_age)

        # (random_repeats, n_days, n_age_groups)
        posi_cumulative = convert_timings_to_cumulative_daily(
            timings=posi_started, age_groups=age_groups, time_horizon=n_days * TO_HOURS)

        if posi_cumulative.shape[0] <= 1:
            raise ValueError('Must run at least 2 random restarts per setting to get estimate of noise in observation.')
        
        # compute aggregate if not using objective per age-group
        if not per_age_group_objective:
            posi_cumulative = posi_cumulative.sum(dim=-1)

        # compute mean and standard error of means        
        G = torch.mean(posi_cumulative, dim=0)
        G_sem = torch.std(posi_cumulative, dim=0) / math.sqrt(posi_cumulative.shape[0])

        # make sure noise is not zero for non-degenerateness
        G_sem = torch.max(G_sem, MIN_NOISE)

        # flatten
        if per_age_group_objective:
            G = G.reshape(n_days * n_age)
            G_sem = G_sem.reshape(n_days * n_age)

        return G, G_sem

    def generate_initial_observations(n, logger, loaded_init_theta=None, loaded_init_G=None, loaded_init_G_sem=None):
        """
        Takes an integer `n` and generates `n` initial observations
        from the black box function using Sobol random parameter settings
        in the unit cube. Returns parameter setting and black box function outputs.
        If `loaded_init_theta/G/G_sem` are specified, initialization is loaded (possibly partially, in which
        case the initialization using the Sobol random sequence is continued where left off).
        """

        if n <= 0:
            raise ValueError(
                'qKnowledgeGradient and GP needs at least one observation to be defined properly.')

        # sobol sequence proposal points
        # new_thetas: [n, n_params]
        new_thetas = torch.tensor(
            sobol_seq.i4_sobol_generate(n_params, n), dtype=torch.float)

        # check whether initial observations are loaded
        loaded = (loaded_init_theta is not None
              and loaded_init_G is not None 
              and loaded_init_G_sem is not None)
        if loaded:
            n_loaded = loaded_init_theta.shape[0] # loaded no. of observations total
            n_loaded_init = min(n_loaded, n)      # loaded no. of quasi-random initialization observations
            n_init = max(n_loaded, n)             # final no. of observations returned, at least quasi-random initializations

            # check whether loaded proposal points are same as without loading observations
            try:
                assert(np.allclose(loaded_init_theta[:n_loaded_init], new_thetas[:n_loaded_init]))
            except AssertionError:
                print(
                    '\n\n\n===> Warning: parameters of loaded inital observations '
                    'do not coincide with initialization that would have been done. '
                    'Double check simulation, ninit, and parameter bounds, which could change '
                    'the initial random Sobol sequence. \nThe loaded parameter settings are used. \n\n\n'
                )
            
            if n_init > n:
                new_thetas = loaded_init_theta # size of tensor increased to `n_init`, as more than Sobol init points loaded

        else:
            n_loaded = 0       # loaded no. of observations total
            n_loaded_init = 0  # loaded no. of quasi-random initialization observations
            n_init = n         # final no. of observations returned, at least quasi-random initializations

        # instantiate simulator observation tensors
        if per_age_group_objective:
            # new_G, new_G_sem: [n_init, n_days * n_age] (flattened outputs)
            new_G = torch.zeros((n_init, n_days * n_age), dtype=torch.float)
            new_G_sem = torch.zeros((n_init, n_days * n_age), dtype=torch.float)
        else:
            # new_G, new_G_sem: [n_init, n_days]
            new_G = torch.zeros((n_init, n_days), dtype=torch.float)
            new_G_sem = torch.zeros((n_init, n_days), dtype=torch.float)

        # generate `n` initial evaluations at quasi random settings; if applicable, skip and load expensive evaluation result
        for i in range(n_init):
            
            # if loaded, use initial observation for this parameter settings
            if loaded and i <= n_loaded - 1:
                new_thetas[i] = loaded_init_theta[i]
                G, G_sem = loaded_init_G[i], loaded_init_G_sem[i]
                walltime = 0.0

            # if not loaded, evaluate as usual
            else:
                t0 = time.time()
                G, G_sem = composite_simulation(new_thetas[i])
                walltime = time.time() - t0

            new_G[i] = G
            new_G_sem[i] = G_sem

            # log
            G_objectives = objective(new_G[:i+1])
            best_idx = G_objectives.argmax()
            best = G_objectives[best_idx].item()
            current = objective(G).item()

            if per_age_group_objective:
                case_diff = G.reshape(n_days, n_age)[-1].sum() - G_obs_aggregate[-1]
            else:
                case_diff = G[-1] - G_obs_aggregate[-1]
            
            logger.log(
                i=i - n,
                time=walltime,
                best=best,
                objective=current,
                case_diff=case_diff,
                theta=transforms.unnormalize(new_thetas[i, :].detach().squeeze(), sim_bounds)
            )

            # save state
            state = {
                'train_theta': new_thetas[:i+1],
                'train_G': new_G[:i+1],
                'train_G_sem': new_G_sem[:i+1],
                'best_observed_obj': best,
                'best_observed_idx': best_idx,
            }
            save_state(state, logger.filename)

        # compute best objective from simulations
        f = objective(new_G)
        best_f_idx = f.argmax()
        best_f = f[best_f_idx].item()

        return new_thetas, new_G, new_G_sem, best_f, best_f_idx

    def initialize_model(train_x, train_y, train_y_sem):
        """
        Defines a GP given X, Y, and noise observations (standard error of mean)
        """
        
        train_ynoise = train_y_sem.pow(2.0) # noise is in variance units
        
        # standardize outputs to zero mean, unit variance to have good hyperparameter tuning
        outcome_transform = Standardize(m=n_days * n_age if per_age_group_objective else n_days)
        model = FixedNoiseGP(train_x, train_y, train_ynoise, outcome_transform=outcome_transform)

        # "Loss" for GPs - the marginal log likelihood
        mll = ExactMarginalLogLikelihood(model.likelihood, model)

        return mll, model

    # Model initialization
    # parameters used in BO are always in unit cube for optimal hyperparameter tuning of GPs
    bo_bounds = torch.stack([torch.zeros(n_params), torch.ones(n_params)])

    def optimize_acqf_and_get_observation(acq_func, args):
        """
        Optimizes the acquisition function, and returns a new candidate and a noisy observation.
        botorch defaults:  num_restarts=10, raw_samples=256, batch_limit=5, maxiter=200
        """

        batch_initial_conditions = gen_one_shot_kg_initial_conditions(
            acq_function=acq_func,
            bounds=bo_bounds,
            q=1,
            num_restarts=args.acqf_opt_num_restarts,
            raw_samples=args.acqf_opt_raw_samples,
            options={"batch_limit": args.acqf_opt_batch_limit,
                     "maxiter": args.acqf_opt_maxiter},
        )

        # optimize acquisition function
        candidates, _ = optimize_acqf(
            acq_function=acq_func,
            bounds=bo_bounds,
            q=1,
            num_restarts=args.acqf_opt_num_restarts,
            raw_samples=args.acqf_opt_raw_samples,  # used for intialization heuristic
            options={"batch_limit": args.acqf_opt_batch_limit,
                     "maxiter": args.acqf_opt_maxiter},
            batch_initial_conditions=batch_initial_conditions
        )

        # proposed evaluation
        new_theta = candidates.detach().squeeze()

        # observe new noisy function evaluation
        new_G, new_G_sem = composite_simulation(new_theta)

        return new_theta, new_G, new_G_sem

    # return functions
    return (
        objective, 
        generate_initial_observations,
        initialize_model,
        optimize_acqf_and_get_observation,
        case_diff,
        unnormalize_theta,
        header,
    )
Ejemplo n.º 23
0
    def test_cache_root(self):
        sample_cached_path = (
            "botorch.acquisition.cached_cholesky.sample_cached_cholesky")
        raw_state_dict = {
            "likelihood.noise_covar.raw_noise":
            torch.tensor([[0.0895], [0.2594]], dtype=torch.float64),
            "mean_module.constant":
            torch.tensor([[-0.4545], [-0.1285]], dtype=torch.float64),
            "covar_module.raw_outputscale":
            torch.tensor([1.4876, 1.4897], dtype=torch.float64),
            "covar_module.base_kernel.raw_lengthscale":
            torch.tensor([[[-0.7202, -0.2868]], [[-0.8794, -1.2877]]],
                         dtype=torch.float64),
        }
        # test batched models (e.g. for MCMC)
        for train_batch_shape, m, dtype in product(
            (torch.Size([]), torch.Size([3])), (1, 2),
            (torch.float, torch.double)):
            state_dict = deepcopy(raw_state_dict)
            for k, v in state_dict.items():
                if m == 1:
                    v = v[0]
                if len(train_batch_shape) > 0:
                    v = v.unsqueeze(0).expand(*train_batch_shape, *v.shape)
                state_dict[k] = v
            tkwargs = {"device": self.device, "dtype": dtype}
            if m == 2:
                objective = GenericMCObjective(lambda Y, X: Y.sum(dim=-1))
            else:
                objective = None
            for k, v in state_dict.items():
                state_dict[k] = v.to(**tkwargs)
            all_close_kwargs = ({
                "atol": 1e-1,
                "rtol": 0.0,
            } if dtype == torch.float else {
                "atol": 1e-4,
                "rtol": 0.0
            })
            torch.manual_seed(1234)
            train_X = torch.rand(*train_batch_shape, 3, 2, **tkwargs)
            train_Y = (
                torch.sin(train_X * 2 * pi) +
                torch.randn(*train_batch_shape, 3, 2, **tkwargs))[..., :m]
            train_Y = standardize(train_Y)
            model = SingleTaskGP(
                train_X,
                train_Y,
            )
            if len(train_batch_shape) > 0:
                X_baseline = train_X[0]
            else:
                X_baseline = train_X
            model.load_state_dict(state_dict, strict=False)
            # test sampler with collapse_batch_dims=False
            sampler = IIDNormalSampler(5, seed=0, collapse_batch_dims=False)
            with self.assertRaises(UnsupportedError):
                qNoisyExpectedImprovement(
                    model=model,
                    X_baseline=X_baseline,
                    sampler=sampler,
                    objective=objective,
                    prune_baseline=False,
                    cache_root=True,
                )
            sampler = IIDNormalSampler(5, seed=0)
            torch.manual_seed(0)
            acqf = qNoisyExpectedImprovement(
                model=model,
                X_baseline=X_baseline,
                sampler=sampler,
                objective=objective,
                prune_baseline=False,
                cache_root=True,
            )

            orig_base_samples = acqf.base_sampler.base_samples.detach().clone()
            sampler2 = IIDNormalSampler(5, seed=0)
            sampler2.base_samples = orig_base_samples
            torch.manual_seed(0)
            acqf_no_cache = qNoisyExpectedImprovement(
                model=model,
                X_baseline=X_baseline,
                sampler=sampler2,
                objective=objective,
                prune_baseline=False,
                cache_root=False,
            )
            for q, batch_shape in product(
                (1, 3), (torch.Size([]), torch.Size([3]), torch.Size([4, 3]))):
                test_X = (0.3 +
                          0.05 * torch.randn(*batch_shape, q, 2, **tkwargs)
                          ).requires_grad_(True)
                with mock.patch(
                        sample_cached_path,
                        wraps=sample_cached_cholesky) as mock_sample_cached:
                    torch.manual_seed(0)
                    val = acqf(test_X)
                    mock_sample_cached.assert_called_once()
                val.sum().backward()
                base_samples = acqf.sampler.base_samples.detach().clone()
                X_grad = test_X.grad.clone()
                test_X2 = test_X.detach().clone().requires_grad_(True)
                acqf_no_cache.sampler.base_samples = base_samples
                with mock.patch(
                        sample_cached_path,
                        wraps=sample_cached_cholesky) as mock_sample_cached:
                    torch.manual_seed(0)
                    val2 = acqf_no_cache(test_X2)
                mock_sample_cached.assert_not_called()
                self.assertTrue(torch.allclose(val, val2, **all_close_kwargs))
                val2.sum().backward()
                self.assertTrue(
                    torch.allclose(X_grad, test_X2.grad, **all_close_kwargs))
            # test we fall back to standard sampling for
            # ill-conditioned covariances
            acqf._baseline_L = torch.zeros_like(acqf._baseline_L)
            with warnings.catch_warnings(
                    record=True) as ws, settings.debug(True):
                with torch.no_grad():
                    acqf(test_X)
            self.assertEqual(len(ws), 1)
            self.assertTrue(issubclass(ws[-1].category, BotorchWarning))
Ejemplo n.º 24
0
 def test_evaluate_q_knowledge_gradient(self):
     for dtype in (torch.float, torch.double):
         # basic test
         n_f = 4
         mean = torch.rand(n_f, 1, device=self.device, dtype=dtype)
         variance = torch.rand(n_f, 1, device=self.device, dtype=dtype)
         mfm = MockModel(MockPosterior(mean=mean, variance=variance))
         with mock.patch.object(MockModel, "fantasize",
                                return_value=mfm) as patch_f:
             mm = MockModel(None)
             qKG = qKnowledgeGradient(model=mm, num_fantasies=n_f)
             X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype)
             val = qKG(X)
             patch_f.assert_called_once()
             cargs, ckwargs = patch_f.call_args
             self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1]))
         self.assertTrue(torch.allclose(val, mean.mean(), atol=1e-4))
         self.assertTrue(
             torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :]))
         # batched evaluation
         b = 2
         mean = torch.rand(n_f, b, 1, device=self.device, dtype=dtype)
         variance = torch.rand(n_f, b, 1, device=self.device, dtype=dtype)
         mfm = MockModel(MockPosterior(mean=mean, variance=variance))
         X = torch.rand(b, n_f + 1, 1, device=self.device, dtype=dtype)
         with mock.patch.object(MockModel, "fantasize",
                                return_value=mfm) as patch_f:
             mm = MockModel(None)
             qKG = qKnowledgeGradient(model=mm, num_fantasies=n_f)
             val = qKG(X)
             patch_f.assert_called_once()
             cargs, ckwargs = patch_f.call_args
             self.assertEqual(ckwargs["X"].shape, torch.Size([b, 1, 1]))
         self.assertTrue(
             torch.allclose(val, mean.mean(dim=0).squeeze(-1), atol=1e-4))
         self.assertTrue(
             torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :]))
         # pending points and current value
         mean = torch.rand(n_f, 1, device=self.device, dtype=dtype)
         variance = torch.rand(n_f, 1, device=self.device, dtype=dtype)
         X_pending = torch.rand(2, 1, device=self.device, dtype=dtype)
         mfm = MockModel(MockPosterior(mean=mean, variance=variance))
         current_value = torch.rand(1, device=self.device, dtype=dtype)
         X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype)
         with mock.patch.object(MockModel, "fantasize",
                                return_value=mfm) as patch_f:
             mm = MockModel(None)
             qKG = qKnowledgeGradient(
                 model=mm,
                 num_fantasies=n_f,
                 X_pending=X_pending,
                 current_value=current_value,
             )
             val = qKG(X)
             patch_f.assert_called_once()
             cargs, ckwargs = patch_f.call_args
             self.assertEqual(ckwargs["X"].shape, torch.Size([3, 1]))
         self.assertTrue(
             torch.allclose(val, mean.mean() - current_value, atol=1e-4))
         self.assertTrue(
             torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :]))
         # test objective (inner MC sampling)
         objective = GenericMCObjective(objective=lambda Y: Y.norm(dim=-1))
         samples = torch.randn(3, 1, 1, device=self.device, dtype=dtype)
         mfm = MockModel(MockPosterior(samples=samples))
         X = torch.rand(n_f + 1, 1, device=self.device, dtype=dtype)
         with mock.patch.object(MockModel, "fantasize",
                                return_value=mfm) as patch_f:
             mm = MockModel(None)
             qKG = qKnowledgeGradient(model=mm,
                                      num_fantasies=n_f,
                                      objective=objective)
             val = qKG(X)
             patch_f.assert_called_once()
             cargs, ckwargs = patch_f.call_args
             self.assertEqual(ckwargs["X"].shape, torch.Size([1, 1]))
         self.assertTrue(
             torch.allclose(val, objective(samples).mean(), atol=1e-4))
         self.assertTrue(
             torch.equal(qKG.extract_candidates(X), X[..., :-n_f, :]))