Ejemplo n.º 1
0
 def _setUp(self, double=False, cuda=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     dtype = torch.double if double else torch.float
     train_x = torch.linspace(0, 1, 10, device=device, dtype=dtype).unsqueeze(-1)
     train_y = torch.sin(train_x * (2 * math.pi)).squeeze(-1)
     train_yvar = torch.tensor(0.1 ** 2, device=device)
     noise = torch.tensor(NOISE, device=device, dtype=dtype)
     self.train_x = train_x
     self.train_y = train_y + noise
     self.train_yvar = train_yvar
     self.bounds = torch.tensor([[0.0], [1.0]], device=device, dtype=dtype)
     model_st = SingleTaskGP(self.train_x, self.train_y)
     self.model_st = model_st.to(device=device, dtype=dtype)
     self.mll_st = ExactMarginalLogLikelihood(
         self.model_st.likelihood, self.model_st
     )
     self.mll_st = fit_gpytorch_model(self.mll_st, options={"maxiter": 5})
     model_fn = FixedNoiseGP(
         self.train_x, self.train_y, self.train_yvar.expand_as(self.train_y)
     )
     self.model_fn = model_fn.to(device=device, dtype=dtype)
     self.mll_fn = ExactMarginalLogLikelihood(
         self.model_fn.likelihood, self.model_fn
     )
     self.mll_fn = fit_gpytorch_model(self.mll_fn, options={"maxiter": 5})
Ejemplo n.º 2
0
 def _setUp(self, double=False, cuda=False, expand=False):
     device = torch.device("cuda") if cuda else torch.device("cpu")
     dtype = torch.double if double else torch.float
     train_x = torch.linspace(0, 1, 10, device=device, dtype=dtype).unsqueeze(-1)
     train_y = torch.sin(train_x * (2 * math.pi)).squeeze(-1)
     noise = torch.tensor(NOISE, device=device, dtype=dtype)
     self.train_x = train_x
     self.train_y = train_y + noise
     if expand:
         self.train_x = self.train_x.expand(-1, 2)
         ics = torch.tensor([[0.5, 1.0]], device=device, dtype=dtype)
     else:
         ics = torch.tensor([[0.5]], device=device, dtype=dtype)
     self.initial_conditions = ics
     self.f_best = self.train_y.max().item()
     model = SingleTaskGP(self.train_x, self.train_y)
     self.model = model.to(device=device, dtype=dtype)
     self.mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model)
     self.mll = fit_gpytorch_model(self.mll, options={"maxiter": 1})
Ejemplo n.º 3
0
 def test_noisy_expected_improvement(self, cuda=False):
     for dtype in (torch.float, torch.double):
         model = self._get_model(cuda=cuda, dtype=dtype)
         X_observed = model.train_inputs[0]
         nEI = NoisyExpectedImprovement(model, X_observed, num_fantasies=5)
         X_test = torch.tensor(
             [[[0.25]], [[0.75]]],
             device=X_observed.device,
             dtype=dtype,
             requires_grad=True,
         )
         val = nEI(X_test)
         # test basics
         self.assertEqual(val.dtype, dtype)
         self.assertEqual(val.device.type, X_observed.device.type)
         self.assertEqual(val.shape, torch.Size([2]))
         # test values
         self.assertGreater(val[0].item(), 1e-4)
         self.assertLess(val[1].item(), 1e-6)
         # test gradient
         val.sum().backward()
         self.assertGreater(X_test.grad[0].abs().item(), 1e-4)
         # test without gradient
         with torch.no_grad():
             nEI(X_test)
         # test non-FixedNoiseGP model
         other_model = SingleTaskGP(X_observed,
                                    model.train_targets.unsqueeze(-1))
         with self.assertRaises(UnsupportedError):
             NoisyExpectedImprovement(other_model,
                                      X_observed,
                                      num_fantasies=5)
         # Test with minimize
         nEI = NoisyExpectedImprovement(model,
                                        X_observed,
                                        num_fantasies=5,
                                        maximize=False)
Ejemplo n.º 4
0
    def _sample(self, candidates: Optional[np.array] = None) -> np.array:
        if len(self.X_observed) < self.num_initial_random_draws:
            return self.initial_sampler.sample(candidates=candidates)
        else:
            z_observed = torch.Tensor(
                self.transform_outputs(self.y_observed.numpy()))

            # build and fit GP
            gp = SingleTaskGP(
                train_X=self.X_observed,
                train_Y=z_observed,
                # special likelihood for numerical Cholesky errors, following advice from
                # https://www.gitmemory.com/issue/pytorch/botorch/179/506276521
                likelihood=GaussianLikelihood(
                    noise_constraint=GreaterThan(1e-3)),
            )
            mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
            fit_gpytorch_model(mll)

            acq = self.expected_improvement(
                model=gp,
                best_f=z_observed.min(dim=0).values,
            )

            if candidates is None:
                candidate, acq_value = optimize_acqf(
                    acq,
                    bounds=self.bounds_tensor,
                    q=1,
                    num_restarts=5,
                    raw_samples=100,
                )
                return candidate[0]
            else:
                # (N,)
                ei = acq(torch.Tensor(candidates).unsqueeze(dim=-2))
                return torch.Tensor(candidates[ei.argmax()])
Ejemplo n.º 5
0
 def test_roundtrip(self):
     for dtype in (torch.float, torch.double):
         train_X = torch.rand(10, 2, device=self.device, dtype=dtype)
         train_Y1 = train_X.sum(dim=-1)
         train_Y2 = train_X[:, 0] - train_X[:, 1]
         train_Y = torch.stack([train_Y1, train_Y2], dim=-1)
         # SingleTaskGP
         batch_gp = SingleTaskGP(train_X, train_Y)
         list_gp = batched_to_model_list(batch_gp)
         batch_gp_recov = model_list_to_batched(list_gp)
         sd_orig = batch_gp.state_dict()
         sd_recov = batch_gp_recov.state_dict()
         self.assertTrue(set(sd_orig) == set(sd_recov))
         self.assertTrue(
             all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig))
         # FixedNoiseGP
         batch_gp = FixedNoiseGP(train_X, train_Y, torch.rand_like(train_Y))
         list_gp = batched_to_model_list(batch_gp)
         batch_gp_recov = model_list_to_batched(list_gp)
         sd_orig = batch_gp.state_dict()
         sd_recov = batch_gp_recov.state_dict()
         self.assertTrue(set(sd_orig) == set(sd_recov))
         self.assertTrue(
             all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig))
         # SingleTaskMultiFidelityGP
         for lin_trunc in (False, True):
             batch_gp = SingleTaskMultiFidelityGP(
                 train_X,
                 train_Y,
                 iteration_fidelity=1,
                 linear_truncated=lin_trunc)
             list_gp = batched_to_model_list(batch_gp)
             batch_gp_recov = model_list_to_batched(list_gp)
             sd_orig = batch_gp.state_dict()
             sd_recov = batch_gp_recov.state_dict()
             self.assertTrue(set(sd_orig) == set(sd_recov))
             self.assertTrue(
                 all(torch.equal(sd_orig[k], sd_recov[k]) for k in sd_orig))
Ejemplo n.º 6
0
 def test_cache_root_decomposition(self):
     tkwargs = {"device": self.device}
     for dtype in (torch.float, torch.double):
         tkwargs["dtype"] = dtype
         # test mt-mvn
         train_x = torch.rand(2, 1, **tkwargs)
         train_y = torch.rand(2, 2, **tkwargs)
         test_x = torch.rand(2, 1, **tkwargs)
         model = SingleTaskGP(train_x, train_y)
         sampler = IIDNormalSampler(1)
         with torch.no_grad():
             posterior = model.posterior(test_x)
         acqf = DummyCachedCholeskyAcqf(
             model=model,
             sampler=sampler,
             objective=GenericMCObjective(lambda Y: Y[..., 0]),
         )
         baseline_L = torch.eye(2, **tkwargs)
         with mock.patch(
                 EXTRACT_BATCH_COVAR_PATH,
                 wraps=extract_batch_covar) as mock_extract_batch_covar:
             with mock.patch(CHOLESKY_PATH,
                             return_value=baseline_L) as mock_cholesky:
                 acqf._cache_root_decomposition(posterior=posterior)
                 mock_extract_batch_covar.assert_called_once_with(
                     posterior.mvn)
                 mock_cholesky.assert_called_once()
         # test mvn
         model = SingleTaskGP(train_x, train_y[:, :1])
         with torch.no_grad():
             posterior = model.posterior(test_x)
         with mock.patch(
                 EXTRACT_BATCH_COVAR_PATH) as mock_extract_batch_covar:
             with mock.patch(CHOLESKY_PATH,
                             return_value=baseline_L) as mock_cholesky:
                 acqf._cache_root_decomposition(posterior=posterior)
                 mock_extract_batch_covar.assert_not_called()
                 mock_cholesky.assert_called_once()
         self.assertTrue(torch.equal(acqf._baseline_L, baseline_L))
Ejemplo n.º 7
0
def qparego_candidates_func(
    train_x: "torch.Tensor",
    train_obj: "torch.Tensor",
    train_con: Optional["torch.Tensor"],
    bounds: "torch.Tensor",
) -> "torch.Tensor":
    """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization.

    The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler`
    with multi-objective optimization when the number of objectives is larger than three.

    .. seealso::
        :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value
        descriptions.
    """

    n_objectives = train_obj.size(-1)

    weights = sample_simplex(n_objectives).squeeze()
    scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj)

    if train_con is not None:
        train_y = torch.cat([train_obj, train_con], dim=-1)

        constraints = []
        n_constraints = train_con.size(1)

        for i in range(n_constraints):
            constraints.append(lambda Z, i=i: Z[..., -n_constraints + i])

        objective = ConstrainedMCObjective(
            objective=lambda Z: scalarization(Z[..., :n_objectives]),
            constraints=constraints,
        )
    else:
        train_y = train_obj

        objective = GenericMCObjective(scalarization)

    train_x = normalize(train_x, bounds=bounds)

    model = SingleTaskGP(train_x,
                         train_y,
                         outcome_transform=Standardize(m=train_y.size(-1)))
    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_model(mll)

    acqf = qExpectedImprovement(
        model=model,
        best_f=objective(train_y).max(),
        sampler=SobolQMCNormalSampler(num_samples=256),
        objective=objective,
    )

    standard_bounds = torch.zeros_like(bounds)
    standard_bounds[1] = 1

    candidates, _ = optimize_acqf(
        acq_function=acqf,
        bounds=standard_bounds,
        q=1,
        num_restarts=20,
        raw_samples=1024,
        options={
            "batch_limit": 5,
            "maxiter": 200
        },
        sequential=True,
    )

    candidates = unnormalize(candidates.detach(), bounds=bounds)

    return candidates
Ejemplo n.º 8
0
    def test_model_list_to_batched(self):
        for dtype in (torch.float, torch.double):
            # basic test
            train_X = torch.rand(10, 2, device=self.device, dtype=dtype)
            train_Y1 = train_X.sum(dim=-1, keepdim=True)
            train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1)
            gp1 = SingleTaskGP(train_X, train_Y1)
            gp2 = SingleTaskGP(train_X, train_Y2)
            list_gp = ModelListGP(gp1, gp2)
            batch_gp = model_list_to_batched(list_gp)
            self.assertIsInstance(batch_gp, SingleTaskGP)
            # test degenerate (single model)
            batch_gp = model_list_to_batched(ModelListGP(gp1))
            self.assertEqual(batch_gp._num_outputs, 1)
            # test different model classes
            gp2 = FixedNoiseGP(train_X, train_Y1, torch.ones_like(train_Y1))
            with self.assertRaises(UnsupportedError):
                model_list_to_batched(ModelListGP(gp1, gp2))
            # test non-batched models
            gp1_ = SimpleGPyTorchModel(train_X, train_Y1)
            gp2_ = SimpleGPyTorchModel(train_X, train_Y2)
            with self.assertRaises(UnsupportedError):
                model_list_to_batched(ModelListGP(gp1_, gp2_))
            # test list of multi-output models
            train_Y = torch.cat([train_Y1, train_Y2], dim=-1)
            gp2 = SingleTaskGP(train_X, train_Y)
            with self.assertRaises(UnsupportedError):
                model_list_to_batched(ModelListGP(gp1, gp2))
            # test different training inputs
            gp2 = SingleTaskGP(2 * train_X, train_Y2)
            with self.assertRaises(UnsupportedError):
                model_list_to_batched(ModelListGP(gp1, gp2))
            # check scalar agreement
            gp2 = SingleTaskGP(train_X, train_Y2)
            gp2.likelihood.noise_covar.noise_prior.rate.fill_(1.0)
            with self.assertRaises(UnsupportedError):
                model_list_to_batched(ModelListGP(gp1, gp2))
            # check tensor shape agreement
            gp2 = SingleTaskGP(train_X, train_Y2)
            gp2.covar_module.raw_outputscale = torch.nn.Parameter(
                torch.tensor([0.0], device=self.device, dtype=dtype))
            with self.assertRaises(UnsupportedError):
                model_list_to_batched(ModelListGP(gp1, gp2))
            # test HeteroskedasticSingleTaskGP
            gp2 = HeteroskedasticSingleTaskGP(train_X, train_Y1,
                                              torch.ones_like(train_Y1))
            with self.assertRaises(NotImplementedError):
                model_list_to_batched(ModelListGP(gp2))
            # test custom likelihood
            gp2 = SingleTaskGP(train_X,
                               train_Y2,
                               likelihood=GaussianLikelihood())
            with self.assertRaises(NotImplementedError):
                model_list_to_batched(ModelListGP(gp2))
            # test FixedNoiseGP
            train_X = torch.rand(10, 2, device=self.device, dtype=dtype)
            train_Y1 = train_X.sum(dim=-1, keepdim=True)
            train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1)
            gp1_ = FixedNoiseGP(train_X, train_Y1, torch.rand_like(train_Y1))
            gp2_ = FixedNoiseGP(train_X, train_Y2, torch.rand_like(train_Y2))
            list_gp = ModelListGP(gp1_, gp2_)
            batch_gp = model_list_to_batched(list_gp)
            # test SingleTaskMultiFidelityGP
            gp1_ = SingleTaskMultiFidelityGP(train_X,
                                             train_Y1,
                                             iteration_fidelity=1)
            gp2_ = SingleTaskMultiFidelityGP(train_X,
                                             train_Y2,
                                             iteration_fidelity=1)
            list_gp = ModelListGP(gp1_, gp2_)
            batch_gp = model_list_to_batched(list_gp)
            gp2_ = SingleTaskMultiFidelityGP(train_X,
                                             train_Y2,
                                             iteration_fidelity=2)
            list_gp = ModelListGP(gp1_, gp2_)
            with self.assertRaises(UnsupportedError):
                model_list_to_batched(list_gp)
            # test input transform
            input_tf = Normalize(
                d=2,
                bounds=torch.tensor([[0.0, 0.0], [1.0, 1.0]],
                                    device=self.device,
                                    dtype=dtype),
            )
            gp1_ = SingleTaskGP(train_X, train_Y1, input_transform=input_tf)
            gp2_ = SingleTaskGP(train_X, train_Y2, input_transform=input_tf)
            list_gp = ModelListGP(gp1_, gp2_)
            batch_gp = model_list_to_batched(list_gp)
            self.assertIsInstance(batch_gp.input_transform, Normalize)
            self.assertTrue(
                torch.equal(batch_gp.input_transform.bounds, input_tf.bounds))
            # test different input transforms
            input_tf2 = Normalize(
                d=2,
                bounds=torch.tensor([[-1.0, -1.0], [1.0, 1.0]],
                                    device=self.device,
                                    dtype=dtype),
            )
            gp1_ = SingleTaskGP(train_X, train_Y1, input_transform=input_tf)
            gp2_ = SingleTaskGP(train_X, train_Y2, input_transform=input_tf2)
            list_gp = ModelListGP(gp1_, gp2_)
            with self.assertRaises(UnsupportedError):
                model_list_to_batched(list_gp)

            # test batched input transform
            input_tf2 = Normalize(
                d=2,
                bounds=torch.tensor([[-1.0, -1.0], [1.0, 1.0]],
                                    device=self.device,
                                    dtype=dtype),
                batch_shape=torch.Size([3]),
            )
            gp1_ = SingleTaskGP(train_X, train_Y1, input_transform=input_tf2)
            gp2_ = SingleTaskGP(train_X, train_Y2, input_transform=input_tf2)
            list_gp = ModelListGP(gp1_, gp2_)
            with self.assertRaises(UnsupportedError):
                model_list_to_batched(list_gp)

            # test outcome transform
            octf = Standardize(m=1)
            gp1_ = SingleTaskGP(train_X, train_Y1, outcome_transform=octf)
            gp2_ = SingleTaskGP(train_X, train_Y2, outcome_transform=octf)
            list_gp = ModelListGP(gp1_, gp2_)
            with self.assertRaises(UnsupportedError):
                model_list_to_batched(list_gp)
Ejemplo n.º 9
0
    def test_proximal(self):
        for dtype in (torch.float, torch.double):
            train_X = torch.rand(5, 3, device=self.device, dtype=dtype)
            train_Y = train_X.norm(dim=-1, keepdim=True)
            model = (SingleTaskGP(train_X, train_Y).to(device=self.device,
                                                       dtype=dtype).eval())
            EI = ExpectedImprovement(model, best_f=0.0)

            # test single point
            proximal_weights = torch.ones(3, device=self.device, dtype=dtype)
            test_X = torch.rand(1, 3, device=self.device, dtype=dtype)
            EI_prox = ProximalAcquisitionFunction(
                EI, proximal_weights=proximal_weights)

            ei = EI(test_X)
            mv_normal = MultivariateNormal(train_X[-1],
                                           torch.diag(proximal_weights))
            test_prox_weight = torch.exp(
                mv_normal.log_prob(test_X)) / torch.exp(
                    mv_normal.log_prob(train_X[-1]))

            ei_prox = EI_prox(test_X)
            self.assertTrue(torch.allclose(ei_prox, ei * test_prox_weight))
            self.assertTrue(ei_prox.shape == torch.Size([1]))

            # test t-batch with broadcasting
            test_X = torch.rand(4, 1, 3, device=self.device, dtype=dtype)
            ei = EI(test_X)
            mv_normal = MultivariateNormal(train_X[-1],
                                           torch.diag(proximal_weights))
            test_prox_weight = torch.exp(
                mv_normal.log_prob(test_X)) / torch.exp(
                    mv_normal.log_prob(train_X[-1]))

            ei_prox = EI_prox(test_X)
            self.assertTrue(
                torch.allclose(ei_prox, ei * test_prox_weight.flatten()))
            self.assertTrue(ei_prox.shape == torch.Size([4]))

            # test MC acquisition function
            qEI = qExpectedImprovement(model, best_f=0.0)
            test_X = torch.rand(4, 1, 3, device=self.device, dtype=dtype)
            qEI_prox = ProximalAcquisitionFunction(
                qEI, proximal_weights=proximal_weights)

            qei = qEI(test_X)
            mv_normal = MultivariateNormal(train_X[-1],
                                           torch.diag(proximal_weights))
            test_prox_weight = torch.exp(
                mv_normal.log_prob(test_X)) / torch.exp(
                    mv_normal.log_prob(train_X[-1]))

            qei_prox = qEI_prox(test_X)
            self.assertTrue(
                torch.allclose(qei_prox, qei * test_prox_weight.flatten()))
            self.assertTrue(qei_prox.shape == torch.Size([4]))

            # test gradient
            test_X = torch.rand(1,
                                3,
                                device=self.device,
                                dtype=dtype,
                                requires_grad=True)
            ei_prox = EI_prox(test_X)
            ei_prox.backward()

            # test model without train_inputs
            bad_model = DummyModel()
            with self.assertRaises(UnsupportedError):
                ProximalAcquisitionFunction(
                    ExpectedImprovement(bad_model, 0.0), proximal_weights)

            # test proximal weights that do not match training_inputs
            train_X = torch.rand(5, 1, 3, device=self.device, dtype=dtype)
            train_Y = train_X.norm(dim=-1, keepdim=True)
            model = SingleTaskGP(train_X,
                                 train_Y).to(device=self.device).eval()
            with self.assertRaises(ValueError):
                ProximalAcquisitionFunction(ExpectedImprovement(model, 0.0),
                                            proximal_weights[:1])

            with self.assertRaises(ValueError):
                ProximalAcquisitionFunction(
                    ExpectedImprovement(model, 0.0),
                    torch.rand(3, 3, device=self.device, dtype=dtype),
                )

            # test for x_pending points
            pending_acq = DummyAcquisitionFunction(model)
            pending_acq.set_X_pending(
                torch.rand(3, 3, device=self.device, dtype=dtype))
            with self.assertRaises(UnsupportedError):
                ProximalAcquisitionFunction(pending_acq, proximal_weights)

            # test model with multi-batch training inputs
            train_X = torch.rand(5, 2, 3, device=self.device, dtype=dtype)
            train_Y = train_X.norm(dim=-1, keepdim=True)
            bad_single_task = (SingleTaskGP(
                train_X, train_Y).to(device=self.device).eval())
            with self.assertRaises(UnsupportedError):
                ProximalAcquisitionFunction(
                    ExpectedImprovement(bad_single_task, 0.0),
                    proximal_weights)
Ejemplo n.º 10
0
    def step(self, snapshot_mode: str, meta_info: dict = None):
        if not self.initialized:
            # Start initialization phase
            self.train_init_policies()
            self.eval_init_policies()
            self.initialized = True

        # Normalize the input data and standardize the output data
        cands_norm = self.uc_normalizer.project_to(self.cands)
        cands_values_stdized = standardize(self.cands_values).unsqueeze(1)

        # Create and fit the GP model
        gp = SingleTaskGP(cands_norm, cands_values_stdized)
        gp.likelihood.noise_covar.register_constraint('raw_noise',
                                                      GreaterThan(1e-5))
        mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
        fit_gpytorch_model(mll)
        print_cbt('Fitted the GP.', 'g')

        # Acquisition functions
        if self.acq_fcn_type == 'UCB':
            acq_fcn = UpperConfidenceBound(gp,
                                           beta=self.acq_param.get(
                                               'beta', 0.1),
                                           maximize=True)
        elif self.acq_fcn_type == 'EI':
            acq_fcn = ExpectedImprovement(
                gp, best_f=cands_values_stdized.max().item(), maximize=True)
        elif self.acq_fcn_type == 'PI':
            acq_fcn = ProbabilityOfImprovement(
                gp, best_f=cands_values_stdized.max().item(), maximize=True)
        else:
            raise pyrado.ValueErr(given=self.acq_fcn_type,
                                  eq_constraint="'UCB', 'EI', 'PI'")

        # Optimize acquisition function and get new candidate point
        cand, acq_value = optimize_acqf(
            acq_function=acq_fcn,
            bounds=to.stack([to.zeros(self.cand_dim),
                             to.ones(self.cand_dim)]),
            q=1,
            num_restarts=self.acq_restarts,
            raw_samples=self.acq_samples)
        next_cand = self.uc_normalizer.project_back(cand)
        print_cbt(f'Found the next candidate: {next_cand.numpy()}', 'g')
        self.cands = to.cat([self.cands, next_cand], dim=0)
        to.save(self.cands, osp.join(self._save_dir, 'candidates.pt'))

        # Train and valuate the new candidate (saves to iter_{self._curr_iter}_policy.pt)
        prefix = f'iter_{self._curr_iter}'
        wrapped_trn_fcn = until_thold_exceeded(
            self.thold_succ_subroutine.item(),
            max_iter=self.max_subroutine_rep)(self.train_policy_sim)
        wrapped_trn_fcn(cand, prefix)

        # Evaluate the current policy on the target domain
        policy = to.load(osp.join(self._save_dir, f'{prefix}_policy.pt'))
        self.curr_cand_value = self.eval_policy(self._save_dir, self._env_real,
                                                policy,
                                                self.montecarlo_estimator,
                                                prefix,
                                                self.num_eval_rollouts_real)

        self.cands_values = to.cat(
            [self.cands_values,
             self.curr_cand_value.view(1)], dim=0)
        to.save(self.cands_values,
                osp.join(self._save_dir, 'candidates_values.pt'))

        # Store the argmax after training and evaluating
        curr_argmax_cand = BayRn.argmax_posterior_mean(
            self.cands, self.cands_values.unsqueeze(1), self.uc_normalizer,
            self.acq_restarts, self.acq_samples)
        self.argmax_cand = to.cat([self.argmax_cand, curr_argmax_cand], dim=0)
        to.save(self.argmax_cand,
                osp.join(self._save_dir, 'candidates_argmax.pt'))

        self.make_snapshot(snapshot_mode, float(to.mean(self.cands_values)),
                           meta_info)
Ejemplo n.º 11
0
def gp_fit_test(x_train: Tensor,
                y_train: Tensor,
                error_train: Tensor,
                x_test: Tensor,
                y_test: Tensor,
                error_test: Tensor,
                gp_obj_model: SingleTaskGP,
                gp_error_model: SingleTaskGP,
                tkwargs: Dict[str, Any],
                gp_test_folder: str,
                obj_out_wp: bool = False,
                err_out_wp: bool = False) -> None:
    """
    1) Estimates mean test error between predicted and the true objective function values.
    2) Estimates mean test error between predicted recon. error by the gp_model and the true recon. error of the vae_model.
    :param x_train: normalised points at which the gps were trained
    :param y_train: objective value function corresponding to x_train that were used as targets of `gp_obj_model`
    :param error_train: reconstruction error value at points x_train that were used as targets of `gp_error_model`
    :param x_test: normalised test points
    :param y_test: objective value function corresponding to x_test
    :param error_test: reconstruction error at test points
    :param gp_obj_model: the gp model trained to predict the black box objective function values
    :param gp_error_model: the gp model trained to predict reconstruction error
    :param tkwargs: dict of type and device
    :param gp_test_folder: folder to save test results
    :param obj_out_wp: if the `gp_obj_model` was trained with output warping then need to apply the same transform
    :param err_out_wp: if the `gp_error_model` was trained with output warping then need to apply the same transform
    :return: (Sum_i||true_y_i - pred_y_i||^2 / n_points, Sum_i||true_recon_i - pred_recon_i||^2 / n_points)
    """
    do_robust = True if gp_error_model is not None else False
    if not os.path.exists(gp_test_folder):
        os.mkdir(gp_test_folder)

    gp_obj_model.eval()
    gp_obj_model.to(tkwargs['device'])
    y_train = y_train.view(-1)
    if do_robust:
        gp_error_model.eval()
        gp_error_model.to(tkwargs['device'])
        error_train = error_train.view(-1)

    with torch.no_grad():
        if obj_out_wp:
            Y_numpy = y_train.cpu().numpy()
            if Y_numpy.min() <= 0:
                y_train = torch.FloatTensor(
                    power_transform(Y_numpy / Y_numpy.std(),
                                    method='yeo-johnson'))
            else:
                y_train = torch.FloatTensor(
                    power_transform(Y_numpy / Y_numpy.std(), method='box-cox'))
                if y_train.std() < 0.5:
                    Y_numpy = y_train.numpy()
                    y_train = torch.FloatTensor(
                        power_transform(Y_numpy / Y_numpy.std(),
                                        method='yeo-johnson')).to(x_train)

            Y_numpy = y_test.cpu().numpy()
            if Y_numpy.min() <= 0:
                y_test = torch.FloatTensor(
                    power_transform(Y_numpy / Y_numpy.std(),
                                    method='yeo-johnson'))
            else:
                y_test = torch.FloatTensor(
                    power_transform(Y_numpy / Y_numpy.std(), method='box-cox'))
                if y_test.std() < 0.5:
                    Y_numpy = y_test.numpy()
                    y_test = torch.FloatTensor(
                        power_transform(Y_numpy / Y_numpy.std(),
                                        method='yeo-johnson')).to(x_test)

        y_train = y_train.view(-1).to(**tkwargs)
        y_test = y_test.view(-1).to(**tkwargs)

        gp_obj_val_model_mse_train = (
            gp_obj_model.posterior(x_train).mean.view(-1) -
            y_train).pow(2).div(len(y_train))
        gp_obj_val_model_mse_test = (
            gp_obj_model.posterior(x_test).mean.view(-1) - y_test).pow(2).div(
                len(y_test))
        torch.save(
            gp_obj_val_model_mse_train,
            os.path.join(gp_test_folder, 'gp_obj_val_model_mse_train.npz'))
        torch.save(gp_obj_val_model_mse_test,
                   os.path.join(gp_test_folder, 'gp_obj_val_model_test.npz'))
        print(
            f'GP training fit on objective value: MSE={gp_obj_val_model_mse_train.sum().item():.5f}'
        )
        print(
            f'GP testing fit on objective value: MSE={gp_obj_val_model_mse_test.sum().item():.5f}'
        )

        if do_robust:
            if err_out_wp:
                error_train = error_train.view(-1, 1)
                R_numpy = error_train.cpu().numpy()
                if R_numpy.min() <= 0:
                    error_train = torch.FloatTensor(
                        power_transform(R_numpy / R_numpy.std(),
                                        method='yeo-johnson'))
                else:
                    error_train = torch.FloatTensor(
                        power_transform(R_numpy / R_numpy.std(),
                                        method='box-cox'))
                    if error_train.std() < 0.5:
                        R_numpy = error_train.numpy()
                        error_train = torch.FloatTensor(
                            power_transform(R_numpy / R_numpy.std(),
                                            method='yeo-johnson')).to(x_train)

                R_numpy = error_test.cpu().numpy()
                if R_numpy.min() <= 0:
                    error_test = torch.FloatTensor(
                        power_transform(R_numpy / R_numpy.std(),
                                        method='yeo-johnson'))
                else:
                    error_test = torch.FloatTensor(
                        power_transform(R_numpy / R_numpy.std(),
                                        method='box-cox'))
                    if error_test.std() < 0.5:
                        R_numpy = error_test.numpy()
                        error_test = torch.FloatTensor(
                            power_transform(R_numpy / R_numpy.std(),
                                            method='yeo-johnson')).to(x_test)

            error_train = error_train.view(-1).to(**tkwargs)
            error_test = error_test.view(-1).to(**tkwargs)

            pred_recon_train = gp_error_model.posterior(x_train).mean.view(-1)
            pred_recon_test = gp_error_model.posterior(x_test).mean.view(-1)

            gp_error_model_mse_train = (error_train -
                                        pred_recon_train).pow(2).div(
                                            len(error_train))
            gp_error_model_mse_test = (error_test -
                                       pred_recon_test).pow(2).div(
                                           len(error_test))
            torch.save(
                gp_error_model_mse_train,
                os.path.join(gp_test_folder, 'gp_error_model_mse_train.npz'))
            torch.save(
                gp_error_model_mse_test,
                os.path.join(gp_test_folder, 'gp_error_model_mse_test.npz'))
            print(
                f'GP training fit on reconstruction errors: MSE={gp_error_model_mse_train.sum().item():.5f}'
            )
            print(
                f'GP testing fit on reconstruction errors: MSE={gp_error_model_mse_test.sum().item():.5f}'
            )
            torch.save(error_test,
                       os.path.join(gp_test_folder, f"true_rec_err_z.pt"))
            torch.save(error_train,
                       os.path.join(gp_test_folder, f"error_train.pt"))

        torch.save(x_train, os.path.join(gp_test_folder, f"train_x.pt"))
        torch.save(x_test, os.path.join(gp_test_folder, f"test_x.pt"))
        torch.save(y_train, os.path.join(gp_test_folder, f"y_train.pt"))
        torch.save(x_test, os.path.join(gp_test_folder, f"X_test.pt"))
        torch.save(y_test, os.path.join(gp_test_folder, f"y_test.pt"))

        # y plots
        plt.hist(y_train.cpu().numpy(),
                 bins=100,
                 label='y train',
                 alpha=0.5,
                 density=True)
        plt.hist(gp_obj_model.posterior(x_train).mean.view(
            -1).detach().cpu().numpy(),
                 bins=100,
                 label='y pred',
                 alpha=0.5,
                 density=True)
        plt.legend()
        plt.title('Training set')
        plt.savefig(os.path.join(gp_test_folder, 'gp_obj_train.pdf'))
        plt.close()

        plt.hist(gp_obj_val_model_mse_train.detach().cpu().numpy(),
                 bins=100,
                 alpha=0.5,
                 density=True)
        plt.title('MSE of gp_obj_val model on training set')
        plt.savefig(os.path.join(gp_test_folder, 'gp_obj_train_mse.pdf'))
        plt.close()

        plt.hist(y_test.cpu().numpy(),
                 bins=100,
                 label='y true',
                 alpha=0.5,
                 density=True)
        plt.hist(gp_obj_model.posterior(x_test).mean.detach().cpu().numpy(),
                 bins=100,
                 alpha=0.5,
                 label='y pred',
                 density=True)
        plt.legend()
        plt.title('Validation set')
        plt.savefig(os.path.join(gp_test_folder, 'gp_obj_test.pdf'))
        plt.close()

        plt.hist(gp_obj_val_model_mse_test.detach().cpu().numpy(),
                 bins=100,
                 alpha=0.5,
                 density=True)
        plt.title('MSE of gp_obj_val model on validation set')
        plt.savefig(os.path.join(gp_test_folder, 'gp_obj_test_mse.pdf'))
        plt.close()

        if do_robust:
            # error plots
            plt.hist(error_train.cpu().numpy(),
                     bins=100,
                     label='error train',
                     alpha=0.5,
                     density=True)
            plt.hist(
                gp_error_model.posterior(x_train).mean.detach().cpu().numpy(),
                bins=100,
                label='error pred',
                alpha=0.5,
                density=True)
            plt.legend()
            plt.title('Training set')
            plt.savefig(os.path.join(gp_test_folder, 'gp_error_train.pdf'))
            plt.close()

            plt.hist(gp_error_model_mse_train.detach().cpu().numpy(),
                     bins=100,
                     alpha=0.5,
                     density=True)
            plt.title('MSE of gp_error model on training set')
            plt.savefig(os.path.join(gp_test_folder, 'gp_error_train_mse.pdf'))
            plt.close()

            plt.hist(error_test.cpu().numpy(),
                     bins=100,
                     label='error true',
                     alpha=0.5,
                     density=True)
            plt.hist(
                gp_error_model.posterior(x_test).mean.detach().cpu().numpy(),
                bins=100,
                alpha=0.5,
                label='error pred',
                density=True)
            plt.legend()
            plt.title('Validation set')
            plt.savefig(os.path.join(gp_test_folder, 'gp_error_test.pdf'))
            plt.close()

            plt.hist(gp_error_model_mse_test.detach().cpu().numpy(),
                     bins=100,
                     alpha=0.5,
                     density=True)
            plt.title('MSE of gp_error model on validation set')
            plt.savefig(os.path.join(gp_test_folder, 'gp_error_test_mse.pdf'))
            plt.close()

            # y-error plots
            y_train_sorted, indices_train = torch.sort(y_train)
            error_train_sorted = error_train[indices_train]
            gp_y_train_pred_sorted, indices_train_pred = torch.sort(
                gp_obj_model.posterior(x_train).mean.view(-1))
            gp_r_train_pred_sorted = (gp_error_model.posterior(
                x_train).mean.view(-1))[indices_train_pred]
            plt.scatter(y_train_sorted.cpu().numpy(),
                        error_train_sorted.cpu().numpy(),
                        label='true',
                        marker='+')
            plt.scatter(gp_y_train_pred_sorted.detach().cpu().numpy(),
                        gp_r_train_pred_sorted.detach().cpu().numpy(),
                        label='pred',
                        marker='*')
            plt.xlabel('y train targets')
            plt.ylabel('recon. error train targets')
            plt.title('y_train vs. error_train')
            plt.legend()
            plt.savefig(
                os.path.join(gp_test_folder, 'scatter_obj_error_train.pdf'))
            plt.close()

            y_test_std_sorted, indices_test = torch.sort(y_test)
            error_test_sorted = error_test[indices_test]
            gp_y_test_pred_sorted, indices_test_pred = torch.sort(
                gp_obj_model.posterior(x_test).mean.view(-1))
            gp_r_test_pred_sorted = (gp_error_model.posterior(
                x_test).mean.view(-1))[indices_test_pred]
            plt.scatter(y_test_std_sorted.cpu().numpy(),
                        error_test_sorted.cpu().numpy(),
                        label='true',
                        marker='+')
            plt.scatter(gp_y_test_pred_sorted.detach().cpu().numpy(),
                        gp_r_test_pred_sorted.detach().cpu().numpy(),
                        label='pred',
                        marker='*')
            plt.xlabel('y test targets')
            plt.ylabel('recon. error test targets')
            plt.title('y_test vs. error_test')
            plt.legend()
            plt.savefig(
                os.path.join(gp_test_folder, 'scatter_obj_error_test.pdf'))
            plt.close()

            # error var plots
            error_train_sorted, indices_train_pred = torch.sort(error_train)
            # error_train_sorted = error_train
            # indices_train_pred = np.arange(len(error_train))
            gp_r_train_pred_sorted = gp_error_model.posterior(
                x_train).mean[indices_train_pred].view(-1)
            gp_r_train_pred_std_sorted = gp_error_model.posterior(
                x_train).variance.view(-1).sqrt()[indices_train_pred]
            plt.scatter(np.arange(len(indices_train_pred)),
                        error_train_sorted.cpu().numpy(),
                        label='err true',
                        marker='+',
                        color='C1',
                        s=15)
            plt.errorbar(
                np.arange(len(indices_train_pred)),
                gp_r_train_pred_sorted.detach().cpu().numpy().flatten(),
                yerr=gp_r_train_pred_std_sorted.detach().cpu().numpy().flatten(
                ),
                fmt='*',
                alpha=0.05,
                label='err pred',
                color='C0',
                ecolor='C0')
            plt.scatter(np.arange(len(indices_train_pred)),
                        gp_r_train_pred_sorted.detach().cpu().numpy(),
                        marker='*',
                        alpha=0.2,
                        s=10,
                        color='C0')
            # plt.scatter(np.arange(len(indices_train_pred)),
            #             (gp_r_train_pred_sorted + gp_r_train_pred_std_sorted).detach().cpu().numpy(),
            #             label='err pred mean+std', marker='.')
            # plt.scatter(np.arange(len(indices_train_pred)),
            #             (gp_r_train_pred_sorted - gp_r_train_pred_std_sorted).detach().cpu().numpy(),
            #             label='err pred mean-std', marker='.')
            plt.legend()
            plt.title('error predictions and uncertainty on train set')
            plt.savefig(
                os.path.join(gp_test_folder, 'gp_error_train_uncertainty.pdf'))
            plt.close()

            error_test_sorted, indices_test_pred = torch.sort(error_test)
            # error_test_sorted = error_test
            # indices_test_pred = np.arange(len(error_test_sorted))
            gp_r_test_pred_sorted = gp_error_model.posterior(x_test).mean.view(
                -1)[indices_test_pred]
            gp_r_test_pred_std_sorted = gp_error_model.posterior(
                x_test).variance.view(-1).sqrt()[indices_test_pred]
            plt.scatter(np.arange(len(indices_test_pred)),
                        error_test_sorted.cpu().numpy(),
                        label='err true',
                        marker='+',
                        color='C1',
                        s=15)
            plt.errorbar(
                np.arange(len(indices_test_pred)),
                gp_r_test_pred_sorted.detach().cpu().numpy().flatten(),
                yerr=gp_r_test_pred_std_sorted.detach().cpu().numpy().flatten(
                ),
                marker='*',
                alpha=0.05,
                label='err pred',
                color='C0',
                ecolor='C0')
            plt.scatter(np.arange(len(indices_test_pred)),
                        gp_r_test_pred_sorted.detach().cpu().numpy().flatten(),
                        marker='*',
                        color='C0',
                        alpha=0.2,
                        s=10)
            # plt.scatter(np.arange(len(indices_test_pred)),
            #             (gp_r_test_pred_sorted + gp_r_test_pred_std_sorted).detach().cpu().numpy(),
            #             label='err pred mean+std', marker='.')
            # plt.scatter(np.arange(len(indices_test_pred)),
            #             (gp_r_test_pred_sorted - gp_r_test_pred_std_sorted).detach().cpu().numpy(),
            #             label='err pred mean-std', marker='.')
            plt.legend()
            plt.title('error predictions and uncertainty on test set')
            plt.savefig(
                os.path.join(gp_test_folder, 'gp_error_test_uncertainty.pdf'))
            plt.close()

        # y var plots
        y_train_std_sorted, indices_train = torch.sort(y_train)
        gp_y_train_pred_sorted = gp_obj_model.posterior(
            x_train).mean[indices_train].view(-1)
        gp_y_train_pred_std_sorted = gp_obj_model.posterior(
            x_train).variance.sqrt()[indices_train].view(-1)
        plt.scatter(np.arange(len(indices_train)),
                    y_train_std_sorted.cpu().numpy(),
                    label='y true',
                    marker='+',
                    color='C1',
                    s=15)
        plt.scatter(np.arange(len(indices_train)),
                    gp_y_train_pred_sorted.detach().cpu().numpy(),
                    marker='*',
                    alpha=0.2,
                    s=10,
                    color='C0')
        plt.errorbar(
            np.arange(len(indices_train)),
            gp_y_train_pred_sorted.detach().cpu().numpy().flatten(),
            yerr=gp_y_train_pred_std_sorted.detach().cpu().numpy().flatten(),
            fmt='*',
            alpha=0.05,
            label='y pred',
            color='C0',
            ecolor='C0')
        # plt.scatter(np.arange(len(indices_train_pred)),
        #             (gp_y_train_pred_sorted+gp_y_train_pred_std_sorted).detach().cpu().numpy(),
        #             label='y pred mean+std', marker='.')
        # plt.scatter(np.arange(len(indices_train_pred)),
        #             (gp_y_train_pred_sorted-gp_y_train_pred_std_sorted).detach().cpu().numpy(),
        #             label='y pred mean-std', marker='.')
        plt.legend()
        plt.title('y predictions and uncertainty on train set')
        plt.savefig(
            os.path.join(gp_test_folder, 'gp_obj_val_train_uncertainty.pdf'))
        plt.close()

        y_test_std_sorted, indices_test = torch.sort(y_test)
        gp_y_test_pred_sorted = gp_obj_model.posterior(x_test).mean.view(
            -1)[indices_test]
        gp_y_test_pred_std_sorted = gp_obj_model.posterior(
            x_test).variance.view(-1).sqrt()[indices_test]
        plt.scatter(np.arange(len(indices_test)),
                    y_test_std_sorted.cpu().numpy(),
                    label='y true',
                    marker='+',
                    color='C1',
                    s=15)
        plt.errorbar(
            np.arange(len(indices_test)),
            gp_y_test_pred_sorted.detach().cpu().numpy().flatten(),
            yerr=gp_y_test_pred_std_sorted.detach().cpu().numpy().flatten(),
            fmt='*',
            alpha=0.05,
            label='y pred',
            color='C0',
            ecolor='C0')
        plt.scatter(np.arange(len(indices_test)),
                    gp_y_test_pred_sorted.detach().cpu().numpy(),
                    marker='*',
                    alpha=0.2,
                    s=10,
                    color='C0')
        # plt.scatter(np.arange(len(indices_test_pred)),
        #             (gp_y_test_pred_sorted + gp_y_test_pred_std_sorted).detach().cpu().numpy(),
        #             label='y pred mean+std', marker='.')
        # plt.scatter(np.arange(len(indices_test_pred)),
        #             (gp_y_test_pred_sorted - gp_y_test_pred_std_sorted).detach().cpu().numpy(),
        #             label='y pred mean-std', marker='.')
        plt.legend()
        plt.title('y predictions and uncertainty on test set')
        plt.savefig(
            os.path.join(gp_test_folder, 'gp_obj_val_test_uncertainty.pdf'))
        plt.close()
Ejemplo n.º 12
0
    def test_cache_root(self):
        sample_cached_path = (
            "botorch.acquisition.cached_cholesky.sample_cached_cholesky")
        raw_state_dict = {
            "likelihood.noise_covar.raw_noise":
            torch.tensor([[0.0895], [0.2594]], dtype=torch.float64),
            "mean_module.constant":
            torch.tensor([[-0.4545], [-0.1285]], dtype=torch.float64),
            "covar_module.raw_outputscale":
            torch.tensor([1.4876, 1.4897], dtype=torch.float64),
            "covar_module.base_kernel.raw_lengthscale":
            torch.tensor([[[-0.7202, -0.2868]], [[-0.8794, -1.2877]]],
                         dtype=torch.float64),
        }
        # test batched models (e.g. for MCMC)
        for train_batch_shape, m, dtype in product(
            (torch.Size([]), torch.Size([3])), (1, 2),
            (torch.float, torch.double)):
            state_dict = deepcopy(raw_state_dict)
            for k, v in state_dict.items():
                if m == 1:
                    v = v[0]
                if len(train_batch_shape) > 0:
                    v = v.unsqueeze(0).expand(*train_batch_shape, *v.shape)
                state_dict[k] = v
            tkwargs = {"device": self.device, "dtype": dtype}
            if m == 2:
                objective = GenericMCObjective(lambda Y, X: Y.sum(dim=-1))
            else:
                objective = None
            for k, v in state_dict.items():
                state_dict[k] = v.to(**tkwargs)
            all_close_kwargs = ({
                "atol": 1e-1,
                "rtol": 0.0,
            } if dtype == torch.float else {
                "atol": 1e-4,
                "rtol": 0.0
            })
            torch.manual_seed(1234)
            train_X = torch.rand(*train_batch_shape, 3, 2, **tkwargs)
            train_Y = (
                torch.sin(train_X * 2 * pi) +
                torch.randn(*train_batch_shape, 3, 2, **tkwargs))[..., :m]
            train_Y = standardize(train_Y)
            model = SingleTaskGP(
                train_X,
                train_Y,
            )
            if len(train_batch_shape) > 0:
                X_baseline = train_X[0]
            else:
                X_baseline = train_X
            model.load_state_dict(state_dict, strict=False)
            # test sampler with collapse_batch_dims=False
            sampler = IIDNormalSampler(5, seed=0, collapse_batch_dims=False)
            with self.assertRaises(UnsupportedError):
                qNoisyExpectedImprovement(
                    model=model,
                    X_baseline=X_baseline,
                    sampler=sampler,
                    objective=objective,
                    prune_baseline=False,
                    cache_root=True,
                )
            sampler = IIDNormalSampler(5, seed=0)
            torch.manual_seed(0)
            acqf = qNoisyExpectedImprovement(
                model=model,
                X_baseline=X_baseline,
                sampler=sampler,
                objective=objective,
                prune_baseline=False,
                cache_root=True,
            )

            orig_base_samples = acqf.base_sampler.base_samples.detach().clone()
            sampler2 = IIDNormalSampler(5, seed=0)
            sampler2.base_samples = orig_base_samples
            torch.manual_seed(0)
            acqf_no_cache = qNoisyExpectedImprovement(
                model=model,
                X_baseline=X_baseline,
                sampler=sampler2,
                objective=objective,
                prune_baseline=False,
                cache_root=False,
            )
            for q, batch_shape in product(
                (1, 3), (torch.Size([]), torch.Size([3]), torch.Size([4, 3]))):
                test_X = (0.3 +
                          0.05 * torch.randn(*batch_shape, q, 2, **tkwargs)
                          ).requires_grad_(True)
                with mock.patch(
                        sample_cached_path,
                        wraps=sample_cached_cholesky) as mock_sample_cached:
                    torch.manual_seed(0)
                    val = acqf(test_X)
                    mock_sample_cached.assert_called_once()
                val.sum().backward()
                base_samples = acqf.sampler.base_samples.detach().clone()
                X_grad = test_X.grad.clone()
                test_X2 = test_X.detach().clone().requires_grad_(True)
                acqf_no_cache.sampler.base_samples = base_samples
                with mock.patch(
                        sample_cached_path,
                        wraps=sample_cached_cholesky) as mock_sample_cached:
                    torch.manual_seed(0)
                    val2 = acqf_no_cache(test_X2)
                mock_sample_cached.assert_not_called()
                self.assertTrue(torch.allclose(val, val2, **all_close_kwargs))
                val2.sum().backward()
                self.assertTrue(
                    torch.allclose(X_grad, test_X2.grad, **all_close_kwargs))
            # test we fall back to standard sampling for
            # ill-conditioned covariances
            acqf._baseline_L = torch.zeros_like(acqf._baseline_L)
            with warnings.catch_warnings(
                    record=True) as ws, settings.debug(True):
                with torch.no_grad():
                    acqf(test_X)
            self.assertEqual(len(ws), 1)
            self.assertTrue(issubclass(ws[-1].category, BotorchWarning))
Ejemplo n.º 13
0
    def test_gen_value_function_initial_conditions(self):
        num_fantasies = 2
        num_solutions = 3
        num_restarts = 4
        raw_samples = 5
        n_train = 6
        dim = 2
        dtype = torch.float
        # run a thorough test with dtype float
        train_X = torch.rand(n_train, dim, device=self.device, dtype=dtype)
        train_Y = torch.rand(n_train, 1, device=self.device, dtype=dtype)
        model = SingleTaskGP(train_X, train_Y)
        fant_X = torch.rand(num_solutions,
                            1,
                            dim,
                            device=self.device,
                            dtype=dtype)
        fantasy_model = model.fantasize(fant_X,
                                        IIDNormalSampler(num_fantasies))
        bounds = torch.tensor([[0, 0], [1, 1]],
                              device=self.device,
                              dtype=dtype)
        value_function = PosteriorMean(fantasy_model)
        # test option error
        with self.assertRaises(ValueError):
            gen_value_function_initial_conditions(
                acq_function=value_function,
                bounds=bounds,
                num_restarts=num_restarts,
                raw_samples=raw_samples,
                current_model=model,
                options={"frac_random": 2.0},
            )
        # test output shape
        ics = gen_value_function_initial_conditions(
            acq_function=value_function,
            bounds=bounds,
            num_restarts=num_restarts,
            raw_samples=raw_samples,
            current_model=model,
        )
        self.assertEqual(
            ics.shape,
            torch.Size([num_restarts, num_fantasies, num_solutions, 1, dim]))
        # test bounds
        self.assertTrue(torch.all(ics >= bounds[0]))
        self.assertTrue(torch.all(ics <= bounds[1]))
        # test dtype
        self.assertEqual(dtype, ics.dtype)

        # minimal test cases for when all raw samples are random, with dtype double
        dtype = torch.double
        n_train = 2
        dim = 1
        num_solutions = 1
        train_X = torch.rand(n_train, dim, device=self.device, dtype=dtype)
        train_Y = torch.rand(n_train, 1, device=self.device, dtype=dtype)
        model = SingleTaskGP(train_X, train_Y)
        fant_X = torch.rand(1, 1, dim, device=self.device, dtype=dtype)
        fantasy_model = model.fantasize(fant_X,
                                        IIDNormalSampler(num_fantasies))
        bounds = torch.tensor([[0], [1]], device=self.device, dtype=dtype)
        value_function = PosteriorMean(fantasy_model)
        ics = gen_value_function_initial_conditions(
            acq_function=value_function,
            bounds=bounds,
            num_restarts=1,
            raw_samples=1,
            current_model=model,
            options={"frac_random": 0.99},
        )
        self.assertEqual(ics.shape,
                         torch.Size([1, num_fantasies, num_solutions, 1, dim]))
        # test bounds
        self.assertTrue(torch.all(ics >= bounds[0]))
        self.assertTrue(torch.all(ics <= bounds[1]))
        # test dtype
        self.assertEqual(dtype, ics.dtype)
Ejemplo n.º 14
0
 def testSubsetModel(self):
     x = torch.zeros(1, 1)
     y = torch.rand(1, 2)
     obj_t = torch.rand(2)
     model = SingleTaskGP(x, y)
     self.assertEqual(model.num_outputs, 2)
     # basic test, can subset
     obj_weights = torch.tensor([1.0, 0.0])
     subset_model_results = subset_model(model, obj_weights)
     model_sub = subset_model_results.model
     obj_weights_sub = subset_model_results.objective_weights
     ocs_sub = subset_model_results.outcome_constraints
     obj_t_sub = subset_model_results.objective_thresholds
     self.assertIsNone(ocs_sub)
     self.assertIsNone(obj_t_sub)
     self.assertEqual(model_sub.num_outputs, 1)
     self.assertTrue(torch.equal(obj_weights_sub, torch.tensor([1.0])))
     # basic test, cannot subset
     obj_weights = torch.tensor([1.0, 2.0])
     subset_model_results = subset_model(model, obj_weights)
     model_sub = subset_model_results.model
     obj_weights_sub = subset_model_results.objective_weights
     ocs_sub = subset_model_results.outcome_constraints
     obj_t_sub = subset_model_results.objective_thresholds
     self.assertIsNone(ocs_sub)
     self.assertIsNone(obj_t_sub)
     self.assertIs(model_sub, model)  # check identity
     self.assertIs(obj_weights_sub, obj_weights)  # check identity
     self.assertTrue(
         torch.equal(subset_model_results.indices, torch.tensor([0, 1])))
     # test w/ outcome constraints, can subset
     obj_weights = torch.tensor([1.0, 0.0])
     ocs = (torch.tensor([[1.0, 0.0]]), torch.tensor([1.0]))
     subset_model_results = subset_model(model, obj_weights, ocs)
     model_sub = subset_model_results.model
     obj_weights_sub = subset_model_results.objective_weights
     ocs_sub = subset_model_results.outcome_constraints
     obj_t_sub = subset_model_results.objective_thresholds
     self.assertEqual(model_sub.num_outputs, 1)
     self.assertIsNone(obj_t_sub)
     self.assertTrue(torch.equal(obj_weights_sub, torch.tensor([1.0])))
     self.assertTrue(torch.equal(ocs_sub[0], torch.tensor([[1.0]])))
     self.assertTrue(torch.equal(ocs_sub[1], torch.tensor([1.0])))
     self.assertTrue(
         torch.equal(subset_model_results.indices, torch.tensor([0])))
     # test w/ outcome constraints, cannot subset
     obj_weights = torch.tensor([1.0, 0.0])
     ocs = (torch.tensor([[0.0, 1.0]]), torch.tensor([1.0]))
     subset_model_results = subset_model(model, obj_weights, ocs)
     model_sub = subset_model_results.model
     obj_weights_sub = subset_model_results.objective_weights
     ocs_sub = subset_model_results.outcome_constraints
     obj_t_sub = subset_model_results.objective_thresholds
     self.assertIs(model_sub, model)  # check identity
     self.assertIsNone(obj_t_sub)
     self.assertIs(obj_weights_sub, obj_weights)  # check identity
     self.assertIs(ocs_sub, ocs)  # check identity
     self.assertTrue(
         torch.equal(subset_model_results.indices, torch.tensor([0, 1])))
     # test w/ objective thresholds, cannot subset
     obj_weights = torch.tensor([1.0, 0.0])
     ocs = (torch.tensor([[0.0, 1.0]]), torch.tensor([1.0]))
     subset_model_results = subset_model(model, obj_weights, ocs, obj_t)
     model_sub = subset_model_results.model
     obj_weights_sub = subset_model_results.objective_weights
     ocs_sub = subset_model_results.outcome_constraints
     obj_t_sub = subset_model_results.objective_thresholds
     self.assertIs(model_sub, model)  # check identity
     self.assertIs(obj_t, obj_t_sub)
     self.assertIs(obj_weights_sub, obj_weights)  # check identity
     self.assertTrue(
         torch.equal(subset_model_results.indices, torch.tensor([0, 1])))
     self.assertIs(ocs_sub, ocs)  # check identity
     # test w/ objective thresholds, can subset
     obj_weights = torch.tensor([1.0, 0.0])
     ocs = (torch.tensor([[1.0, 0.0]]), torch.tensor([1.0]))
     subset_model_results = subset_model(model, obj_weights, ocs, obj_t)
     model_sub = subset_model_results.model
     obj_weights_sub = subset_model_results.objective_weights
     ocs_sub = subset_model_results.outcome_constraints
     obj_t_sub = subset_model_results.objective_thresholds
     self.assertTrue(
         torch.equal(subset_model_results.indices, torch.tensor([0])))
     self.assertEqual(model_sub.num_outputs, 1)
     self.assertTrue(torch.equal(obj_weights_sub, torch.tensor([1.0])))
     self.assertTrue(torch.equal(obj_t_sub, obj_t[:1]))
     self.assertTrue(torch.equal(ocs_sub[0], torch.tensor([[1.0]])))
     self.assertTrue(torch.equal(ocs_sub[1], torch.tensor([1.0])))
     # test unsupported
     yvar = torch.ones(1, 2)
     model = HeteroskedasticSingleTaskGP(x, y, yvar)
     subset_model_results = subset_model(model, obj_weights)
     model_sub = subset_model_results.model
     obj_weights_sub = subset_model_results.objective_weights
     ocs_sub = subset_model_results.outcome_constraints
     obj_t_sub = subset_model_results.objective_thresholds
     self.assertIsNone(ocs_sub)
     self.assertIs(model_sub, model)  # check identity
     self.assertIs(obj_weights_sub, obj_weights)  # check identity
     self.assertTrue(
         torch.equal(subset_model_results.indices, torch.tensor([0, 1])))
     # test error on size inconsistency
     obj_weights = torch.ones(3)
     with self.assertRaises(RuntimeError):
         subset_model(model, obj_weights)
Ejemplo n.º 15
0
    def fit(self, X: DataSet, y: DataSet, **kwargs):
        """Train model and take spectral samples"""
        from botorch.models import SingleTaskGP
        from botorch.fit import fit_gpytorch_model
        from gpytorch.mlls.exact_marginal_log_likelihood import (
            ExactMarginalLogLikelihood, )
        import pyrff
        import torch

        self.input_columns_ordered = X.columns

        # Convert to tensors
        X_np = X.to_numpy().astype(float)
        y_np = y.to_numpy().astype(float)
        X = torch.from_numpy(X_np)
        y = torch.from_numpy(y_np)

        # Train the GP model
        self.model = SingleTaskGP(X, y)
        mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model)
        fit_gpytorch_model(mll)

        # self.logger.info model hyperparameters
        if self.model_name is None:
            self.model_name = self.output_columns_ordered[0]
        self.lengthscales_ = self.model.covar_module.base_kernel.lengthscale.detach(
        )[0].numpy()
        self.outputscale_ = self.model.covar_module.outputscale.detach().numpy(
        )
        self.noise_ = self.model.likelihood.noise_covar.noise.detach().numpy(
        )[0]
        self.logger.debug(
            f"Model {self.model_name} lengthscales: {self.lengthscales_}")
        self.logger.debug(
            f"Model {self.model_name} variance: {self.outputscale_}")
        self.logger.debug(f"Model {self.model_name} noise: {self.noise_}")

        # Spectral sampling
        n_spectral_points = kwargs.get("n_spectral_points", 1500)
        n_retries = kwargs.get("n_retries", 10)
        self.logger.debug(
            f"Spectral sampling {self.model_name} with {n_spectral_points} spectral points."
        )
        self.rff = None
        nu = self.model.covar_module.base_kernel.nu
        for _ in range(n_retries):
            try:
                self.rff = pyrff.sample_rff(
                    lengthscales=self.lengthscales_,
                    scaling=np.sqrt(self.outputscale_),
                    noise=self.noise_,
                    kernel_nu=nu,
                    X=X_np,
                    Y=y_np[:, 0],
                    M=n_spectral_points,
                )
                break
            except np.linalg.LinAlgError as e:
                self.logger.error(e)
            except ValueError as e:
                self.logger.error(e)
        if self.rff is None:
            raise RuntimeError(
                f"Spectral sampling failed after {n_retries} retries.")

        return dict(
            name=self.model_name,
            rff=self.rff,
            lengthscales=self.lengthscales_,
            outputscale=self.outputscale_,
            noise=self.noise_,
        )
Ejemplo n.º 16
0
    def sample_arch(self, START_BO, g, hyperparams, og_flops, empty_val_loss, full_val_loss, target_flops=0):
        if g < START_BO:
            if target_flops == 0:
                f = np.random.rand(1) * (args.upper_channel-args.lower_channel) + args.lower_channel
            else:
                f = args.lower_channel
            parameterization = np.ones(hyperparams.get_dim()) * f
            layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner)
        elif g == START_BO:
            if target_flops == 0:
                parameterization = np.ones(hyperparams.get_dim())
            else:
                f = args.lower_channel
                parameterization = np.ones(hyperparams.get_dim()) * f
            layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner)
        else:
            rand = torch.rand(1).cuda()

            train_X = torch.FloatTensor(self.X).cuda()
            train_Y_loss = torch.FloatTensor(np.array(self.Y)[:, 0].reshape(-1, 1)).cuda()
            train_Y_loss = standardize(train_Y_loss)

            train_Y_cost = torch.FloatTensor(np.array(self.Y)[:, 1].reshape(-1, 1)).cuda()
            train_Y_cost = standardize(train_Y_cost)

            covar_module = None
            if args.ski and g > 128:
                if args.additive:
                    covar_module = AdditiveStructureKernel(
                        ScaleKernel(
                            GridInterpolationKernel(
                                MaternKernel(
                                    nu=2.5,
                                    lengthscale_prior=GammaPrior(3.0, 6.0),
                                ),
                                grid_size=128, num_dims=1, grid_bounds=[(0, 1)]
                            ),
                            outputscale_prior=GammaPrior(2.0, 0.15),
                        ), 
                        num_dims=train_X.shape[1]
                    )
                else:
                    covar_module = ScaleKernel(
                        GridInterpolationKernel(
                            MaternKernel(
                                nu=2.5,
                                lengthscale_prior=GammaPrior(3.0, 6.0),
                            ),
                            grid_size=128, num_dims=train_X.shape[1], grid_bounds=[(0, 1) for _ in range(train_X.shape[1])]
                        ),
                        outputscale_prior=GammaPrior(2.0, 0.15),
                    )
            else:
                if args.additive:
                    covar_module = AdditiveStructureKernel(
                        ScaleKernel(
                            MaternKernel(
                                nu=2.5,
                                lengthscale_prior=GammaPrior(3.0, 6.0),
                                num_dims=1
                            ),
                            outputscale_prior=GammaPrior(2.0, 0.15),
                        ),
                        num_dims=train_X.shape[1]
                    )
                else:
                    covar_module = ScaleKernel(
                        MaternKernel(
                            nu=2.5,
                            lengthscale_prior=GammaPrior(3.0, 6.0),
                            num_dims=train_X.shape[1]
                        ),
                        outputscale_prior=GammaPrior(2.0, 0.15),
                    )

            new_train_X = train_X
            gp_loss = SingleTaskGP(new_train_X, train_Y_loss, covar_module=covar_module)
            mll = ExactMarginalLogLikelihood(gp_loss.likelihood, gp_loss)
            mll = mll.to('cuda')
            fit_gpytorch_model(mll)


            # Use add-gp for cost
            covar_module = AdditiveStructureKernel(
                ScaleKernel(
                    MaternKernel(
                        nu=2.5,
                        lengthscale_prior=GammaPrior(3.0, 6.0),
                        num_dims=1
                    ),
                    outputscale_prior=GammaPrior(2.0, 0.15),
                ),
                num_dims=train_X.shape[1]
            )
            gp_cost = SingleTaskGP(new_train_X, train_Y_cost, covar_module=covar_module)
            mll = ExactMarginalLogLikelihood(gp_cost.likelihood, gp_cost)
            mll = mll.to('cuda')
            fit_gpytorch_model(mll)

            UCB_loss = UpperConfidenceBound(gp_loss, beta=args.beta).cuda()
            UCB_cost = UpperConfidenceBound(gp_cost, beta=args.beta).cuda()
            self.mobo_obj = RandAcquisition(UCB_loss).cuda()
            self.mobo_obj.setup(UCB_loss, UCB_cost, rand)

            lower = torch.ones(new_train_X.shape[1])*args.lower_channel
            upper = torch.ones(new_train_X.shape[1])*args.upper_channel
            self.mobo_bounds = torch.stack([lower, upper]).cuda()

            if args.pas:
                val = np.linspace(args.lower_flops, 1, 50)
                chosen_target_flops = np.random.choice(val, p=(self.sampling_weights/np.sum(self.sampling_weights)))
                
                lower_bnd, upper_bnd = 0, 1
                lmda = 0.5
                for i in range(10):
                    self.mobo_obj.rand = lmda

                    parameterization, acq_value = optimize_acqf(
                        self.mobo_obj, bounds=self.mobo_bounds, q=1, num_restarts=5, raw_samples=1000,
                    )

                    parameterization = parameterization[0].cpu().numpy()
                    layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner)
                    sim_flops = self.mask_pruner.simulate_and_count_flops(layer_budget, self.use_mem)
                    ratio = sim_flops/og_flops

                    if np.abs(ratio - chosen_target_flops) <= 0.02:
                        break
                    if args.baseline > 0:
                        if ratio < chosen_target_flops:
                            lower_bnd = lmda
                            lmda = (lmda + upper_bnd) / 2
                        elif ratio > chosen_target_flops:
                            upper_bnd = lmda
                            lmda = (lmda + lower_bnd) / 2
                    else:
                        if ratio < chosen_target_flops:
                            upper_bnd = lmda
                            lmda = (lmda + lower_bnd) / 2
                        elif ratio > chosen_target_flops:
                            lower_bnd = lmda
                            lmda = (lmda + upper_bnd) / 2
                rand[0] = lmda
                writer.add_scalar('Binary search trials', i, g)

            else:
                parameterization, acq_value = optimize_acqf(
                    self.mobo_obj, bounds=self.mobo_bounds, q=1, num_restarts=5, raw_samples=1000,
                )
                parameterization = parameterization[0].cpu().numpy()

            layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner)
        return layer_budget, parameterization, self.sampling_weights/np.sum(self.sampling_weights)
Ejemplo n.º 17
0
 def testSubsetModel(self):
     x = torch.zeros(1, 1)
     y = torch.rand(1, 2)
     Ys = [y[:, :1], y[:, 1:]]
     model = SingleTaskGP(x, y)
     self.assertEqual(model.num_outputs, 2)
     # basic test, can subset
     obj_weights = torch.tensor([1.0, 0.0])
     model_sub, obj_weights_sub, ocs_sub, Ys_sub = subset_model(
         model, obj_weights, Ys=Ys
     )
     self.assertIsNone(ocs_sub)
     self.assertEqual(model_sub.num_outputs, 1)
     self.assertTrue(torch.equal(obj_weights_sub, torch.tensor([1.0])))
     self.assertEqual(Ys_sub[0], Ys[0])
     self.assertEqual(len(Ys_sub), 1)
     # basic test, cannot subset
     obj_weights = torch.tensor([1.0, 2.0])
     model_sub, obj_weights_sub, ocs_sub, Ys_sub = subset_model(
         model, obj_weights, Ys=Ys
     )
     self.assertIsNone(ocs_sub)
     self.assertIs(model_sub, model)  # check identity
     self.assertIs(obj_weights_sub, obj_weights)  # check identity
     self.assertIs(Ys_sub, Ys)
     # test w/ outcome constraints, can subset
     obj_weights = torch.tensor([1.0, 0.0])
     ocs = (torch.tensor([[1.0, 0.0]]), torch.tensor([1.0]))
     model_sub, obj_weights_sub, ocs_sub, Ys_sub = subset_model(
         model, obj_weights, ocs, Ys
     )
     self.assertEqual(model_sub.num_outputs, 1)
     self.assertTrue(torch.equal(obj_weights_sub, torch.tensor([1.0])))
     self.assertTrue(torch.equal(ocs_sub[0], torch.tensor([[1.0]])))
     self.assertTrue(torch.equal(ocs_sub[1], torch.tensor([1.0])))
     self.assertIs(Ys_sub[0], Ys[0])
     self.assertEqual(len(Ys_sub), 1)
     # test w/ outcome constraints, cannot subset
     obj_weights = torch.tensor([1.0, 0.0])
     ocs = (torch.tensor([[0.0, 1.0]]), torch.tensor([1.0]))
     model_sub, obj_weights_sub, ocs_sub, Ys_sub = subset_model(
         model, obj_weights, ocs, Ys
     )
     self.assertIs(model_sub, model)  # check identity
     self.assertIs(obj_weights_sub, obj_weights)  # check identity
     self.assertIs(ocs_sub, ocs)  # check identity
     self.assertIs(Ys_sub, Ys)
     # test unsupported
     yvar = torch.ones(1, 2)
     model = HeteroskedasticSingleTaskGP(x, y, yvar)
     model_sub, obj_weights_sub, ocs, Ys_sub = subset_model(
         model, obj_weights, Ys=Ys
     )
     self.assertIsNone(ocs)
     self.assertIs(model_sub, model)  # check identity
     self.assertIs(obj_weights_sub, obj_weights)  # check identity
     self.assertIs(Ys_sub, Ys)
     # test error on size inconsistency
     obj_weights = torch.ones(3)
     with self.assertRaises(RuntimeError):
         subset_model(model, obj_weights, Ys=Ys)
Ejemplo n.º 18
0
    def suggest_experiments(self,
                            num_experiments,
                            prev_res: DataSet = None,
                            **kwargs):
        from botorch.models import SingleTaskGP
        from botorch.fit import fit_gpytorch_model
        from botorch.optim import optimize_acqf
        from torch import tensor
        from gpytorch.mlls.exact_marginal_log_likelihood import (
            ExactMarginalLogLikelihood, )

        # Suggest lhs initial design or append new experiments to previous experiments
        if prev_res is None:
            lhs = LHS(self.domain)
            self.iterations += 1
            k = num_experiments if num_experiments > 1 else 2
            conditions = lhs.suggest_experiments(k)
            return conditions
        elif prev_res is not None and self.all_experiments is None:
            self.all_experiments = prev_res
        elif prev_res is not None and self.all_experiments is not None:
            self.all_experiments = self.all_experiments.append(prev_res)
        self.iterations += 1
        data = self.all_experiments

        # Get inputs (decision variables) and outputs (objectives)
        inputs, output = self.transform.transform_inputs_outputs(
            data,
            categorical_method=self.categorical_method,
            standardize_inputs=True,
            standardize_outputs=True,
        )

        # Train model
        model = SingleTaskGP(
            torch.tensor(inputs.data_to_numpy()).float(),
            torch.tensor(output.data_to_numpy()).float(),
        )
        mll = ExactMarginalLogLikelihood(model.likelihood, model)
        fit_gpytorch_model(mll)

        # Create acquisition function
        objective = self.domain.output_variables[0]
        if objective.maximize:
            fbest_scaled = output.max()[objective.name]
            maximize = True
        else:
            fbest_scaled = output.min()[objective.name]
            maximize = False
        ei = CategoricalEI(self.domain,
                           model,
                           best_f=fbest_scaled,
                           maximize=maximize)

        # Optimize acquisition function
        results, acq_values = optimize_acqf(
            acq_function=ei,
            bounds=self._get_bounds(),
            num_restarts=20,
            q=num_experiments,
            raw_samples=100,
        )

        # Convert result to datset
        result = DataSet(
            results.detach().numpy(),
            columns=inputs.data_columns,
        )

        # Untransform
        result = self.transform.un_transform(
            result,
            categorical_method=self.categorical_method,
            standardize_inputs=True)

        # Add metadata
        result[("strategy", "METADATA")] = "STBO"
        return result
Ejemplo n.º 19
0
def gp_torch_train(train_x: Tensor,
                   train_y: Tensor,
                   n_inducing_points: int,
                   tkwargs: Dict[str, Any],
                   init,
                   scale: bool,
                   covar_name: str,
                   gp_file: Optional[str],
                   save_file: str,
                   input_wp: bool,
                   outcome_transform: Optional[OutcomeTransform] = None,
                   options: Dict[str, Any] = None) -> SingleTaskGP:
    assert train_y.ndim > 1, train_y.shape
    assert gp_file or init, (gp_file, init)
    likelihood = gpytorch.likelihoods.GaussianLikelihood()

    if init:
        # build hyp
        print("Initialize GP hparams...")
        print("Doing Kmeans init...")
        assert n_inducing_points > 0, n_inducing_points
        kmeans = MiniBatchKMeans(n_clusters=n_inducing_points,
                                 batch_size=min(10000, train_x.shape[0]),
                                 n_init=25)
        start_time = time.time()
        kmeans.fit(train_x.cpu().numpy())
        end_time = time.time()
        print(f"K means took {end_time - start_time:.1f}s to finish...")
        inducing_points = torch.from_numpy(kmeans.cluster_centers_.copy())

        output_scale = None
        if scale:
            output_scale = train_y.var().item()
        lscales = torch.empty(1, train_x.shape[1])
        for i in range(train_x.shape[1]):
            lscales[0, i] = torch.pdist(train_x[:, i].view(
                -1, 1)).median().clamp(min=0.01)
        base_covar_module = query_covar(covar_name=covar_name,
                                        scale=scale,
                                        outputscale=output_scale,
                                        lscales=lscales)

        covar_module = InducingPointKernel(base_covar_module,
                                           inducing_points=inducing_points,
                                           likelihood=likelihood)

        input_warp_tf = None
        if input_wp:
            # Apply input warping
            # initialize input_warping transformation
            input_warp_tf = CustomWarp(
                indices=list(range(train_x.shape[-1])),
                # use a prior with median at 1.
                # when a=1 and b=1, the Kumaraswamy CDF is the identity function
                concentration1_prior=LogNormalPrior(0.0, 0.75**0.5),
                concentration0_prior=LogNormalPrior(0.0, 0.75**0.5),
            )

        model = SingleTaskGP(train_x,
                             train_y,
                             covar_module=covar_module,
                             likelihood=likelihood,
                             input_transform=input_warp_tf,
                             outcome_transform=outcome_transform)
    else:
        # load model
        output_scale = 1  # will be overwritten when loading model
        lscales = torch.ones(
            train_x.shape[1])  # will be overwritten when loading model
        base_covar_module = query_covar(covar_name=covar_name,
                                        scale=scale,
                                        outputscale=output_scale,
                                        lscales=lscales)
        covar_module = InducingPointKernel(base_covar_module,
                                           inducing_points=torch.empty(
                                               n_inducing_points,
                                               train_x.shape[1]),
                                           likelihood=likelihood)

        input_warp_tf = None
        if input_wp:
            # Apply input warping
            # initialize input_warping transformation
            input_warp_tf = Warp(
                indices=list(range(train_x.shape[-1])),
                # use a prior with median at 1.
                # when a=1 and b=1, the Kumaraswamy CDF is the identity function
                concentration1_prior=LogNormalPrior(0.0, 0.75**0.5),
                concentration0_prior=LogNormalPrior(0.0, 0.75**0.5),
            )
        model = SingleTaskGP(train_x,
                             train_y,
                             covar_module=covar_module,
                             likelihood=likelihood,
                             input_transform=input_warp_tf,
                             outcome_transform=outcome_transform)
        print("Loading GP from file")
        state_dict = torch.load(gp_file)
        model.load_state_dict(state_dict)

    print("GP regression")
    start_time = time.time()
    model.to(**tkwargs)
    model.train()

    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    # set approx_mll to False since we are using an exact marginal log likelihood
    # fit_gpytorch_model(mll, optimizer=fit_gpytorch_torch, approx_mll=False, options=options)
    fit_gpytorch_torch(mll,
                       options=options,
                       approx_mll=False,
                       clip_by_value=True if input_wp else False,
                       clip_value=10.0)
    end_time = time.time()
    print(f"Regression took {end_time - start_time:.1f}s to finish...")

    print("Save GP model...")
    torch.save(model.state_dict(), save_file)
    print("Done training of GP.")

    model.eval()
    return model
Ejemplo n.º 20
0
import torch
from botorch.models import SingleTaskGP
from botorch.fit import fit_gpytorch_model
from botorch.utils import standardize
from gpytorch.mlls import ExactMarginalLogLikelihood
from botorch.acquisition import UpperConfidenceBound
from botorch.optim import optimize_acqf

# Training data:
train_X = torch.rand(10, 2)
Y = 1 - torch.norm(train_X - 0.5, dim=-1, keepdim=True)
Y = Y + 0.1 * torch.randn_like(Y)  # add some noise
train_Y = standardize(Y)

# Fir the model:
gp = SingleTaskGP(train_X, train_Y)
mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
fit_gpytorch_model(mll)

print(mll)

# Construct acquisition function:
UCB = UpperConfidenceBound(gp, beta=0.1)

print(UCB)

bounds = torch.stack([torch.zeros(2), torch.ones(2)])
candidate, acq_value = optimize_acqf(
    UCB,
    bounds=bounds,
    q=1,
Ejemplo n.º 21
0
def fairBO_debiasing(model_state_dict, data, config, device):
    def evaluate(lr, beta1, beta2, alpha, T0, verbose=False):
        model = load_model(data.num_features,
                           config.get('hyperparameters', {}))
        model.load_state_dict(model_state_dict)
        model.to(device)

        loss_fn = torch.nn.BCELoss()
        optimizer = optim.Adam(model.parameters(),
                               lr=lr,
                               betas=(beta1, beta2),
                               weight_decay=alpha)
        scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer, int(T0))

        for epoch in range(201):
            model.train()
            batch_idxs = torch.split(torch.randperm(data.X_valid.size(0)), 64)
            train_loss = 0
            for batch in batch_idxs:
                X = data.X_valid_gpu[batch, :]
                y = data.y_valid_gpu[batch]

                optimizer.zero_grad()
                loss = loss_fn(model(X)[:, 0], y)
                loss.backward()
                train_loss += loss.item()
                optimizer.step()
                scheduler.step(X.size(0))
            if epoch % 10 == 0 and verbose:
                model.eval()
                with torch.no_grad():
                    valid_loss = loss_fn(
                        model(data.X_valid_valid.to(device))[:, 0],
                        data.y_valid_valid.to(device))
                print(
                    f'=======> Epoch: {epoch} Train loss: {train_loss / len(batch_idxs)} '
                    f'Valid loss: {valid_loss}')

        model.eval()
        with torch.no_grad():
            scores = model(data.X_valid_gpu)[:, 0].reshape(-1).cpu().numpy()

        best_thresh, _ = get_best_thresh(scores,
                                         np.linspace(0, 1, 1001),
                                         data,
                                         config,
                                         valid=False,
                                         margin=config['fairBO']['margin'])
        return get_valid_objective(scores > best_thresh,
                                   data,
                                   config,
                                   valid=False), model, best_thresh

    space = config['fairBO']['hyperparameters']
    search_space = {}
    bounds_dict = {}
    for var in space:
        search_space[var] = np.arange(space[var]['start'], space[var]['end'],
                                      space[var]['step'])
        bounds_dict[var] = torch.tensor(
            [space[var]['start'], space[var]['end']])
        if space[var]['log_scale']:
            search_space[var] = np.exp(np.log(10) * search_space[var])
            bounds_dict[var] = torch.exp(float(np.log(10)) * bounds_dict[var])

    def sample_space():
        return {
            var: np.random.choice(rng)
            for var, rng in search_space.items()
        }

    X_hyp = []
    y_hyp = []
    best_model = [None, -math.inf, -1]
    for it in range(config['fairBO']['initial_budget']):
        X_hyp.append(sample_space())
        logger.info(
            f'(Iteration {it}) Evaluating fairBO with sample {X_hyp[-1]}')
        y_eval, model_candidate, thresh = evaluate(**X_hyp[-1])
        logger.info(f'Result: {y_eval}')
        if y_eval['objective'] > best_model[1]:
            best_model[0] = copy.deepcopy(model_candidate)
            best_model[1] = y_eval['objective']
            best_model[2] = thresh
        y_hyp.append(y_eval)

    X_df = pd.DataFrame(X_hyp)
    X = torch.tensor(X_df.to_numpy())
    y = torch.tensor(pd.DataFrame(y_hyp)[['performance', 'bias']].to_numpy())

    for it in range(config['fairBO']['total_budget'] -
                    config['fairBO']['initial_budget']):
        xscaler = StandardScaler()
        gp = SingleTaskGP(torch.tensor(xscaler.fit_transform(X)), y)
        mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
        fit_gpytorch_model(mll)

        cEI = ConstrainedExpectedImprovement(gp, y[:, 0].max().item(), 0,
                                             {1: (-0.05, 0.05)})
        bounds = torch.stack([bounds_dict[x] for x in X_df.columns])
        candidate, _ = optimize_acqf(cEI, bounds.T, 1, 100, 1024)
        inv_candidate = xscaler.inverse_transform(candidate)

        hyp = {k: v.item() for k, v in zip(X_df.columns, inv_candidate[0])}
        logger.info(
            f'(Iteration {it+config["fairBO"]["initial_budget"]}) Evaluating fairBO with sample {hyp}'
        )

        X = torch.cat((X, candidate))

        y_eval, model_candidate, thresh = evaluate(**hyp)
        logger.info(f'Result: {y_eval}')
        if y_eval['objective'] > best_model[1]:
            best_model[0] = copy.deepcopy(model_candidate)
            best_model[1] = y_eval['objective']
            best_model[2] = thresh
        y = torch.cat(
            (y, torch.tensor([[y_eval['performance'], y_eval['bias']]])))

    logger.info('Evaluating best fairBO debiased model.')
    best_model[0].eval()
    with torch.no_grad():
        y_pred = (best_model[0](data.X_valid_gpu)[:, 0] >
                  best_model[2]).reshape(-1).cpu().numpy()
    results_valid = get_valid_objective(y_pred, data, config)
    logger.info(f'Results: {results_valid}')

    best_model[0].eval()
    with torch.no_grad():
        y_pred = (best_model[0](data.X_test_gpu)[:, 0] >
                  best_model[2]).reshape(-1).cpu().numpy()
    results_test = get_test_objective(y_pred, data, config)
    return results_valid, results_test
Ejemplo n.º 22
0
class ParametricArm:
    """
    the class of an Arm
    """

    def __init__(
        self,
        function: SyntheticTestFunction,
        num_init_samples: int = 10,
        retrain_gp: bool = False,
        num_restarts: int = 10,
        raw_samples: int = 1000,
    ):
        """
        Initialize the Arm

        :param function: the function of the arm to sample from
        :param num_init_samples: number of samples to initialize with
        :param retrain_gp: retrain the model after each sample if True
        :param num_restarts: number of random restarts for acquisition function optimization
        :param raw_samples: number of raw samples for acquisition function optimization
        """
        self.function = function
        self.dim = function.dim
        self.bounds = Tensor(function._bounds).t()
        self.scale = self.bounds[1] - self.bounds[0]
        self.l_bounds = self.bounds[0]
        self.num_restarts = num_restarts
        self.raw_samples = raw_samples
        self._initialize_model(num_init_samples)
        self._update_current_best()
        self._maximize_kg()
        self.retrain_gp = retrain_gp
        self.num_samples = num_init_samples

    def _maximize_kg(self) -> None:
        """
        maximizes the KG acquisition function and stores the resulting value and
        the candidate
        """
        acq_func = qKnowledgeGradient(
            model=self.model, current_value=self.current_best_val
        )
        # acq_func = qExpectedImprovement(model=self.model, best_f=self.current_best_val)
        # acq_func = ExpectedImprovement(model=self.model, best_f=self.current_best_val)
        self.next_candidate, self.kg_value = optimize_acqf(
            acq_func,
            Tensor([[0], [1]]).repeat(1, self.dim),
            q=1,
            num_restarts=self.num_restarts,
            raw_samples=self.raw_samples,
        )

    def _update_current_best(self) -> None:
        """
        Updates the current best solution and corresponding value
        """
        pm = PosteriorMean(self.model)
        self.current_best_sol, self.current_best_val = optimize_acqf(
            pm,
            Tensor([[0], [1]]).repeat(1, self.dim),
            q=1,
            num_restarts=self.num_restarts,
            raw_samples=self.raw_samples,
        )

    def _function_call(self, X: Tensor) -> Tensor:
        """
        Scales the solutions to the function domain and returns the function value.
        :param X: Solutions from the relative scale of [0, 1]
        :return: function value
        """
        shape = list(X.size())
        shape[-1] = 1
        X = X * self.scale.repeat(shape) + self.l_bounds.repeat(shape)
        # TODO: adjust for minimization
        return -self.function(X).unsqueeze(1)

    def _initialize_model(self, num_init_samples: int) -> None:
        """
        initialize the GP model with num_init_samples of initial samples
        """
        self.train_X = torch.rand((num_init_samples, self.dim))
        self.train_Y = self._function_call(self.train_X)
        self.model = SingleTaskGP(
            self.train_X, self.train_Y, outcome_transform=Standardize(m=1)
        )
        mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model)
        fit_gpytorch_model(mll)

    def _update_model(self, new_sample: Tensor, new_observation: Tensor) -> None:
        """
        Update the GP model with the new observation(s)
        :param new_sample: sampled point
        :param new_observation: observed function value
        """
        self.train_X = torch.cat((self.train_X, new_sample), 0)
        self.train_Y = torch.cat((self.train_Y, new_observation), 0)
        self.model = self.model.condition_on_observations(new_sample, new_observation)
        if self.retrain_gp:
            mll = ExactMarginalLogLikelihood(self.model.likelihood, self.model)
            fit_gpytorch_model(mll)

    def sample_next(self):
        """
        sample the next point, i.e. the point that maximizes KG
        update the model and retrain if needed
        update the relevant values
        """
        Y = self._function_call(self.next_candidate)
        self._update_model(self.next_candidate, Y)
        self._update_current_best()
        self._maximize_kg()
Ejemplo n.º 23
0
 def test_model_list_to_batched(self):
     for dtype in (torch.float, torch.double):
         # basic test
         train_X = torch.rand(10, 2, device=self.device, dtype=dtype)
         train_Y1 = train_X.sum(dim=-1, keepdim=True)
         train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1)
         gp1 = SingleTaskGP(train_X, train_Y1)
         gp2 = SingleTaskGP(train_X, train_Y2)
         list_gp = ModelListGP(gp1, gp2)
         batch_gp = model_list_to_batched(list_gp)
         self.assertIsInstance(batch_gp, SingleTaskGP)
         # test degenerate (single model)
         batch_gp = model_list_to_batched(ModelListGP(gp1))
         self.assertEqual(batch_gp._num_outputs, 1)
         # test different model classes
         gp2 = FixedNoiseGP(train_X, train_Y1, torch.ones_like(train_Y1))
         with self.assertRaises(UnsupportedError):
             model_list_to_batched(ModelListGP(gp1, gp2))
         # test non-batched models
         gp1_ = SimpleGPyTorchModel(train_X, train_Y1)
         gp2_ = SimpleGPyTorchModel(train_X, train_Y2)
         with self.assertRaises(UnsupportedError):
             model_list_to_batched(ModelListGP(gp1_, gp2_))
         # test list of multi-output models
         train_Y = torch.cat([train_Y1, train_Y2], dim=-1)
         gp2 = SingleTaskGP(train_X, train_Y)
         with self.assertRaises(UnsupportedError):
             model_list_to_batched(ModelListGP(gp1, gp2))
         # test different training inputs
         gp2 = SingleTaskGP(2 * train_X, train_Y2)
         with self.assertRaises(UnsupportedError):
             model_list_to_batched(ModelListGP(gp1, gp2))
         # check scalar agreement
         gp2 = SingleTaskGP(train_X, train_Y2)
         gp2.likelihood.noise_covar.noise_prior.rate.fill_(1.0)
         with self.assertRaises(UnsupportedError):
             model_list_to_batched(ModelListGP(gp1, gp2))
         # check tensor shape agreement
         gp2 = SingleTaskGP(train_X, train_Y2)
         gp2.covar_module.raw_outputscale = torch.nn.Parameter(
             torch.tensor([0.0], device=self.device, dtype=dtype)
         )
         with self.assertRaises(UnsupportedError):
             model_list_to_batched(ModelListGP(gp1, gp2))
         # test HeteroskedasticSingleTaskGP
         gp2 = HeteroskedasticSingleTaskGP(
             train_X, train_Y1, torch.ones_like(train_Y1)
         )
         with self.assertRaises(NotImplementedError):
             model_list_to_batched(ModelListGP(gp2))
         # test custom likelihood
         gp2 = SingleTaskGP(train_X, train_Y2, likelihood=GaussianLikelihood())
         with self.assertRaises(NotImplementedError):
             model_list_to_batched(ModelListGP(gp2))
         # test FixedNoiseGP
         train_X = torch.rand(10, 2, device=self.device, dtype=dtype)
         train_Y1 = train_X.sum(dim=-1, keepdim=True)
         train_Y2 = (train_X[:, 0] - train_X[:, 1]).unsqueeze(-1)
         gp1_ = FixedNoiseGP(train_X, train_Y1, torch.rand_like(train_Y1))
         gp2_ = FixedNoiseGP(train_X, train_Y2, torch.rand_like(train_Y2))
         list_gp = ModelListGP(gp1_, gp2_)
         batch_gp = model_list_to_batched(list_gp)
Ejemplo n.º 24
0
    def test_qMS(self):
        d = 2
        q = 1
        num_data = 3
        q_batch_sizes = [1, 1, 1]
        num_fantasies = [2, 2, 1]
        t_batch_size = [2]
        for dtype in (torch.float, torch.double):
            bounds = torch.tensor([[0], [1]], device=self.device, dtype=dtype)
            bounds = bounds.repeat(1, d)
            train_X = torch.rand(num_data, d, device=self.device, dtype=dtype)
            train_Y = torch.rand(num_data, 1, device=self.device, dtype=dtype)
            model = SingleTaskGP(train_X, train_Y)

            # default evaluation tests
            qMS = qMultiStepLookahead(
                model=model,
                batch_sizes=[1, 1, 1],
                num_fantasies=num_fantasies,
            )
            q_prime = qMS.get_augmented_q_batch_size(q)
            eval_X = torch.rand(t_batch_size + [q_prime, d])
            result = qMS(eval_X)
            self.assertEqual(result.shape, torch.Size(t_batch_size))

            qMS = qMultiStepLookahead(
                model=model,
                batch_sizes=q_batch_sizes,
                valfunc_cls=[qExpectedImprovement] * 4,
                valfunc_argfacs=[make_best_f] * 4,
                num_fantasies=num_fantasies,
                inner_mc_samples=[2] * 4,
            )
            result = qMS(eval_X)
            self.assertEqual(result.shape, torch.Size(t_batch_size))

            # get induced fantasy model, with collapse_fantasy_base_samples
            fant_model = qMS.get_induced_fantasy_model(eval_X)
            self.assertEqual(
                fant_model.train_inputs[0].shape,
                torch.Size(num_fantasies[::-1] + t_batch_size +
                           [num_data + sum(q_batch_sizes), d]),
            )

            # collapse fantasy base samples
            qMS = qMultiStepLookahead(
                model=model,
                batch_sizes=q_batch_sizes,
                valfunc_cls=[qExpectedImprovement] * 4,
                valfunc_argfacs=[make_best_f] * 4,
                num_fantasies=num_fantasies,
                inner_mc_samples=[2] * 4,
                collapse_fantasy_base_samples=False,
            )
            q_prime = qMS.get_augmented_q_batch_size(q)
            eval_X = torch.rand(t_batch_size + [q_prime, d])
            result = qMS(eval_X)
            self.assertEqual(result.shape, torch.Size(t_batch_size))
            self.assertEqual(qMS.samplers[0].batch_range, (-3, -2))

            # get induced fantasy model, without collapse_fantasy_base_samples
            fant_model = qMS.get_induced_fantasy_model(eval_X)
            self.assertEqual(
                fant_model.train_inputs[0].shape,
                torch.Size(num_fantasies[::-1] + t_batch_size +
                           [num_data + sum(q_batch_sizes), d]),
            )

            # X_pending
            X_pending = torch.rand(5, d)
            qMS = qMultiStepLookahead(
                model=model,
                batch_sizes=q_batch_sizes,
                valfunc_cls=[qExpectedImprovement] * 4,
                valfunc_argfacs=[make_best_f] * 4,
                num_fantasies=num_fantasies,
                inner_mc_samples=[2] * 4,
                X_pending=X_pending,
            )
            q_prime = qMS.get_augmented_q_batch_size(q)
            eval_X = torch.rand(t_batch_size + [q_prime, d])
            result = qMS(eval_X)
            self.assertEqual(result.shape, torch.Size(t_batch_size))

            # add dummy base_weights to samplers
            samplers = [
                SobolQMCNormalSampler(num_samples=nf,
                                      resample=False,
                                      collapse_batch_dims=True)
                for nf in num_fantasies
            ]
            for s in samplers:
                s.base_weights = torch.ones(s.sample_shape[0],
                                            1,
                                            device=self.device,
                                            dtype=dtype)

            qMS = qMultiStepLookahead(
                model=model,
                batch_sizes=[1, 1, 1],
                samplers=samplers,
            )
            q_prime = qMS.get_augmented_q_batch_size(q)
            eval_X = torch.rand(t_batch_size + [q_prime, d])
            result = qMS(eval_X)
            self.assertEqual(result.shape, torch.Size(t_batch_size))

            # extract candidates
            cand = qMS.extract_candidates(eval_X)
            self.assertEqual(cand.shape, torch.Size(t_batch_size + [q, d]))
Ejemplo n.º 25
0
    def step(self, snapshot_mode: str = 'latest', meta_info: dict = None):
        # Save snapshot to save the correct iteration count
        self.save_snapshot()

        if self.curr_checkpoint == -2:
            # Train the initial policies in the source domain
            self.train_init_policies()
            self.reached_checkpoint()  # setting counter to -1

        if self.curr_checkpoint == -1:
            # Evaluate the initial policies in the target domain
            self.eval_init_policies()
            self.reached_checkpoint()  # setting counter to 0

        if self.curr_checkpoint == 0:
            # Normalize the input data and standardize the output data
            cands_norm = self.ddp_projector.project_to(self.cands)
            cands_values_stdized = standardize(self.cands_values).unsqueeze(1)

            # Create and fit the GP model
            gp = SingleTaskGP(cands_norm, cands_values_stdized)
            gp.likelihood.noise_covar.register_constraint('raw_noise', GreaterThan(1e-5))
            mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
            fit_gpytorch_model(mll)
            print_cbt('Fitted the GP.', 'g')

            # Acquisition functions
            if self.acq_fcn_type == 'UCB':
                acq_fcn = UpperConfidenceBound(gp, beta=self.acq_param.get('beta', 0.1), maximize=True)
            elif self.acq_fcn_type == 'EI':
                acq_fcn = ExpectedImprovement(gp, best_f=cands_values_stdized.max().item(), maximize=True)
            elif self.acq_fcn_type == 'PI':
                acq_fcn = ProbabilityOfImprovement(gp, best_f=cands_values_stdized.max().item(), maximize=True)
            else:
                raise pyrado.ValueErr(given=self.acq_fcn_type, eq_constraint="'UCB', 'EI', 'PI'")

            # Optimize acquisition function and get new candidate point
            cand_norm, acq_value = optimize_acqf(
                acq_function=acq_fcn,
                bounds=to.stack([to.zeros(self.ddp_space.flat_dim), to.ones(self.ddp_space.flat_dim)]),
                q=1,
                num_restarts=self.acq_restarts,
                raw_samples=self.acq_samples
            )
            next_cand = self.ddp_projector.project_back(cand_norm)
            print_cbt(f'Found the next candidate: {next_cand.numpy()}', 'g')
            self.cands = to.cat([self.cands, next_cand], dim=0)
            pyrado.save(self.cands, 'candidates', 'pt', self.save_dir, meta_info)
            self.reached_checkpoint()  # setting counter to 1

        if self.curr_checkpoint == 1:
            # Train and evaluate a new policy, repeat if the resulting policy did not exceed the success threshold
            wrapped_trn_fcn = until_thold_exceeded(
                self.thold_succ_subrtn.item(), self.max_subrtn_rep
            )(self.train_policy_sim)
            wrapped_trn_fcn(self.cands[-1, :], prefix=f'iter_{self._curr_iter}')
            self.reached_checkpoint()  # setting counter to 2

        if self.curr_checkpoint == 2:
            # Evaluate the current policy in the target domain
            policy = pyrado.load(self.policy, 'policy', 'pt', self.save_dir,
                                        meta_info=dict(prefix=f'iter_{self._curr_iter}'))
            self.curr_cand_value = self.eval_policy(
                self.save_dir, self._env_real, policy, self.mc_estimator, f'iter_{self._curr_iter}',
                self.num_eval_rollouts_real
            )
            self.cands_values = to.cat([self.cands_values, self.curr_cand_value.view(1)], dim=0)
            pyrado.save(self.cands_values, 'candidates_values', 'pt', self.save_dir, meta_info)

            # Store the argmax after training and evaluating
            curr_argmax_cand = BayRn.argmax_posterior_mean(
                self.cands, self.cands_values.unsqueeze(1), self.ddp_space, self.acq_restarts, self.acq_samples
            )
            self.argmax_cand = to.cat([self.argmax_cand, curr_argmax_cand], dim=0)
            pyrado.save(self.argmax_cand, 'candidates_argmax', 'pt', self.save_dir, meta_info)
            self.reached_checkpoint()  # setting counter to 0
Ejemplo n.º 26
0
    def test_qMS_init(self):
        d = 2
        q = 1
        num_data = 3
        q_batch_sizes = [1, 1, 1]
        num_fantasies = [2, 2, 1]
        t_batch_size = [2]
        for dtype in (torch.float, torch.double):
            bounds = torch.tensor([[0], [1]], device=self.device, dtype=dtype)
            bounds = bounds.repeat(1, d)
            train_X = torch.rand(num_data, d, device=self.device, dtype=dtype)
            train_Y = torch.rand(num_data, 1, device=self.device, dtype=dtype)
            model = SingleTaskGP(train_X, train_Y)

            # exactly one of samplers or num_fantasies
            with self.assertRaises(UnsupportedError):
                qMultiStepLookahead(
                    model=model,
                    batch_sizes=q_batch_sizes,
                    valfunc_cls=[qExpectedImprovement] * 4,
                    valfunc_argfacs=[make_best_f] * 4,
                    inner_mc_samples=[2] * 4,
                )

            # cannot use qMS as its own valfunc_cls
            with self.assertRaises(UnsupportedError):
                qMultiStepLookahead(
                    model=model,
                    batch_sizes=q_batch_sizes,
                    valfunc_cls=[qMultiStepLookahead] * 4,
                    valfunc_argfacs=[make_best_f] * 4,
                    num_fantasies=num_fantasies,
                    inner_mc_samples=[2] * 4,
                )

            # construct using samplers
            samplers = [
                SobolQMCNormalSampler(num_samples=nf,
                                      resample=False,
                                      collapse_batch_dims=True)
                for nf in num_fantasies
            ]
            qMS = qMultiStepLookahead(
                model=model,
                batch_sizes=q_batch_sizes,
                valfunc_cls=[qExpectedImprovement] * 4,
                valfunc_argfacs=[make_best_f] * 4,
                inner_mc_samples=[2] * 4,
                samplers=samplers,
            )
            self.assertEqual(qMS.num_fantasies, num_fantasies)

            # use default valfunc_cls, valfun_argfacs, inner_mc_samples
            qMS = qMultiStepLookahead(
                model=model,
                batch_sizes=q_batch_sizes,
                samplers=samplers,
            )
            self.assertEqual(len(qMS._valfunc_cls), 4)
            self.assertEqual(len(qMS.inner_samplers), 4)
            self.assertEqual(len(qMS._valfunc_argfacs), 4)

            # _construct_inner_samplers error catching tests below
            # AnalyticAcquisitionFunction with MCAcquisitionObjective
            with self.assertRaises(UnsupportedError):
                qMultiStepLookahead(
                    model=model,
                    objective=IdentityMCObjective(),
                    batch_sizes=q_batch_sizes,
                    valfunc_cls=[ExpectedImprovement] * 4,
                    valfunc_argfacs=[make_best_f] * 4,
                    num_fantasies=num_fantasies,
                )
            # AnalyticAcquisitionFunction and q > 1
            with self.assertRaises(UnsupportedError):
                qMultiStepLookahead(
                    model=model,
                    batch_sizes=[2, 2, 2],
                    valfunc_cls=[ExpectedImprovement] * 4,
                    valfunc_argfacs=[make_best_f] * 4,
                    num_fantasies=num_fantasies,
                    inner_mc_samples=[2] * 4,
                )
            # AnalyticAcquisitionFunction and inner_mc_samples
            with self.assertWarns(Warning):
                qMultiStepLookahead(
                    model=model,
                    batch_sizes=q_batch_sizes,
                    valfunc_cls=[ExpectedImprovement] * 4,
                    valfunc_argfacs=[make_best_f] * 4,
                    num_fantasies=num_fantasies,
                    inner_mc_samples=[2] * 4,
                )
            # MCAcquisitionFunction and non MCAcquisitionObjective
            with self.assertRaises(UnsupportedError):
                qMultiStepLookahead(
                    model=model,
                    objective=ScalarizedObjective(weights=torch.tensor([1.0])),
                    batch_sizes=[2, 2, 2],
                    valfunc_cls=[qExpectedImprovement] * 4,
                    valfunc_argfacs=[make_best_f] * 4,
                    num_fantasies=num_fantasies,
                    inner_mc_samples=[2] * 4,
                )

            # test warmstarting
            qMS = qMultiStepLookahead(
                model=model,
                batch_sizes=q_batch_sizes,
                samplers=samplers,
            )
            q_prime = qMS.get_augmented_q_batch_size(q)
            eval_X = torch.rand(t_batch_size + [q_prime, d])
            warmstarted_X = warmstart_multistep(
                acq_function=qMS,
                bounds=bounds,
                num_restarts=5,
                raw_samples=10,
                full_optimizer=eval_X,
            )
            self.assertEqual(warmstarted_X.shape, torch.Size([5, q_prime, d]))
Ejemplo n.º 27
0
    def test_batched_multi_output_to_single_output(self):
        for dtype in (torch.float, torch.double):
            # basic test
            train_X = torch.rand(10, 2, device=self.device, dtype=dtype)
            train_Y = torch.stack(
                [
                    train_X.sum(dim=-1),
                    (train_X[:, 0] - train_X[:, 1]),
                ],
                dim=1,
            )
            batched_mo_model = SingleTaskGP(train_X, train_Y)
            batched_so_model = batched_multi_output_to_single_output(
                batched_mo_model)
            self.assertIsInstance(batched_so_model, SingleTaskGP)
            self.assertEqual(batched_so_model.num_outputs, 1)
            # test non-batched models
            non_batch_model = SimpleGPyTorchModel(train_X, train_Y[:, :1])
            with self.assertRaises(UnsupportedError):
                batched_multi_output_to_single_output(non_batch_model)
            gp2 = HeteroskedasticSingleTaskGP(train_X, train_Y,
                                              torch.ones_like(train_Y))
            with self.assertRaises(NotImplementedError):
                batched_multi_output_to_single_output(gp2)
            # test custom likelihood
            gp2 = SingleTaskGP(train_X,
                               train_Y,
                               likelihood=GaussianLikelihood())
            with self.assertRaises(NotImplementedError):
                batched_multi_output_to_single_output(gp2)
            # test FixedNoiseGP
            train_X = torch.rand(10, 2, device=self.device, dtype=dtype)
            batched_mo_model = FixedNoiseGP(train_X, train_Y,
                                            torch.rand_like(train_Y))
            batched_so_model = batched_multi_output_to_single_output(
                batched_mo_model)
            self.assertIsInstance(batched_so_model, FixedNoiseGP)
            self.assertEqual(batched_so_model.num_outputs, 1)
            # test SingleTaskMultiFidelityGP
            batched_mo_model = SingleTaskMultiFidelityGP(train_X,
                                                         train_Y,
                                                         iteration_fidelity=1)
            batched_so_model = batched_multi_output_to_single_output(
                batched_mo_model)
            self.assertIsInstance(batched_so_model, SingleTaskMultiFidelityGP)
            self.assertEqual(batched_so_model.num_outputs, 1)
            # test input transform
            input_tf = Normalize(
                d=2,
                bounds=torch.tensor([[0.0, 0.0], [1.0, 1.0]],
                                    device=self.device,
                                    dtype=dtype),
            )
            batched_mo_model = SingleTaskGP(train_X,
                                            train_Y,
                                            input_transform=input_tf)
            batch_so_model = batched_multi_output_to_single_output(
                batched_mo_model)
            self.assertIsInstance(batch_so_model.input_transform, Normalize)
            self.assertTrue(
                torch.equal(batch_so_model.input_transform.bounds,
                            input_tf.bounds))

            # test batched input transform
            input_tf2 = Normalize(
                d=2,
                bounds=torch.tensor([[-1.0, -1.0], [1.0, 1.0]],
                                    device=self.device,
                                    dtype=dtype),
                batch_shape=torch.Size([2]),
            )
            batched_mo_model = SingleTaskGP(train_X,
                                            train_Y,
                                            input_transform=input_tf2)
            batched_so_model = batched_multi_output_to_single_output(
                batched_mo_model)
            self.assertIsInstance(batch_so_model.input_transform, Normalize)
            self.assertTrue(
                torch.equal(batch_so_model.input_transform.bounds,
                            input_tf.bounds))
            # test outcome transform
            batched_mo_model = SingleTaskGP(train_X,
                                            train_Y,
                                            outcome_transform=Standardize(m=2))
            with self.assertRaises(NotImplementedError):
                batched_multi_output_to_single_output(batched_mo_model)
Ejemplo n.º 28
0
def EI_run(seed,
           alpha,
           rho,
           x0=5,
           n0=100,
           iter_count=1000,
           mu_1=2,
           mu_2=5,
           sigma_1=1,
           sigma_2=1,
           SAA_seed=None):
    """
    Does a single run of the Expected Improvement algorithm for the simple normal problem, without derivatives
    :param seed: random seed
    :param alpha: risk level
    :param rho: risk measure
    :param x0: Ignored! Just to keep the same arglist as others
    :param n0: outer sample starting size
    :param iter_count: number of iterations
    :param kwargs: passed to estimator
    :param SAA_seed: if given, an SAA version is run with this seed.
    :return:
    """
    np.random.seed(seed)
    begin = datetime.datetime.now()
    args = (n0, alpha, rho, mu_1, mu_2, sigma_1, sigma_2, SAA_seed)

    points = torch.empty(iter_count, 1)
    values = torch.empty(points.shape)
    points[:4] = draw_sobol_samples(torch.tensor([[-5.], [5.]]), n=4,
                                    q=1).reshape(-1, 1)
    for i in range(4):
        values[i] = estimate_no_grad(points[i], *args)

    for i in range(4, iter_count):
        # fit gp
        # this transforms the GP to unit domain - botorch priors work best there
        transformed_points = points / 10. + 0.5
        model = SingleTaskGP(transformed_points[:i],
                             values[:i],
                             outcome_transform=Standardize(m=1))
        mll = ExactMarginalLogLikelihood(model.likelihood, model)
        fit_gpytorch_model(mll)

        # optimize EI to get the candidate
        acqf = ExpectedImprovement(model,
                                   best_f=torch.min(values),
                                   maximize=False)
        best_p, _ = optimize_acqf(acqf,
                                  bounds=torch.tensor([[0.], [1.]]),
                                  q=1,
                                  num_restarts=10,
                                  raw_samples=50)
        # transform it back to original domain
        best_p = best_p.detach() * 10. - 5.
        points[i] = best_p
        values[i] = estimate_no_grad(points[i], *args)

    best_list = torch.empty(points.shape)
    for i in range(1, iter_count + 1):
        # pick the arg min of the history to return
        best_ind = torch.argmin(values[:i], dim=0)
        best_list[i - 1] = points[best_ind]

    x_list = best_list
    now = datetime.datetime.now()
    print('done time: %s' % (now - begin))
    print('call count: %d' % call_count)
    # np.save("sa_out/normal/EI_" + rho + "_" + str(alpha) + "_iter_" + str(iter_count) + "_x.npy", x_list)
    return x_list
Ejemplo n.º 29
0
def qehvi_candidates_func(
    train_x: "torch.Tensor",
    train_obj: "torch.Tensor",
    train_con: Optional["torch.Tensor"],
    bounds: "torch.Tensor",
) -> "torch.Tensor":
    """Quasi MC-based batch Expected Hypervolume Improvement (qEHVI).

    The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler`
    with multi-objective optimization when the number of objectives is three or less.

    .. seealso::
        :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value
        descriptions.
    """

    n_objectives = train_obj.size(-1)

    if train_con is not None:
        train_y = torch.cat([train_obj, train_con], dim=-1)

        is_feas = (train_con <= 0).all(dim=-1)
        train_obj_feas = train_obj[is_feas]

        constraints = []
        n_constraints = train_con.size(1)

        for i in range(n_constraints):
            constraints.append(lambda Z, i=i: Z[..., -n_constraints + i])
        additional_qehvi_kwargs = {
            "objective":
            IdentityMCMultiOutputObjective(outcomes=list(range(n_objectives))),
            "constraints":
            constraints,
        }
    else:
        train_y = train_obj

        train_obj_feas = train_obj

        additional_qehvi_kwargs = {}

    train_x = normalize(train_x, bounds=bounds)

    model = SingleTaskGP(train_x,
                         train_y,
                         outcome_transform=Standardize(m=train_y.shape[-1]))
    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_model(mll)

    # Approximate box decomposition similar to Ax when the number of objectives is large.
    # https://github.com/facebook/Ax/blob/master/ax/models/torch/botorch_moo_defaults
    if n_objectives > 2:
        alpha = 10**(-8 + n_objectives)
    else:
        alpha = 0.0
    partitioning = NondominatedPartitioning(num_outcomes=n_objectives,
                                            Y=train_obj_feas,
                                            alpha=alpha)

    ref_point = train_obj.min(dim=0).values - 1e-8
    ref_point_list = ref_point.tolist()

    acqf = qExpectedHypervolumeImprovement(
        model=model,
        ref_point=ref_point_list,
        partitioning=partitioning,
        sampler=SobolQMCNormalSampler(num_samples=256),
        **additional_qehvi_kwargs,
    )

    standard_bounds = torch.zeros_like(bounds)
    standard_bounds[1] = 1

    candidates, _ = optimize_acqf(
        acq_function=acqf,
        bounds=standard_bounds,
        q=1,
        num_restarts=20,
        raw_samples=1024,
        options={
            "batch_limit": 5,
            "maxiter": 200,
            "nonnegative": True
        },
        sequential=True,
    )

    candidates = unnormalize(candidates.detach(), bounds=bounds)

    return candidates
Ejemplo n.º 30
0
    def sample_arch(self, START_BO, g, steps, hyperparams, og_flops, full_val_loss, target_flops=0):
        if args.slim:
            if target_flops == 0:
                parameterization = hyperparams.random_sample()
                layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner)
            else:
                parameterization = np.ones(hyperparams.get_dim()) * args.lower_channel
                layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner)
        else:
            # random sample to warmup history for MOBO
            if g < START_BO:
                if target_flops == 0:
                    f = np.random.rand(1) * (args.upper_channel-args.lower_channel) + args.lower_channel
                else:
                    f = args.lower_channel
                parameterization = np.ones(hyperparams.get_dim()) * f
                layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner)
            # put the largest model into the history
            elif g == START_BO:
                if target_flops == 0:
                    parameterization = np.ones(hyperparams.get_dim())
                else:
                    f = args.lower_channel
                    parameterization = np.ones(hyperparams.get_dim()) * f
                layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner)
            # MOBO
            else:
                # this is the scalarization (lambda_{FLOPs})
                rand = torch.rand(1).cuda()

                # standardize data for building Gaussian Processes
                train_X = torch.FloatTensor(self.X).cuda()
                train_Y_loss = torch.FloatTensor(np.array(self.Y)[:, 0].reshape(-1, 1)).cuda()
                train_Y_loss = standardize(train_Y_loss)

                train_Y_cost = torch.FloatTensor(np.array(self.Y)[:, 1].reshape(-1, 1)).cuda()
                train_Y_cost = standardize(train_Y_cost)

                new_train_X = train_X
                # GP for the cross entropy loss
                gp_loss = SingleTaskGP(new_train_X, train_Y_loss)
                mll = ExactMarginalLogLikelihood(gp_loss.likelihood, gp_loss)
                mll = mll.to('cuda')
                fit_gpytorch_model(mll)


                # GP for FLOPs
                # we use add-gp since FLOPs has addive structure (not exactly though)
                # the parameters for ScaleKernel and MaternKernel simply follow the default
                covar_module = AdditiveStructureKernel(
                    ScaleKernel(
                        MaternKernel(
                            nu=2.5,
                            lengthscale_prior=GammaPrior(3.0, 6.0),
                            num_dims=1
                        ),
                        outputscale_prior=GammaPrior(2.0, 0.15),
                    ),
                    num_dims=train_X.shape[1]
                )
                gp_cost = SingleTaskGP(new_train_X, train_Y_cost, covar_module=covar_module)
                mll = ExactMarginalLogLikelihood(gp_cost.likelihood, gp_cost)
                mll = mll.to('cuda')
                fit_gpytorch_model(mll)

                # Build acquisition functions
                UCB_loss = UpperConfidenceBound(gp_loss, beta=0.1).cuda()
                UCB_cost = UpperConfidenceBound(gp_cost, beta=0.1).cuda()

                # Combine them via augmented Tchebyshev scalarization
                self.mobo_obj = RandAcquisition(UCB_loss).cuda()
                self.mobo_obj.setup(UCB_loss, UCB_cost, rand)

                # Bounds for the optimization variable (alpha)
                lower = torch.ones(new_train_X.shape[1])*args.lower_channel
                upper = torch.ones(new_train_X.shape[1])*args.upper_channel
                self.mobo_bounds = torch.stack([lower, upper]).cuda()

                # Pareto-aware sampling
                if args.pas:
                    # Generate approximate Pareto front first
                    costs = []
                    for i in range(len(self.population_data)):
                        costs.append([self.population_data[i]['loss'], self.population_data[i]['ratio']])
                    costs = np.array(costs)
                    efficient_mask = is_pareto_efficient(costs)
                    costs = costs[efficient_mask]
                    loss = costs[:, 0]
                    flops = costs[:, 1]
                    sorted_idx = np.argsort(flops)
                    loss = loss[sorted_idx]
                    flops = flops[sorted_idx]
                    if flops[0] > args.lower_flops:
                        flops = np.concatenate([[args.lower_flops], flops.reshape(-1)])
                        loss = np.concatenate([[8], loss.reshape(-1)])
                    else:
                        flops = flops.reshape(-1)
                        loss = loss.reshape(-1)

                    if flops[-1] < args.upper_flops and (loss[-1] > full_val_loss):
                        flops = np.concatenate([flops.reshape(-1), [args.upper_flops]])
                        loss = np.concatenate([loss.reshape(-1), [full_val_loss]])
                    else:
                        flops = flops.reshape(-1)
                        loss = loss.reshape(-1)

                    # Equation (4) in paper
                    areas = (flops[1:]-flops[:-1])*(loss[:-1]-loss[1:])

                    # Quantize into 50 bins to sample from multinomial
                    self.sampling_weights = np.zeros(50)
                    k = 0
                    while k < len(flops) and flops[k] < args.lower_flops:
                        k+=1
                    for i in range(50):
                        lower = i/50.
                        upper = (i+1)/50.
                        if upper < args.lower_flops or lower > args.upper_flops or lower < args.lower_flops:
                            continue
                        cnt = 1
                        while ((k+1) < len(flops)) and upper > flops[k+1]:
                            self.sampling_weights[i] += areas[k]
                            cnt += 1
                            k += 1
                        if k < len(areas):
                            self.sampling_weights[i] += areas[k]
                        self.sampling_weights[i] /= cnt
                    if np.sum(self.sampling_weights) == 0:
                        self.sampling_weights = np.ones(50)
                        
                    if target_flops == 0:
                        val = np.arange(0.01, 1, 0.02)
                        chosen_target_flops = np.random.choice(val, p=(self.sampling_weights/np.sum(self.sampling_weights)))
                    else:
                        chosen_target_flops = target_flops
                    
                    # Binary search is here
                    lower_bnd, upper_bnd = 0, 1
                    lmda = 0.5
                    for i in range(10):
                        self.mobo_obj.rand = lmda

                        parameterization, acq_value = optimize_acqf(
                            self.mobo_obj, bounds=self.mobo_bounds, q=1, num_restarts=5, raw_samples=1000,
                        )

                        parameterization = parameterization[0].cpu().numpy()
                        layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner)
                        sim_flops = self.mask_pruner.simulate_and_count_flops(layer_budget)
                        ratio = sim_flops/og_flops

                        if np.abs(ratio - chosen_target_flops) <= 0.02:
                            break
                        if args.baseline > 0:
                            if ratio < chosen_target_flops:
                                lower_bnd = lmda
                                lmda = (lmda + upper_bnd) / 2
                            elif ratio > chosen_target_flops:
                                upper_bnd = lmda
                                lmda = (lmda + lower_bnd) / 2
                        else:
                            if ratio < chosen_target_flops:
                                upper_bnd = lmda
                                lmda = (lmda + lower_bnd) / 2
                            elif ratio > chosen_target_flops:
                                lower_bnd = lmda
                                lmda = (lmda + upper_bnd) / 2
                    rand[0] = lmda
                    writer.add_scalar('Binary search trials', i, steps)

                else:
                    parameterization, acq_value = optimize_acqf(
                        self.mobo_obj, bounds=self.mobo_bounds, q=1, num_restarts=5, raw_samples=1000,
                    )
                    parameterization = parameterization[0].cpu().numpy()

                layer_budget = hyperparams.get_layer_budget_from_parameterization(parameterization, self.mask_pruner)
        return layer_budget, parameterization, self.sampling_weights/np.sum(self.sampling_weights)
Ejemplo n.º 31
0
def qei_candidates_func(
    train_x: "torch.Tensor",
    train_obj: "torch.Tensor",
    train_con: Optional["torch.Tensor"],
    bounds: "torch.Tensor",
) -> "torch.Tensor":
    """Quasi MC-based batch Expected Improvement (qEI).

    The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler`
    with single-objective optimization.

    Args:
        train_x:
            Previous parameter configurations. A ``torch.Tensor`` of shape
            ``(n_trials, n_params)``. ``n_trials`` is the number of already observed trials
            and ``n_params`` is the number of parameters. ``n_params`` may be larger than the
            actual number of parameters if categorical parameters are included in the search
            space, since these parameters are one-hot encoded.
            Values are not normalized.
        train_obj:
            Previously observed objectives. A ``torch.Tensor`` of shape
            ``(n_trials, n_objectives)``. ``n_trials`` is identical to that of ``train_x``.
            ``n_objectives`` is the number of objectives. Observations are not normalized.
        train_con:
            Objective constraints. A ``torch.Tensor`` of shape ``(n_trials, n_constraints)``.
            ``n_trials`` is identical to that of ``train_x``. ``n_constraints`` is the number of
            constraints. A constraint is violated if strictly larger than 0. If no constraints are
            involved in the optimization, this argument will be :obj:`None`.
        bounds:
            Search space bounds. A ``torch.Tensor`` of shape ``(n_params, 2)``. ``n_params`` is
            identical to that of ``train_x``. The first and the second column correspond to the
            lower and upper bounds for each parameter respectively.

    Returns:
        Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``.

    """

    if train_obj.size(-1) != 1:
        raise ValueError("Objective may only contain single values with qEI.")
    if train_con is not None:
        train_y = torch.cat([train_obj, train_con], dim=-1)

        is_feas = (train_con <= 0).all(dim=-1)
        train_obj_feas = train_obj[is_feas]

        if train_obj_feas.numel() == 0:
            # TODO(hvy): Do not use 0 as the best observation.
            _logger.warning(
                "No objective values are feasible. Using 0 as the best objective in qEI."
            )
            best_f = torch.zeros(())
        else:
            best_f = train_obj_feas.max()

        constraints = []
        n_constraints = train_con.size(1)
        for i in range(n_constraints):
            constraints.append(lambda Z, i=i: Z[..., -n_constraints + i])
        objective = ConstrainedMCObjective(
            objective=lambda Z: Z[..., 0],
            constraints=constraints,
        )
    else:
        train_y = train_obj

        best_f = train_obj.max()

        objective = None  # Using the default identity objective.

    train_x = normalize(train_x, bounds=bounds)

    model = SingleTaskGP(train_x,
                         train_y,
                         outcome_transform=Standardize(m=train_y.size(-1)))
    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_model(mll)

    acqf = qExpectedImprovement(
        model=model,
        best_f=best_f,
        sampler=SobolQMCNormalSampler(num_samples=256),
        objective=objective,
    )

    standard_bounds = torch.zeros_like(bounds)
    standard_bounds[1] = 1

    candidates, _ = optimize_acqf(
        acq_function=acqf,
        bounds=standard_bounds,
        q=1,
        num_restarts=10,
        raw_samples=512,
        options={
            "batch_limit": 5,
            "maxiter": 200
        },
        sequential=True,
    )

    candidates = unnormalize(candidates.detach(), bounds=bounds)

    return candidates
import torch
from botorch.test_functions import Branin
from botorch.models import SingleTaskGP
from botorch.fit import fit_gpytorch_model
from botorch.models.transforms import Standardize
from gpytorch.mlls import ExactMarginalLogLikelihood
from parametric_bandit.discrete_KG import DiscreteKGAlg

torch.manual_seed(0)

# generate input
n = 10
noise_std = 0.1
function = Branin(noise_std=0.1)
dim = function.dim
train_X = torch.rand((n, dim))
train_Y = function(train_X).unsqueeze(-1)

# fit model
gp = SingleTaskGP(train_X, train_Y, outcome_transform=Standardize(m=1))
mll = ExactMarginalLogLikelihood(gp.likelihood, gp)
fit_gpytorch_model(mll)

# get mu and Sigma
mu = gp.posterior(train_X).mean
Sigma = gp.posterior(train_X).mvn.covariance_matrix

# initiate the algorithm for testing
dkg = DiscreteKGAlg(M=n, error=noise_std**2, mu_0=mu, Sigma_0=Sigma)
print(dkg.find_maximizer())