Beispiel #1
0
    def test_q_expected_improvement(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            # the event shape is `b x q x t` = 1 x 1 x 1
            samples = torch.zeros(1, 1, 1, device=device, dtype=dtype)
            mm = MockModel(MockPosterior(samples=samples))
            # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking
            X = torch.zeros(1, 1, device=device, dtype=dtype)

            # basic test
            sampler = IIDNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)

            # test shifting best_f value
            acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 1.0)

            # basic test, no resample
            sampler = IIDNormalSampler(num_samples=2, seed=12345)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape,
                             torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            res = acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # basic test, qmc, no resample
            sampler = SobolQMCNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape,
                             torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # basic test, qmc, resample
            sampler = SobolQMCNormalSampler(num_samples=2, resample=True)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape,
                             torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))
Beispiel #2
0
    def test_q_expected_improvement(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            # the event shape is `b x q x t` = 1 x 1 x 1
            samples = torch.zeros(1, 1, 1, device=device, dtype=dtype)
            mm = MockModel(MockPosterior(samples=samples))
            # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking
            X = torch.zeros(1, 1, device=device, dtype=dtype)

            # basic test
            sampler = IIDNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)

            # test shifting best_f value
            acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 1.0)

            # basic test, no resample
            sampler = IIDNormalSampler(num_samples=2, seed=12345)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            res = acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # basic test, qmc, no resample
            sampler = SobolQMCNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # basic test, qmc, resample
            sampler = SobolQMCNormalSampler(num_samples=2, resample=True)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))
Beispiel #3
0
def optimize_qparego_and_get_observation(model, train_obj, sampler):
    """Samples a set of random weights for each candidate in the batch, performs sequential greedy optimization
    of the qParEGO acquisition function, and returns a new candidate and observation."""
    acq_func_list = []
    for _ in range(BATCH_SIZE):
        weights = sample_simplex(problem.num_objectives, **tkwargs).squeeze()
        objective = GenericMCObjective(
            get_chebyshev_scalarization(weights=weights, Y=train_obj))
        acq_func = qExpectedImprovement(  # pyre-ignore: [28]
            model=model,
            objective=objective,
            best_f=objective(train_obj).max(),
            sampler=sampler,
        )
        acq_func_list.append(acq_func)
    # optimize
    candidates, _ = optimize_acqf_list(
        acq_function_list=acq_func_list,
        bounds=standard_bounds,
        num_restarts=NUM_RESTARTS,
        raw_samples=RAW_SAMPLES,  # used for intialization heuristic
        options={
            "batch_limit": 5,
            "maxiter": 200
        },
    )
    # observe new values
    new_x = unnormalize(candidates.detach(), bounds=problem.bounds)
    new_obj = problem(new_x)
    return new_x, new_obj
Beispiel #4
0
    def select_query_point(self, batch_size=1):
        """

        :param
            batch_size (int): number of query points to return
        :return:
            (batch_size x d_orig) numpy array
        """

        # TODO: Make the random initialization its own function so it can be done separately from the acquisition argmin
        # Initialize with random points
        if len(self.X) < self.initial_random_samples:

            # Select query point randomly from embedding_boundaries
            X_query = \
                self.rng.uniform(size=self.boundaries.shape[0]) \
                * (self.boundaries[:, 1] - self.boundaries[:, 0]) \
                + self.boundaries[:, 0]
            X_query = torch.from_numpy(X_query).unsqueeze(0)

        # Query by maximizing the acquisition function
        else:
            print("---------------------")
            print('querying')

            print("self.X.shape: {}".format(self.X.shape))
            print("self.y.shape: {}".format(self.y.shape))
            # Initialize model
            if len(self.X) == self.initial_random_samples:
                self.model = ExactGaussianProcess(
                    train_x=self.X.float(),
                    train_y=self.y.float(),
                )

            # Acquisition function
            qEI = qExpectedImprovement(
                model=self.model,
                best_f=torch.max(self.y).item(),
            )
            # qUCB = qUpperConfidenceBound(
            #     model=self.model,
            #     beta=2.0,
            # )

            print("batch_size: {}".format(batch_size))

            # Optimize for a (batch_size x d_embedding) tensor query point
            X_query = global_optimization(
                objective_function=qEI,
                boundaries=torch.from_numpy(self.boundaries).float(),
                batch_size=batch_size,  # number of query points to suggest
            )

            print("batched X_query: {}".format(X_query))
            print("batched X_query.shape: {}".format(X_query.shape))

        print("X concatenated: {}".format(self.X.shape))

        return X_query
Beispiel #5
0
 def test_fixed_features(self, cuda=False):
     device = torch.device("cuda" if cuda else "cpu")
     train_X = torch.rand(5, 3, device=device)
     train_Y = train_X.norm(dim=-1)
     model = SingleTaskGP(train_X, train_Y).to(device=device).eval()
     qEI = qExpectedImprovement(model, best_f=0.0)
     # test single point
     test_X = torch.rand(1, 3, device=device)
     qEI_ff = FixedFeatureAcquisitionFunction(qEI,
                                              d=3,
                                              columns=[2],
                                              values=test_X[..., -1:])
     qei = qEI(test_X)
     qei_ff = qEI_ff(test_X[..., :-1])
     self.assertTrue(torch.allclose(qei, qei_ff))
     # test list input
     qEI_ff = FixedFeatureAcquisitionFunction(qEI,
                                              d=3,
                                              columns=[2],
                                              values=[0.5])
     qei_ff = qEI_ff(test_X[..., :-1])
     # test q-batch
     test_X = torch.rand(2, 3, device=device)
     qEI_ff = FixedFeatureAcquisitionFunction(qEI,
                                              d=3,
                                              columns=[1],
                                              values=test_X[..., [1]])
     qei = qEI(test_X)
     qei_ff = qEI_ff(test_X[..., [0, 2]])
     self.assertTrue(torch.allclose(qei, qei_ff))
     # test t-batch with broadcasting
     test_X = torch.rand(2, 3, device=device).expand(4, 2, 3)
     qEI_ff = FixedFeatureAcquisitionFunction(qEI,
                                              d=3,
                                              columns=[2],
                                              values=test_X[0, :, -1:])
     qei = qEI(test_X)
     qei_ff = qEI_ff(test_X[..., :-1])
     self.assertTrue(torch.allclose(qei, qei_ff))
     # test gradient
     test_X = torch.rand(1, 3, device=device, requires_grad=True)
     test_X_ff = test_X[..., :-1].detach().clone().requires_grad_(True)
     qei = qEI(test_X)
     qEI_ff = FixedFeatureAcquisitionFunction(qEI,
                                              d=3,
                                              columns=[2],
                                              values=test_X[...,
                                                            [2]].detach())
     qei_ff = qEI_ff(test_X_ff)
     self.assertTrue(torch.allclose(qei, qei_ff))
     qei.backward()
     qei_ff.backward()
     self.assertTrue(torch.allclose(test_X.grad[..., :-1], test_X_ff.grad))
     # test error b/c of incompatible input shapes
     with self.assertRaises(ValueError):
         qEI_ff(test_X)
Beispiel #6
0
    def test_penalized_acquisition_function(self):
        for dtype in (torch.float, torch.double):
            mock_model = MockModel(
                MockPosterior(mean=torch.tensor([1.0]),
                              variance=torch.tensor([1.0])))
            init_point = torch.tensor([0.5, 0.5, 0.5],
                                      device=self.device,
                                      dtype=dtype)
            groups = [[0, 2], [1]]
            raw_acqf = ExpectedImprovement(model=mock_model, best_f=1.0)
            penalty = GroupLassoPenalty(init_point=init_point, groups=groups)
            lmbda = 0.1
            acqf = PenalizedAcquisitionFunction(raw_acqf=raw_acqf,
                                                penalty_func=penalty,
                                                regularization_parameter=lmbda)

            sample_point = torch.tensor([[1.0, 2.0, 3.0]],
                                        device=self.device,
                                        dtype=dtype)
            raw_value = raw_acqf(sample_point)
            penalty_value = penalty(sample_point)
            real_value = raw_value - lmbda * penalty_value
            computed_value = acqf(sample_point)
            self.assertTrue(torch.equal(real_value, computed_value))

            # testing X_pending for analytic raw_acqfn (EI)
            X_pending = torch.tensor([0.1, 0.2, 0.3],
                                     device=self.device,
                                     dtype=dtype)
            with self.assertRaises(UnsupportedError):
                acqf.set_X_pending(X_pending)

            # testing X_pending for non-analytic raw_acqfn (EI)
            sampler = IIDNormalSampler(num_samples=2)
            raw_acqf_2 = qExpectedImprovement(model=mock_model,
                                              best_f=0,
                                              sampler=sampler)
            init_point = torch.tensor([1.0, 1.0, 1.0],
                                      device=self.device,
                                      dtype=dtype)
            l2_module = L2Penalty(init_point=init_point)
            acqf_2 = PenalizedAcquisitionFunction(
                raw_acqf=raw_acqf_2,
                penalty_func=l2_module,
                regularization_parameter=lmbda,
            )

            X_pending = torch.tensor([0.1, 0.2, 0.3],
                                     device=self.device,
                                     dtype=dtype)
            acqf_2.set_X_pending(X_pending)
            self.assertTrue(torch.equal(acqf_2.X_pending, X_pending))
def optimize_qparego_and_get_observation(model, train_obj, train_con, sampler, obj_func, time_list, global_start_time):
    """Samples a set of random weights for each candidate in the batch, performs sequential greedy optimization
    of the qParEGO acquisition function, and returns a new candidate and observation."""
    acq_func_list = []
    for _ in range(1):
        # sample random weights
        weights = sample_simplex(problem.num_objs, **tkwargs).squeeze()
        # construct augmented Chebyshev scalarization
        scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj)
        # initialize ConstrainedMCObjective
        constrained_objective = get_constrained_mc_objective(train_obj=train_obj, train_con=train_con, scalarization=scalarization)
        train_y = torch.cat([train_obj, train_con], dim=-1)
        acq_func = qExpectedImprovement(  # pyre-ignore: [28]
            model=model,
            objective=constrained_objective,
            best_f=constrained_objective(train_y).max(),
            sampler=sampler,
        )
        acq_func_list.append(acq_func)
    # optimize
    candidates, _ = optimize_acqf_list(
        acq_function_list=acq_func_list,
        bounds=standard_bounds,
        num_restarts=20,
        raw_samples=1024,  # used for intialization heuristic
        options={"batch_limit": 5, "maxiter": 200},
    )
    # observe new values
    new_x = candidates.detach()
    new_obj = []
    new_con = []
    for x in new_x:
        res = obj_func(x)
        y = res['objs']
        c = res['constraints']
        new_obj.append(y)
        new_con.append(c)
        global_time = time.time() - global_start_time
        time_list.append(global_time)
    new_obj = torch.tensor(new_obj, **tkwargs).reshape(new_x.shape[0], -1)
    new_con = torch.tensor(new_con, **tkwargs).reshape(new_x.shape[0], -1)
    print(f'evaluate {new_x.shape[0]} configs on real objective')
    return new_x, new_obj, new_con
Beispiel #8
0
    def test_acquisition_functions(self):
        tkwargs = {"device": self.device, "dtype": torch.double}
        train_X, train_Y, train_Yvar, model = self._get_data_and_model(
            infer_noise=True, **tkwargs
        )
        fit_fully_bayesian_model_nuts(
            model, warmup_steps=8, num_samples=5, thinning=2, disable_progbar=True
        )
        sampler = IIDNormalSampler(num_samples=2)
        acquisition_functions = [
            ExpectedImprovement(model=model, best_f=train_Y.max()),
            ProbabilityOfImprovement(model=model, best_f=train_Y.max()),
            PosteriorMean(model=model),
            UpperConfidenceBound(model=model, beta=4),
            qExpectedImprovement(model=model, best_f=train_Y.max(), sampler=sampler),
            qNoisyExpectedImprovement(model=model, X_baseline=train_X, sampler=sampler),
            qProbabilityOfImprovement(
                model=model, best_f=train_Y.max(), sampler=sampler
            ),
            qSimpleRegret(model=model, sampler=sampler),
            qUpperConfidenceBound(model=model, beta=4, sampler=sampler),
            qNoisyExpectedHypervolumeImprovement(
                model=ModelListGP(model, model),
                X_baseline=train_X,
                ref_point=torch.zeros(2, **tkwargs),
                sampler=sampler,
            ),
            qExpectedHypervolumeImprovement(
                model=ModelListGP(model, model),
                ref_point=torch.zeros(2, **tkwargs),
                sampler=sampler,
                partitioning=NondominatedPartitioning(
                    ref_point=torch.zeros(2, **tkwargs), Y=train_Y.repeat([1, 2])
                ),
            ),
        ]

        for acqf in acquisition_functions:
            for batch_shape in [[5], [6, 5, 2]]:
                test_X = torch.rand(*batch_shape, 1, 4, **tkwargs)
                self.assertEqual(acqf(test_X).shape, torch.Size(batch_shape))
Beispiel #9
0
    def test_q_expected_improvement_batch(self):
        for dtype in (torch.float, torch.double):
            # the event shape is `b x q x t` = 2 x 2 x 1
            samples = torch.zeros(2, 2, 1, device=self.device, dtype=dtype)
            samples[0, 0, 0] = 1.0
            mm = MockModel(MockPosterior(samples=samples))

            # X is a dummy and unused b/c of mocking
            X = torch.zeros(1, 1, 1, device=self.device, dtype=dtype)

            # test batch mode
            sampler = IIDNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res[0].item(), 1.0)
            self.assertEqual(res[1].item(), 0.0)

            # test shifting best_f value
            acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res[0].item(), 2.0)
            self.assertEqual(res[1].item(), 1.0)

            # test batch mode, no resample
            sampler = IIDNormalSampler(num_samples=2, seed=12345)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)  # 1-dim batch
            self.assertEqual(res[0].item(), 1.0)
            self.assertEqual(res[1].item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
            res = acqf(X.expand(2, 1, 1))  # 2-dim batch
            self.assertEqual(res[0].item(), 1.0)
            self.assertEqual(res[1].item(), 0.0)
            # the base samples should have the batch dim collapsed
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X.expand(2, 1, 1))
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # test batch mode, qmc, no resample
            sampler = SobolQMCNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res[0].item(), 1.0)
            self.assertEqual(res[1].item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # test batch mode, qmc, resample
            sampler = SobolQMCNormalSampler(num_samples=2, resample=True)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)  # 1-dim batch
            self.assertEqual(res[0].item(), 1.0)
            self.assertEqual(res[1].item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))
            res = acqf(X.expand(2, 1, 1))  # 2-dim batch
            self.assertEqual(res[0].item(), 1.0)
            self.assertEqual(res[1].item(), 0.0)
            # the base samples should have the batch dim collapsed
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X.expand(2, 1, 1))
            self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))
Beispiel #10
0
def optimize():

    verbose = True

    best_observed_all_ei, best_observed_all_nei, best_random_all = [], [], []
    train_x_all_ei, train_x_all_nei, train_x_all_random = [], [], []
    train_y_all_ei, train_y_all_nei, train_y_all_random = [], [], []

    # statistics over multiple trials
    for trial in range(1, N_TRIALS + 1):

        print('\nTrial {} of {}'.format(trial, N_TRIALS))
        best_observed_ei, best_observed_nei = [], []
        best_random = []

        # generate initial training data and initialize model
        print('\nGenerating {} random samples'.format(N_INITIAL_SAMPLES))
        train_x_ei, train_y_ei, best_y_ei, mean_y, std_y = generate_initial_data(
            n_samples=N_INITIAL_SAMPLES)
        denormalize = lambda x: -(x * std_y + mean_y)
        mll_ei, model_ei = initialize_model(train_x_ei, train_y_ei)

        train_x_nei, train_y_nei, best_y_nei = train_x_ei, train_y_ei, best_y_ei
        mll_nei, model_nei = initialize_model(train_x_nei, train_y_nei)

        train_x_random, train_y_random, best_y_random = train_x_ei, train_y_ei, best_y_ei

        best_observed_ei.append(denormalize(best_y_ei))
        best_observed_nei.append(denormalize(best_y_nei))
        best_random.append(denormalize(best_y_random))

        # run N_BATCH rounds of BayesOpt after the initial random batch
        for iteration in range(1, N_BATCH + 1):

            print('\nBatch {} of {}\n'.format(iteration, N_BATCH))
            t0 = time.time()

            # fit the models
            fit_gpytorch_model(mll_ei)
            fit_gpytorch_model(mll_nei)

            # update acquisition functions
            qEI = qExpectedImprovement(
                model=model_ei,
                best_f=train_y_ei.max(),
                sampler=qmc_sampler,
            )

            qNEI = qNoisyExpectedImprovement(
                model=model_nei,
                X_baseline=train_x_nei,
                sampler=qmc_sampler,
            )

            # optimize acquisition function and evaluate new sample
            new_x_ei, new_y_ei = optimize_acqf_and_get_observation(
                qEI, mean_y=mean_y, std_y=std_y)
            print('EI: time to traverse is {:.4f}s'.format(
                -(new_y_ei.numpy().ravel()[0] * std_y + mean_y)))
            new_x_nei, new_y_nei = optimize_acqf_and_get_observation(
                qNEI, mean_y=mean_y, std_y=std_y)
            print('NEI: time to traverse is {:.4f}s'.format(
                -(new_y_nei.numpy().ravel()[0] * std_y + mean_y)))
            new_x_random, new_y_random = sample_random_observations(
                mean_y=mean_y, std_y=std_y)
            print('Random: time to traverse is {:.4f}s'.format(
                -(new_y_random.numpy().ravel()[0] * std_y + mean_y)))

            # update training points
            train_x_ei = torch.cat([train_x_ei, new_x_ei])
            train_y_ei = torch.cat([train_y_ei, new_y_ei])

            train_x_nei = torch.cat([train_x_nei, new_x_nei])
            train_y_nei = torch.cat([train_y_nei, new_y_nei])

            train_x_random = torch.cat([train_x_random, new_x_random])
            train_y_random = torch.cat([train_y_random, new_y_random])

            # update progress
            best_value_ei = denormalize(train_y_ei.max().item())
            best_value_nei = denormalize(train_y_nei.max().item())
            best_value_random = denormalize(train_y_random.max().item())

            best_observed_ei.append(best_value_ei)
            best_observed_nei.append(best_value_nei)
            best_random.append(best_value_random)

            # reinitialize the models so they are ready for fitting on next iteration
            # use the current state dict to speed up fitting
            mll_ei, model_ei = initialize_model(
                train_x_ei,
                train_y_ei,
                model_ei.state_dict(),
            )
            mll_nei, model_nei = initialize_model(
                train_x_nei,
                train_y_nei,
                model_nei.state_dict(),
            )
            t1 = time.time()

            if verbose:
                print(
                    'best lap time (random, qEI, qNEI) = {:.2f}, {:.2f}, {:.2f}, time to compute = {:.2f}s'
                    .format(best_value_random, best_value_ei, best_value_nei,
                            t1 - t0))
            else:
                print(".")

        best_observed_all_ei.append(best_observed_ei)
        best_observed_all_nei.append(best_observed_nei)
        best_random_all.append(best_random)

        train_x_all_ei.append(train_x_ei.cpu().numpy())
        train_x_all_nei.append(train_x_nei.cpu().numpy())
        train_x_all_random.append(train_x_random.cpu().numpy())

        train_y_all_ei.append(denormalize(train_y_ei.cpu().numpy()))
        train_y_all_nei.append(denormalize(train_y_nei.cpu().numpy()))
        train_y_all_random.append(denormalize(train_y_random.cpu().numpy()))

    iters = np.arange(N_BATCH + 1) * BATCH_SIZE
    y_ei = np.asarray(best_observed_all_ei)
    y_nei = np.asarray(best_observed_all_nei)
    y_rnd = np.asarray(best_random_all)
    savestr = time.strftime('%Y%m%d%H%M%S')

    #####################################################################
    # save results

    if SAVE_RESULTS:

        np.savez(
            'results/{}_raceline_data-{}.npz'.format('UCB', savestr),
            y_ei=y_ei,
            y_nei=y_nei,
            y_rnd=y_rnd,
            iters=iters,
            train_x_all_ei=np.asarray(train_x_all_ei),
            train_x_all_nei=np.asarray(train_x_all_nei),
            train_x_all_random=np.asarray(train_x_all_random),
            train_y_all_ei=np.asarray(train_y_all_ei),
            train_y_all_nei=np.asarray(train_y_all_nei),
            train_y_all_random=np.asarray(train_y_all_random),
            SEED=SEED,
        )

    #####################################################################
    # plot results

    if PLOT_RESULTS:

        def ci(y):
            return 1.96 * y.std(axis=0) / np.sqrt(N_TRIALS)

        plt.figure()
        plt.gca().set_prop_cycle(None)
        plt.plot(iters, y_rnd.mean(axis=0), linewidth=1.5)
        plt.plot(iters, y_ei.mean(axis=0), linewidth=1.5)
        plt.plot(iters, y_nei.mean(axis=0), linewidth=1.5)
        plt.gca().set_prop_cycle(None)
        plt.fill_between(iters,
                         y_rnd.mean(axis=0) - ci(y_rnd),
                         y_rnd.mean(axis=0) + ci(y_rnd),
                         label='random',
                         alpha=0.2)
        plt.fill_between(iters,
                         y_ei.mean(axis=0) - ci(y_ei),
                         y_ei.mean(axis=0) + ci(y_ei),
                         label='qEI',
                         alpha=0.2)
        plt.fill_between(iters,
                         y_nei.mean(axis=0) - ci(y_nei),
                         y_nei.mean(axis=0) + ci(y_nei),
                         label='qNEI',
                         alpha=0.2)
        plt.xlabel('number of observations (beyond initial points)')
        plt.ylabel('best lap times')
        plt.grid(True)
        plt.legend(loc=0)
        plt.savefig('results/{}_laptimes-{}.png'.format('UCB', savestr),
                    dpi=600)
        plt.show()
Beispiel #11
0
def bo_qei(config):
    """Optimizes over designs x in an offline optimization problem
    using the CMA Evolution Strategy

    Args:

    config: dict
        a dictionary of hyper parameters such as the learning rate
    """

    # create the training task and logger
    logger = Logger(config['logging_dir'])
    task = StaticGraphTask(config['task'], **config['task_kwargs'])

    if config['normalize_ys']:
        task.map_normalize_y()
    if task.is_discrete and not config["use_vae"]:
        task.map_to_logits()
    if config['normalize_xs']:
        task.map_normalize_x()

    x = task.x
    y = task.y

    if task.is_discrete and config["use_vae"]:

        vae_model = SequentialVAE(task,
                                  hidden_size=config['vae_hidden_size'],
                                  latent_size=config['vae_latent_size'],
                                  activation=config['vae_activation'],
                                  kernel_size=config['vae_kernel_size'],
                                  num_blocks=config['vae_num_blocks'])

        vae_trainer = VAETrainer(vae_model,
                                 vae_optim=tf.keras.optimizers.Adam,
                                 vae_lr=config['vae_lr'],
                                 beta=config['vae_beta'])

        # create the training task and logger
        train_data, val_data = build_pipeline(
            x=x,
            y=y,
            batch_size=config['vae_batch_size'],
            val_size=config['val_size'])

        # estimate the number of training steps per epoch
        vae_trainer.launch(train_data, val_data, logger, config['vae_epochs'])

        # map the x values to latent space
        x = vae_model.encoder_cnn.predict(x)[0]

        mean = np.mean(x, axis=0, keepdims=True)
        standard_dev = np.std(x - mean, axis=0, keepdims=True)
        x = (x - mean) / standard_dev

    input_shape = x.shape[1:]
    input_size = np.prod(input_shape)

    # create the training task and logger
    train_data, val_data = build_pipeline(
        x=x,
        y=y,
        bootstraps=config['bootstraps'],
        batch_size=config['ensemble_batch_size'],
        val_size=config['val_size'])

    # make several keras neural networks with two hidden layers
    forward_models = [
        ForwardModel(input_shape,
                     hidden_size=config['hidden_size'],
                     num_layers=config['num_layers'],
                     initial_max_std=config['initial_max_std'],
                     initial_min_std=config['initial_min_std'])
        for b in range(config['bootstraps'])
    ]

    # create a trainer for a forward model with a conservative objective
    ensemble = Ensemble(forward_models,
                        forward_model_optim=tf.keras.optimizers.Adam,
                        forward_model_lr=config['ensemble_lr'])

    # train the model for an additional number of epochs
    ensemble.launch(train_data, val_data, logger, config['ensemble_epochs'])

    # select the top 1 initial designs from the dataset
    indices = tf.math.top_k(y[:, 0], k=config['bo_gp_samples'])[1]
    initial_x = tf.gather(x, indices, axis=0)
    initial_y = tf.gather(y, indices, axis=0)

    from botorch.models import FixedNoiseGP, ModelListGP
    from gpytorch.mlls.sum_marginal_log_likelihood import SumMarginalLogLikelihood
    from botorch.acquisition.objective import GenericMCObjective
    from botorch.optim import optimize_acqf
    from botorch import fit_gpytorch_model
    from botorch.acquisition.monte_carlo import qExpectedImprovement
    from botorch.sampling.samplers import SobolQMCNormalSampler
    from botorch.exceptions import BadInitialCandidatesWarning

    import torch
    import time
    import warnings

    warnings.filterwarnings('ignore', category=BadInitialCandidatesWarning)
    warnings.filterwarnings('ignore', category=RuntimeWarning)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    dtype = torch.float32

    def objective(input_x):
        original_x = input_x
        # convert the tensor into numpy before using a TF model
        if torch.cuda.is_available():
            input_x = input_x.detach().cpu().numpy()
        else:
            input_x = input_x.detach().numpy()
        batch_shape = input_x.shape[:-1]
        # pass the input into a TF model
        input_x = tf.reshape(input_x, [-1, *input_shape])

        # optimize teh ground truth or the learned model
        if config["optimize_ground_truth"]:
            if task.is_discrete and config["use_vae"]:
                input_x = tf.argmax(
                    vae_model.decoder_cnn.predict(input_x * standard_dev +
                                                  mean),
                    axis=2,
                    output_type=tf.int32)
            value = task.predict(input_x)
        else:
            value = ensemble.get_distribution(input_x).mean()

        ys = value.numpy()

        ys.reshape(list(batch_shape) + [1])
        # convert the scores back to pytorch tensors
        return torch.tensor(ys).type_as(original_x).to(device, dtype=dtype)

    NOISE_SE = config['bo_noise_se']
    train_yvar = torch.tensor(NOISE_SE**2, device=device, dtype=dtype)

    def initialize_model(train_x, train_obj, state_dict=None):
        # define models for objective
        model_obj = FixedNoiseGP(train_x, train_obj,
                                 train_yvar.expand_as(train_obj)).to(train_x)
        # combine into a multi-output GP model
        model = ModelListGP(model_obj)
        mll = SumMarginalLogLikelihood(model.likelihood, model)
        # load state dict if it is passed
        if state_dict is not None:
            model.load_state_dict(state_dict)
        return mll, model

    def obj_callable(Z):
        return Z[..., 0]

    # define a feasibility-weighted objective for optimization
    obj = GenericMCObjective(obj_callable)

    BATCH_SIZE = config['bo_batch_size']
    bounds = torch.tensor([
        np.min(x, axis=0).reshape([input_size]).tolist(),
        np.max(x, axis=0).reshape([input_size]).tolist()
    ],
                          device=device,
                          dtype=dtype)

    def optimize_acqf_and_get_observation(acq_func):
        """Optimizes the acquisition function, and returns
        a new candidate and a noisy observation."""
        # optimize
        try:
            candidates, _ = optimize_acqf(
                acq_function=acq_func,
                bounds=bounds,
                q=BATCH_SIZE,
                num_restarts=config['bo_num_restarts'],
                raw_samples=config[
                    'bo_raw_samples'],  # used for intialization heuristic
                options={
                    "batch_limit": config['bo_batch_limit'],
                    "maxiter": config['bo_maxiter']
                })
        except RuntimeError:
            return
        # observe new values
        new_x = candidates.detach()
        exact_obj = objective(candidates)
        new_obj = exact_obj + NOISE_SE * torch.randn_like(exact_obj)
        return new_x, new_obj

    N_BATCH = config['bo_iterations']
    MC_SAMPLES = config['bo_mc_samples']

    best_observed_ei = []

    # call helper functions to generate initial training data and initialize model
    train_x_ei = initial_x.numpy().reshape([initial_x.shape[0], input_size])
    train_x_ei = torch.tensor(train_x_ei).to(device, dtype=dtype)

    train_obj_ei = initial_y.numpy().reshape([initial_y.shape[0], 1])
    train_obj_ei = torch.tensor(train_obj_ei).to(device, dtype=dtype)

    best_observed_value_ei = train_obj_ei.max().item()
    mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei)
    best_observed_ei.append(best_observed_value_ei)

    # run N_BATCH rounds of BayesOpt after the initial random batch
    for iteration in range(1, N_BATCH + 1):

        t0 = time.time()

        # fit the models
        fit_gpytorch_model(mll_ei)

        # define the qEI acquisition module using a QMC sampler
        qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES)

        # for best_f, we use the best observed noisy values as an approximation
        qEI = qExpectedImprovement(model=model_ei,
                                   best_f=train_obj_ei.max(),
                                   sampler=qmc_sampler,
                                   objective=obj)

        # optimize and get new observation
        result = optimize_acqf_and_get_observation(qEI)
        if result is None:
            print("RuntimeError was encountered, most likely a "
                  "'symeig_cpu: the algorithm failed to converge'")
            break
        new_x_ei, new_obj_ei = result

        # update training points
        train_x_ei = torch.cat([train_x_ei, new_x_ei])
        train_obj_ei = torch.cat([train_obj_ei, new_obj_ei])

        # update progress
        best_value_ei = obj(train_x_ei).max().item()
        best_observed_ei.append(best_value_ei)

        # reinitialize the models so they are ready for fitting on next iteration
        # use the current state dict to speed up fitting
        mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei,
                                            model_ei.state_dict())

        t1 = time.time()
        print(
            f"Batch {iteration:>2}: best_value = "
            f"({best_value_ei:>4.2f}), "
            f"time = {t1 - t0:>4.2f}.",
            end="")

    if torch.cuda.is_available():
        x_sol = train_x_ei.detach().cpu().numpy()
        y_sol = train_obj_ei.detach().cpu().numpy()

    else:
        x_sol = train_x_ei.detach().numpy()
        y_sol = train_obj_ei.detach().numpy()

    # select the top 1 initial designs from the dataset
    indices = tf.math.top_k(y_sol[:, 0], k=config['solver_samples'])[1]
    solution = tf.gather(x_sol, indices, axis=0)
    solution = tf.reshape(solution, [-1, *input_shape])

    if task.is_discrete and config["use_vae"]:
        solution = solution * standard_dev + mean
        logits = vae_model.decoder_cnn.predict(solution)
        solution = tf.argmax(logits, axis=2, output_type=tf.int32)

    # save the current solution to the disk
    np.save(os.path.join(config["logging_dir"], f"solution.npy"),
            solution.numpy())

    # evaluate the found solution and record a video
    score = task.predict(solution)
    if task.is_normalized_y:
        score = task.denormalize_y(score)
    logger.record("score", score, N_BATCH, percentile=True)
Beispiel #12
0
    def test_q_expected_improvement(self):
        for dtype in (torch.float, torch.double):
            tkwargs = {"device": self.device, "dtype": dtype}
            # the event shape is `b x q x t` = 1 x 1 x 1
            samples = torch.zeros(1, 1, 1, **tkwargs)
            mm = MockModel(MockPosterior(samples=samples))
            # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking
            X = torch.zeros(1, 1, **tkwargs)

            # basic test
            sampler = IIDNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)

            # test shifting best_f value
            acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 1.0)

            # TODO: Test batched best_f, batched model, batched evaluation

            # basic test, no resample
            sampler = IIDNormalSampler(num_samples=2, seed=12345)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape,
                             torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            res = acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # basic test, qmc, no resample
            sampler = SobolQMCNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape,
                             torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # basic test, qmc, resample
            sampler = SobolQMCNormalSampler(num_samples=2, resample=True)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape,
                             torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))

            # basic test for X_pending and warning
            acqf.set_X_pending()
            self.assertIsNone(acqf.X_pending)
            acqf.set_X_pending(None)
            self.assertIsNone(acqf.X_pending)
            acqf.set_X_pending(X)
            self.assertEqual(acqf.X_pending, X)
            mm._posterior._samples = torch.zeros(1, 2, 1, **tkwargs)
            res = acqf(X)
            X2 = torch.zeros(1, 1, 1, **tkwargs, requires_grad=True)
            with warnings.catch_warnings(
                    record=True) as ws, settings.debug(True):
                acqf.set_X_pending(X2)
                self.assertEqual(acqf.X_pending, X2)
                self.assertEqual(len(ws), 1)
                self.assertTrue(issubclass(ws[-1].category, BotorchWarning))
Beispiel #13
0
    def test_q_expected_improvement_batch(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            # the event shape is `b x q x t` = 2 x 2 x 1
            samples = torch.zeros(2, 2, 1, device=device, dtype=dtype)
            samples[0, 0, 0] = 1.0
            mm = MockModel(MockPosterior(samples=samples))

            # X is a dummy and unused b/c of mocking
            X = torch.zeros(1, 1, 1, device=device, dtype=dtype)

            # test batch mode
            sampler = IIDNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res[0].item(), 1.0)
            self.assertEqual(res[1].item(), 0.0)

            # test shifting best_f value
            acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res[0].item(), 2.0)
            self.assertEqual(res[1].item(), 1.0)

            # test batch mode, no resample
            sampler = IIDNormalSampler(num_samples=2, seed=12345)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)  # 1-dim batch
            self.assertEqual(res[0].item(), 1.0)
            self.assertEqual(res[1].item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))
            res = acqf(X.expand(2, 1, 1))  # 2-dim batch
            self.assertEqual(res[0].item(), 1.0)
            self.assertEqual(res[1].item(), 0.0)
            # the base samples should have the batch dim collapsed
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X.expand(2, 1, 1))
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # test batch mode, qmc, no resample
            sampler = SobolQMCNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res[0].item(), 1.0)
            self.assertEqual(res[1].item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # test batch mode, qmc, resample
            sampler = SobolQMCNormalSampler(num_samples=2, resample=True)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)  # 1-dim batch
            self.assertEqual(res[0].item(), 1.0)
            self.assertEqual(res[1].item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))
            res = acqf(X.expand(2, 1, 1))  # 2-dim batch
            self.assertEqual(res[0].item(), 1.0)
            self.assertEqual(res[1].item(), 0.0)
            # the base samples should have the batch dim collapsed
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 2, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X.expand(2, 1, 1))
            self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))
Beispiel #14
0
    # run N_BATCH rounds of BayesOpt after the initial random batch
    for iteration in range(1, N_BATCH + 1):

        t0 = time.time()

        # fit the models
        fit_gpytorch_model(mll_ei)
        fit_gpytorch_model(mll_nei)

        # define the qEI and qNEI acquisition modules using a QMC sampler
        qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES)

        # for best_f, we use the best observed noisy values as an approximation
        qEI = qExpectedImprovement(
            model=model_ei,
            best_f=(train_obj_ei * (train_con_ei <= 0).to(train_obj_ei)).max(),
            sampler=qmc_sampler,
            objective=constrained_obj,
        )

        qNEI = qNoisyExpectedImprovement(
            model=model_nei,
            X_baseline=train_x_nei,
            sampler=qmc_sampler,
            objective=constrained_obj,
        )

        # optimize and get new observation
        new_x_ei, new_obj_ei, new_con_ei = optimize_acqf_and_get_observation(
            qEI)
        new_x_nei, new_obj_nei, new_con_nei = optimize_acqf_and_get_observation(
            qNEI)
Beispiel #15
0
    def test_proximal(self):
        for dtype in (torch.float, torch.double):
            train_X = torch.rand(5, 3, device=self.device, dtype=dtype)
            train_Y = train_X.norm(dim=-1, keepdim=True)
            model = (SingleTaskGP(train_X, train_Y).to(device=self.device,
                                                       dtype=dtype).eval())
            EI = ExpectedImprovement(model, best_f=0.0)

            # test single point
            proximal_weights = torch.ones(3, device=self.device, dtype=dtype)
            test_X = torch.rand(1, 3, device=self.device, dtype=dtype)
            EI_prox = ProximalAcquisitionFunction(
                EI, proximal_weights=proximal_weights)

            ei = EI(test_X)
            mv_normal = MultivariateNormal(train_X[-1],
                                           torch.diag(proximal_weights))
            test_prox_weight = torch.exp(
                mv_normal.log_prob(test_X)) / torch.exp(
                    mv_normal.log_prob(train_X[-1]))

            ei_prox = EI_prox(test_X)
            self.assertTrue(torch.allclose(ei_prox, ei * test_prox_weight))
            self.assertTrue(ei_prox.shape == torch.Size([1]))

            # test t-batch with broadcasting
            test_X = torch.rand(4, 1, 3, device=self.device, dtype=dtype)
            ei = EI(test_X)
            mv_normal = MultivariateNormal(train_X[-1],
                                           torch.diag(proximal_weights))
            test_prox_weight = torch.exp(
                mv_normal.log_prob(test_X)) / torch.exp(
                    mv_normal.log_prob(train_X[-1]))

            ei_prox = EI_prox(test_X)
            self.assertTrue(
                torch.allclose(ei_prox, ei * test_prox_weight.flatten()))
            self.assertTrue(ei_prox.shape == torch.Size([4]))

            # test MC acquisition function
            qEI = qExpectedImprovement(model, best_f=0.0)
            test_X = torch.rand(4, 1, 3, device=self.device, dtype=dtype)
            qEI_prox = ProximalAcquisitionFunction(
                qEI, proximal_weights=proximal_weights)

            qei = qEI(test_X)
            mv_normal = MultivariateNormal(train_X[-1],
                                           torch.diag(proximal_weights))
            test_prox_weight = torch.exp(
                mv_normal.log_prob(test_X)) / torch.exp(
                    mv_normal.log_prob(train_X[-1]))

            qei_prox = qEI_prox(test_X)
            self.assertTrue(
                torch.allclose(qei_prox, qei * test_prox_weight.flatten()))
            self.assertTrue(qei_prox.shape == torch.Size([4]))

            # test gradient
            test_X = torch.rand(1,
                                3,
                                device=self.device,
                                dtype=dtype,
                                requires_grad=True)
            ei_prox = EI_prox(test_X)
            ei_prox.backward()

            # test model without train_inputs
            bad_model = DummyModel()
            with self.assertRaises(UnsupportedError):
                ProximalAcquisitionFunction(
                    ExpectedImprovement(bad_model, 0.0), proximal_weights)

            # test proximal weights that do not match training_inputs
            train_X = torch.rand(5, 1, 3, device=self.device, dtype=dtype)
            train_Y = train_X.norm(dim=-1, keepdim=True)
            model = SingleTaskGP(train_X,
                                 train_Y).to(device=self.device).eval()
            with self.assertRaises(ValueError):
                ProximalAcquisitionFunction(ExpectedImprovement(model, 0.0),
                                            proximal_weights[:1])

            with self.assertRaises(ValueError):
                ProximalAcquisitionFunction(
                    ExpectedImprovement(model, 0.0),
                    torch.rand(3, 3, device=self.device, dtype=dtype),
                )

            # test for x_pending points
            pending_acq = DummyAcquisitionFunction(model)
            pending_acq.set_X_pending(
                torch.rand(3, 3, device=self.device, dtype=dtype))
            with self.assertRaises(UnsupportedError):
                ProximalAcquisitionFunction(pending_acq, proximal_weights)

            # test model with multi-batch training inputs
            train_X = torch.rand(5, 2, 3, device=self.device, dtype=dtype)
            train_Y = train_X.norm(dim=-1, keepdim=True)
            bad_single_task = (SingleTaskGP(
                train_X, train_Y).to(device=self.device).eval())
            with self.assertRaises(UnsupportedError):
                ProximalAcquisitionFunction(
                    ExpectedImprovement(bad_single_task, 0.0),
                    proximal_weights)
Beispiel #16
0
def qei_candidates_func(
    train_x: "torch.Tensor",
    train_obj: "torch.Tensor",
    train_con: Optional["torch.Tensor"],
    bounds: "torch.Tensor",
) -> "torch.Tensor":
    """Quasi MC-based batch Expected Improvement (qEI).

    The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler`
    with single-objective optimization.

    Args:
        train_x:
            Previous parameter configurations. A ``torch.Tensor`` of shape
            ``(n_trials, n_params)``. ``n_trials`` is the number of already observed trials
            and ``n_params`` is the number of parameters. ``n_params`` may be larger than the
            actual number of parameters if categorical parameters are included in the search
            space, since these parameters are one-hot encoded.
            Values are not normalized.
        train_obj:
            Previously observed objectives. A ``torch.Tensor`` of shape
            ``(n_trials, n_objectives)``. ``n_trials`` is identical to that of ``train_x``.
            ``n_objectives`` is the number of objectives. Observations are not normalized.
        train_con:
            Objective constraints. A ``torch.Tensor`` of shape ``(n_trials, n_constraints)``.
            ``n_trials`` is identical to that of ``train_x``. ``n_constraints`` is the number of
            constraints. A constraint is violated if strictly larger than 0. If no constraints are
            involved in the optimization, this argument will be :obj:`None`.
        bounds:
            Search space bounds. A ``torch.Tensor`` of shape ``(n_params, 2)``. ``n_params`` is
            identical to that of ``train_x``. The first and the second column correspond to the
            lower and upper bounds for each parameter respectively.

    Returns:
        Next set of candidates. Usually the return value of BoTorch's ``optimize_acqf``.

    """

    if train_obj.size(-1) != 1:
        raise ValueError("Objective may only contain single values with qEI.")
    if train_con is not None:
        train_y = torch.cat([train_obj, train_con], dim=-1)

        is_feas = (train_con <= 0).all(dim=-1)
        train_obj_feas = train_obj[is_feas]

        if train_obj_feas.numel() == 0:
            # TODO(hvy): Do not use 0 as the best observation.
            _logger.warning(
                "No objective values are feasible. Using 0 as the best objective in qEI."
            )
            best_f = torch.zeros(())
        else:
            best_f = train_obj_feas.max()

        constraints = []
        n_constraints = train_con.size(1)
        for i in range(n_constraints):
            constraints.append(lambda Z, i=i: Z[..., -n_constraints + i])
        objective = ConstrainedMCObjective(
            objective=lambda Z: Z[..., 0],
            constraints=constraints,
        )
    else:
        train_y = train_obj

        best_f = train_obj.max()

        objective = None  # Using the default identity objective.

    train_x = normalize(train_x, bounds=bounds)

    model = SingleTaskGP(train_x,
                         train_y,
                         outcome_transform=Standardize(m=train_y.size(-1)))
    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_model(mll)

    acqf = qExpectedImprovement(
        model=model,
        best_f=best_f,
        sampler=SobolQMCNormalSampler(num_samples=256),
        objective=objective,
    )

    standard_bounds = torch.zeros_like(bounds)
    standard_bounds[1] = 1

    candidates, _ = optimize_acqf(
        acq_function=acqf,
        bounds=standard_bounds,
        q=1,
        num_restarts=10,
        raw_samples=512,
        options={
            "batch_limit": 5,
            "maxiter": 200
        },
        sequential=True,
    )

    candidates = unnormalize(candidates.detach(), bounds=bounds)

    return candidates
def evaluate(mth, run_i, seed):
    print(mth, run_i, seed, '===== start =====', flush=True)

    def objective_function(x: torch.Tensor):
        # Caution: unnormalize and maximize
        x = unnormalize(x, bounds=problem_bounds)
        x = x.cpu().numpy().astype(np.float64)  # caution
        res = problem.evaluate(x)
        res['objs'] = [-y for y in res['objs']]
        return res  # Caution: negative values imply feasibility in botorch

    time_list = []
    global_start_time = time.time()

    # random seed
    np.random.seed(seed)
    torch.manual_seed(seed)

    # call helper functions to generate initial training data and initialize model
    train_x, train_obj, train_con = generate_initial_data(
        initial_runs, objective_function, time_list, global_start_time)
    mll, model = initialize_model(train_x, train_obj, train_con)

    # run (max_runs - initial_runs) rounds of BayesOpt after the initial random batch
    for iteration in range(initial_runs + 1, max_runs + 1):
        t0 = time.time()
        # fit the models
        fit_gpytorch_model(mll)
        # define the qEI and qNEI acquisition modules using a QMC sampler
        qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES)
        # for best_f, we use the best observed values
        if (train_con > 0).any(dim=-1).all():  # no feasible data
            best_f = -INFEASIBLE_OBJ_VALUE
        else:
            best_f = train_obj[(train_con <= 0).all(dim=-1)].max()
        qEI = qExpectedImprovement(
            model=model,
            best_f=best_f,
            sampler=qmc_sampler,
            objective=constrained_obj,
        )
        # optimize and get new observation
        new_x, new_obj, new_con = optimize_acqf_and_get_observation(
            qEI, objective_function, time_list, global_start_time)
        # update training points
        train_x = torch.cat([train_x, new_x])
        train_obj = torch.cat([train_obj, new_obj])
        train_con = torch.cat([train_con, new_con])
        # reinitialize the models so they are ready for fitting on next iteration
        # use the current state dict to speed up fitting
        mll, model = initialize_model(
            train_x,
            train_obj,
            train_con,
            model.state_dict(),
        )
        t1 = time.time()
        print("Iter %d: x=%s, perf=%s, con=%s, time=%.2f, global_time=%.2f" %
              (iteration, unnormalize(new_x, bounds=problem_bounds), -new_obj,
               new_con, t1 - t0, time_list[-1]),
              flush=True)

    # Save result
    X = unnormalize(train_x, bounds=problem_bounds).cpu().numpy().astype(
        np.float64)  # caution
    train_obj[(train_con > 0).any(
        dim=-1)] = -INFEASIBLE_OBJ_VALUE  # set infeasible
    perf_list = (
        -1 * train_obj.reshape(-1).cpu().numpy().astype(np.float64)).tolist()
    return X, perf_list, time_list
Beispiel #18
0
    def test_sample_points_around_best(self):
        tkwargs = {"device": self.device}
        _bounds = torch.ones(2, 2)
        _bounds[1] = 2
        for dtype in (torch.float, torch.double):
            tkwargs["dtype"] = dtype
            bounds = _bounds.to(**tkwargs)
            X_train = 1 + torch.rand(20, 2, **tkwargs)
            model = MockModel(
                MockPosterior(mean=(2 * X_train + 1).sum(dim=-1, keepdim=True))
            )
            # test NEI with X_baseline
            acqf = qNoisyExpectedImprovement(model, X_baseline=X_train)
            with mock.patch(
                "botorch.optim.initializers.sample_perturbed_subset_dims"
            ) as mock_subset_dims:
                X_rnd = sample_points_around_best(
                    acq_function=acqf,
                    n_discrete_points=4,
                    sigma=1e-3,
                    bounds=bounds,
                )
                mock_subset_dims.assert_not_called()
            self.assertTrue(X_rnd.shape, torch.Size([4, 2]))
            self.assertTrue((X_rnd >= 1).all())
            self.assertTrue((X_rnd <= 2).all())
            # test model that returns a batched mean
            model = MockModel(
                MockPosterior(
                    mean=(2 * X_train + 1).sum(dim=-1, keepdim=True).unsqueeze(0)
                )
            )
            acqf = qNoisyExpectedImprovement(model, X_baseline=X_train)
            X_rnd = sample_points_around_best(
                acq_function=acqf,
                n_discrete_points=4,
                sigma=1e-3,
                bounds=bounds,
            )
            self.assertTrue(X_rnd.shape, torch.Size([4, 2]))
            self.assertTrue((X_rnd >= 1).all())
            self.assertTrue((X_rnd <= 2).all())

            # test EI without X_baseline
            acqf = qExpectedImprovement(model, best_f=0.0)

            with warnings.catch_warnings(record=True) as w, settings.debug(True):

                X_rnd = sample_points_around_best(
                    acq_function=acqf,
                    n_discrete_points=4,
                    sigma=1e-3,
                    bounds=bounds,
                )
                self.assertEqual(len(w), 1)
                self.assertTrue(issubclass(w[-1].category, BotorchWarning))
                self.assertIsNone(X_rnd)

            # set train inputs
            model.train_inputs = (X_train,)
            X_rnd = sample_points_around_best(
                acq_function=acqf,
                n_discrete_points=4,
                sigma=1e-3,
                bounds=bounds,
            )
            self.assertTrue(X_rnd.shape, torch.Size([4, 2]))
            self.assertTrue((X_rnd >= 1).all())
            self.assertTrue((X_rnd <= 2).all())

            # test an acquisition function that has objective=None
            # and maximize=False
            pm = PosteriorMean(model, maximize=False)
            self.assertIsNone(pm.objective)
            self.assertFalse(pm.maximize)
            X_rnd = sample_points_around_best(
                acq_function=pm,
                n_discrete_points=4,
                sigma=0,
                bounds=bounds,
                best_pct=1e-8,  # ensures that we only use best value
            )
            idx = (-model.posterior(X_train).mean).argmax()
            self.assertTrue((X_rnd == X_train[idx : idx + 1]).all(dim=-1).all())

            # test acquisition function that has no model
            ff = FixedFeatureAcquisitionFunction(pm, d=2, columns=[0], values=[0])
            # set X_baseline for testing purposes
            ff.X_baseline = X_train
            with warnings.catch_warnings(record=True) as w, settings.debug(True):
                X_rnd = sample_points_around_best(
                    acq_function=ff,
                    n_discrete_points=4,
                    sigma=1e-3,
                    bounds=bounds,
                )
                self.assertEqual(len(w), 1)
                self.assertTrue(issubclass(w[-1].category, BotorchWarning))
                self.assertIsNone(X_rnd)

            # test constraints with NEHVI
            constraints = [lambda Y: Y[..., 0]]
            ref_point = torch.zeros(2, **tkwargs)
            # test cases when there are and are not any feasible points
            for any_feas in (True, False):
                Y_train = torch.stack(
                    [
                        torch.linspace(-0.5, 0.5, X_train.shape[0], **tkwargs)
                        if any_feas
                        else torch.ones(X_train.shape[0], **tkwargs),
                        X_train.sum(dim=-1),
                    ],
                    dim=-1,
                )
                moo_model = MockModel(MockPosterior(mean=Y_train, samples=Y_train))
                acqf = qNoisyExpectedHypervolumeImprovement(
                    moo_model,
                    ref_point=ref_point,
                    X_baseline=X_train,
                    constraints=constraints,
                    cache_root=False,
                )
                X_rnd = sample_points_around_best(
                    acq_function=acqf,
                    n_discrete_points=4,
                    sigma=0.0,
                    bounds=bounds,
                )
                self.assertTrue(X_rnd.shape, torch.Size([4, 2]))
                # this should be true since sigma=0
                # and we should only be returning feasible points
                violation = constraints[0](Y_train)
                neg_violation = -violation.clamp_min(0.0)
                feas = neg_violation == 0
                eq_mask = (X_train.unsqueeze(1) == X_rnd.unsqueeze(0)).all(dim=-1)
                if feas.any():
                    # determine
                    # create n_train x n_rnd tensor of booleans
                    eq_mask = (X_train.unsqueeze(1) == X_rnd.unsqueeze(0)).all(dim=-1)
                    # check that all X_rnd correspond to feasible points
                    self.assertEqual(eq_mask[feas].sum(), 4)
                else:
                    idcs = torch.topk(neg_violation, k=2).indices
                    self.assertEqual(eq_mask[idcs].sum(), 4)
                self.assertTrue((X_rnd >= 1).all())
                self.assertTrue((X_rnd <= 2).all())
            # test that subset_dims is called if d>=21
            X_train = 1 + torch.rand(20, 21, **tkwargs)
            model = MockModel(
                MockPosterior(mean=(2 * X_train + 1).sum(dim=-1, keepdim=True))
            )
            bounds = torch.ones(2, 21, **tkwargs)
            bounds[1] = 2
            # test NEI with X_baseline
            acqf = qNoisyExpectedImprovement(model, X_baseline=X_train)
            with mock.patch(
                "botorch.optim.initializers.sample_perturbed_subset_dims",
                wraps=sample_perturbed_subset_dims,
            ) as mock_subset_dims:
                X_rnd = sample_points_around_best(
                    acq_function=acqf, n_discrete_points=5, sigma=1e-3, bounds=bounds
                )
            self.assertTrue(X_rnd.shape, torch.Size([5, 2]))
            self.assertTrue((X_rnd >= 1).all())
            self.assertTrue((X_rnd <= 2).all())
            mock_subset_dims.assert_called_once()
            # test tiny prob_perturb to make sure we perturb at least one dimension
            X_rnd = sample_points_around_best(
                acq_function=acqf,
                n_discrete_points=5,
                sigma=1e-3,
                bounds=bounds,
                prob_perturb=1e-8,
            )
            self.assertTrue(
                ((X_rnd.unsqueeze(0) == X_train.unsqueeze(1)).all(dim=-1)).sum() == 0
            )
Beispiel #19
0
def main(argv):
    dataset = 1

    try:
        opts, args = getopt.getopt(argv, "hd:", ["dataset="])
    except getopt.GetoptError:
        print('random parallel with input dataset')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('random parallel with input dataset')
            sys.exit()
        elif opt in ("-d", "--dataset"):
            dataset = int(arg)
    # average over multiple trials
    for trial in range(1, N_TRIALS + 1):

        print(f"\nTrial {trial:>2} of {N_TRIALS} ", end="")
        best_observed_ei, best_observed_nei = [], []

        # call helper functions to generate initial training data and initialize model
        train_x_ei, train_obj_ei, best_observed_value_ei, current_best_config = generate_initial_data(
            dataset)
        mll_ei, model_ei = initialize_model(train_x_ei, train_obj_ei)

        best_observed_ei.append(best_observed_value_ei)

        # run N_BATCH rounds of BayesOpt after the initial random batch
        for iteration in range(1, N_BATCH + 1):

            # fit the models
            fit_gpytorch_model(mll_ei)

            # define the qEI and qNEI acquisition modules using a QMC sampler
            qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES)

            # for best_f, we use the best observed noisy values as an approximation
            qEI = qExpectedImprovement(
                model=model_ei,
                best_f=train_obj_ei.max(),
                sampler=qmc_sampler,
            )
            # optimize and get new observation
            new_x_ei, new_obj_ei = optimize_acqf_and_get_observation(
                qEI, dataset)

            # update training points
            train_x_ei = torch.cat([train_x_ei, new_x_ei])
            train_obj_ei = torch.cat([train_obj_ei, new_obj_ei])

            # update progress

            best_value_ei = train_obj_ei.max().item()
            best_observed_ei.append(best_value_ei)

            # reinitialize the models so they are ready for fitting on next iteration
            # use the current state dict to speed up fitting
            mll_ei, model_ei = initialize_model(
                train_x_ei,
                train_obj_ei,
                model_ei.state_dict(),
            )

        # return the best configuration
        best_tensor_ei, indices_ei = torch.max(train_obj_ei, 0)
        train_best_x_ei = train_x_ei[indices_ei].cpu().numpy()

        from botorch.acquisition import PosteriorMean

        argmax_pmean_ei, max_pmean_ei = optimize_acqf(
            acq_function=PosteriorMean(model_ei),
            bounds=bounds,
            q=1,
            num_restarts=20,
            raw_samples=2048,
        )

        csv_file_name = '/home/junjie/modes/botorch/' + folder_name + '/modes-i/hp-gp-qei-dataset-' + str(
            dataset) + '-trail' + str(trial) + '.csv'

        with open(csv_file_name, 'w') as csvFile:
            writer = csv.writer(csvFile)
            writer.writerow([
                str(argmax_pmean_ei.cpu().numpy()),
                str(max_pmean_ei.cpu().numpy())
            ])  # ei prediction
            writer.writerow(
                [str(train_best_x_ei),
                 str(best_tensor_ei.cpu().numpy())])  # ei observation

        csvFile.close()
Beispiel #20
0
    def test_get_X_baseline(self):
        tkwargs = {"device": self.device}
        for dtype in (torch.float, torch.double):
            tkwargs["dtype"] = dtype
            X_train = torch.rand(20, 2, **tkwargs)
            model = MockModel(
                MockPosterior(mean=(2 * X_train +
                                    1).sum(dim=-1, keepdim=True)))
            # test NEI with X_baseline
            acqf = qNoisyExpectedImprovement(model, X_baseline=X_train[:2])
            X = get_X_baseline(acq_function=acqf)
            self.assertTrue(torch.equal(X, acqf.X_baseline))
            # test EI without X_baseline
            acqf = qExpectedImprovement(model, best_f=0.0)

            with warnings.catch_warnings(
                    record=True) as w, settings.debug(True):

                X_rnd = get_X_baseline(acq_function=acqf, )
                self.assertEqual(len(w), 1)
                self.assertTrue(issubclass(w[-1].category, BotorchWarning))
                self.assertIsNone(X_rnd)

            # set train inputs
            model.train_inputs = (X_train, )
            X = get_X_baseline(acq_function=acqf, )
            self.assertTrue(torch.equal(X, X_train))
            # test that we fail back to train_inputs if X_baseline is an empty tensor
            acqf.register_buffer("X_baseline", X_train[:0])
            X = get_X_baseline(acq_function=acqf, )
            self.assertTrue(torch.equal(X, X_train))

            # test acquisitipon function without X_baseline or model
            acqf = FixedFeatureAcquisitionFunction(acqf,
                                                   d=2,
                                                   columns=[0],
                                                   values=[0])
            with warnings.catch_warnings(
                    record=True) as w, settings.debug(True):
                X_rnd = get_X_baseline(acq_function=acqf, )
                self.assertEqual(len(w), 1)
                self.assertTrue(issubclass(w[-1].category, BotorchWarning))
                self.assertIsNone(X_rnd)

            Y_train = 2 * X_train[:2] + 1
            moo_model = MockModel(MockPosterior(mean=Y_train, samples=Y_train))
            ref_point = torch.zeros(2, **tkwargs)
            # test NEHVI with X_baseline
            acqf = qNoisyExpectedHypervolumeImprovement(
                moo_model,
                ref_point=ref_point,
                X_baseline=X_train[:2],
                cache_root=False,
            )
            X = get_X_baseline(acq_function=acqf, )
            self.assertTrue(torch.equal(X, acqf.X_baseline))
            # test qEHVI without train_inputs
            acqf = qExpectedHypervolumeImprovement(
                moo_model,
                ref_point=ref_point,
                partitioning=FastNondominatedPartitioning(
                    ref_point=ref_point,
                    Y=Y_train,
                ),
            )
            # test extracting train_inputs from model list GP
            model_list = ModelListGP(
                SingleTaskGP(X_train, Y_train[:, :1]),
                SingleTaskGP(X_train, Y_train[:, 1:]),
            )
            acqf = qExpectedHypervolumeImprovement(
                model_list,
                ref_point=ref_point,
                partitioning=FastNondominatedPartitioning(
                    ref_point=ref_point,
                    Y=Y_train,
                ),
            )
            X = get_X_baseline(acq_function=acqf, )
            self.assertTrue(torch.equal(X, X_train))

            # test MESMO for which we need to use
            # `acqf.mo_model`
            batched_mo_model = SingleTaskGP(X_train, Y_train)
            acqf = qMultiObjectiveMaxValueEntropy(
                batched_mo_model,
                sample_pareto_frontiers=lambda model: torch.rand(
                    10, 2, **tkwargs),
            )
            X = get_X_baseline(acq_function=acqf, )
            self.assertTrue(torch.equal(X, X_train))
            # test that if there is an input transform that is applied
            # to the train_inputs when the model is in eval mode, we
            # extract the untransformed train_inputs
            model = SingleTaskGP(X_train,
                                 Y_train[:, :1],
                                 input_transform=Warp(indices=[0, 1]))
            model.eval()
            self.assertFalse(torch.equal(model.train_inputs[0], X_train))
            acqf = qExpectedImprovement(model, best_f=0.0)
            X = get_X_baseline(acq_function=acqf, )
            self.assertTrue(torch.equal(X, X_train))
    def observe(self, X, y):
        """Send an observation of a suggestion back to the optimizer.
        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated
        """
        try:
            assert len(X) == len(y)
            c = 0

            for x_, y_ in zip(X, y):
                # Archive stores all the solutions
                self.archive.append(x_)
                self.arc_fitness.append(
                    -y_)  # As BoTorch solves a maximization problem

                if self.iter == 1:
                    self.population.append(x_)
                    self.fitness.append(y_)
                else:
                    if y_ <= self.fitness[c]:
                        self.population[c] = x_
                        self.fitness[c] = y_

                    c += 1

                # Just ignore, any inf observations we got, unclear if right thing
                if np.isfinite(y_):
                    self._observe(x_, y_)

            # Transform the data (seen till now) into tensors and train the model
            train_x = normalize(torch.from_numpy(
                self.search_space.warp(self.archive)),
                                bounds=self.torch_bounds)
            train_y = standardize(
                torch.from_numpy(
                    np.array(self.arc_fitness).reshape(len(self.arc_fitness),
                                                       1)))
            # Fit the GP based on the actual observed values
            if self.iter == 1:
                self.model, mll = self.make_model(train_x, train_y)
            else:
                self.model, mll = self.make_model(train_x, train_y,
                                                  self.model.state_dict())

            # mll.train()
            fit_gpytorch_model(mll)

            # define the sampler
            sampler = SobolQMCNormalSampler(num_samples=512)

            # define the acquisition function
            self.acquisition = qExpectedImprovement(model=self.model,
                                                    best_f=train_y.max(),
                                                    sampler=sampler)

        except Exception as e:
            print('Error: {} in observe()'.format(e))
Beispiel #22
0
    def test_q_expected_improvement(self):
        for dtype in (torch.float, torch.double):
            # the event shape is `b x q x t` = 1 x 1 x 1
            samples = torch.zeros(1, 1, 1, device=self.device, dtype=dtype)
            mm = MockModel(MockPosterior(samples=samples))
            # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking
            X = torch.zeros(1, 1, device=self.device, dtype=dtype)

            # basic test
            sampler = IIDNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)

            # test shifting best_f value
            acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 1.0)

            # test size verification of best_f
            with self.assertRaises(ValueError):
                qExpectedImprovement(
                    model=mm, best_f=torch.zeros(2, device=self.device, dtype=dtype)
                )

            # basic test, no resample
            sampler = IIDNormalSampler(num_samples=2, seed=12345)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            res = acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # basic test, qmc, no resample
            sampler = SobolQMCNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # basic test, qmc, resample
            sampler = SobolQMCNormalSampler(num_samples=2, resample=True)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))

            # basic test for X_pending and warning
            acqf.set_X_pending()
            self.assertIsNone(acqf.X_pending)
            acqf.set_X_pending(None)
            self.assertIsNone(acqf.X_pending)
            acqf.set_X_pending(X)
            self.assertEqual(acqf.X_pending, X)
            res = acqf(X)
            X2 = torch.zeros(
                1, 1, 1, device=self.device, dtype=dtype, requires_grad=True
            )
            with warnings.catch_warnings(record=True) as ws, settings.debug(True):
                acqf.set_X_pending(X2)
                self.assertEqual(acqf.X_pending, X2)
                self.assertEqual(len(ws), 1)
                self.assertTrue(issubclass(ws[-1].category, BotorchWarning))

        # test bad objective type
        obj = ScalarizedObjective(
            weights=torch.rand(2, device=self.device, dtype=dtype)
        )
        with self.assertRaises(UnsupportedError):
            qExpectedImprovement(model=mm, best_f=0, sampler=sampler, objective=obj)
Beispiel #23
0
def get_acquisition_function(
    acquisition_function_name: str,
    model: Model,
    objective: MCAcquisitionObjective,
    X_observed: Tensor,
    X_pending: Optional[Tensor] = None,
    constraints: Optional[List[Callable[[Tensor], Tensor]]] = None,
    mc_samples: int = 500,
    qmc: bool = True,
    seed: Optional[int] = None,
    **kwargs,
) -> monte_carlo.MCAcquisitionFunction:
    r"""Convenience function for initializing botorch acquisition functions.

    Args:
        acquisition_function_name: Name of the acquisition function.
        model: A fitted model.
        objective: A MCAcquisitionObjective.
        X_observed: A `m1 x d`-dim Tensor of `m1` design points that have
            already been observed.
        X_pending: A `m2 x d`-dim Tensor of `m2` design points whose evaluation
            is pending.
        constraints: A list of callables, each mapping a Tensor of dimension
            `sample_shape x batch-shape x q x m` to a Tensor of dimension
            `sample_shape x batch-shape x q`, where negative values imply
            feasibility. Used when constraint_transforms are not passed
            as part of the objective.
        mc_samples: The number of samples to use for (q)MC evaluation of the
            acquisition function.
        qmc: If True, use quasi-Monte-Carlo sampling (instead of iid).
        seed: If provided, perform deterministic optimization (i.e. the
            function to optimize is fixed and not stochastic).

    Returns:
        The requested acquisition function.

    Example:
        >>> model = SingleTaskGP(train_X, train_Y)
        >>> obj = LinearMCObjective(weights=torch.tensor([1.0, 2.0]))
        >>> acqf = get_acquisition_function("qEI", model, obj, train_X)
    """
    # initialize the sampler
    if qmc:
        sampler = SobolQMCNormalSampler(num_samples=mc_samples, seed=seed)
    else:
        sampler = IIDNormalSampler(num_samples=mc_samples, seed=seed)
    # instantiate and return the requested acquisition function
    if acquisition_function_name == "qEI":
        best_f = objective(model.posterior(X_observed).mean).max().item()
        return monte_carlo.qExpectedImprovement(
            model=model,
            best_f=best_f,
            sampler=sampler,
            objective=objective,
            X_pending=X_pending,
        )
    elif acquisition_function_name == "qPI":
        best_f = objective(model.posterior(X_observed).mean).max().item()
        return monte_carlo.qProbabilityOfImprovement(
            model=model,
            best_f=best_f,
            sampler=sampler,
            objective=objective,
            X_pending=X_pending,
            tau=kwargs.get("tau", 1e-3),
        )
    elif acquisition_function_name == "qNEI":
        return monte_carlo.qNoisyExpectedImprovement(
            model=model,
            X_baseline=X_observed,
            sampler=sampler,
            objective=objective,
            X_pending=X_pending,
            prune_baseline=kwargs.get("prune_baseline", False),
        )
    elif acquisition_function_name == "qSR":
        return monte_carlo.qSimpleRegret(model=model,
                                         sampler=sampler,
                                         objective=objective,
                                         X_pending=X_pending)
    elif acquisition_function_name == "qUCB":
        if "beta" not in kwargs:
            raise ValueError("`beta` must be specified in kwargs for qUCB.")
        return monte_carlo.qUpperConfidenceBound(
            model=model,
            beta=kwargs["beta"],
            sampler=sampler,
            objective=objective,
            X_pending=X_pending,
        )
    elif acquisition_function_name == "qEHVI":
        # pyre-fixme [16]: `Model` has no attribute `train_targets`
        try:
            ref_point = kwargs["ref_point"]
        except KeyError:
            raise ValueError(
                "`ref_point` must be specified in kwargs for qEHVI")
        try:
            Y = kwargs["Y"]
        except KeyError:
            raise ValueError("`Y` must be specified in kwargs for qEHVI")
        # get feasible points
        if constraints is not None:
            feas = torch.stack([c(Y) <= 0 for c in constraints],
                               dim=-1).all(dim=-1)
            Y = Y[feas]
        obj = objective(Y)
        partitioning = NondominatedPartitioning(
            ref_point=torch.as_tensor(ref_point,
                                      dtype=Y.dtype,
                                      device=Y.device),
            Y=obj,
            alpha=kwargs.get("alpha", 0.0),
        )
        return moo_monte_carlo.qExpectedHypervolumeImprovement(
            model=model,
            ref_point=ref_point,
            partitioning=partitioning,
            sampler=sampler,
            objective=objective,
            constraints=constraints,
            X_pending=X_pending,
        )
    raise NotImplementedError(
        f"Unknown acquisition function {acquisition_function_name}")
    def test_fixed_features(self):
        train_X = torch.rand(5, 3, device=self.device)
        train_Y = train_X.norm(dim=-1, keepdim=True)
        model = SingleTaskGP(train_X, train_Y).to(device=self.device).eval()
        qEI = qExpectedImprovement(model, best_f=0.0)
        for q in [1, 2]:
            # test single point
            test_X = torch.rand(q, 3, device=self.device)
            qEI_ff = FixedFeatureAcquisitionFunction(qEI,
                                                     d=3,
                                                     columns=[2],
                                                     values=test_X[..., -1:])
            qei = qEI(test_X)
            qei_ff = qEI_ff(test_X[..., :-1])
            self.assertTrue(torch.allclose(qei, qei_ff))

            # test list input with float
            qEI_ff = FixedFeatureAcquisitionFunction(qEI,
                                                     d=3,
                                                     columns=[2],
                                                     values=[0.5])
            qei_ff = qEI_ff(test_X[..., :-1])
            test_X_clone = test_X.clone()
            test_X_clone[..., 2] = 0.5
            qei = qEI(test_X_clone)
            self.assertTrue(torch.allclose(qei, qei_ff))

            # test list input with Tensor and float
            qEI_ff = FixedFeatureAcquisitionFunction(
                qEI, d=3, columns=[0, 2], values=[test_X[..., [0]], 0.5])
            qei_ff = qEI_ff(test_X[..., [1]])
            self.assertTrue(torch.allclose(qei, qei_ff))

            # test t-batch with broadcasting and list of floats
            test_X = torch.rand(q, 3, device=self.device).expand(4, q, 3)
            qEI_ff = FixedFeatureAcquisitionFunction(qEI,
                                                     d=3,
                                                     columns=[2],
                                                     values=test_X[0, :, -1:])
            qei = qEI(test_X)
            qei_ff = qEI_ff(test_X[..., :-1])
            self.assertTrue(torch.allclose(qei, qei_ff))

            # test t-batch with broadcasting and list of floats and Tensor
            qEI_ff = FixedFeatureAcquisitionFunction(
                qEI, d=3, columns=[0, 2], values=[test_X[0, :, [0]], 0.5])
            test_X_clone = test_X.clone()
            test_X_clone[..., 2] = 0.5
            qei = qEI(test_X_clone)
            qei_ff = qEI_ff(test_X[..., [1]])
            self.assertTrue(torch.allclose(qei, qei_ff))

        # test gradient
        test_X = torch.rand(1, 3, device=self.device, requires_grad=True)
        test_X_ff = test_X[..., :-1].detach().clone().requires_grad_(True)
        qei = qEI(test_X)
        qEI_ff = FixedFeatureAcquisitionFunction(qEI,
                                                 d=3,
                                                 columns=[2],
                                                 values=test_X[...,
                                                               [2]].detach())
        qei_ff = qEI_ff(test_X_ff)
        self.assertTrue(torch.allclose(qei, qei_ff))
        qei.backward()
        qei_ff.backward()
        self.assertTrue(torch.allclose(test_X.grad[..., :-1], test_X_ff.grad))

        test_X = test_X.detach().clone()
        test_X_ff = test_X[..., [1]].detach().clone().requires_grad_(True)
        test_X[..., 2] = 0.5
        test_X.requires_grad_(True)
        qei = qEI(test_X)
        qEI_ff = FixedFeatureAcquisitionFunction(
            qEI, d=3, columns=[0, 2], values=[test_X[..., [0]].detach(), 0.5])
        qei_ff = qEI_ff(test_X_ff)
        qei.backward()
        qei_ff.backward()
        self.assertTrue(torch.allclose(test_X.grad[..., [1]], test_X_ff.grad))

        # test error b/c of incompatible input shapes
        with self.assertRaises(ValueError):
            qEI_ff(test_X)
Beispiel #25
0
def main(
        benchmark_name,
        dataset_name,
        dimensions,
        method_name,
        num_runs,
        run_start,
        num_iterations,
        acquisition_name,
        # acquisition_optimizer_name,
        gamma,
        num_random_init,
        mc_samples,
        batch_size,
        num_fantasies,
        num_restarts,
        raw_samples,
        noise_variance_init,
        # use_ard,
        # use_input_warping,
        standardize_targets,
        input_dir,
        output_dir):

    # TODO(LT): Turn into options
    # device = "cpu"
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    dtype = torch.double

    benchmark = make_benchmark(benchmark_name,
                               dimensions=dimensions,
                               dataset_name=dataset_name,
                               input_dir=input_dir)
    name = make_name(benchmark_name,
                     dimensions=dimensions,
                     dataset_name=dataset_name)

    output_path = Path(output_dir).joinpath(name, method_name)
    output_path.mkdir(parents=True, exist_ok=True)

    options = dict(gamma=gamma,
                   num_random_init=num_random_init,
                   acquisition_name=acquisition_name,
                   mc_samples=mc_samples,
                   batch_size=batch_size,
                   num_restarts=num_restarts,
                   raw_samples=raw_samples,
                   num_fantasies=num_fantasies,
                   noise_variance_init=noise_variance_init,
                   standardize_targets=standardize_targets)
    with output_path.joinpath("options.yaml").open('w') as f:
        yaml.dump(options, f)

    config_space = DenseConfigurationSpace(benchmark.get_config_space())
    bounds = create_bounds(config_space.get_bounds(),
                           device=device,
                           dtype=dtype)
    input_dim = config_space.get_dimensions()

    def func(tensor, *args, **kwargs):
        """
        Wrapper that receives and returns torch.Tensor
        """
        config = dict_from_tensor(tensor, cs=config_space)
        # turn into maximization problem
        res = -benchmark.evaluate(config).value
        return torch.tensor(res, device=device, dtype=dtype)

    for run_id in trange(run_start, num_runs, unit="run"):

        run_begin_t = batch_end_t_adj = batch_end_t = datetime.now()

        frames = []

        features = []
        targets = []

        noise_variance = torch.tensor(noise_variance_init,
                                      device=device,
                                      dtype=dtype)
        state_dict = None

        with trange(num_iterations) as iterations:

            for batch in iterations:

                if len(targets) < num_random_init:
                    # click.echo(f"Completed {i}/{num_random_init} initial runs. "
                    #            "Suggesting random candidate...")
                    # TODO(LT): support random seed
                    X_batch = torch.rand(size=(batch_size, input_dim),
                                         device=device,
                                         dtype=dtype)
                else:

                    # construct dataset
                    X = torch.vstack(features)
                    y = torch.hstack(targets).unsqueeze(axis=-1)
                    y = standardize(y) if standardize_targets else y

                    # construct model
                    # model = FixedNoiseGP(X, standardize(y), noise_variance.expand_as(y),
                    model = FixedNoiseGP(X,
                                         y,
                                         noise_variance.expand_as(y),
                                         input_transform=None).to(X)
                    mll = ExactMarginalLogLikelihood(model.likelihood, model)

                    if state_dict is not None:
                        model.load_state_dict(state_dict)

                    # update model
                    fit_gpytorch_model(mll)

                    # construct acquisition function
                    tau = torch.quantile(y, q=1 - gamma)
                    iterations.set_postfix(tau=tau.item())

                    if acquisition_name == "q-KG":
                        assert num_fantasies is not None and num_fantasies > 0
                        acq = qKnowledgeGradient(model,
                                                 num_fantasies=num_fantasies)
                    elif acquisition_name == "q-EI":
                        assert mc_samples is not None and mc_samples > 0
                        qmc_sampler = SobolQMCNormalSampler(
                            num_samples=mc_samples)
                        acq = qExpectedImprovement(model=model,
                                                   best_f=tau,
                                                   sampler=qmc_sampler)

                    # optimize acquisition function
                    X_batch, b = optimize_acqf(acq_function=acq,
                                               bounds=bounds,
                                               q=batch_size,
                                               num_restarts=num_restarts,
                                               raw_samples=raw_samples,
                                               options=dict(batch_limit=5,
                                                            maxiter=200))

                    state_dict = model.state_dict()

                # begin batch evaluation
                batch_begin_t = datetime.now()
                decision_duration = batch_begin_t - batch_end_t
                batch_begin_t_adj = batch_end_t_adj + decision_duration

                eval_end_times = []

                # TODO(LT): Deliberately not doing broadcasting for now since
                # batch sizes are so small anyway. Can revisit later if there
                # is a compelling reason to do it.
                rows = []
                for j, x_next in enumerate(X_batch):

                    # eval begin time
                    eval_begin_t = datetime.now()

                    # evaluate blackbox objective
                    y_next = func(x_next)

                    # eval end time
                    eval_end_t = datetime.now()

                    # eval duration
                    eval_duration = eval_end_t - eval_begin_t

                    # adjusted eval end time is the duration added to the
                    # time at which batch eval was started
                    eval_end_t_adj = batch_begin_t_adj + eval_duration

                    eval_end_times.append(eval_end_t_adj)
                    elapsed = eval_end_t_adj - run_begin_t

                    # update dataset
                    features.append(x_next)
                    targets.append(y_next)

                    row = dict_from_tensor(x_next, cs=config_space)
                    row["loss"] = -y_next.item()
                    row["cost_eval"] = eval_duration.total_seconds()
                    row["finished"] = elapsed.total_seconds()
                    rows.append(row)

                batch_end_t = datetime.now()
                batch_end_t_adj = max(eval_end_times)

                frame = pd.DataFrame(data=rows) \
                          .assign(batch=batch,
                                  cost_decision=decision_duration.total_seconds())
                frames.append(frame)

        data = pd.concat(frames, axis="index", ignore_index=True)
        data.to_csv(output_path.joinpath(f"{run_id:03d}.csv"))

    return 0
Beispiel #26
0
    def select_query_point(self, batch_size=1):
        """

        :param
            batch_size (int): number of query points to return
        :return:
            (batch_size x d_orig) numpy array
        """

        # Produces (d_embedding, 2) array
        if self.embedding_boundaries_setting == "auto":
            # Approximately compute boundaries on embedded space
            embedding_boundaries = self._compute_boundaries_embedding(
                self.original_boundaries)
        elif self.embedding_boundaries_setting == "constant":
            # As described in the original paper. This is default.
            embedding_boundaries = np.array(
                [[-np.sqrt(self.d_embedding),
                  np.sqrt(self.d_embedding)]] * self.d_embedding)
        else:
            raise NotImplementedError("embedding_boundaries_setting must be "
                                      "'auto' or 'constant'.")

        # TODO: Make the random initialization its own function so it can be done separately from the acquisition argmin
        # Initialize with random points
        if len(self.X) < self.initial_random_samples:

            # Select query point randomly from embedding_boundaries
            X_query_embedded = \
                self.rng.uniform(size=embedding_boundaries.shape[0]) \
                * (embedding_boundaries[:, 1] - embedding_boundaries[:, 0]) \
                + embedding_boundaries[:, 0]
            X_query_embedded = torch.from_numpy(X_query_embedded).unsqueeze(0)

            print("X_query_embedded.shape: {}".format(X_query_embedded.shape))

        # Query by maximizing the acquisition function
        else:
            print("---------------------")
            print('querying')

            print("self.X_embedded.shape: {}".format(self.X_embedded.shape))
            print("self.y.shape: {}".format(self.y.shape))
            # Initialize model
            if len(self.X) == self.initial_random_samples:
                self.model = ExactGaussianProcess(
                    train_x=self.X_embedded.float(),
                    train_y=self.y.float(),
                )

            # Acquisition function
            qEI = qExpectedImprovement(
                model=self.model,
                best_f=torch.max(self.y).item(),
            )
            # qUCB = qUpperConfidenceBound(
            #     model=self.model,
            #     beta=2.0,
            # )

            print("batch_size: {}".format(batch_size))

            # Optimize for a (batch_size x d_embedding) tensor query point
            X_query_embedded = global_optimization(
                objective_function=qEI,
                boundaries=torch.from_numpy(embedding_boundaries).float(),
                batch_size=batch_size,  # number of query points to suggest
            )

            print("batched X_query_embedded: {}".format(X_query_embedded))
            print("batched X_query_embedded.shape: {}".format(
                X_query_embedded.shape))

        print("X_embedded concatenated: {}".format(self.X_embedded.shape))

        # Map to higher dimensional space and clip to hard boundaries [-1, 1]
        X_query = np.clip(a=self._manifold_to_dataspace(
            X_query_embedded.numpy()),
                          a_min=self.original_boundaries[:, 0],
                          a_max=self.original_boundaries[:, 1])

        return X_query, X_query_embedded
Beispiel #27
0
def qparego_candidates_func(
    train_x: "torch.Tensor",
    train_obj: "torch.Tensor",
    train_con: Optional["torch.Tensor"],
    bounds: "torch.Tensor",
) -> "torch.Tensor":
    """Quasi MC-based extended ParEGO (qParEGO) for constrained multi-objective optimization.

    The default value of ``candidates_func`` in :class:`~optuna.integration.BoTorchSampler`
    with multi-objective optimization when the number of objectives is larger than three.

    .. seealso::
        :func:`~optuna.integration.botorch.qei_candidates_func` for argument and return value
        descriptions.
    """

    n_objectives = train_obj.size(-1)

    weights = sample_simplex(n_objectives).squeeze()
    scalarization = get_chebyshev_scalarization(weights=weights, Y=train_obj)

    if train_con is not None:
        train_y = torch.cat([train_obj, train_con], dim=-1)

        constraints = []
        n_constraints = train_con.size(1)

        for i in range(n_constraints):
            constraints.append(lambda Z, i=i: Z[..., -n_constraints + i])

        objective = ConstrainedMCObjective(
            objective=lambda Z: scalarization(Z[..., :n_objectives]),
            constraints=constraints,
        )
    else:
        train_y = train_obj

        objective = GenericMCObjective(scalarization)

    train_x = normalize(train_x, bounds=bounds)

    model = SingleTaskGP(train_x,
                         train_y,
                         outcome_transform=Standardize(m=train_y.size(-1)))
    mll = ExactMarginalLogLikelihood(model.likelihood, model)
    fit_gpytorch_model(mll)

    acqf = qExpectedImprovement(
        model=model,
        best_f=objective(train_y).max(),
        sampler=SobolQMCNormalSampler(num_samples=256),
        objective=objective,
    )

    standard_bounds = torch.zeros_like(bounds)
    standard_bounds[1] = 1

    candidates, _ = optimize_acqf(
        acq_function=acqf,
        bounds=standard_bounds,
        q=1,
        num_restarts=20,
        raw_samples=1024,
        options={
            "batch_limit": 5,
            "maxiter": 200
        },
        sequential=True,
    )

    candidates = unnormalize(candidates.detach(), bounds=bounds)

    return candidates
Beispiel #28
0
    def test_q_expected_improvement(self, cuda=False):
        device = torch.device("cuda") if cuda else torch.device("cpu")
        for dtype in (torch.float, torch.double):
            # the event shape is `b x q x t` = 1 x 1 x 1
            samples = torch.zeros(1, 1, 1, device=device, dtype=dtype)
            mm = MockModel(MockPosterior(samples=samples))
            # X is `q x d` = 1 x 1. X is a dummy and unused b/c of mocking
            X = torch.zeros(1, 1, device=device, dtype=dtype)

            # basic test
            sampler = IIDNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)

            # test shifting best_f value
            acqf = qExpectedImprovement(model=mm, best_f=-1, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 1.0)

            # basic test, no resample
            sampler = IIDNormalSampler(num_samples=2, seed=12345)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            res = acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # basic test, qmc, no resample
            sampler = SobolQMCNormalSampler(num_samples=2)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertTrue(torch.equal(acqf.sampler.base_samples, bs))

            # basic test, qmc, resample
            sampler = SobolQMCNormalSampler(num_samples=2, resample=True)
            acqf = qExpectedImprovement(model=mm, best_f=0, sampler=sampler)
            res = acqf(X)
            self.assertEqual(res.item(), 0.0)
            self.assertEqual(acqf.sampler.base_samples.shape, torch.Size([2, 1, 1, 1]))
            bs = acqf.sampler.base_samples.clone()
            acqf(X)
            self.assertFalse(torch.equal(acqf.sampler.base_samples, bs))

            # basic test for X_pending and warning
            acqf.set_X_pending()
            self.assertIsNone(acqf.X_pending)
            acqf.set_X_pending(None)
            self.assertIsNone(acqf.X_pending)
            acqf.set_X_pending(X)
            self.assertEqual(acqf.X_pending, X)
            res = acqf(X)
            X2 = torch.zeros(1, 1, 1, device=device, dtype=dtype, requires_grad=True)
            with warnings.catch_warnings(record=True) as ws:
                acqf.set_X_pending(X2)
                self.assertEqual(acqf.X_pending, X2)
                self.assertEqual(len(ws), 1)
                self.assertTrue(issubclass(ws[-1].category, BotorchWarning))
Beispiel #29
0
def get_acquisition_function(
    acquisition_function_name: str,
    model: Model,
    objective: MCAcquisitionObjective,
    X_observed: Tensor,
    X_pending: Optional[Tensor] = None,
    mc_samples: int = 500,
    qmc: bool = True,
    seed: Optional[int] = None,
    **kwargs,
) -> monte_carlo.MCAcquisitionFunction:
    r"""Convenience function for initializing botorch acquisition functions.

    Args:
        acquisition_function_name: Name of the acquisition function.
        model: A fitted model.
        objective: A MCAcquisitionObjective.
        X_observed: A `m1 x d`-dim Tensor of `m1` design points that have
            already been observed.
        X_pending: A `m2 x d`-dim Tensor of `m2` design points whose evaluation
            is pending.
        mc_samples: The number of samples to use for (q)MC evaluation of the
            acquisition function.
        qmc: If True, use quasi-Monte-Carlo sampling (instead of iid).
        seed: If provided, perform deterministic optimization (i.e. the
            function to optimize is fixed and not stochastic).

    Returns:
        The requested acquisition function.

    Example:
        >>> model = SingleTaskGP(train_X, train_Y)
        >>> obj = LinearMCObjective(weights=torch.tensor([1.0, 2.0]))
        >>> acqf = get_acquisition_function("qEI", model, obj, train_X)
    """
    # initialize the sampler
    if qmc:
        sampler = SobolQMCNormalSampler(num_samples=mc_samples, seed=seed)
    else:
        sampler = IIDNormalSampler(num_samples=mc_samples, seed=seed)
    # instantiate and return the requested acquisition function
    if acquisition_function_name == "qEI":
        best_f = objective(model.posterior(X_observed).mean).max().item()
        return monte_carlo.qExpectedImprovement(
            model=model,
            best_f=best_f,
            sampler=sampler,
            objective=objective,
            X_pending=X_pending,
        )
    elif acquisition_function_name == "qPI":
        best_f = objective(model.posterior(X_observed).mean).max().item()
        return monte_carlo.qProbabilityOfImprovement(
            model=model,
            best_f=best_f,
            sampler=sampler,
            objective=objective,
            X_pending=X_pending,
            tau=kwargs.get("tau", 1e-3),
        )
    elif acquisition_function_name == "qNEI":
        return monte_carlo.qNoisyExpectedImprovement(
            model=model,
            X_baseline=X_observed,
            sampler=sampler,
            objective=objective,
            X_pending=X_pending,
            prune_baseline=kwargs.get("prune_baseline", False),
        )
    elif acquisition_function_name == "qSR":
        return monte_carlo.qSimpleRegret(model=model,
                                         sampler=sampler,
                                         objective=objective,
                                         X_pending=X_pending)
    elif acquisition_function_name == "qUCB":
        if "beta" not in kwargs:
            raise ValueError("`beta` must be specified in kwargs for qUCB.")
        return monte_carlo.qUpperConfidenceBound(
            model=model,
            beta=kwargs["beta"],
            sampler=sampler,
            objective=objective,
            X_pending=X_pending,
        )
    raise NotImplementedError(
        f"Unknown acquisition function {acquisition_function_name}")
Beispiel #30
0
    def train_loop(self):
        from botorch.models import SingleTaskGP
        from botorch.fit import fit_gpytorch_model
        from gpytorch.mlls import ExactMarginalLogLikelihood
        from botorch.optim import optimize_acqf
        from botorch.acquisition.monte_carlo import qExpectedImprovement
        from botorch.sampling.samplers import SobolQMCNormalSampler

        seed = 1
        torch.manual_seed(seed)
        dt, d = torch.float32, 3
        lb, ub = [1e-4, 0.1, 0.1], [3e-3, 1 - 1e-3, 1 - 1e-3]
        bounds = torch.tensor([lb, ub], dtype=dt)

        def gen_initial_data():
            # auto
            # x = unnormalize(torch.rand(1, 3, dtype=dt), bounds=bounds)
            # manual
            x = torch.tensor([[1e-3, 0.9, 0.999]])
            print('BO Initialization: \n')
            print('Initial Hyper-parameter: ' + str(x))
            obj = self.train(x.view(-1))
            print('Initial Error: ' + str(obj))
            return x, obj.unsqueeze(1)

        def get_fitted_model(x, obj, state_dict=None):
            # initialize and fit model
            fitted_model = SingleTaskGP(train_X=x, train_Y=obj)
            if state_dict is not None:
                fitted_model.load_state_dict(state_dict)
            mll = ExactMarginalLogLikelihood(fitted_model.likelihood,
                                             fitted_model)
            mll.to(x)
            fit_gpytorch_model(mll)
            return fitted_model

        def optimize_acqf_and_get_observation(acq_func):
            """Optimizes the acquisition function,
            and returns a new candidate and a noisy observation"""
            candidates, _ = optimize_acqf(
                acq_function=acq_func,
                bounds=torch.stack([
                    torch.zeros(d, dtype=dt),
                    torch.ones(d, dtype=dt),
                ]),
                q=1,
                num_restarts=10,
                raw_samples=200,
            )

            x = unnormalize(candidates.detach(), bounds=bounds)
            print('Hyper-parameter: ' + str(x))
            obj = self.train(x.view(-1)).unsqueeze(-1)
            print(print('Error: ' + str(obj)))
            return x, obj

        N_BATCH = 500
        MC_SAMPLES = 2000
        best_observed = []
        train_x, train_obj = gen_initial_data()  # (1,3), (1,1)
        best_observed.append(train_obj.view(-1))

        print(f"\nRunning BO......\n ", end='')
        state_dict = None
        for iteration in range(N_BATCH):
            # fit the model
            model = get_fitted_model(
                normalize(train_x, bounds=bounds),
                standardize(train_obj),
                state_dict=state_dict,
            )

            # define the qNEI acquisition module using a QMC sampler
            qmc_sampler = SobolQMCNormalSampler(num_samples=MC_SAMPLES,
                                                seed=seed)
            qEI = qExpectedImprovement(model=model,
                                       sampler=qmc_sampler,
                                       best_f=standardize(train_obj).max())

            # optimize and get new observation
            new_x, new_obj = optimize_acqf_and_get_observation(qEI)

            # update training points
            train_x = torch.cat((train_x, new_x))
            train_obj = torch.cat((train_obj, new_obj))

            # update progress
            best_value = train_obj.max().item()
            best_observed.append(best_value)

            state_dict = model.state_dict()
            print(".", end='')

        print(best_observed)