def construct_single_training_data(Xs: List[Tensor], Ys: List[Tensor], Yvars: List[Tensor]) -> TrainingData: """Construct a `TrainingData` object for a single-outcome model or a batched multi-output model. **This function assumes that a single `TrainingData` is expected (so if all Xs are equal, it will produce `TrainingData` for a batched multi-output model).** NOTE: All four outputs are organized as lists over outcomes. E.g. if there are two outcomes, 'x' and 'y', the Xs are formatted like so: `[Xs_x_ndarray, Xs_y_ndarray]`. We specifically do not assume that every point is observed for every outcome. This means that the array for each of those outcomes may be different, and in particular could have a different length (e.g. if a particular arm was observed only for half of the outcomes, it would be present in half of the arrays in the list but not the other half.) Returns: A `TrainingData` object with training data for single outcome or with batched multi-output training data if appropriate for given model and if all X inputs in Xs are equal. """ if len(Xs) == len(Ys) == 1: # Just one outcome, can use single model. return TrainingData(X=Xs[0], Y=Ys[0], Yvar=Yvars[0]) elif all(torch.equal(Xs[0], X) for X in Xs[1:]): if not len(Xs) == len(Ys) == len(Yvars): # pragma: no cover raise ValueError("Xs, Ys, and Yvars must have equal lengths.") # All Xs are the same and model supports batched multioutput. return TrainingData(X=Xs[0], Y=torch.cat(Ys, dim=-1), Yvar=torch.cat(Yvars, dim=-1)) raise ValueError( "Unexpected training data format. Use `construct_training_data_list` if " "constructing training data for multiple outcomes (and not using batched " "multi-output).")
def test_construct_single_training_data(self): # len(Xs) == len(Ys) == len(Yvars) == 1 case self.assertEqual( construct_single_training_data(Xs=self.Xs, Ys=self.Ys, Yvars=self.Yvars), TrainingData(X=self.Xs[0], Y=self.Ys[0], Yvar=self.Yvars[0]), ) # len(Xs) == len(Ys) == len(Yvars) > 1 case, batched multi-output td = construct_single_training_data(Xs=self.Xs * 2, Ys=self.Ys * 2, Yvars=self.Yvars * 2) expected = TrainingData( X=self.Xs[0], Y=torch.cat(self.Ys * 2, dim=-1), Yvar=torch.cat(self.Yvars * 2, dim=-1), ) self.assertTrue(torch.equal(td.X, expected.X)) self.assertTrue(torch.equal(td.Y, expected.Y)) self.assertTrue(torch.equal(td.Yvar, expected.Yvar)) # len(Xs) == len(Ys) == len(Yvars) > 1 case with not all Xs equal, # not supported and should go to `construct_training_data_list` instead. with self.assertRaisesRegex(ValueError, "Unexpected training data format"): td = construct_single_training_data( Xs=self.Xs + self.Xs2, # Unequal Xs. Ys=self.Ys * 2, Yvars=self.Yvars * 2, )
def test_construct_inputs(self): for batch_shape, dtype in itertools.product( (torch.Size(), torch.Size([2])), (torch.float, torch.double)): tkwargs = {"device": self.device, "dtype": dtype} model, model_kwargs = self._get_model_and_data( batch_shape=batch_shape, m=2, **tkwargs) training_data = TrainingData( X=model_kwargs["train_X"], Y=model_kwargs["train_Y"], Yvar=model_kwargs["train_Yvar"], ) data_dict = model.construct_inputs(training_data) self.assertTrue("train_Yvar" in data_dict) self.assertTrue( torch.equal(data_dict["train_X"], model_kwargs["train_X"])) self.assertTrue( torch.equal(data_dict["train_Y"], model_kwargs["train_Y"])) self.assertTrue( torch.equal(data_dict["train_Yvar"], model_kwargs["train_Yvar"])) # if Yvars is missing, then raise error training_data = TrainingData(X=model_kwargs["train_X"], Y=model_kwargs["train_Y"]) with self.assertRaises(ValueError): model.construct_inputs(training_data)
def test_construct_training_data(self): # len(Xs) == len(Ys) == len(Yvars) == 1 case self.assertEqual( construct_training_data( Xs=self.Xs, Ys=self.Ys, Yvars=self.Yvars, model_class=SingleTaskGP ), TrainingData(X=self.Xs[0], Y=self.Ys[0], Yvar=self.Yvars[0]), ) # len(Xs) == len(Ys) == len(Yvars) > 1 case, batched multi-output td = construct_training_data( Xs=self.Xs * 2, Ys=self.Ys * 2, Yvars=self.Yvars * 2, model_class=SingleTaskGP, ) expected = TrainingData( X=self.Xs[0], Y=torch.cat(self.Ys * 2, dim=-1), Yvar=torch.cat(self.Yvars * 2, dim=-1), ) self.assertTrue(torch.equal(td.X, expected.X)) self.assertTrue(torch.equal(td.Y, expected.Y)) self.assertTrue(torch.equal(td.Yvar, expected.Yvar)) # len(Xs) == len(Ys) == len(Yvars) > 1 case, not supporting batched # multi-output (`Model` not a subclass of `BatchedMultiOutputGPyTorchModel`) with self.assertRaisesRegex(ValueError, "Unexpected training data format"): td = construct_training_data( Xs=self.Xs * 2, Ys=self.Ys * 2, Yvars=self.Yvars * 2, model_class=Model )
def setUp(self): X = torch.rand(3, 2) Y = torch.rand(3, 1) self.bd_td = TrainingData.from_block_design(X=X, Y=Y) self.bd_td_mo = TrainingData.from_block_design(X=X, Y=torch.rand(3, 2)) Xs = [torch.rand(2, 2), torch.rand(2, 2)] Ys = [torch.rand(2, 1), torch.rand(2, 1)] self.nbd_td = TrainingData(Xs=Xs, Ys=Ys) self.bounds = 2 * [(0.0, 1.0)]
def test_construct_training_data_list(self): td_list = construct_training_data_list(Xs=self.Xs + self.Xs2, Ys=self.Ys + self.Ys2, Yvars=self.Yvars + self.Yvars2) self.assertEqual(len(td_list), 2) self.assertEqual( td_list[0], TrainingData(X=self.Xs[0], Y=self.Ys[0], Yvar=self.Yvars[0])) self.assertEqual( td_list[1], TrainingData(X=self.Xs2[0], Y=self.Ys2[0], Yvar=self.Yvars2[0]))
def test_TrainingData(self): Xs = torch.tensor([[-1.0, 0.0, 0.0], [0.0, 1.0, 1.0]]) Ys = torch.tensor([[-1.0, 0.0, 0.0], [0.0, 1.0, 1.0]]) Yvars = torch.tensor([[-1.0, 0.0, 0.0], [0.0, 1.0, 1.0]]) training_data = TrainingData(Xs, Ys) self.assertTrue(torch.equal(training_data.Xs, Xs)) self.assertTrue(torch.equal(training_data.Ys, Ys)) self.assertEqual(training_data.Yvars, None) training_data = TrainingData(Xs, Ys, Yvars) self.assertTrue(torch.equal(training_data.Xs, Xs)) self.assertTrue(torch.equal(training_data.Ys, Ys)) self.assertTrue(torch.equal(training_data.Yvars, Yvars))
def test_TrainingData(self): X = torch.tensor([[-1.0, 0.0, 0.0], [0.0, 1.0, 1.0]]) Y = torch.tensor([[-1.0, 0.0, 0.0], [0.0, 1.0, 1.0]]) Yvar = torch.tensor([[-1.0, 0.0, 0.0], [0.0, 1.0, 1.0]]) training_data = TrainingData(X, Y) self.assertTrue(torch.equal(training_data.X, X)) self.assertTrue(torch.equal(training_data.Y, Y)) self.assertEqual(training_data.Yvar, None) training_data = TrainingData(X, Y, Yvar) self.assertTrue(torch.equal(training_data.X, X)) self.assertTrue(torch.equal(training_data.Y, Y)) self.assertTrue(torch.equal(training_data.Yvar, Yvar))
def update( self, Xs: List[Tensor], Ys: List[Tensor], Yvars: List[Tensor], search_space_digest: SearchSpaceDigest, metric_names: List[str], candidate_metadata: Optional[List[List[TCandidateMetadata]]] = None, ) -> None: if not self._surrogate: raise ValueError("Cannot update model that has not been fitted.") # store search space info for later use (e.g. during generation) self._search_space_digest = search_space_digest # Sometimes the model fit should be restarted from scratch on update, for models # that are prone to overfitting. In those cases, `self.warm_start_refit` should # be false and `Surrogate.update` will not receive a state dict and will not # pass it to the underlying `Surrogate.fit`. state_dict = (None if self.refit_on_update and not self.warm_start_refit else self.surrogate.model.state_dict()) self.surrogate.update( training_data=TrainingData(Xs=Xs, Ys=Ys, Yvars=Yvars), search_space_digest=search_space_digest, metric_names=metric_names, candidate_metadata=candidate_metadata, state_dict=state_dict, refit=self.refit_on_update, )
def setUp(self): self.botorch_model_class = SingleTaskGP self.surrogate = Surrogate(botorch_model_class=self.botorch_model_class) self.X = torch.tensor([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]]) self.Y = torch.tensor([[3.0], [4.0]]) self.Yvar = torch.tensor([[0.0], [2.0]]) self.training_data = TrainingData.from_block_design( X=self.X, Y=self.Y, Yvar=self.Yvar ) self.fidelity_features = [2] self.surrogate.construct( training_data=self.training_data, fidelity_features=self.fidelity_features ) self.acquisition_options = {Keys.NUM_FANTASIES: 64} self.search_space_digest = SearchSpaceDigest( feature_names=["a", "b", "c"], bounds=[(0.0, 10.0), (0.0, 10.0), (0.0, 10.0)], target_fidelities={2: 1.0}, ) self.objective_weights = torch.tensor([1.0]) self.pending_observations = [ torch.tensor([[1.0, 3.0, 4.0]]), torch.tensor([[2.0, 6.0, 8.0]]), ] self.outcome_constraints = (torch.tensor([[1.0]]), torch.tensor([[0.5]])) self.linear_constraints = None self.fixed_features = {1: 2.0} self.options = { Keys.FIDELITY_WEIGHTS: {2: 1.0}, Keys.COST_INTERCEPT: 1.0, Keys.NUM_TRACE_OBSERVATIONS: 0, }
def setUp(self): self.botorch_model_class = SingleTaskGP self.mll_class = ExactMarginalLogLikelihood self.device = torch.device("cpu") self.dtype = torch.float self.Xs, self.Ys, self.Yvars, self.bounds, _, _, _ = get_torch_test_data( dtype=self.dtype) self.training_data = TrainingData(X=self.Xs[0], Y=self.Ys[0], Yvar=self.Yvars[0]) self.surrogate_kwargs = self.botorch_model_class.construct_inputs( self.training_data) self.surrogate = Surrogate( botorch_model_class=self.botorch_model_class, mll_class=self.mll_class) self.task_features = [] self.feature_names = ["x1", "x2"] self.metric_names = ["y"] self.fidelity_features = [] self.target_fidelities = {1: 1.0} self.fixed_features = {1: 2.0} self.refit = True self.objective_weights = torch.tensor([-1.0, 1.0], dtype=self.dtype, device=self.device) self.outcome_constraints = (torch.tensor([[1.0]]), torch.tensor([[0.5] ])) self.linear_constraints = ( torch.tensor([[0.0, 0.0, 0.0], [0.0, 1.0, 0.0]]), torch.tensor([[0.5], [1.0]]), ) self.options = {}
def test_update(self, mock_fit_gpytorch, mock_MLL, mock_state_dict): self.surrogate.construct( training_data=self.training_data, fidelity_features=self.search_space_digest.fidelity_features, ) # Check that correct arguments are passed to `fit`. with patch(f"{SURROGATE_PATH}.Surrogate.fit") as mock_fit: # Call `fit` by default self.surrogate.update( training_data=self.training_data, search_space_digest=self.search_space_digest, metric_names=self.metric_names, refit=self.refit, state_dict={"key": "val"}, ) mock_fit.assert_called_with( training_data=self.training_data, search_space_digest=self.search_space_digest, metric_names=self.metric_names, candidate_metadata=None, refit=self.refit, state_dict={"key": "val"}, ) # Check that the training data is correctly passed through to the # BoTorch `Model`. Xs, Ys, Yvars, bounds, _, _, _ = get_torch_test_data( dtype=self.dtype, offset=1.0 ) training_data = TrainingData(X=Xs[0], Y=Ys[0], Yvar=Yvars[0]) surrogate_kwargs = self.botorch_model_class.construct_inputs(training_data) self.surrogate.update( training_data=training_data, search_space_digest=self.search_space_digest, metric_names=self.metric_names, refit=self.refit, state_dict={"key": "val"}, ) self.assertTrue( torch.equal( self.surrogate.model.train_inputs[0], surrogate_kwargs.get("train_X"), ) ) self.assertTrue( torch.equal( self.surrogate.model.train_targets, surrogate_kwargs.get("train_Y").squeeze(1), ) ) # If should not be reconstructed, check that error is raised. self.surrogate._constructed_manually = True with self.assertRaisesRegex(NotImplementedError, ".* constructed manually"): self.surrogate.update( training_data=self.training_data, search_space_digest=self.search_space_digest, metric_names=self.metric_names, refit=self.refit, )
def test_construct_inputs(self): for batch_shape, dtype in itertools.product( (torch.Size(), torch.Size([2])), (torch.float, torch.double)): tkwargs = {"device": self.device, "dtype": dtype} model, model_kwargs = self._get_model_and_data( batch_shape=batch_shape, m=2, **tkwargs) # len(Xs) == len(Ys) == 1 training_data = TrainingData(Xs=[model_kwargs["train_X"][0]], Ys=[model_kwargs["train_Y"][0]]) data_dict = model.construct_inputs(training_data) self.assertTrue( torch.equal(data_dict["train_X"], model_kwargs["train_X"][0])) self.assertTrue( torch.equal(data_dict["train_Y"], model_kwargs["train_Y"][0])) # all X's are equal training_data = TrainingData( Xs=[model_kwargs["train_X"], model_kwargs["train_X"]], Ys=[model_kwargs["train_Y"], model_kwargs["train_Y"]], ) data_dict = model.construct_inputs(training_data) self.assertTrue( torch.equal(data_dict["train_X"], model_kwargs["train_X"])) self.assertTrue( torch.equal( data_dict["train_Y"], torch.cat( [model_kwargs["train_Y"], model_kwargs["train_Y"]], dim=-1), )) # unexpected data format training_data = TrainingData( Xs=[ model_kwargs["train_X"], torch.add(model_kwargs["train_X"], 1) ], Ys=[model_kwargs["train_Y"], model_kwargs["train_Y"]], ) with self.assertRaises(ValueError): model.construct_inputs(training_data) # make sure Yvar is not added to dict training_data = TrainingData( Xs=[model_kwargs["train_X"]], Ys=[model_kwargs["train_Y"]], Yvars=[torch.full_like(model_kwargs["train_Y"], 0.01)], ) data_dict = model.construct_inputs(training_data) self.assertTrue("train_Yvar" not in data_dict)
def test_construct_inputs(self): for (iteration_fidelity, data_fidelity) in self.FIDELITY_TEST_PAIRS: for batch_shape, m, dtype, lin_trunc in itertools.product( (torch.Size(), torch.Size([2])), (1, 2), (torch.float, torch.double), (False, True), ): tkwargs = {"device": self.device, "dtype": dtype} model, model_kwargs = self._get_model_and_data( iteration_fidelity=iteration_fidelity, data_fidelity=data_fidelity, batch_shape=batch_shape, m=m, lin_truncated=lin_trunc, **tkwargs, ) training_data = TrainingData.from_block_design( X=model_kwargs["train_X"], Y=model_kwargs["train_Y"]) # missing Yvars with self.assertRaises(ValueError): model.construct_inputs(training_data, fidelity_features=[1]) # len(Xs) == len(Ys) == 1 training_data = TrainingData.from_block_design( X=model_kwargs["train_X"], Y=model_kwargs["train_Y"], Yvar=torch.full_like(model_kwargs["train_Y"], 0.01), ) # missing fidelity features with self.assertRaises(ValueError): model.construct_inputs(training_data) data_dict = model.construct_inputs(training_data, fidelity_features=[1]) self.assertTrue("train_Yvar" in data_dict) self.assertTrue("data_fidelity" in data_dict) self.assertEqual(data_dict["data_fidelity"], 1) data_dict = model.construct_inputs(training_data, fidelity_features=[1]) self.assertTrue( torch.equal(data_dict["train_X"], model_kwargs["train_X"])) self.assertTrue( torch.equal(data_dict["train_Y"], model_kwargs["train_Y"]))
def test_MultiTaskGP_construct_inputs(self): for dtype in (torch.float, torch.double): tkwargs = {"device": self.device, "dtype": dtype} model, train_X, train_Y = _get_model_and_training_data(**tkwargs) training_data = TrainingData.from_block_design(X=train_X, Y=train_Y) # Test that task features are required. with self.assertRaisesRegex(ValueError, "`task_features` required"): model.construct_inputs(training_data) # Validate prior config. with self.assertRaisesRegex( ValueError, ".* only config for LKJ prior is supported"): data_dict = model.construct_inputs( training_data, task_features=[0], prior_config={"use_LKJ_prior": False}, ) # Validate eta. with self.assertRaisesRegex(ValueError, "eta must be a real number"): data_dict = model.construct_inputs( training_data, task_features=[0], prior_config={ "use_LKJ_prior": True, "eta": "not_number" }, ) # Test that presence of `prior` and `prior_config` kwargs at the # same time causes error. with self.assertRaisesRegex(ValueError, ".* one of `prior` and `prior_"): data_dict = model.construct_inputs( training_data, task_features=[0], task_covar_prior=1, prior_config={ "use_LKJ_prior": True, "eta": "not_number" }, ) data_dict = model.construct_inputs( training_data, task_features=[0], prior_config={ "use_LKJ_prior": True, "eta": 0.6 }, ) self.assertTrue(torch.equal(data_dict["train_X"], train_X)) self.assertTrue(torch.equal(data_dict["train_Y"], train_Y)) self.assertEqual(data_dict["task_feature"], 0) self.assertIsInstance(data_dict["task_covar_prior"], LKJCovariancePrior)
def setUp(self): qNEI_input_constructor = get_acqf_input_constructor( qNoisyExpectedImprovement) self.mock_input_constructor = mock.MagicMock( qNEI_input_constructor, side_effect=qNEI_input_constructor) # Adding wrapping here to be able to count calls and inspect arguments. _register_acqf_input_constructor( acqf_cls=DummyACQFClass, input_constructor=self.mock_input_constructor, ) self.botorch_model_class = SingleTaskGP self.surrogate = Surrogate( botorch_model_class=self.botorch_model_class) self.X = torch.tensor([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]]) self.Y = torch.tensor([[3.0], [4.0]]) self.Yvar = torch.tensor([[0.0], [2.0]]) self.training_data = TrainingData.from_block_design(X=self.X, Y=self.Y, Yvar=self.Yvar) self.fidelity_features = [2] self.surrogate.construct(training_data=self.training_data, fidelity_features=self.fidelity_features) self.search_space_digest = SearchSpaceDigest( feature_names=["a", "b", "c"], bounds=[(0.0, 10.0), (0.0, 10.0), (0.0, 10.0)], target_fidelities={2: 1.0}, ) self.botorch_acqf_class = DummyACQFClass self.objective_weights = torch.tensor([1.0]) self.objective_thresholds = None self.pending_observations = [torch.tensor([[1.0, 3.0, 4.0]])] self.outcome_constraints = (torch.tensor([[1.0]]), torch.tensor([[0.5] ])) self.linear_constraints = None self.fixed_features = {1: 2.0} self.options = {"best_f": 0.0} self.acquisition = Acquisition( botorch_acqf_class=self.botorch_acqf_class, surrogate=self.surrogate, search_space_digest=self.search_space_digest, objective_weights=self.objective_weights, objective_thresholds=self.objective_thresholds, pending_observations=self.pending_observations, outcome_constraints=self.outcome_constraints, linear_constraints=self.linear_constraints, fixed_features=self.fixed_features, options=self.options, ) self.inequality_constraints = [(torch.tensor([0, 1]), torch.tensor([-1.0, 1.0]), 1)] self.rounding_func = lambda x: x self.optimizer_options = { Keys.NUM_RESTARTS: 40, Keys.RAW_SAMPLES: 1024 }
def test_FixedNoiseMultiTaskGP_construct_inputs(self): for dtype in (torch.float, torch.double): tkwargs = {"device": self.device, "dtype": dtype} ( model, train_X, train_Y, train_Yvar, ) = _get_fixed_noise_model_and_training_data(**tkwargs) td_no_Yvar = TrainingData.from_block_design(X=train_X, Y=train_Y) # Test that Yvar is required. with self.assertRaisesRegex(ValueError, "Yvar required"): model.construct_inputs(td_no_Yvar) training_data = TrainingData.from_block_design(X=train_X, Y=train_Y, Yvar=train_Yvar) # Test that task features are required. with self.assertRaisesRegex(ValueError, "`task_features` required"): model.construct_inputs(training_data) # Validate prior config. with self.assertRaisesRegex( ValueError, ".* only config for LKJ prior is supported"): data_dict = model.construct_inputs( training_data, task_features=[0], prior_config={"use_LKJ_prior": False}, ) data_dict = model.construct_inputs( training_data, task_features=[0], prior_config={ "use_LKJ_prior": True, "eta": 0.6 }, ) self.assertTrue(torch.equal(data_dict["train_X"], train_X)) self.assertTrue(torch.equal(data_dict["train_Y"], train_Y)) self.assertTrue(torch.equal(data_dict["train_Yvar"], train_Yvar)) self.assertEqual(data_dict["task_feature"], 0) self.assertIsInstance(data_dict["task_covar_prior"], LKJCovariancePrior)
def test_MultiTaskGP_construct_inputs(self): for dtype in (torch.float, torch.double): tkwargs = {"device": self.device, "dtype": dtype} model, train_X, train_Y = _get_model_and_training_data(**tkwargs) training_data = TrainingData(X=train_X, Y=train_Y) # if task_features is missing, then raise error with self.assertRaises(ValueError): model.construct_inputs(training_data) data_dict = model.construct_inputs(training_data, task_features=[0]) self.assertTrue(torch.equal(data_dict["train_X"], train_X)) self.assertTrue(torch.equal(data_dict["train_Y"], train_Y))
def test_FixedNoiseMultiTaskGP_construct_inputs(self): for dtype in (torch.float, torch.double): tkwargs = {"device": self.device, "dtype": dtype} ( model, train_X, train_Y, train_Yvar, ) = _get_fixed_noise_model_and_training_data(**tkwargs) td_no_Yvar = TrainingData(X=train_X, Y=train_Y) # if task_features is missing, then raise error with self.assertRaisesRegex(ValueError, "task features required"): model.construct_inputs(td_no_Yvar) # if task_features is missing, then raise error with self.assertRaisesRegex(ValueError, "Yvar required"): model.construct_inputs(td_no_Yvar, task_features=[0]) training_data = TrainingData(X=train_X, Y=train_Y, Yvar=train_Yvar) data_dict = model.construct_inputs(training_data, task_features=[0]) self.assertTrue(torch.equal(data_dict["train_X"], train_X)) self.assertTrue(torch.equal(data_dict["train_Y"], train_Y))
def test_construct_inputs(self): for batch_shape, dtype in itertools.product( (torch.Size(), torch.Size([2])), (torch.float, torch.double)): tkwargs = {"device": self.device, "dtype": dtype} model, model_kwargs = self._get_model_and_data( batch_shape=batch_shape, m=2, **tkwargs) training_data = TrainingData(X=model_kwargs["train_X"], Y=model_kwargs["train_Y"]) data_dict = model.construct_inputs(training_data) self.assertTrue( torch.equal(data_dict["train_X"], model_kwargs["train_X"])) self.assertTrue( torch.equal(data_dict["train_Y"], model_kwargs["train_Y"]))
def setUp(self): self.botorch_model_class = SingleTaskGP self.surrogate = Surrogate( botorch_model_class=self.botorch_model_class) self.X = torch.tensor([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]]) self.Y = torch.tensor([[3.0, 4.0, 2.0], [4.0, 3.0, 1.0]]) self.Yvar = torch.tensor([[0.0, 2.0, 1.0], [2.0, 0.0, 1.0]]) self.training_data = TrainingData(X=self.X, Y=self.Y, Yvar=self.Yvar) self.fidelity_features = [2] self.surrogate.construct(training_data=self.training_data) self.bounds = [(0.0, 10.0), (0.0, 10.0), (0.0, 10.0)] self.botorch_acqf_class = DummyACQFClass self.objective_weights = torch.tensor([1.0, -1.0, 0.0]) self.objective_thresholds = torch.tensor([2.0, 1.0, float("nan")]) self.pending_observations = [ torch.tensor([[1.0, 3.0, 4.0]]), torch.tensor([[1.0, 3.0, 4.0]]), torch.tensor([[1.0, 3.0, 4.0]]), ] self.outcome_constraints = ( torch.tensor([[1.0, 0.5, 0.5]]), torch.tensor([[0.5]]), ) self.con_tfs = get_outcome_constraint_transforms( self.outcome_constraints) self.linear_constraints = None self.fixed_features = {1: 2.0} self.target_fidelities = {2: 1.0} self.options = {} self.acquisition = MOOAcquisition( surrogate=self.surrogate, bounds=self.bounds, objective_weights=self.objective_weights, objective_thresholds=self.objective_thresholds, botorch_acqf_class=self.botorch_acqf_class, pending_observations=self.pending_observations, outcome_constraints=self.outcome_constraints, linear_constraints=self.linear_constraints, fixed_features=self.fixed_features, target_fidelities=self.target_fidelities, options=self.options, ) self.inequality_constraints = [(torch.tensor([0, 1]), torch.tensor([-1.0, 1.0]), 1)] self.rounding_func = lambda x: x self.optimizer_options = { Keys.NUM_RESTARTS: 40, Keys.RAW_SAMPLES: 1024 }
def construct_training_data( Xs: List[Tensor], Ys: List[Tensor], Yvars: List[Tensor], model_class: Type[Model] ) -> TrainingData: """Construct a `TrainingData` object based on sizes of Xs, Ys, and Yvars, and the type of model, for which the training data is intended. NOTE: All four outputs are organized as lists over outcomes. E.g. if there are two outcomes, 'x' and 'y', the Xs are formatted like so: `[Xs_x_ndarray, Xs_y_ndarray]`. We specifically do not assume that every point is observed for every outcome. This means that the array for each of those outcomes may be different, and in particular could have a different length (e.g. if a particular arm was observed only for half of the outcomes, it would be present in half of the arrays in the list but not the other half.) Returns: A `TrainingData` object with training data for single outcome or with batched multi-output training data if appropriate for given model and if all X inputs in Xs are equal. """ if not isclass(model_class): # pragma: no cover raise ValueError( f"Expected `Type[Model]`, got: {model_class} " f"(type: {type(model_class)})." ) if len(Xs) == len(Ys) == 1: # Just one outcome, can use single model. return TrainingData(X=Xs[0], Y=Ys[0], Yvar=Yvars[0]) elif issubclass(model_class, BatchedMultiOutputGPyTorchModel) and all( torch.equal(Xs[0], X) for X in Xs[1:] ): # All Xs are the same and model supports batched multioutput. return TrainingData( X=Xs[0], Y=torch.cat(Ys, dim=-1), Yvar=torch.cat(Yvars, dim=-1) ) elif model_class is ModelListGP: # pragma: no cover # TODO: This will be case for `ListSurrogate`. raise NotImplementedError("`ModelListGP` not yet supported.") raise ValueError(f"Unexpected training data format for {model_class}.")
def setUp(self): self.botorch_model_class = SingleTaskGP self.surrogate = Surrogate( botorch_model_class=self.botorch_model_class) self.acquisition_class = KnowledgeGradient self.botorch_acqf_class = qKnowledgeGradient self.acquisition_options = {Keys.NUM_FANTASIES: 64} self.model = BoTorchModel( surrogate=self.surrogate, acquisition_class=self.acquisition_class, acquisition_options=self.acquisition_options, ) self.dtype = torch.float Xs1, Ys1, Yvars1, self.bounds, _, _, _ = get_torch_test_data( dtype=self.dtype) Xs2, Ys2, Yvars2, _, _, _, _ = get_torch_test_data(dtype=self.dtype, offset=1.0) self.Xs = Xs1 + Xs2 self.Ys = Ys1 + Ys2 self.Yvars = Yvars1 + Yvars2 self.X = Xs1[0] self.Y = Ys1[0] self.Yvar = Yvars1[0] self.X2 = Xs2[0] self.training_data = TrainingData(X=self.X, Y=self.Y, Yvar=self.Yvar) self.search_space_digest = SearchSpaceDigest( feature_names=["x1", "x2", "x3"], bounds=[(0.0, 10.0), (0.0, 10.0), (0.0, 10.0)], task_features=[], fidelity_features=[2], target_fidelities={1: 1.0}, ) self.metric_names = ["y"] self.metric_names_for_list_surrogate = ["y1", "y2"] self.candidate_metadata = [] self.optimizer_options = { Keys.NUM_RESTARTS: 40, Keys.RAW_SAMPLES: 1024 } self.model_gen_options = { Keys.OPTIMIZER_KWARGS: self.optimizer_options } self.objective_weights = torch.tensor([1.0]) self.objective_thresholds = None self.outcome_constraints = None self.linear_constraints = None self.fixed_features = None self.pending_observations = None self.rounding_func = "func"
def test_construct_inputs(self): d, m = 3, 1 for batch_shape, ncat, dtype in itertools.product( (torch.Size(), torch.Size([2])), (1, 2), (torch.float, torch.double) ): tkwargs = {"device": self.device, "dtype": dtype} train_X, train_Y = _get_random_data( batch_shape=batch_shape, m=m, d=d, **tkwargs ) cat_dims = list(range(ncat)) training_data = TrainingData(X=train_X, Y=train_Y) kwarg_dict = MixedSingleTaskGP.construct_inputs( training_data, categorical_features=cat_dims ) self.assertTrue(torch.equal(kwarg_dict["train_X"], train_X)) self.assertTrue(torch.equal(kwarg_dict["train_Y"], train_Y)) self.assertEqual(kwarg_dict["cat_dims"], cat_dims) self.assertIsNone(kwarg_dict["likelihood"])
def test_construct_inputs(self): for infer_noise, dtype in itertools.product( (True, False), (torch.float, torch.double) ): tkwargs = {"device": self.device, "dtype": dtype} train_X, train_Y, train_Yvar, model = self._get_data_and_model( infer_noise=infer_noise, **tkwargs ) training_data = TrainingData.from_block_design( X=train_X, Y=train_Y, Yvar=train_Yvar, ) data_dict = model.construct_inputs(training_data) if infer_noise: self.assertTrue("train_Yvar" not in data_dict) else: self.assertTrue(torch.equal(data_dict["train_Yvar"], train_Yvar)) self.assertTrue(torch.equal(data_dict["train_X"], train_X)) self.assertTrue(torch.equal(data_dict["train_Y"], train_Y))
def setUp(self): self.botorch_model_class = SingleTaskGP self.surrogate = Surrogate(botorch_model_class=self.botorch_model_class) self.X = torch.tensor([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]]) self.Y = torch.tensor([[3.0], [4.0]]) self.Yvar = torch.tensor([[0.0], [2.0]]) self.training_data = TrainingData(X=self.X, Y=self.Y, Yvar=self.Yvar) self.fidelity_features = [2] self.surrogate.construct( training_data=self.training_data, fidelity_features=self.fidelity_features ) self.search_space_digest = SearchSpaceDigest( feature_names=["a", "b", "c"], bounds=[(0.0, 10.0), (0.0, 10.0), (0.0, 10.0)], target_fidelities={2: 1.0}, ) self.botorch_acqf_class = DummyACQFClass self.objective_weights = torch.tensor([1.0]) self.objective_thresholds = None self.pending_observations = [torch.tensor([[1.0, 3.0, 4.0]])] self.outcome_constraints = (torch.tensor([[1.0]]), torch.tensor([[0.5]])) self.linear_constraints = None self.fixed_features = {1: 2.0} self.options = {"best_f": 0.0} self.acquisition = Acquisition( surrogate=self.surrogate, search_space_digest=self.search_space_digest, objective_weights=self.objective_weights, objective_thresholds=self.objective_thresholds, botorch_acqf_class=self.botorch_acqf_class, pending_observations=self.pending_observations, outcome_constraints=self.outcome_constraints, linear_constraints=self.linear_constraints, fixed_features=self.fixed_features, options=self.options, ) self.inequality_constraints = [ (torch.tensor([0, 1]), torch.tensor([-1.0, 1.0]), 1) ] self.rounding_func = lambda x: x self.optimizer_options = {Keys.NUM_RESTARTS: 40, Keys.RAW_SAMPLES: 1024}
def setUp(self): self.botorch_model_class = SingleTaskGP self.surrogate = Surrogate( botorch_model_class=self.botorch_model_class) self.X = torch.tensor([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]]) self.Y = torch.tensor([[3.0], [4.0]]) self.Yvar = torch.tensor([[0.0], [2.0]]) self.training_data = TrainingData(X=self.X, Y=self.Y, Yvar=self.Yvar) self.fidelity_features = [2] self.surrogate.construct(training_data=self.training_data, fidelity_features=self.fidelity_features) self.acquisition_options = {Keys.NUM_FANTASIES: 64} self.search_space_digest = SearchSpaceDigest( feature_names=["a", "b", "c"], bounds=[(0.0, 10.0), (0.0, 10.0), (0.0, 10.0)], target_fidelities={2: 1.0}, ) self.objective_weights = torch.tensor([1.0]) self.pending_observations = [ torch.tensor([[1.0, 3.0, 4.0]]), torch.tensor([[2.0, 6.0, 8.0]]), ] self.outcome_constraints = (torch.tensor([[1.0]]), torch.tensor([[0.5] ])) self.linear_constraints = None self.fixed_features = {1: 2.0} self.options = { Keys.FIDELITY_WEIGHTS: { 2: 1.0 }, Keys.COST_INTERCEPT: 1.0, Keys.NUM_TRACE_OBSERVATIONS: 0, } with patch(f"{MFKG_PATH}.__init__", return_value=None): # We don't actually need to instantiate the BoTorch acqf in these tests. self.acquisition = MultiFidelityAcquisition( surrogate=self.surrogate, search_space_digest=self.search_space_digest, objective_weights=self.objective_weights, botorch_acqf_class=qMultiFidelityKnowledgeGradient, )
def construct_training_data_list(Xs: List[Tensor], Ys: List[Tensor], Yvars: List[Tensor]) -> List[TrainingData]: """Construct a list of `TrainingData` objects, for use in `ListSurrogate` and `ModelListGP`. Each `TrainingData` corresponds to an outcome. NOTE: All four outputs are organized as lists over outcomes. E.g. if there are two outcomes, 'x' and 'y', the Xs are formatted like so: `[Xs_x_ndarray, Xs_y_ndarray]`. We specifically do not assume that every point is observed for every outcome. This means that the array for each of those outcomes may be different, and in particular could have a different length (e.g. if a particular arm was observed only for half of the outcomes, it would be present in half of the arrays in the list but not the other half.) Returns: A list of `TrainingData` for all outcomes, preserves the order of Xs. """ if not len(Xs) == len(Ys) == len(Yvars): # pragma: no cover raise ValueError("Xs, Ys, and Yvars must have equal lengths.") return [ TrainingData(X=X, Y=Y, Yvar=Yvar) for X, Y, Yvar in zip(Xs, Ys, Yvars) ]
def setUp(self): self.botorch_model_class = SingleTaskGP self.surrogate = Surrogate( botorch_model_class=self.botorch_model_class) self.X = torch.tensor([[1.0, 2.0, 3.0], [2.0, 3.0, 4.0]]) self.Y = torch.tensor([[3.0], [4.0]]) self.Yvar = torch.tensor([[0.0], [2.0]]) self.training_data = TrainingData(X=self.X, Y=self.Y, Yvar=self.Yvar) self.fidelity_features = [2] self.surrogate.construct(training_data=self.training_data, fidelity_features=self.fidelity_features) self.search_space_digest = SearchSpaceDigest( feature_names=["a", "b", "c"], bounds=[(0.0, 10.0), (0.0, 10.0), (0.0, 10.0)], target_fidelities={2: 1.0}, ) self.botorch_acqf_class = qMaxValueEntropy self.objective_weights = torch.tensor([1.0]) self.pending_observations = [ torch.tensor([[1.0, 3.0, 4.0]]), torch.tensor([[2.0, 6.0, 8.0]]), ] self.outcome_constraints = (torch.tensor([[1.0]]), torch.tensor([[0.5] ])) self.linear_constraints = None self.fixed_features = {1: 2.0} self.options = { Keys.FIDELITY_WEIGHTS: { 2: 1.0 }, Keys.COST_INTERCEPT: 1.0, Keys.NUM_TRACE_OBSERVATIONS: 0, } self.optimizer_options = { Keys.NUM_RESTARTS: 40, Keys.RAW_SAMPLES: 1024, Keys.FRAC_RANDOM: 0.2, } self.inequality_constraints = [(torch.tensor([0, 1]), torch.tensor([-1.0, 1.0]), 1)]
def fit( self, Xs: List[Tensor], Ys: List[Tensor], Yvars: List[Tensor], search_space_digest: SearchSpaceDigest, metric_names: List[str], target_fidelities: Optional[Dict[int, float]] = None, candidate_metadata: Optional[List[List[TCandidateMetadata]]] = None, state_dict: Optional[Dict[str, Tensor]] = None, refit: bool = True, ) -> None: # Ensure that parts of data all have equal lengths. validate_data_format(Xs=Xs, Ys=Ys, Yvars=Yvars, metric_names=metric_names) # store search space info for later use (e.g. during generation) self._search_space_digest = search_space_digest # Choose `Surrogate` and undelying `Model` based on properties of data. if not self._surrogate: self._autoset_surrogate( Xs=Xs, Ys=Ys, Yvars=Yvars, search_space_digest=search_space_digest, metric_names=metric_names, ) self.surrogate.fit( training_data=TrainingData(Xs=Xs, Ys=Ys, Yvars=Yvars), search_space_digest=search_space_digest, metric_names=metric_names, candidate_metadata=candidate_metadata, state_dict=state_dict, refit=refit, )