def test_exclude(self): for dtype in (torch.float, torch.double): # get a test module train_x = torch.tensor([[1.0, 2.0, 3.0]], device=self.device, dtype=dtype) train_y = torch.tensor([4.0], device=self.device, dtype=dtype) likelihood = GaussianLikelihood() model = ExactGP(train_x, train_y, likelihood) model.covar_module = RBFKernel(ard_num_dims=3) model.mean_module = ConstantMean() model.to(device=self.device, dtype=dtype) mll = ExactMarginalLogLikelihood(likelihood, model) # test the basic case x, pdict, bounds = module_to_array( module=mll, exclude={"model.mean_module.constant"}) self.assertTrue(np.array_equal(x, np.zeros(4))) expected_sizes = { "likelihood.noise_covar.raw_noise": torch.Size([1]), "model.covar_module.raw_lengthscale": torch.Size([1, 3]), } self.assertEqual(set(pdict.keys()), set(expected_sizes.keys())) for pname, val in pdict.items(): self.assertEqual(val.dtype, dtype) self.assertEqual(val.shape, expected_sizes[pname]) self.assertEqual(val.device.type, self.device.type) self.assertIsNone(bounds)
def test_manual_bounds(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # get a test module train_x = torch.tensor([[1.0, 2.0, 3.0]], device=device, dtype=dtype) train_y = torch.tensor([4.0], device=device, dtype=dtype) likelihood = GaussianLikelihood() model = ExactGP(train_x, train_y, likelihood) model.covar_module = RBFKernel(ard_num_dims=3) model.mean_module = ConstantMean() model.to(device=device, dtype=dtype) mll = ExactMarginalLogLikelihood(likelihood, model) # test the basic case x, pdict, bounds = module_to_array( module=mll, bounds={"model.covar_module.raw_lengthscale": (0.1, None)} ) self.assertTrue(np.array_equal(x, np.zeros(5))) expected_sizes = { "likelihood.noise_covar.raw_noise": torch.Size([1]), "model.covar_module.raw_lengthscale": torch.Size([1, 3]), "model.mean_module.constant": torch.Size([1]), } self.assertEqual(set(pdict.keys()), set(expected_sizes.keys())) for pname, val in pdict.items(): self.assertEqual(val.dtype, dtype) self.assertEqual(val.shape, expected_sizes[pname]) self.assertEqual(val.device.type, device.type) lower_exp = np.full_like(x, 0.1) for p in ("likelihood.noise_covar.raw_noise", "model.mean_module.constant"): lower_exp[_get_index(pdict, p)] = -np.inf self.assertTrue(np.equal(bounds[0], lower_exp).all()) self.assertTrue(np.equal(bounds[1], np.full_like(x, np.inf)).all())
def laplace_sample_U(mll: ExactMarginalLogLikelihood, nsamp: int) -> Tuple[Tensor, Tensor, Tensor]: """Draw posterior samples of kernel hyperparameters using Laplace approximation. Only the Mahalanobis distance matrix is sampled. The diagonal of the Hessian is estimated using finite differences of the autograd gradients. The Laplace approximation is then N(p_map, inv(-H)). We construct a set of nsamp kernel hyperparameters by drawing nsamp-1 values from this distribution, and prepending as the first sample the MAP parameters. Args: mll: MLL object of MAP ALEBO GP. nsamp: Number of samples to return. Returns: Batch tensors of the kernel hyperparameters Uvec, mean constant, and output scale. """ # Estimate diagonal of the Hessian mll.train() x0, property_dict, bounds = module_to_array(module=mll) x0 = x0.astype(np.float64) # This is the MAP parameters H = np.zeros((len(x0), len(x0))) epsilon = 1e-4 + 1e-3 * np.abs(x0) for i, _ in enumerate(x0): # Compute gradient of df/dx_i wrt x_i def f(x): x_all = x0.copy() x_all[i] = x[0] return -_scipy_objective_and_grad(x_all, mll, property_dict)[1][i] H[i, i] = approx_fprime(np.array([x0[i]]), f, epsilon=epsilon[i]) # pyre-ignore # Sample only Uvec; leave mean and output scale fixed. assert list(property_dict.keys()) == [ "model.mean_module.constant", "model.covar_module.raw_outputscale", "model.covar_module.base_kernel.Uvec", ] H = H[2:, 2:] H += np.diag(-1e-3 * np.ones(H.shape[0])) # Add a nugget for inverse stability Sigma = np.linalg.inv(-H) samples = np.random.multivariate_normal(mean=x0[2:], cov=Sigma, size=(nsamp - 1)) # Include the MAP estimate samples = np.vstack((x0[2:], samples)) # Reshape attrs = property_dict["model.covar_module.base_kernel.Uvec"] Uvec_batch = torch.tensor(samples, dtype=attrs.dtype, device=attrs.device).reshape( nsamp, *attrs.shape) # Get the other properties into batch mode mean_constant_batch = mll.model.mean_module.constant.repeat(nsamp, 1) output_scale_batch = mll.model.covar_module.raw_outputscale.repeat(nsamp) return Uvec_batch, mean_constant_batch, output_scale_batch
def get_params_numpy( self, model: Any, ) -> Tuple[np.ndarray, Dict[str, TorchAttr], Optional[np.ndarray]]: """ Syntatic sugar for `botorch.optim.numpy_converter.module_to_array`. Parameters ---------- model : Any. An instance of nn.Module or one of its subclasses. Returns ------- param_array : 1D np.ndarray Model parameters values. param_dict : dict. String representations of parameter names are keys, and the values are TorchAttr objects containing shape, dtype, and device information about the correpsonding pytorch tensors. bounds : optional, np.ndarray or None. If at least one parameter has bounds, then these are returned as a 2D ndarray representing the bounds for each paramaeter. Otherwise None. """ return numpy_converter.module_to_array(model)
def test_set_parameters(self): for dtype in (torch.float, torch.double): # get a test module train_x = torch.tensor([[1.0, 2.0, 3.0]], device=self.device, dtype=dtype) train_y = torch.tensor([4.0], device=self.device, dtype=dtype) likelihood = GaussianLikelihood() model = ExactGP(train_x, train_y, likelihood) model.covar_module = RBFKernel(ard_num_dims=3) model.mean_module = ConstantMean() model.to(device=self.device, dtype=dtype) mll = ExactMarginalLogLikelihood(likelihood, model) # get parameters x, pdict, bounds = module_to_array(module=mll) # Set parameters mll = set_params_with_array(mll, np.array([1.0, 2.0, 3.0, 4.0, 5.0]), pdict) z = dict(mll.named_parameters()) self.assertTrue( torch.equal( z["likelihood.noise_covar.raw_noise"], torch.tensor([1.0], device=self.device, dtype=dtype), )) self.assertTrue( torch.equal( z["model.covar_module.raw_lengthscale"], torch.tensor([[2.0, 3.0, 4.0]], device=self.device, dtype=dtype), )) self.assertTrue( torch.equal( z["model.mean_module.constant"], torch.tensor([5.0], device=self.device, dtype=dtype), )) # Extract again x2, pdict2, bounds2 = module_to_array(module=mll) self.assertTrue( np.array_equal(x2, np.array([1.0, 2.0, 3.0, 4.0, 5.0])))
def test_set_parameters(self, cuda=False): device = torch.device("cuda") if cuda else torch.device("cpu") for dtype in (torch.float, torch.double): # get a test module train_x = torch.tensor([[1.0, 2.0, 3.0]], device=device, dtype=dtype) train_y = torch.tensor([4.0], device=device, dtype=dtype) likelihood = GaussianLikelihood() model = ExactGP(train_x, train_y, likelihood) model.covar_module = RBFKernel(ard_num_dims=3) model.mean_module = ConstantMean() model.to(device=device, dtype=dtype) mll = ExactMarginalLogLikelihood(likelihood, model) # get parameters x, pdict, bounds = module_to_array(module=mll) # Set parameters mll = set_params_with_array(mll, np.array([1.0, 2.0, 3.0, 4.0, 5.0]), pdict) z = dict(mll.named_parameters()) self.assertTrue( torch.equal( z["likelihood.noise_covar.raw_noise"], torch.tensor([1.0], device=device, dtype=dtype), ) ) self.assertTrue( torch.equal( z["model.covar_module.raw_lengthscale"], torch.tensor([[2.0, 3.0, 4.0]], device=device, dtype=dtype), ) ) self.assertTrue( torch.equal( z["model.mean_module.constant"], torch.tensor([5.0], device=device, dtype=dtype), ) ) # Extract again x2, pdict2, bounds2 = module_to_array(module=mll) self.assertTrue(np.array_equal(x2, np.array([1.0, 2.0, 3.0, 4.0, 5.0])))
def fit_gpytorch_scipy( mll: MarginalLogLikelihood, bounds: Optional[ParameterBounds] = None, method: str = "L-BFGS-B", options: Optional[Dict[str, Any]] = None, track_iterations: bool = True, ) -> Tuple[MarginalLogLikelihood, Dict[str, Union[ float, List[OptimizationIteration]]]]: r"""Fit a gpytorch model by maximizing MLL with a scipy optimizer. The model and likelihood in mll must already be in train mode. Note: this method requires that the model has `train_inputs` and `train_targets`. Args: mll: MarginalLogLikelihood to be maximized. bounds: A dictionary mapping parameter names to tuples of lower and upper bounds. method: Solver type, passed along to scipy.minimize. options: Dictionary of solver options, passed along to scipy.minimize. track_iterations: Track the function values and wall time for each iteration. Returns: 2-element tuple containing - MarginalLogLikelihood with parameters optimized in-place. - Dictionary with the following key/values: "fopt": Best mll value. "wall_time": Wall time of fitting. "iterations": List of OptimizationIteration objects with information on each iteration. If track_iterations is False, will be empty. Example: >>> gp = SingleTaskGP(train_X, train_Y) >>> mll = ExactMarginalLogLikelihood(gp.likelihood, gp) >>> mll.train() >>> fit_gpytorch_scipy(mll) >>> mll.eval() """ options = options or {} x0, property_dict, bounds = module_to_array(module=mll, bounds=bounds, exclude=options.pop( "exclude", None)) x0 = x0.astype(np.float64) if bounds is not None: bounds = Bounds(lb=bounds[0], ub=bounds[1], keep_feasible=True) xs = [] ts = [] t1 = time.time() def store_iteration(xk): xs.append(xk.copy()) ts.append(time.time() - t1) cb = store_iteration if track_iterations else None res = minimize( _scipy_objective_and_grad, x0, args=(mll, property_dict), bounds=bounds, method=method, jac=True, options=options, callback=cb, ) iterations = [] if track_iterations: for i, xk in enumerate(xs): obj, _ = _scipy_objective_and_grad(x=xk, mll=mll, property_dict=property_dict) iterations.append(OptimizationIteration(i, obj, ts[i])) # Construct info dict info_dict = { "fopt": float(res.fun), "wall_time": time.time() - t1, "iterations": iterations, } if not res.success: try: # Some res.message are bytes msg = res.message.decode("ascii") except AttributeError: # Others are str msg = res.message warnings.warn(f"Fitting failed with the optimizer reporting '{msg}'", OptimizationWarning) # Set to optimum mll = set_params_with_array(mll, res.x, property_dict) return mll, info_dict