class GaussianProcessRewardModel(RewardModel): """ Models rewards with a Gaussian process regressor. Implemented with a modified version of scikit-learn's Gaussian Process Regressor class. The GP is updated online as samples are added. As such, hyperparameters for the GP are fit in batch after a threshold number of samples are collected. The hyperparameters are then refined afterwards as more samples are added until the number of samples passes an upper threshold, after which the hyperparameters are no longer updated. This helps avoid highly expensive refinement which has computational complexity of O(N^3) in number of samples. Parameters: ----------- min_samples: integer (default 100) The number of samples after which initial batch hyperparameter fitting is performed. batch_retries: integer (default 20) The number of random restarts for the initial hyperparameter fit. refine_ll_delta: numeric (default 1.0) The hyperparameters are refined after the average GP marginal log-likelihood decreases by this much since the last refinement. max_samples: integer (default 1000) The number of samples after which hyperparameters are no longer refined. Other Keyword Parameters: ------------------- Refer to sklearn.gaussian_process.GaussianProcessRegressor's __init__ """ def __init__(self, min_samples=10, batch_retries=19, enable_refine=True, refine_period=0, refine_ll_delta=1.0, refine_retries=0, kernel_type='rbf', verbose=False, **kwargs): self.min_samples = min_samples self.hp_batch_retries = batch_retries self.enable_refine = enable_refine self.hp_refine_ll_delta = float(refine_ll_delta) self.hp_refine_retries = refine_retries self.hp_refine_period = refine_period self.last_refine_iter = 0 self.hp_init = False self.last_ll = None self.kwargs = kwargs self.verbose = bool(verbose) if kernel_type.lower() == 'rbf': self.kernel_class = RBF elif kernel_type.lower() == 'matern': self.kernel_class = Matern32 else: raise ValueError('Unknown kernel_type: ' + kernel_type) self.kernel = None self.gp = None # Init later self.inputs = [] self.outputs = [] def _initialize(self): x = np.asarray(self.inputs) y = np.asarray(self.outputs).reshape(-1, 1) self.kernel = self.kernel_class(input_dim=x.shape[1], ARD=True) self.gp = GPRegression(x, y, kernel=self.kernel, **self.kwargs) @property def num_samples(self): return len(self.inputs) def average_log_likelihood(self): # NOTE For some reason this returns the negative log-likelihood if self.gp is None or self.num_samples < self.min_samples: return None return -self.gp.log_likelihood() / self.num_samples def report_sample(self, x, reward): self.inputs.append(x) self.outputs.append(reward) if self.gp is None: self.batch_optimize() else: x = np.asarray(self.inputs) y = np.asarray(self.outputs).reshape(-1, 1) self.gp.set_XY(x, y) # Wait until we've initialized if not self.hp_init: return current_ll = self.average_log_likelihood() if self.verbose: rospy.loginfo('Prev LL: %f Curr LL: %f', self.last_ll, current_ll) self.check_refine(current_ll) def check_refine(self, current_ll): if not self.enable_refine: return if current_ll > self.last_ll: self.last_ll = current_ll # If the LL has decreased by refine_ll_delta delta_achieved = current_ll < self.last_ll - self.hp_refine_ll_delta # If it has been refine_period samples since last refinement period_achieved = self.num_samples > self.last_refine_iter + self.hp_refine_period if delta_achieved or period_achieved: self.batch_optimize(self.hp_refine_retries + 1) self.last_refine_iter = self.num_samples def batch_optimize(self, n_restarts=None): if self.num_samples < self.min_samples: return if n_restarts is None: n_restarts = self.hp_batch_retries + 1 # NOTE Warm-restarting seems to get stuck in local optima, possibly from mean? # if self.gp is None: self._initialize() if self.verbose: rospy.loginfo('Batch optimizing with %d restarts...', n_restarts) self.gp.optimize_restarts(optimizer='bfgs', messages=False, num_restarts=n_restarts) if self.verbose: rospy.loginfo('Optimization complete. Model:\n%s\n Kernel:\n%s', str(self.gp), str(self.kernel.lengthscale)) self.hp_init = True self.last_ll = self.average_log_likelihood() def predict(self, x, return_std=False): if self.gp is None: #raise RuntimeError('Model is not fitted yet!') pred_mean = 0 pred_std = float('inf') else: x = np.asarray(x) if len(x.shape) == 1: x = x.reshape(1, -1) pred_mean, pred_var = self.gp.predict_noiseless(x) # To catch negative variances if pred_var < 0: rospy.logwarn('Negative variance %f rounding to 0', pred_var) pred_var = 0 pred_std = np.sqrt(pred_var) if return_std: return np.squeeze(pred_mean), np.squeeze(pred_std) else: return np.squeeze(pred_mean) def clear(self): self.inputs = [] self.outputs = [] self.kernel = None self.gp = None def fit(self, X, y): """Initialize the model from lists of inputs and corresponding rewards. Parameters ---------- X : Iterable of inputs Y : Iterable of corresponding rewards """ if len(X) != len(y): raise RuntimeError('X and Y lengths must be the same!') self.inputs = list(X) self.outputs = list(y) self._initialize() self.batch_optimize(self.hp_batch_retries) @property def num_samples(self): return len(self.inputs) @property def model(self): return self.gp
class KernelKernelGPModel: def __init__(self, kernel_kernel: Optional[Covariance] = None, noise_var: Optional[float] = None, exact_f_eval: bool = False, optimizer: Optional[str] = 'lbfgsb', max_iters: int = 1000, optimize_restarts: int = 5, verbose: bool = True, kernel_kernel_hyperpriors: Optional[HyperpriorMap] = None): """ :param kernel_kernel: :param noise_var: :param exact_f_eval: :param optimizer: :param max_iters: :param optimize_restarts: :param verbose: :param kernel_kernel_hyperpriors: """ self.noise_var = noise_var self.exact_f_eval = exact_f_eval self.optimize_restarts = optimize_restarts self.optimizer = optimizer self.max_iters = max_iters self.verbose = verbose self.covariance = kernel_kernel self.kernel_hyperpriors = kernel_kernel_hyperpriors self.model = None def train(self): """Train (optimize) the model.""" if self.max_iters > 0: # Update the model maximizing the marginal likelihood. if self.optimize_restarts == 1: self.model.optimize(optimizer=self.optimizer, max_iters=self.max_iters, messages=False, ipython_notebook=False) else: self.model.optimize_restarts(num_restarts=self.optimize_restarts, optimizer=self.optimizer, max_iters=self.max_iters, ipython_notebook=False, verbose=self.verbose, robust=True, messages=False) def _create_model(self, x: np.ndarray, y: np.ndarray): """Create model given input data X and output data Y. :param x: 2d array of indices of distance builder :param y: model fitness scores :return: """ # Make sure input data consists only of positive integers. assert np.issubdtype(x.dtype, np.integer) and x.min() >= 0 # Define kernel self.input_dim = x.shape[1] # TODO: figure out default kernel kernel initialization if self.covariance is None: assert self.covariance is not None # kern = GPy.kern.RBF(self.input_dim, variance=1.) else: kern = self.covariance.raw_kernel self.covariance = None # Define model noise_var = y.var() * 0.01 if self.noise_var is None else self.noise_var normalize = x.size > 1 # only normalize if more than 1 observation. self.model = GPRegression(x, y, kern, noise_var=noise_var, normalizer=normalize) # Set hyperpriors if self.kernel_hyperpriors is not None: if 'GP' in self.kernel_hyperpriors: # Set likelihood hyperpriors. likelihood_hyperprior = self.kernel_hyperpriors['GP'] set_priors(self.model.likelihood, likelihood_hyperprior, in_place=True) if 'SE' in self.kernel_hyperpriors: # Set kernel hyperpriors. se_hyperprior = self.kernel_hyperpriors['SE'] set_priors(self.model.kern, se_hyperprior, in_place=True) # Restrict variance if exact evaluations of the objective. if self.exact_f_eval: self.model.Gaussian_noise.constrain_fixed(1e-6, warning=False) else: # --- We make sure we do not get ridiculously small residual noise variance if self.model.priors.size > 0: # FIXME: shouldn't need this case, but GPy doesn't have log Jacobian implemented for Logistic self.model.Gaussian_noise.constrain_positive(warning=False) else: self.model.Gaussian_noise.constrain_bounded(1e-9, 1e6, warning=False) def update(self, x_all, y_all, x_new, y_new): """Update model with new observations.""" if self.model is None: self._create_model(x_all, y_all) else: self.model.set_XY(x_all, y_all) self.train() def _predict(self, x: np.ndarray, full_cov: bool, include_likelihood: bool): if x.ndim == 1: x = x[None, :] m, v = self.model.predict(x, full_cov=full_cov, include_likelihood=include_likelihood) v = np.clip(v, 1e-10, np.inf) return m, v def predict(self, x: np.ndarray, with_noise: bool = True): m, v = self._predict(x, False, with_noise) # We can take the square root because v is just a diagonal matrix of variances return m, np.sqrt(v) def get_f_max(self): """ Returns the location where the posterior mean is takes its maximal value. """ return self.model.predict(self.model.X)[0].max() def plot(self, **plot_kwargs): import matplotlib.pyplot as plt self.model.plot(plot_limits=(0, self.model.kern.n_models - 1), resolution=self.model.kern.n_models, **plot_kwargs) plt.show()
class Kernel(object): def __init__(self, x0, y0, cons=None, alpha=opt.ke_alpha, beta=opt.ke_beta, input_size=opt.ke_input_size, hidden_size=opt.ke_hidden_size, num_layers=opt.ke_num_layers, bidirectional=opt.ke_bidirectional, lr=opt.ke_lr, weight_decay=opt.ke_weight_decay): super(Kernel, self).__init__() self.alpha = alpha self.beta = beta self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional) self.lstm = self.lstm.to(opt.device) self.input_size = input_size self.hidden_size = hidden_size self.num_layers = num_layers self.bidirectional = bidirectional self.bi = 2 if bidirectional else 1 self.x = [x0] self.y = torch.tensor([y0], dtype=torch.float, device=opt.device, requires_grad=False) self.cons = [cons] inp, out = clean_x(self.x, self.cons) self.model = GPRegression(inp, out) self.model.Gaussian_noise.constrain_fixed(1e-6, warning=False) self.model.optimize() self.x_best = x0 self.y_best = y0 self.i_best = 0 self.n = 1 self.E = self.embedding(x0).view(1, -1) self.K = self.kernel(self.E[0], self.E[0]).view(1, 1) self.K_inv = torch.inverse(self.K + self.beta * torch.eye(self.n, device=opt.device)) self.optimizer = optim.Adam(self.lstm.parameters(), lr=lr, weight_decay=weight_decay) def embedding(self, xi): inputs = xi.view(-1, 1, self.input_size) outputs, (hn, cn) = self.lstm(inputs) outputs = torch.mean(outputs.squeeze(1), dim=0) outputs = outputs / torch.norm(outputs) return outputs def kernel(self, ei, ej): d = ei - ej d = torch.sum(d * d) k = torch.exp(-d / (2 * self.alpha)) return k def kernel_batch(self, en): n = self.n k = torch.zeros(n, device=opt.device) for i in range(n): k[i] = self.kernel(self.E[i], en) return k def predict(self, xn): n = self.n en = self.embedding(xn) k = self.kernel_batch(en) kn = self.kernel(en, en) t = torch.mm(k.view(1, n), self.K_inv) mu = torch.mm(t, self.y.view(n, 1)) sigma = kn - torch.mm(t, k.view(n, 1)) sigma = torch.sqrt(sigma + self.beta) return mu, sigma def acquisition_cons(self, xn): with torch.no_grad(): xn_ = np.array([xn.cpu().numpy().flatten()]) mu_cons, sigma_cons = self.model.predict(xn_) sigma_cons = sqrt(sigma_cons) PoF = norm.cdf(0, mu_cons, sigma_cons) mu, sigma = self.predict(xn) mu = mu.item() sigma = sigma.item() y_best = self.y_best z = (mu - y_best) / sigma ei = (mu - y_best) * norm.cdf(z) + sigma * norm.pdf(z) return ei * PoF def acquisition(self, xn): with torch.no_grad(): mu, sigma = self.predict(xn) mu = mu.item() sigma = sigma.item() y_best = self.y_best z = (mu - y_best) / sigma ei = (mu - y_best) * norm.cdf(z) + sigma * norm.pdf(z) return ei def kernel_batch_ex(self, t): n = self.n k = torch.zeros(n - 1, device=opt.device) for i in range(t): k[i] = self.kernel(self.E[i], self.E[t]) for i in range(t + 1, n): k[i - 1] = self.kernel(self.E[t], self.E[i]) return k def predict_ex(self, t): n = self.n k = self.kernel_batch_ex(t) kt = self.kernel(self.E[t], self.E[t]) indices = list(range(t)) + list(range(t + 1, n)) indices = torch.tensor(indices, dtype=torch.long, device=opt.device) K = self.K K = torch.index_select(K, 0, indices) K = torch.index_select(K, 1, indices) K_inv = torch.inverse(K + self.beta * torch.eye(n - 1, device=opt.device)) y = torch.index_select(self.y, 0, indices) t = torch.mm(k.view(1, n - 1), K_inv) mu = torch.mm(t, y.view(n - 1, 1)) sigma = kt - torch.mm(t, k.view(n - 1, 1)) sigma = torch.sqrt(sigma + self.beta) return mu, sigma def add_sample(self, xn, yn, consn): self.x.append(xn) self.y = torch.cat((self.y, torch.tensor([yn], dtype=torch.float, device=opt.device, requires_grad=False))) self.cons.append(consn) inp, out = clean_x(self.x, self.cons) self.model.set_XY(inp, out) self.model.optimize() n = self.n if consn > 0: if yn > self.y_best: self.x_best = xn self.y_best = yn self.i_best = n en = self.embedding(xn) k = self.kernel_batch(en) kn = self.kernel(en, en) self.E = torch.cat((self.E, en.view(1, -1)), 0) self.K = torch.cat( (torch.cat((self.K, k.view(n, 1)), 1), torch.cat((k.view(1, n), kn.view(1, 1)), 1)), 0) self.n += 1 self.K_inv = torch.inverse(self.K + self.beta * torch.eye(self.n, device=opt.device)) def add_batch(self, x, y, cons): self.x.extend(x) self.y = torch.cat((self.y, y)) self.cons.extend(cons) inp, out = clean_x(self.x, self.cons) self.model.set_XY(inp, out) self.model.optimize() m = len(x) for i in range(m): n = self.n if self.cons[i] > 0: if y[i].item() > self.y_best: self.x_best = x[i] self.y_best = y[i].item() self.i_best = n en = self.embedding(x[i]) k = self.kernel_batch(en) kn = self.kernel(en, en) self.E = torch.cat((self.E, en.view(1, -1)), 0) self.K = torch.cat( (torch.cat((self.K, k.view(n, 1)), 1), torch.cat((k.view(1, n), kn.view(1, 1)), 1)), 0) self.n += 1 self.K_inv = torch.inverse(self.K + self.beta * torch.eye(self.n, device=opt.device)) def update_EK(self): n = self.n E_ = torch.zeros((n, self.E.size(1)), device=opt.device) for i in range(n): E_[i] = self.embedding(self.x[i]) self.E = E_ K_ = torch.zeros((n, n), device=opt.device) for i in range(n): for j in range(i, n): k = self.kernel(self.E[i], self.E[j]) K_[i, j] = k K_[j, i] = k self.K = K_ self.K_inv = torch.inverse(self.K + self.beta * torch.eye(self.n, device=opt.device)) def loss(self): n = self.n l = torch.zeros(n, device=opt.device) for i in range(n): mu, sigma = self.predict_ex(i) d = self.y[i] - mu l[i] = -(0.918939 + torch.log(sigma) + d * d / (2 * sigma * sigma)) l = -torch.mean(l) return l def opt_step(self): if self.n < 2: return 0.0 self.optimizer.zero_grad() l = self.loss() ll = -l.item() l.backward() self.optimizer.step() self.update_EK() return ll def save(self, save_path): path = os.path.dirname(save_path) if not os.path.exists(path): os.makedirs(path) torch.save(self, save_path)