def grad_optimize_ei(self, cand, comp, vals, durs, compute_grad=True): # Here we have to compute the gradients for ei per second # This means deriving through the two kernels, the one for predicting # time and the one predicting ei best = np.min(vals) cand = np.reshape(cand, (-1, comp.shape[1])) # First we make predictions for the durations # Compute covariances comp_time_cov = self.cov(self.time_amp2, self.time_ls, comp) cand_time_cross = self.cov(self.time_amp2, self.time_ls,comp,cand) # Cholesky decompositions obsv_time_cov = comp_time_cov + self.time_noise*np.eye(comp.shape[0]) obsv_time_chol = spla.cholesky( obsv_time_cov, lower=True ) # Linear systems t_alpha = spla.cho_solve((obsv_time_chol, True), durs - self.time_mean) # Predict marginal mean times and (possibly) variances func_time_m = np.dot(cand_time_cross.T, t_alpha) + self.time_mean # We don't really need the time variances now #func_time_v = self.time_amp2*(1+1e-6) - np.sum(t_beta**2, axis=0) # Bring time out of the log domain func_time_m = np.exp(func_time_m) # Compute derivative of cross-distances. grad_cross_r = gp.grad_dist2(self.time_ls, comp, cand) # Apply covariance function cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__) cand_cross_grad = cov_grad_func(self.time_ls, comp, cand) grad_cross_t = np.squeeze(cand_cross_grad) # Now compute the gradients w.r.t. ei # The primary covariances for prediction. comp_cov = self.cov(self.amp2, self.ls, comp) cand_cross = self.cov(self.amp2, self.ls, comp, cand) # Compute the required Cholesky. obsv_cov = comp_cov + self.noise*np.eye(comp.shape[0]) obsv_chol = spla.cholesky( obsv_cov, lower=True ) cand_cross_grad = cov_grad_func(self.ls, comp, cand) # Predictive things. # Solve the linear systems. alpha = spla.cho_solve((obsv_chol, True), vals - self.mean) beta = spla.solve_triangular(obsv_chol, cand_cross, lower=True) # Predict the marginal means and variances at candidates. func_m = np.dot(cand_cross.T, alpha) + self.mean func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0) # Expected improvement func_s = np.sqrt(func_v) u = (best - func_m) / func_s ncdf = sps.norm.cdf(u) npdf = sps.norm.pdf(u) ei = func_s*(u*ncdf + npdf) ei_per_s = -np.sum(ei/func_time_m) if not compute_grad: return ei grad_time_xp_m = np.dot(t_alpha.transpose(),grad_cross_t) # Gradients of ei w.r.t. mean and variance g_ei_m = -ncdf g_ei_s2 = 0.5*npdf / func_s # Apply covariance function grad_cross = np.squeeze(cand_cross_grad) grad_xp_m = np.dot(alpha.transpose(),grad_cross) grad_xp_v = np.dot(-2*spla.cho_solve((obsv_chol, True), cand_cross).transpose(),grad_cross) grad_xp = 0.5*self.amp2*(grad_xp_m*g_ei_m + grad_xp_v*g_ei_s2) grad_time_xp_m = 0.5*self.time_amp2*grad_time_xp_m*func_time_m grad_xp = (func_time_m*grad_xp - ei*grad_time_xp_m)/(func_time_m**2) return ei_per_s, grad_xp.flatten()