예제 #1
0
    def grad_optimize_ei(self, cand, comp, vals, durs, compute_grad=True):
        # Here we have to compute the gradients for ei per second
        # This means deriving through the two kernels, the one for predicting
        # time and the one predicting ei
        best = np.min(vals)
        cand = np.reshape(cand, (-1, comp.shape[1]))

        # First we make predictions for the durations
        # Compute covariances
        comp_time_cov   = self.cov(self.time_amp2, self.time_ls, comp)
        cand_time_cross = self.cov(self.time_amp2, self.time_ls,comp,cand)

        # Cholesky decompositions
        obsv_time_cov  = comp_time_cov + self.time_noise*np.eye(comp.shape[0])
        obsv_time_chol = spla.cholesky( obsv_time_cov, lower=True )

        # Linear systems
        t_alpha  = spla.cho_solve((obsv_time_chol, True), durs - self.time_mean)

        # Predict marginal mean times and (possibly) variances
        func_time_m = np.dot(cand_time_cross.T, t_alpha) + self.time_mean

        # We don't really need the time variances now
        #func_time_v = self.time_amp2*(1+1e-6) - np.sum(t_beta**2, axis=0)

        # Bring time out of the log domain
        func_time_m = np.exp(func_time_m)

        # Compute derivative of cross-distances.
        grad_cross_r = gp.grad_dist2(self.time_ls, comp, cand)

        # Apply covariance function
        cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
        cand_cross_grad = cov_grad_func(self.time_ls, comp, cand)
        grad_cross_t = np.squeeze(cand_cross_grad)

        # Now compute the gradients w.r.t. ei
        # The primary covariances for prediction.
        comp_cov   = self.cov(self.amp2, self.ls, comp)
        cand_cross = self.cov(self.amp2, self.ls, comp, cand)

        # Compute the required Cholesky.
        obsv_cov  = comp_cov + self.noise*np.eye(comp.shape[0])
        obsv_chol = spla.cholesky( obsv_cov, lower=True )

        cand_cross_grad = cov_grad_func(self.ls, comp, cand)

        # Predictive things.
        # Solve the linear systems.
        alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
        beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

        # Predict the marginal means and variances at candidates.
        func_m = np.dot(cand_cross.T, alpha) + self.mean
        func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

        # Expected improvement
        func_s = np.sqrt(func_v)
        u      = (best - func_m) / func_s
        ncdf   = sps.norm.cdf(u)
        npdf   = sps.norm.pdf(u)
        ei     = func_s*(u*ncdf + npdf)

        ei_per_s = -np.sum(ei/func_time_m)
        if not compute_grad:
            return ei

        grad_time_xp_m = np.dot(t_alpha.transpose(),grad_cross_t)

        # Gradients of ei w.r.t. mean and variance
        g_ei_m = -ncdf
        g_ei_s2 = 0.5*npdf / func_s

        # Apply covariance function
        grad_cross = np.squeeze(cand_cross_grad)

        grad_xp_m = np.dot(alpha.transpose(),grad_cross)
        grad_xp_v = np.dot(-2*spla.cho_solve((obsv_chol, True),
                                             cand_cross).transpose(),grad_cross)

        grad_xp = 0.5*self.amp2*(grad_xp_m*g_ei_m + grad_xp_v*g_ei_s2)
        grad_time_xp_m = 0.5*self.time_amp2*grad_time_xp_m*func_time_m
        grad_xp = (func_time_m*grad_xp - ei*grad_time_xp_m)/(func_time_m**2)

        return ei_per_s, grad_xp.flatten()
예제 #2
0
    def grad_optimize_ei(self, cand, comp, vals, durs, compute_grad=True):
        # Here we have to compute the gradients for ei per second
        # This means deriving through the two kernels, the one for predicting
        # time and the one predicting ei
        best = np.min(vals)
        cand = np.reshape(cand, (-1, comp.shape[1]))

        # First we make predictions for the durations
        # Compute covariances
        comp_time_cov   = self.cov(self.time_amp2, self.time_ls, comp)
        cand_time_cross = self.cov(self.time_amp2, self.time_ls,comp,cand)

        # Cholesky decompositions
        obsv_time_cov  = comp_time_cov + self.time_noise*np.eye(comp.shape[0])
        obsv_time_chol = spla.cholesky( obsv_time_cov, lower=True )

        # Linear systems
        t_alpha  = spla.cho_solve((obsv_time_chol, True), durs - self.time_mean)

        # Predict marginal mean times and (possibly) variances
        func_time_m = np.dot(cand_time_cross.T, t_alpha) + self.time_mean

        # We don't really need the time variances now
        #func_time_v = self.time_amp2*(1+1e-6) - np.sum(t_beta**2, axis=0)

        # Bring time out of the log domain
        func_time_m = np.exp(func_time_m)

        # Compute derivative of cross-distances.
        grad_cross_r = gp.grad_dist2(self.time_ls, comp, cand)

        # Apply covariance function
        cov_grad_func = getattr(gp, 'grad_' + self.cov_func.__name__)
        cand_cross_grad = cov_grad_func(self.time_ls, comp, cand)
        grad_cross_t = np.squeeze(cand_cross_grad)

        # Now compute the gradients w.r.t. ei
        # The primary covariances for prediction.
        comp_cov   = self.cov(self.amp2, self.ls, comp)
        cand_cross = self.cov(self.amp2, self.ls, comp, cand)

        # Compute the required Cholesky.
        obsv_cov  = comp_cov + self.noise*np.eye(comp.shape[0])
        obsv_chol = spla.cholesky( obsv_cov, lower=True )

        cand_cross_grad = cov_grad_func(self.ls, comp, cand)

        # Predictive things.
        # Solve the linear systems.
        alpha  = spla.cho_solve((obsv_chol, True), vals - self.mean)
        beta   = spla.solve_triangular(obsv_chol, cand_cross, lower=True)

        # Predict the marginal means and variances at candidates.
        func_m = np.dot(cand_cross.T, alpha) + self.mean
        func_v = self.amp2*(1+1e-6) - np.sum(beta**2, axis=0)

        # Expected improvement
        func_s = np.sqrt(func_v)
        u      = (best - func_m) / func_s
        ncdf   = sps.norm.cdf(u)
        npdf   = sps.norm.pdf(u)
        ei     = func_s*(u*ncdf + npdf)

        ei_per_s = -np.sum(ei/func_time_m)
        if not compute_grad:
            return ei

        grad_time_xp_m = np.dot(t_alpha.transpose(),grad_cross_t)

        # Gradients of ei w.r.t. mean and variance
        g_ei_m = -ncdf
        g_ei_s2 = 0.5*npdf / func_s

        # Apply covariance function
        grad_cross = np.squeeze(cand_cross_grad)

        grad_xp_m = np.dot(alpha.transpose(),grad_cross)
        grad_xp_v = np.dot(-2*spla.cho_solve((obsv_chol, True),
                                             cand_cross).transpose(),grad_cross)

        grad_xp = 0.5*self.amp2*(grad_xp_m*g_ei_m + grad_xp_v*g_ei_s2)
        grad_time_xp_m = 0.5*self.time_amp2*grad_time_xp_m*func_time_m
        grad_xp = (func_time_m*grad_xp - ei*grad_time_xp_m)/(func_time_m**2)

        return ei_per_s, grad_xp.flatten()