def computeHessianLogZ(self): """ Hessian of log(Z). """ if 'hess_logZ' in self.__dict__: return L = self.traj.L self.computeLogZ() self.computeGradientLogZ() intermediate_log_hessians = [] # Inner recursive function. # The stack should be large enough for our purpose. def inner(indices): """ Inner working function. """ L_ = len(indices) if L_ == L: v1 = np.zeros_like(self.theta) for l in range(L): v1 += self.traj.features_np[l][indices[l]] h = (dot(v1, self.theta), outer(v1, v1)) intermediate_log_hessians.append(h) else: i = indices[-1] if i in self.traj.connections_forward[L_-1]: for j in self.traj.connections_forward[L_-1][i]: inner(indices + [j]) for i in range(self.traj.num_choices[0]): inner([i]) self.log_hess_Z = lse_vec(intermediate_log_hessians) (grad_log_scale, grad_vec) = self.log_gradZ (hess_log_scale, hess_vec) = self.log_hess_Z (lhess_log_scale, lhess_vec) = \ lse_vec([(hess_log_scale - self.logZ, hess_vec), (2 * grad_log_scale - 2 * self.logZ, -outer(grad_vec, grad_vec))]) self.hess_logZ = exp(lhess_log_scale) * lhess_vec
def test_lse_vec_2(): ''' test_lse_vec_2 ''' # Trying to add a zero vector with itself xs = [(MINUS_INF, np.ones(2))] (u, v) = lse_vec(xs) assert u == MINUS_INF, u assert np.abs(v).max() == 0
def computeGradientLogZ(self): """ Computes the gradient of the logarithm of Z. """ if 'grad_logZ' in self.__dict__: return self.computeLogZ() self.grad_Z = np.zeros_like(self.theta) # List of the log weighted gradients of all the possible trajectories. intermediate_log_grads = [] L = self.traj.L def inner(indices): """ Inner work function. """ L_ = len(indices) if L_ == L: v1 = np.zeros_like(self.theta) for l in range(L): v1 += self.traj.features_np[l][indices[l]] log_g = (dot(v1, self.theta), v1) # print(v1, self.theta, dot(v1, self.theta), g) intermediate_log_grads.append(log_g) else: i = indices[-1] if i in self.traj.connections_forward[L_-1]: for j in self.traj.connections_forward[L_-1][i]: inner(indices + [j]) for i in range(self.traj.num_choices[0]): inner([i]) (log_scale, v) = lse_vec(intermediate_log_grads) self.grad_logZ = exp(log_scale - self.logZ) * v self.log_gradZ = (log_scale, v)
def test_2(): """ test_1 """ inf = float("inf") xs = [(log(3.0), np.ones(2)), (2, np.zeros(2)), \ (-inf, np.ones(2)), (-inf, np.zeros(2))] (norm, x) = lse_vec(xs) assert abs(norm - log(3)) < 1e-6, (norm, log(3), x) assert np.abs(x - 1).max() < 1e-6, (x)
def computeHessianLogZ(self): """ Hessian of log(Z). """ if 'hess_logZ' in self.__dict__: return inf = float('inf') self.computeLogZ() self.computeGradientLogZ() N = len(self.theta) L = self.traj.L # The initial values: N_0 = self.traj.num_choices[0] log_hess_Zs0 = [] for i in range(N_0): T_i_0 = self.traj.features_np[0][i] log_hess_Zs0.append((dot(T_i_0, self.theta), outer(T_i_0, T_i_0))) self.log_hess_Zs = [log_hess_Zs0] # Recursion: for l in range(1, L): N_l = self.traj.num_choices[l] l_vec = [(-inf, np.zeros((N, N))) for i in range(N_l)] conns_back = self.traj.connections_backward[l] w = dot(self.traj.features_np[l], self.theta) for i in range(N_l): T_i_l = self.traj.features_np[l][i] vs = [(self.logZs[l][i], outer(T_i_l, T_i_l))] if i in conns_back: assert conns_back[i], (i, conns_back) for j in conns_back[i]: (l_norm, h) = self.log_hess_Zs[l-1][j] vs.append((w[i] + l_norm, h)) log_g_vec = lse_vec([self.log_grad_Zs[l-1][j] for j in conns_back[i]]) (l_norm, u_g_vec) = log_g_vec vs.append((w[i] + l_norm, outer(u_g_vec, T_i_l))) vs.append((w[i] + l_norm, outer(T_i_l, u_g_vec))) l_vec[i] = lse_vec(vs) self.log_hess_Zs.append(l_vec) assert(len(self.log_hess_Zs) == L) self.log_hess_Z = lse_vec(self.log_hess_Zs[-1]) (l_norm, h) = self.log_hess_Z if l_norm < 100 and l_norm > -100: self.hess_Z = exp(l_norm) * h (l_norm_g, g) = self.log_grad_Z self.hess_logZ = exp(l_norm - self.logZ) * h\ - exp(2 * l_norm_g - 2 * self.logZ) * outer(g, g)
def computeGradientLogZ(self): if 'grad_logZ' in self.__dict__: return self.computeLogZ() inf = float('inf') N = len(self.theta) L = self.traj.L # The initial values: N_0 = self.traj.num_choices[0] assert not np.isnan(self.theta).any() log_grad_Zs0 = [(dot(self.traj.features_np[0][i], self.theta), \ self.traj.features_np[0][i]) for i in range(N_0)] self.log_grad_Zs = [log_grad_Zs0] # Recursion: for l in range(1, L): N_l = self.traj.num_choices[l] l_vec = [(-inf, np.zeros(N)) for i in range(N_l)] conns_back = self.traj.connections_backward[l] w = dot(self.traj.features_np[l], self.theta) assert not np.isnan(w).any() for i in range(N_l): vs = [(self.logZs[l][i], self.traj.features_np[l][i])] assert not np.isnan(vs[0][0]).any() assert not np.isnan(vs[0][1]).any() if i in conns_back: for j in conns_back[i]: (l_norm, v) = self.log_grad_Zs[l-1][j] assert not np.isnan(v).any() assert not np.isnan(l_norm).any() vs.append((w[i] + l_norm, v)) l_vec[i] = lse_vec(vs) assert not np.isnan(l_vec[i][0]).any(), (l_vec[i], vs) assert not np.isnan(l_vec[i][1]).any(), (l_vec[i], vs) self.log_grad_Zs.append(l_vec) assert(len(self.log_grad_Zs) == L) self.log_grad_Z = lse_vec(self.log_grad_Zs[L-1]) (l_norm, v) = self.log_grad_Z if l_norm < 100 and l_norm > -100: self.grad_Z = exp(l_norm) * v self.grad_logZ = exp(l_norm - self.logZ) * v