def alpha_nr(self, maxit=20, init_alpha=[]): """ Newton-Raphson procedure for updating the Dirichlet hyperparameter alpha :param maxit: maximum number of iterations :param init_alpha: initial guess of alphas :return: the updated alpha """ old_alpha = self.alpha.copy() try: M, K = self.gamma_matrix.shape if not len(init_alpha) > 0: init_alpha = self.gamma_matrix.mean(axis=0) / K alpha = init_alpha.copy() alphap = init_alpha.copy() g_term = (psi(self.gamma_matrix) - psi(self.gamma_matrix.sum(axis=1))[:, None]).sum(axis=0) for it in range(maxit): grad = M * (psi(alpha.sum()) - psi(alpha)) + g_term H = -M * np.diag(pg(1, alpha)) + M * pg(1, alpha.sum()) # playing here.... z = M * pg(1, alpha.sum()) h = -M * pg(1, alpha) c = ((grad / h).sum()) / ((1.0 / z) + (1.0 / h).sum()) alpha_change = (grad - c) / h n_bad = (alpha_change > alpha).sum() while n_bad > 0: alpha_change /= 2.0 n_bad = (alpha_change > alpha).sum() alpha_new = alpha - alpha_change pos = np.where(alpha_new <= SMALL_NUMBER)[0] alpha_new[pos] = SMALL_NUMBER # if (alpha_new < 0).sum() > 0: # init_alpha /= 10.0 # return self.alpha_nr(maxit=maxit,init_alpha = init_alpha) diff = np.sum(np.abs(alpha - alpha_new)) alpha = alpha_new if diff < 1e-6 and it > 1: return alpha except: alpha = old_alpha return alpha
def alpha_nr(g_term, M, maxit=100, init_alpha=[]): # with open('./test_g.csv','w') as f: # for g in g_term: # f.write("{}\n".format(g)) SMALL_NUMBER = 1e-100 K = len(g_term) if len(init_alpha) == 0: init_alpha = np.ones_like(g_term) / K old_alpha = init_alpha.copy() # try: alpha = init_alpha.copy() # g_term = (psi(self.gamma_matrix) - psi(self.gamma_matrix.sum(axis=1))[:,None]).sum(axis=0) for it in range(maxit): grad = M * (psi(alpha.sum()) - psi(alpha)) + g_term # H = -M*np.diag(pg(1,alpha)) + M*pg(1,alpha.sum()) z = M * pg(1, alpha.sum()) h = -M * pg(1, alpha) c = ((grad / h).sum()) / ((1.0 / z) + (1.0 / h).sum()) alpha_change = (grad - c) / h # Check to make sure none of them go negative n_bad = (alpha_change > alpha).sum() while n_bad > 0: alpha_change /= 2.0 n_bad = (alpha_change > alpha).sum() # alpha_change = np.dot(np.linalg.inv(H),grad) alpha_new = alpha - alpha_change pos = np.where(alpha_new <= SMALL_NUMBER)[0] alpha_new[pos] = SMALL_NUMBER diff = np.sum(np.abs(alpha - alpha_new)) # print "Alpha: {}, it: {}".format(diff,it) # print grad.max(),grad.argmax(),alpha[grad.argmax()],alpha_new[grad.argmax()],alpha_change[grad.argmax()],h[grad.argmax()] alpha = alpha_new if diff < 1e-6 and it > 10: return alpha # except: # alpha = old_alpha return alpha
def alpha_nr(self,maxit=20,init_alpha=[]): old_alpha = self.alpha.copy() try: M,K = self.gamma_matrix.shape if not len(init_alpha) > 0: init_alpha = self.gamma_matrix.mean(axis=0)/K alpha = init_alpha.copy() alphap = init_alpha.copy() g_term = (psi(self.gamma_matrix) - psi(self.gamma_matrix.sum(axis=1))[:,None]).sum(axis=0) for it in range(maxit): grad = M *(psi(alpha.sum()) - psi(alpha)) + g_term H = -M*np.diag(pg(1,alpha)) + M*pg(1,alpha.sum()) # playing here.... z = M*pg(1,alpha.sum()) h = -M*pg(1,alpha) c = ((grad/h).sum())/((1.0/z) + (1.0/h).sum()) alpha_change = (grad - c)/h n_bad = (alpha_change > alpha).sum() while n_bad > 0: alpha_change/=2.0 n_bad = (alpha_change > alpha).sum() alpha_new = alpha - alpha_change pos = np.where(alpha_new <= SMALL_NUMBER)[0] alpha_new[pos] = SMALL_NUMBER # if (alpha_new < 0).sum() > 0: # init_alpha /= 10.0 # return self.alpha_nr(maxit=maxit,init_alpha = init_alpha) diff = np.sum(np.abs(alpha-alpha_new)) alpha = alpha_new if diff < 1e-6 and it > 1: return alpha except: alpha = old_alpha return alpha
def alpha_nr(self,maxit=20,init_alpha=[]): M,K = self.gamma_matrix.shape if not len(init_alpha) > 0: init_alpha = self.gamma_matrix.mean(axis=0)/K alpha = init_alpha.copy() alphap = init_alpha.copy() g_term = (psi(self.gamma_matrix) - psi(self.gamma_matrix.sum(axis=1))[:,None]).sum(axis=0) for it in range(maxit): grad = M *(psi(alpha.sum()) - psi(alpha)) + g_term H = -M*np.diag(pg(1,alpha)) + M*pg(1,alpha.sum()) alpha_new = alpha - np.dot(np.linalg.inv(H),grad) if (alpha_new < 0).sum() > 0: init_alpha /= 10.0 return self.alpha_nr(maxit=maxit,init_alpha = init_alpha) diff = np.sum(np.abs(alpha-alpha_new)) alpha = alpha_new if diff < 1e-6 and it > 1: return alpha return alpha
def tell(self,X,fit): '''Updates the model parameters with the function values. ''' fit = self.utility(fit) # Map to utility values # Compute information differentials p0a = pg(0,self.a) p0b = pg(0,self.b) p0ab = pg(0,self.a+self.b) p1a = pg(1,self.a) p1b = pg(1,self.b) p1ab = pg(1,self.a+self.b) N = len(X) dA = p0ab - p0a + sum([f*np.log(x) for x,f in zip(X,fit)])/N dB = p0ab - p0b + sum([f*np.log(1-x) for x,f in zip(X,fit)])/N # Compute the Riemannian metric and raise the derivatives gdet = p1a*p1b - p1ab*(p1a+p1b) gA = ((p1b - p1ab)*dA + p1ab*dB)/(self.a*gdet) gB = ((p1a - p1ab)*dB + p1ab*dA)/(self.b*gdet) # Update parameters w/ exponential map self.a *= np.exp(self.step*gA) self.b *= np.exp(self.step*gB)
def _train_m_step(self): self.obj.pull() # update beta self.new_beta[:, :] = np.maximum(self.new_beta, EPS) self.beta[:, :] = self.new_beta / np.sum(self.new_beta, axis=0)[None, :] self.new_beta[:, :] = 0 # update alpha alpha_sum = np.sum(self.alpha) gvec = np.sum(self.grad_alpha, axis=0) gvec += self.num_docs * (pg(0, alpha_sum) - pg(0, self.alpha)) hvec = self.num_docs * pg(1, self.alpha) z_0 = pg(1, alpha_sum) c_nume = np.sum(gvec / hvec) c_deno = 1 / z_0 + np.sum(1 / hvec) c_0 = c_nume / c_deno delta = (gvec - c_0) / hvec self.alpha -= delta self.alpha[:] = np.maximum(self.alpha, EPS) self.grad_alpha[:, :] = 0 self.obj.push()