예제 #1
0
    def alpha_nr(self, maxit=20, init_alpha=[]):
        """
        Newton-Raphson procedure for updating the Dirichlet hyperparameter alpha
        :param maxit: maximum number of iterations
        :param init_alpha: initial guess of alphas
        :return: the updated alpha
        """
        old_alpha = self.alpha.copy()
        try:
            M, K = self.gamma_matrix.shape
            if not len(init_alpha) > 0:
                init_alpha = self.gamma_matrix.mean(axis=0) / K
            alpha = init_alpha.copy()
            alphap = init_alpha.copy()
            g_term = (psi(self.gamma_matrix) -
                      psi(self.gamma_matrix.sum(axis=1))[:, None]).sum(axis=0)
            for it in range(maxit):
                grad = M * (psi(alpha.sum()) - psi(alpha)) + g_term
                H = -M * np.diag(pg(1, alpha)) + M * pg(1, alpha.sum())

                # playing here....
                z = M * pg(1, alpha.sum())
                h = -M * pg(1, alpha)
                c = ((grad / h).sum()) / ((1.0 / z) + (1.0 / h).sum())
                alpha_change = (grad - c) / h

                n_bad = (alpha_change > alpha).sum()
                while n_bad > 0:
                    alpha_change /= 2.0
                    n_bad = (alpha_change > alpha).sum()

                alpha_new = alpha - alpha_change

                pos = np.where(alpha_new <= SMALL_NUMBER)[0]
                alpha_new[pos] = SMALL_NUMBER

                # if (alpha_new < 0).sum() > 0:
                #   init_alpha /= 10.0
                #   return self.alpha_nr(maxit=maxit,init_alpha = init_alpha)

                diff = np.sum(np.abs(alpha - alpha_new))
                alpha = alpha_new
                if diff < 1e-6 and it > 1:
                    return alpha
        except:
            alpha = old_alpha
        return alpha
def alpha_nr(g_term, M, maxit=100, init_alpha=[]):

    # with open('./test_g.csv','w') as f:
    #     for g in g_term:
    #         f.write("{}\n".format(g))

    SMALL_NUMBER = 1e-100
    K = len(g_term)
    if len(init_alpha) == 0:
        init_alpha = np.ones_like(g_term) / K
    old_alpha = init_alpha.copy()

    # try:
    alpha = init_alpha.copy()
    # g_term = (psi(self.gamma_matrix) - psi(self.gamma_matrix.sum(axis=1))[:,None]).sum(axis=0)
    for it in range(maxit):
        grad = M * (psi(alpha.sum()) - psi(alpha)) + g_term
        # H = -M*np.diag(pg(1,alpha)) + M*pg(1,alpha.sum())
        z = M * pg(1, alpha.sum())
        h = -M * pg(1, alpha)
        c = ((grad / h).sum()) / ((1.0 / z) + (1.0 / h).sum())
        alpha_change = (grad - c) / h

        # Check to make sure none of them go negative
        n_bad = (alpha_change > alpha).sum()
        while n_bad > 0:
            alpha_change /= 2.0
            n_bad = (alpha_change > alpha).sum()

        # alpha_change = np.dot(np.linalg.inv(H),grad)
        alpha_new = alpha - alpha_change

        pos = np.where(alpha_new <= SMALL_NUMBER)[0]
        alpha_new[pos] = SMALL_NUMBER

        diff = np.sum(np.abs(alpha - alpha_new))
        # print "Alpha: {}, it: {}".format(diff,it)
        # print grad.max(),grad.argmax(),alpha[grad.argmax()],alpha_new[grad.argmax()],alpha_change[grad.argmax()],h[grad.argmax()]
        alpha = alpha_new

        if diff < 1e-6 and it > 10:
            return alpha
    # except:
    #     alpha = old_alpha
    return alpha
예제 #3
0
	def alpha_nr(self,maxit=20,init_alpha=[]):
		old_alpha = self.alpha.copy()
		try:
			M,K = self.gamma_matrix.shape
			if not len(init_alpha) > 0:
				init_alpha = self.gamma_matrix.mean(axis=0)/K
			alpha = init_alpha.copy()
			alphap = init_alpha.copy()
			g_term = (psi(self.gamma_matrix) - psi(self.gamma_matrix.sum(axis=1))[:,None]).sum(axis=0)
			for it in range(maxit):
				grad = M *(psi(alpha.sum()) - psi(alpha)) + g_term
				H = -M*np.diag(pg(1,alpha)) + M*pg(1,alpha.sum())


				# playing here....
				z = M*pg(1,alpha.sum())
				h = -M*pg(1,alpha)
				c = ((grad/h).sum())/((1.0/z) + (1.0/h).sum())
				alpha_change = (grad - c)/h

				n_bad = (alpha_change > alpha).sum()
				while n_bad > 0:
					alpha_change/=2.0
					n_bad = (alpha_change > alpha).sum()

				
				alpha_new = alpha - alpha_change

				pos = np.where(alpha_new <= SMALL_NUMBER)[0]
				alpha_new[pos] = SMALL_NUMBER

				# if (alpha_new < 0).sum() > 0:
				# 	init_alpha /= 10.0
				# 	return self.alpha_nr(maxit=maxit,init_alpha = init_alpha)

				diff = np.sum(np.abs(alpha-alpha_new))
				alpha = alpha_new
				if diff < 1e-6 and it > 1:
					return alpha
		except:
			alpha = old_alpha
		return alpha
예제 #4
0
    def alpha_nr(self,maxit=20,init_alpha=[]):
        old_alpha = self.alpha.copy()
        try:
            M,K = self.gamma_matrix.shape
            if not len(init_alpha) > 0:
                init_alpha = self.gamma_matrix.mean(axis=0)/K
            alpha = init_alpha.copy()
            alphap = init_alpha.copy()
            g_term = (psi(self.gamma_matrix) - psi(self.gamma_matrix.sum(axis=1))[:,None]).sum(axis=0)
            for it in range(maxit):
                grad = M *(psi(alpha.sum()) - psi(alpha)) + g_term
                H = -M*np.diag(pg(1,alpha)) + M*pg(1,alpha.sum())


                # playing here....
                z = M*pg(1,alpha.sum())
                h = -M*pg(1,alpha)
                c = ((grad/h).sum())/((1.0/z) + (1.0/h).sum())
                alpha_change = (grad - c)/h

                n_bad = (alpha_change > alpha).sum()
                while n_bad > 0:
                    alpha_change/=2.0
                    n_bad = (alpha_change > alpha).sum()

                
                alpha_new = alpha - alpha_change

                pos = np.where(alpha_new <= SMALL_NUMBER)[0]
                alpha_new[pos] = SMALL_NUMBER

                # if (alpha_new < 0).sum() > 0:
                #   init_alpha /= 10.0
                #   return self.alpha_nr(maxit=maxit,init_alpha = init_alpha)

                diff = np.sum(np.abs(alpha-alpha_new))
                alpha = alpha_new
                if diff < 1e-6 and it > 1:
                    return alpha
        except:
            alpha = old_alpha
        return alpha
예제 #5
0
파일: lda_vb.py 프로젝트: sdrogers/MS2LDA
	def alpha_nr(self,maxit=20,init_alpha=[]):
	    M,K = self.gamma_matrix.shape
	    if not len(init_alpha) > 0:
	        init_alpha = self.gamma_matrix.mean(axis=0)/K
	    alpha = init_alpha.copy()
	    alphap = init_alpha.copy()
	    g_term = (psi(self.gamma_matrix) - psi(self.gamma_matrix.sum(axis=1))[:,None]).sum(axis=0)
	    for it in range(maxit):
	        grad = M *(psi(alpha.sum()) - psi(alpha)) + g_term
	        H = -M*np.diag(pg(1,alpha)) + M*pg(1,alpha.sum())
	        alpha_new = alpha - np.dot(np.linalg.inv(H),grad)
	        if (alpha_new < 0).sum() > 0:
	            init_alpha /= 10.0
	            return self.alpha_nr(maxit=maxit,init_alpha = init_alpha)

	        diff = np.sum(np.abs(alpha-alpha_new))
	        alpha = alpha_new
	        if diff < 1e-6 and it > 1:
	            return alpha
	    return alpha
예제 #6
0
    def tell(self,X,fit):
        '''Updates the model parameters with the function values.
        '''
        fit = self.utility(fit) # Map to utility values
        # Compute information differentials
        p0a  = pg(0,self.a)
        p0b  = pg(0,self.b)
        p0ab = pg(0,self.a+self.b)

        p1a  = pg(1,self.a)
        p1b  = pg(1,self.b)
        p1ab = pg(1,self.a+self.b)
       
        N = len(X)
        dA = p0ab - p0a + sum([f*np.log(x) for x,f in zip(X,fit)])/N
        dB = p0ab - p0b + sum([f*np.log(1-x) for x,f in zip(X,fit)])/N

        # Compute the Riemannian metric and raise the derivatives
        gdet = p1a*p1b - p1ab*(p1a+p1b)
        gA = ((p1b - p1ab)*dA + p1ab*dB)/(self.a*gdet)
        gB = ((p1a - p1ab)*dB + p1ab*dA)/(self.b*gdet)
        
        # Update parameters w/ exponential map
        self.a *= np.exp(self.step*gA)
        self.b *= np.exp(self.step*gB)
예제 #7
0
    def _train_m_step(self):
        self.obj.pull()

        # update beta
        self.new_beta[:, :] = np.maximum(self.new_beta, EPS)
        self.beta[:, :] = self.new_beta / np.sum(self.new_beta,
                                                 axis=0)[None, :]
        self.new_beta[:, :] = 0

        # update alpha
        alpha_sum = np.sum(self.alpha)
        gvec = np.sum(self.grad_alpha, axis=0)
        gvec += self.num_docs * (pg(0, alpha_sum) - pg(0, self.alpha))
        hvec = self.num_docs * pg(1, self.alpha)
        z_0 = pg(1, alpha_sum)
        c_nume = np.sum(gvec / hvec)
        c_deno = 1 / z_0 + np.sum(1 / hvec)
        c_0 = c_nume / c_deno
        delta = (gvec - c_0) / hvec
        self.alpha -= delta
        self.alpha[:] = np.maximum(self.alpha, EPS)
        self.grad_alpha[:, :] = 0

        self.obj.push()