def __generalizedBessel(self, y, M, lambda_, tau, alpha): # M represents sup[index freq]. y,lambda,tau must be 1d-array. W = np.array([(lambda_**j * (y / tau)**(j * alpha)) / (gammaFunc(j + 1) * gammaFunc(j * alpha)) for j in range(1, M + 1)]) W = W.sum( axis=0 ) # summuation for j=1:M and array shape transform into (nsample, 1) return W
def loggammaFunc(num): result = 0 if num < 170: result = log(gammaFunc(num), e) else: result = log(num - 1, e) + loggammaFunc(num - 1) return result
def GammaDist(self, gamma): return ((self.be**self.al) / gammaFunc(self.al)) * (gamma**( self.al - 1)) * mpmath.exp(-self.be * gamma)
def performLDA(self): for iteration in range(self.numIterations): self.logLDA() self.logOut('Variational Iteration : ' + str(iteration)) # E-Step : Learning phi and gamma # initialize the variational parameter phi = [] gamma = ndarray(shape=(self.intNumDoc, self.intNumTopic), dtype=float) for d in range(self.intNumDoc): phi.append( ndarray(shape=(self.numWordPerDoc[d], self.intNumTopic), dtype=float)) for n in range(self.numWordPerDoc[d]): for k in range(self.intNumTopic): phi[d][n][k] = 1.0 / float(self.intNumTopic) for k in range(self.intNumTopic): for d in range(self.intNumDoc): gamma[d][k] = self.alpha[k] + float( self.intUniqueWord) / float(self.intNumTopic) for d in range(self.intNumDoc): for iterationInternal in range(self.numInternalIterations): # Learning phi for n in range(self.numWordPerDoc[d]): for k in range(self.intNumTopic): phi[d][n][k] = self.beta[k][ self.corpusList[d][n]] * exp( polygamma(0, gamma[d][k])) normalizeConstantPhi = sum(phi[d][n]) for k in range(self.intNumTopic): phi[d][n][k] = phi[d][n][k] / normalizeConstantPhi # Learning gamma for k in range(self.intNumTopic): gamma[d][k] = self.alpha[k] for n in range(self.numWordPerDoc[d]): gamma[d][k] = gamma[d][k] + phi[d][n][k] # M-Step : Learning alpha and beta # Learning Beta for k in range(self.intNumTopic): for d in range(self.intNumDoc): for n in range(self.numWordPerDoc[d]): self.beta[k][self.corpusList[d][n]] = self.beta[k][ self.corpusList[d][n]] + phi[d][n][k] normalizeConstantBeta = sum(self.beta[k]) for v in range(self.intUniqueWord): self.beta[k][v] = self.beta[k][v] / normalizeConstantBeta # Learning Alpha # calculate current ELBO with respect to Current Alpha ELBOMax = 0 for d in range(self.intNumDoc): ELBOMax = ELBOMax + log(gammaFunc(sum(self.alpha)), e) for k in range(self.intNumTopic): ELBOMax = ELBOMax - log(gammaFunc(self.alpha[k]), e) ELBOMax = ELBOMax + (self.alpha[k] - 1) * (polygamma( 0, gamma[d][k]) - polygamma(0, sum(gamma[d]))) tempAlpha = ndarray(shape=(self.intNumTopic), dtype=float) bestAlpha = ndarray(shape=(self.intNumTopic), dtype=float) for k in range(self.intNumTopic): bestAlpha[k] = self.alpha[k] tempAlpha[k] = self.alpha[k] self.logOut('Newton-Rhapson Itr - ELBO : ' + str(ELBOMax) + ' | alpha : ' + str(tempAlpha)) # Newton-Rhapson optimization for itr in range(self.numNewtonIteration): # Building Hessian Matrix and Derivative Vector H = ndarray(shape=(self.intNumTopic, self.intNumTopic), dtype=float) g = ndarray(shape=(self.intNumTopic), dtype=float) for k1 in range(self.intNumTopic): g[k1] = float(self.intNumDoc) * (polygamma( 0, sum(tempAlpha)) - polygamma(0, tempAlpha[k1])) for d in range(self.intNumDoc): g[k1] = g[k1] + (polygamma(0, gamma[d][k1]) - polygamma(0, sum(gamma[d]))) for k2 in range(self.intNumTopic): H[k1][k2] = 0 if k1 == k2: H[k1][k2] = H[k1][k2] - float( self.intNumDoc) * polygamma(1, tempAlpha[k1]) H[k1][k2] = H[k1][k2] + float( self.intNumDoc) * polygamma(1, sum(tempAlpha)) # Update Alpha in Log Domain deltaAlpha = np.dot(np.linalg.inv(H), g) for k in range(self.intNumTopic): logAlphaK = log(tempAlpha[k], e) logAlphaK = logAlphaK - deltaAlpha[k] tempAlpha[k] = exp(logAlphaK) if tempAlpha[k] < 0.00001: tempAlpha[k] = 0.00001 # calculate current ELBO with respect to New Alpha ELBOAfter = 0 for d in range(self.intNumDoc): ELBOAfter = ELBOAfter + log(gammaFunc(sum(tempAlpha)), e) for k in range(self.intNumTopic): ELBOAfter = ELBOAfter - log(gammaFunc(tempAlpha[k]), e) ELBOAfter = ELBOAfter + ( tempAlpha[k] - 1) * (polygamma(0, gamma[d][k]) - polygamma(0, sum(gamma[d]))) self.logOut('Newton-Rhapson Itr - ELBO : ' + str(ELBOAfter) + ' | alpha : ' + str(tempAlpha)) if ELBOMax <= ELBOAfter: ELBOMax = ELBOAfter for k in range(self.intNumTopic): bestAlpha[k] = tempAlpha[k] self.alpha = bestAlpha
def performLDA(self): for iteration in range(self.numIterations): self.logLDA() self.logOut( 'Variational Iteration : '+str(iteration) ) # E-Step : Learning phi and gamma # initialize the variational parameter phi = [] gamma = ndarray(shape=(self.intNumDoc, self.intNumTopic), dtype=float) for d in range(self.intNumDoc): phi.append(ndarray(shape=(self.numWordPerDoc[d], self.intNumTopic), dtype=float)) for n in range(self.numWordPerDoc[d]): for k in range(self.intNumTopic): phi[d][n][k] = 1.0 / float(self.intNumTopic) for k in range(self.intNumTopic): for d in range(self.intNumDoc): gamma[d][k] = self.alpha[k] + float(self.intUniqueWord) / float(self.intNumTopic) for d in range(self.intNumDoc): for iterationInternal in range(self.numInternalIterations): # Learning phi for n in range(self.numWordPerDoc[d]): for k in range(self.intNumTopic): phi[d][n][k] = self.beta[k][self.corpusList[d][n]]*exp(polygamma(0,gamma[d][k])) normalizeConstantPhi = sum(phi[d][n]) for k in range(self.intNumTopic): phi[d][n][k] = phi[d][n][k] / normalizeConstantPhi # Learning gamma for k in range(self.intNumTopic): gamma[d][k] = self.alpha[k] for n in range(self.numWordPerDoc[d]): gamma[d][k] = gamma[d][k] + phi[d][n][k] # M-Step : Learning alpha and beta # Learning Beta for k in range(self.intNumTopic): for d in range(self.intNumDoc): for n in range(self.numWordPerDoc[d]): self.beta[k][self.corpusList[d][n]] = self.beta[k][self.corpusList[d][n]] + phi[d][n][k] normalizeConstantBeta = sum(self.beta[k]) for v in range(self.intUniqueWord): self.beta[k][v] = self.beta[k][v] / normalizeConstantBeta # Learning Alpha # calculate current ELBO with respect to Current Alpha ELBOMax = 0 for d in range(self.intNumDoc): ELBOMax = ELBOMax + log(gammaFunc(sum(self.alpha)), e) for k in range(self.intNumTopic): ELBOMax = ELBOMax - log(gammaFunc(self.alpha[k]), e) ELBOMax = ELBOMax + (self.alpha[k] - 1) * ( polygamma(0, gamma[d][k]) - polygamma(0, sum(gamma[d]))) tempAlpha = ndarray(shape=(self.intNumTopic), dtype=float) bestAlpha = ndarray(shape=(self.intNumTopic), dtype=float) for k in range(self.intNumTopic): bestAlpha[k] = self.alpha[k] tempAlpha[k] = self.alpha[k] self.logOut( 'Newton-Rhapson Itr - ELBO : '+str(ELBOMax)+' | alpha : '+str(tempAlpha) ) # Newton-Rhapson optimization for itr in range(self.numNewtonIteration): # Building Hessian Matrix and Derivative Vector H = ndarray(shape=(self.intNumTopic,self.intNumTopic), dtype=float) g = ndarray(shape=(self.intNumTopic), dtype=float) for k1 in range(self.intNumTopic): g[k1] = float(self.intNumDoc)*(polygamma(0,sum(tempAlpha))-polygamma(0,tempAlpha[k1])) for d in range(self.intNumDoc): g[k1] = g[k1] + ( polygamma(0,gamma[d][k1]) - polygamma(0,sum(gamma[d])) ) for k2 in range(self.intNumTopic): H[k1][k2] = 0 if k1 == k2: H[k1][k2] = H[k1][k2] - float(self.intNumDoc) * polygamma(1,tempAlpha[k1]) H[k1][k2] = H[k1][k2] + float(self.intNumDoc) * polygamma(1,sum(tempAlpha)) # Update Alpha in Log Domain deltaAlpha = np.dot(np.linalg.inv(H),g) for k in range(self.intNumTopic): logAlphaK = log(tempAlpha[k],e) logAlphaK = logAlphaK - deltaAlpha[k] tempAlpha[k] = exp(logAlphaK) if tempAlpha[k] < 0.00001: tempAlpha[k] = 0.00001 # calculate current ELBO with respect to New Alpha ELBOAfter = 0 for d in range(self.intNumDoc): ELBOAfter = ELBOAfter + log(gammaFunc(sum(tempAlpha)), e) for k in range(self.intNumTopic): ELBOAfter = ELBOAfter - log(gammaFunc(tempAlpha[k]),e) ELBOAfter = ELBOAfter + (tempAlpha[k]-1)*(polygamma(0,gamma[d][k])-polygamma(0,sum(gamma[d]))) self.logOut( 'Newton-Rhapson Itr - ELBO : '+str(ELBOAfter)+' | alpha : '+str(tempAlpha) ) if ELBOMax <= ELBOAfter: ELBOMax = ELBOAfter for k in range(self.intNumTopic): bestAlpha[k] = tempAlpha[k] self.alpha = bestAlpha