def runDeltaLDA(data, numsamp, randseed, binarizedOnly): """Run the deltaLDA analysis on 'data' based on data generated from runsinfo.mat and return the result. ARGUMENTS: * numsamp - specifies how many samples to take from the Gibbs sampler * randseed - is used to initialize the Gibbs sampler random number generator """ result = {} if not binarizedOnly: (result['phi'], result['theta'], result['sample']) = deltaLDA(data['docs'], data['alpha'], data['beta'], numsamp, randseed, f=data['Fvector']) (result['phi_bin'], result['theta_bin'], result['sample_bin']) = deltaLDA(data['bindocs'], data['alpha'], data['beta'], numsamp, randseed, f=data['Fvector']) def normalize(array, axis): """Normalize the entries of the array based onthe axis specified """ array2 = numpy.sum(array, axis) #sum it along axis # TODO: make this applicable for more than 2D arrays array2 = array2.reshape((array2.shape[0], 1)) array2 = array2.repeat(array.shape[1], axis=1) return numpy.divide(array, array2) def getprzAndprzw(theta, phi): prz = numpy.sum(theta, 0) prz = prz.reshape((prz.shape[0], 1)) prz_w = numpy.multiply(phi, prz.repeat(phi.shape[1], axis=1)).transpose() return prz / numpy.sum(prz), normalize(prz_w, 1) if not binarizedOnly: result['prz'], result['prz_w'] = getprzAndprzw(result['theta'], result['phi']) result['prz_bin'], result['prz_w_bin'] = getprzAndprzw( result['theta_bin'], result['phi_bin']) return result
def testDelta(self): """ Test DeltaLDA with base data/params + f-values """ our_f = [0, 0, 0, 0, 1, 1] alpha = array([[.1, .1, 0],[.1, .1, .1]]) (phi,theta,sample) = deltaLDA(self.docs,alpha,self.beta, self.numsamp,self.randseed,f=our_f) # theta should assign special topic to docs [4,5] maxtheta = argmax(theta,axis=1) self.assert_(maxtheta[4] == 2) self.assert_(maxtheta[5] == 2) # theta valid prob matrix self.assert_(self.matProb(theta)) # theta rows should sum to 1 self.assert_(reduce(lambda x,y: x and y, [abs(val - float(1)) < self.tol for val in theta.sum(axis=1)])) # phi for special topic should emph [0] maxphi = argmax(phi,axis=1) self.assert_(maxphi[2] == 0) # phi valid prob matrix self.assert_(self.matProb(phi))
def testDelta(self): """ Test DeltaLDA with base data/params + f-values """ our_f = [0, 0, 0, 0, 1, 1] alpha = array([[.1, .1, 0], [.1, .1, .1]]) (phi, theta, sample) = deltaLDA(self.docs, alpha, self.beta, self.numsamp, self.randseed, f=our_f) # theta should assign special topic to docs [4,5] maxtheta = argmax(theta, axis=1) self.assert_(maxtheta[4] == 2) self.assert_(maxtheta[5] == 2) # theta valid prob matrix self.assert_(self.matProb(theta)) # theta rows should sum to 1 self.assert_( reduce( lambda x, y: x and y, [abs(val - float(1)) < self.tol for val in theta.sum(axis=1)])) # phi for special topic should emph [0] maxphi = argmax(phi, axis=1) self.assert_(maxphi[2] == 0) # phi valid prob matrix self.assert_(self.matProb(phi))
def testInit(self): """ Test standard LDA with init from previous sample (this doesn't test how the init could affect behavior, just checks that using an init doesn't fail completely...) """ # Give stupid init (phi, theta, sample) = deltaLDA(self.docs, self.alpha, self.beta, self.numsamp, self.randseed, init=self.init) # theta should clust docs [0,1], [2,3], [4,5] maxtheta = argmax(theta, axis=1) self.assert_(maxtheta[0] == maxtheta[1]) self.assert_(maxtheta[2] == maxtheta[3]) self.assert_(maxtheta[4] == maxtheta[5]) # corresponding phi should emph [1,2], [3,4], [0] maxphi = argmax(phi, axis=1) self.assert_(maxphi[maxtheta[0]] == 1) self.assert_(maxphi[maxtheta[2]] == 3) self.assert_(maxphi[maxtheta[4]] == 0)
def testSanity1(self): """ Test no constraints vs deltaLDA implementation """ # Don't even try unless deltaLDA module present if (not hasDelta): return # Randomly generated docs docs = [[random.randint(self.W) for i in range(1000)] for j in range(100)] # Set beta for standard LDA ldabeta = self.df.beta * ones((self.T, self.W)) # Run standard LDA (sphi, stheta, ssample) = deltaLDA(docs, self.ldaalpha, ldabeta, self.numsamp, self.randseed) # Run Interactive LDA with empty constraint set df = DF.DirichletForest(self.alpha, self.beta, self.eta, self.T, self.W) df.inference(docs, self.numsamp, self.randseed) # Assert matrix agreement, valid prob dists self.assert_(self.matAgree(df.phi, sphi)) self.assert_(self.matProb(df.phi)) self.assert_(self.matProb(sphi)) self.assert_(self.matAgree(df.theta, stheta)) self.assert_(self.matProb(df.theta)) self.assert_(self.matProb(stheta))
def testSanity1(self): """ Test no constraints vs deltaLDA implementation """ # Don't even try unless deltaLDA module present if(not hasDelta): return # Randomly generated docs docs = [[random.randint(self.W) for i in range(1000)] for j in range(100)] # Set beta for standard LDA ldabeta = self.df.beta * ones((self.T,self.W)) # Run standard LDA (sphi,stheta,ssample) = deltaLDA(docs,self.ldaalpha, ldabeta,self.numsamp,self.randseed) # Run Interactive LDA with empty constraint set df = DF.DirichletForest(self.alpha,self.beta,self.eta, self.T,self.W) df.inference(docs,self.numsamp,self.randseed) # Assert matrix agreement, valid prob dists self.assert_(self.matAgree(df.phi,sphi)) self.assert_(self.matProb(df.phi)) self.assert_(self.matProb(sphi)) self.assert_(self.matAgree(df.theta,stheta)) self.assert_(self.matProb(df.theta)) self.assert_(self.matProb(stheta))
def testSanity(self): """ Sanity check online Gibbs init scheme against deltaLDA """ # Don't even try unless deltaLDA module present if(not hasDelta): return randseed = 194582 # Use 1 'online' Gibbs sample to build initial gamma gibbs_docs = [[1,1,2], [1,1,1,1,2], [3,3,3,4], [3,3,3,3,4,4], [0,0,0,0,0], [0,0,0,0]] numsamp = 0 (phi,theta,sample) = deltaLDA(gibbs_docs,self.alpha,self.beta, numsamp,randseed) gamma_init = [] for (d,di) in zip(self.docs_w,range(len(self.docs_w))): gamma = zeros((self.T,len(d))) for (w,i) in zip(d,range(len(d))): gamma[:,i] = theta[di,:] * phi[:,w] # normalize gamma[:,i] = gamma[:,i] / gamma[:,i].sum() # save gamma_init.append(gamma) # Run cvbLDA with this gamma (gphi,gtheta,gamma) = cvbLDA(self.docs_w,self.docs_c, self.alpha,self.beta, gamma_init=gamma_init, maxiter=self.maxiter, convtol=self.convtol) # Run cvbLDA no init gamma, same randseed (phi,theta,gamma) = cvbLDA(self.docs_w,self.docs_c, self.alpha,self.beta, randseed=randseed, maxiter=self.maxiter, convtol=self.convtol) self.assert_(self.matAgree(phi,gphi)) self.assert_(self.matProb(phi)) self.assert_(self.matProb(gphi)) self.assert_(self.matAgree(theta,gtheta)) self.assert_(self.matProb(theta)) self.assert_(self.matProb(gtheta))
def testStandard(self): """ Test no constraints mode """ # Don't even try unless deltaLDA module present if(not hasDelta): return # Temporarily shrink vocab W = 5 # Set beta for standard LDA ldabeta = self.beta * ones((self.T,W)) # Run standard LDA (sphi,stheta,ssample) = deltaLDA(self.docs,self.ldaalpha,ldabeta, self.numsamp,self.randseed) # Run Interactive LDA with empty constraint set df = DF.DirichletForest(self.alpha,self.beta,self.eta, self.T,W) df.inference(self.docs,self.numsamp,self.randseed) # # First, validate correctness of recovered topics # # theta should clust docs [0,1], [2,3], [4,5] maxtheta = argmax(df.theta,axis=1) self.assert_(maxtheta[0] == maxtheta[1]) self.assert_(maxtheta[2] == maxtheta[3]) self.assert_(maxtheta[4] == maxtheta[5]) # corresponding phi should emph [1,2], [3,4], [0] maxphi = argmax(df.phi,axis=1) self.assert_(maxphi[maxtheta[0]] == 1) self.assert_(maxphi[maxtheta[2]] == 3) self.assert_(maxphi[maxtheta[4]] == 0) # Assert matrix agreement, valid prob dists self.assert_(self.matAgree(df.phi,sphi)) self.assert_(self.matProb(df.phi)) self.assert_(self.matProb(sphi)) self.assert_(self.matAgree(df.theta,stheta)) self.assert_(self.matProb(df.theta)) self.assert_(self.matProb(stheta))
def testStandard(self): """ Test no constraints mode """ # Don't even try unless deltaLDA module present if (not hasDelta): return # Temporarily shrink vocab W = 5 # Set beta for standard LDA ldabeta = self.beta * ones((self.T, W)) # Run standard LDA (sphi, stheta, ssample) = deltaLDA(self.docs, self.ldaalpha, ldabeta, self.numsamp, self.randseed) # Run Interactive LDA with empty constraint set df = DF.DirichletForest(self.alpha, self.beta, self.eta, self.T, W) df.inference(self.docs, self.numsamp, self.randseed) # # First, validate correctness of recovered topics # # theta should clust docs [0,1], [2,3], [4,5] maxtheta = argmax(df.theta, axis=1) self.assert_(maxtheta[0] == maxtheta[1]) self.assert_(maxtheta[2] == maxtheta[3]) self.assert_(maxtheta[4] == maxtheta[5]) # corresponding phi should emph [1,2], [3,4], [0] maxphi = argmax(df.phi, axis=1) self.assert_(maxphi[maxtheta[0]] == 1) self.assert_(maxphi[maxtheta[2]] == 3) self.assert_(maxphi[maxtheta[4]] == 0) # Assert matrix agreement, valid prob dists self.assert_(self.matAgree(df.phi, sphi)) self.assert_(self.matProb(df.phi)) self.assert_(self.matProb(sphi)) self.assert_(self.matAgree(df.theta, stheta)) self.assert_(self.matProb(df.theta)) self.assert_(self.matProb(stheta))
def start_delta_lda( good_doc_list, bad_doc_list, next_index ): docs = good_doc_list + bad_doc_list delta_f = [] for i in range(0, len(good_doc_list)): delta_f.append(0) for i in range(0, len(bad_doc_list)): delta_f.append(1) delta_alpha = array([[.1, .1, 0],[.1, .1, .1]]) beta = ones((3,next_index)) numsamp = 200 randseed = 194582 (phi,theta,sample) = deltaLDA(docs,delta_alpha,beta,numsamp,randseed,f=delta_f) return phi,theta,sample
def testInit(self): """ Test standard LDA with init from previous sample (this doesn't test how the init could affect behavior, just checks that using an init doesn't fail completely...) """ # Give stupid init (phi,theta,sample) = deltaLDA(self.docs,self.alpha,self.beta, self.numsamp,self.randseed,init=self.init) # theta should clust docs [0,1], [2,3], [4,5] maxtheta = argmax(theta,axis=1) self.assert_(maxtheta[0] == maxtheta[1]) self.assert_(maxtheta[2] == maxtheta[3]) self.assert_(maxtheta[4] == maxtheta[5]) # corresponding phi should emph [1,2], [3,4], [0] maxphi = argmax(phi,axis=1) self.assert_(maxphi[maxtheta[0]] == 1) self.assert_(maxphi[maxtheta[2]] == 3) self.assert_(maxphi[maxtheta[4]] == 0)
def testStandard(self): """ Test standard LDA with base data/params """ (phi, theta, sample) = deltaLDA(self.docs, self.alpha, self.beta, self.numsamp, self.randseed) # theta should clust docs [0,1], [2,3], [4,5] maxtheta = argmax(theta, axis=1) self.assert_(maxtheta[0] == maxtheta[1]) self.assert_(maxtheta[2] == maxtheta[3]) self.assert_(maxtheta[4] == maxtheta[5]) # theta valid prob matrix self.assert_(self.matProb(theta)) # corresponding phi should emph [1,2], [3,4], [0] maxphi = argmax(phi, axis=1) self.assert_(maxphi[maxtheta[0]] == 1) self.assert_(maxphi[maxtheta[2]] == 3) self.assert_(maxphi[maxtheta[4]] == 0) # phi valid prob matrix self.assert_(self.matProb(phi))
def testStandard(self): """ Test standard LDA with base data/params """ (phi,theta,sample) = deltaLDA(self.docs,self.alpha,self.beta, self.numsamp,self.randseed) # theta should clust docs [0,1], [2,3], [4,5] maxtheta = argmax(theta,axis=1) self.assert_(maxtheta[0] == maxtheta[1]) self.assert_(maxtheta[2] == maxtheta[3]) self.assert_(maxtheta[4] == maxtheta[5]) # theta valid prob matrix self.assert_(self.matProb(theta)) # corresponding phi should emph [1,2], [3,4], [0] maxphi = argmax(phi,axis=1) self.assert_(maxphi[maxtheta[0]] == 1) self.assert_(maxphi[maxtheta[2]] == 3) self.assert_(maxphi[maxtheta[4]] == 0) # phi valid prob matrix self.assert_(self.matProb(phi))
def testSanity3(self): """ Test beta*eta=X vs deltaLDA with beta=X """ # Don't even try unless deltaLDA module present if(not hasDelta): return # 'magic' X parameter X = 50 # Randomly generated docs W = 2 docs = [[random.randint(W) for i in range(1000)] for j in range(100)] # Set beta for standard LDA ldabeta = X * ones((self.T,W)) # Run standard LDA (sphi,stheta,ssample) = deltaLDA(docs,self.ldaalpha, ldabeta,self.numsamp,self.randseed) # Run Interactive LDA with the baseline (full-tree) constraints, # but temporarily set constraint strength to 1 to build tree eta = X beta = 1 df = DF.DirichletForest(self.alpha,beta,eta, self.T,W) df.merge([0],[1]) df.inference(docs,self.numsamp,self.randseed) # Assert matrix agreement, valid prob dists self.assert_(self.matAgree(df.phi,sphi)) self.assert_(self.matProb(df.phi)) self.assert_(self.matProb(sphi)) self.assert_(self.matAgree(df.theta,stheta)) self.assert_(self.matProb(df.theta)) self.assert_(self.matProb(stheta))
def testSanity2(self): """ Test eta=1 against deltaLDA implementation """ # Don't even try unless deltaLDA module present if(not hasDelta): return # Randomly generated docs docs = [[random.randint(self.W) for i in range(1000)] for j in range(100)] # Set beta for standard LDA ldabeta = self.beta * ones((self.T,self.W)) # Run standard LDA start = time.time() (sphi,stheta,ssample) = deltaLDA(docs,self.ldaalpha, ldabeta,self.numsamp,self.randseed) ldatime = time.time() - start # Run Interactive LDA with the baseline (full-tree) constraints, # but temporarily set constraint strength to 1 to build tree eta = 1 df = DF.DirichletForest(self.alpha,self.beta,eta, self.T,self.W) df.split([1,2],[3]) df.split([0],[3]) df.merge([4],[5]) df.inference(docs,self.numsamp,self.randseed) # Assert matrix agreement, valid prob dists self.assert_(self.matAgree(df.phi,sphi)) self.assert_(self.matProb(df.phi)) self.assert_(self.matProb(sphi)) self.assert_(self.matAgree(df.theta,stheta)) self.assert_(self.matProb(df.theta)) self.assert_(self.matProb(stheta))
def start_delta_lda(good_doc_list, bad_doc_list, next_index): docs = good_doc_list + bad_doc_list delta_f = [] for i in range(0, len(good_doc_list)): delta_f.append(0) for i in range(0, len(bad_doc_list)): delta_f.append(1) delta_alpha = array([[.1, .1, 0], [.1, .1, .1]]) alpha = .1 * ones((1, 3)) beta = ones((3, next_index)) numsamp = 200 randseed = 194582 (phi, theta, sample) = deltaLDA(docs, delta_alpha, beta, numsamp, randseed, f=delta_f) return phi, theta, sample
def testSanity2(self): """ Test eta=1 against deltaLDA implementation """ # Don't even try unless deltaLDA module present if (not hasDelta): return # Randomly generated docs docs = [[random.randint(self.W) for i in range(1000)] for j in range(100)] # Set beta for standard LDA ldabeta = self.beta * ones((self.T, self.W)) # Run standard LDA start = time.time() (sphi, stheta, ssample) = deltaLDA(docs, self.ldaalpha, ldabeta, self.numsamp, self.randseed) ldatime = time.time() - start # Run Interactive LDA with the baseline (full-tree) constraints, # but temporarily set constraint strength to 1 to build tree eta = 1 df = DF.DirichletForest(self.alpha, self.beta, eta, self.T, self.W) df.split([1, 2], [3]) df.split([0], [3]) df.merge([4], [5]) df.inference(docs, self.numsamp, self.randseed) # Assert matrix agreement, valid prob dists self.assert_(self.matAgree(df.phi, sphi)) self.assert_(self.matProb(df.phi)) self.assert_(self.matProb(sphi)) self.assert_(self.matAgree(df.theta, stheta)) self.assert_(self.matProb(df.theta)) self.assert_(self.matProb(stheta))
def testSanity3(self): """ Test beta*eta=X vs deltaLDA with beta=X """ # Don't even try unless deltaLDA module present if (not hasDelta): return # 'magic' X parameter X = 50 # Randomly generated docs W = 2 docs = [[random.randint(W) for i in range(1000)] for j in range(100)] # Set beta for standard LDA ldabeta = X * ones((self.T, W)) # Run standard LDA (sphi, stheta, ssample) = deltaLDA(docs, self.ldaalpha, ldabeta, self.numsamp, self.randseed) # Run Interactive LDA with the baseline (full-tree) constraints, # but temporarily set constraint strength to 1 to build tree eta = X beta = 1 df = DF.DirichletForest(self.alpha, beta, eta, self.T, W) df.merge([0], [1]) df.inference(docs, self.numsamp, self.randseed) # Assert matrix agreement, valid prob dists self.assert_(self.matAgree(df.phi, sphi)) self.assert_(self.matProb(df.phi)) self.assert_(self.matProb(sphi)) self.assert_(self.matAgree(df.theta, stheta)) self.assert_(self.matProb(df.theta)) self.assert_(self.matProb(stheta))
# This command will initialize the Gibbs sampler from a user-supplied sample # #(phi,theta,sample) = deltaLDA(docs,alpha,beta,numsamp,randseed,init=sample) # This command will run standard LDA, but show Gibbs sampler output # ("Gibbs sample X of Y") # #(phi,theta,sample) = deltaLDA(docs,alpha,beta,numsamp,randseed,verbose=1) # These commands will run deltaLDA # (use different alpha vectors for different docs, depending on value of f) # delta_f = [0, 1] delta_alpha = array([[.1,.1, 0],[.1, .1, .1]]) (phi,theta,sample) = deltaLDA(docs,delta_alpha,beta,numsamp,randseed,f=delta_f) # theta is the matrix of document-topic probabilities # (estimated from final sample) # # theta = D x T # theta[di,zj] = P(z=zj | d=di) # print '' print 'Theta - P(z|d)' print str(theta) print ''
def lda(documents_dist,topic_local_to_universal,alpha,beta): """ Runs LDA over a set of documents, saving results over a set of predefined topics """ cursor = connection.cursor() n_topics = len(topic_local_to_universal) word_local_to_universal = {} word_universal_to_local = {} document_local_to_universal = {} print "Getting document matrix..." dic = [word_mapper(map(lambda x: int(str(x),16),document_dist.distribution[:-1].split(',')),word_local_to_universal,word_universal_to_local) for document_dist in documents_dist] document_local_to_universal = dict(enumerate([document_dist.document.id for document_dist in documents_dist])) n_documents = str(len(dic)) n_words = len(word_local_to_universal) print "Numero de documentos: "+str(n_documents) print "Numero de palabras: "+str(n_words) if int(n_documents) == 0: raise Exception('LDAmodel has no documents assigned or the documents had only irrelevant words. No document matrix founded.') f_label = 1 numsamp = 50 randseed = 194582 alpha_vector = alpha * ones((f_label,n_topics)) beta_vector = beta * ones((n_topics,n_words)) print "Calculating LDA using..." print " beta: "+str(beta) print " alpha: "+str(alpha) print " ntopics: "+str(n_topics) (phi,theta,sample) = deltaLDA(dic,alpha_vector,beta_vector,numsamp,randseed) print "Saving Results..." ######################## # document_topic ######################## print "Saving Document and topic correlation..." document_local_id = 0 goal = 0 current = 0 theta_len = len(theta) for d in theta: st = "INSERT INTO application_documenttopic (document_id, topic_id, value) VALUES " goal, current = avance(current, theta_len, goal) topic_local_id = 0 for document_weight in d: st = st + "("+str(document_local_to_universal[document_local_id])+","+str(topic_local_to_universal[topic_local_id])+","+str(document_weight)+")," topic_local_id += 1 st = st[:-1]+";" cursor.execute(st) cursor.execute("COMMIT") document_local_id += 1 ##################### # topic_word ##################### print "Saving topics and word correlation to file" topic_local_id = 0 goal = 0 current = 0 phi_len = len(phi) nbest = int(n_words*0.5) os.system("touch /tmp/application_topicword.txt") os.system("chmod 777 /tmp/application_topicword.txt") FILE = '/tmp/application_topicword.txt' print 'Opening %s' % FILE fw = open (FILE,'w') for t in phi: goal, current = avance(current, phi_len, goal) word_local_id = 0 for word_weight in t: fw.write(str(topic_local_to_universal[topic_local_id])+';'+str(word_local_to_universal[word_local_id])+';'+str(word_weight)+'\n') word_local_id += 1 topic_local_id += 1 fw.close() load_data_in_file() return True
docs = [[1,1,2], [1,1,1,1,2], [3,3,3,4], [3,3,4,4,3,3], [0,0,0,0,0], [0,0,0,0]] # numsamp specifies how many samples to take from the Gibbs sampler numsamp = 50 # randseed is used to initialize the Gibbs sampler random number generator randseed = 194582 # This command will run the standard LDA model # (phi,theta,sample) = deltaLDA(docs,alpha,beta,numsamp,randseed) # This command will initialize the Gibbs sampler from a user-supplied sample # #(phi,theta,sample) = deltaLDA(docs,alpha,beta,numsamp,randseed,init=sample) # This command will run standard LDA, but show Gibbs sampler output # ("Gibbs sample X of Y") # #(phi,theta,sample) = deltaLDA(docs,alpha,beta,numsamp,randseed,verbose=1) # These commands will run deltaLDA # (use different alpha vectors for different docs, depending on value of f) # #delta_f = [0, 0, 0, 0, 1, 1] #delta_alpha = array([[.1, .1, 0],[.1, .1, .1]])
# #(phi,theta,sample) = deltaLDA(docs,alpha,beta,numsamp,randseed,init=sample) # This command will run standard LDA, but show Gibbs sampler output # ("Gibbs sample X of Y") # #(phi,theta,sample) = deltaLDA(docs,alpha,beta,numsamp,randseed,verbose=1) # These commands will run deltaLDA # (use different alpha vectors for different docs, depending on value of f) # delta_f = [0, 1] delta_alpha = array([[.1, .1, 0], [.1, .1, .1]]) (phi, theta, sample) = deltaLDA(docs, delta_alpha, beta, numsamp, randseed, f=delta_f) # theta is the matrix of document-topic probabilities # (estimated from final sample) # # theta = D x T # theta[di,zj] = P(z=zj | d=di) # print '' print 'Theta - P(z|d)' print str(theta) print '' # phi is the matrix of topic-word probabilities
f = open('d', 'r') content = f.readlines() docs = [] for line in content: l = [int(item) for item in line.split(' ')] docs.append(l) # numsamp specifies how many samples to take from the Gibbs sampler numsamp = 50 # randseed is used to initialize the Gibbs sampler random number generator randseed = 194582 # This command will run the standard LDA model # (phi, theta, sample) = deltaLDA(docs, alpha, beta, numsamp, randseed) # This command will initialize the Gibbs sampler from a user-supplied sample # #(phi,theta,sample) = deltaLDA(docs,alpha,beta,numsamp,randseed,init=sample) # This command will run standard LDA, but show Gibbs sampler output # ("Gibbs sample X of Y") # #(phi,theta,sample) = deltaLDA(docs,alpha,beta,numsamp,randseed,verbose=1) # These commands will run deltaLDA # (use different alpha vectors for different docs, depending on value of f) # #delta_f = [0, 0, 0, 0, 1, 1] #delta_alpha = array([[.1, .1, 0],[.1, .1, .1]])