Python LLDA, iir Examples

Programming Language: Python

Class/Type: LLDA

Examples at hotexamples.com: 2

Python LLDA - 2 examples found. These are the top rated real world Python examples of LLDA from package iir extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

term_to_id(1)

theta(1)

Example #1

Show file

File: get_topics2.py Project: vdavid70619/Prediction-of-Derpression-using-NLP

    def __init__(self, n_topics=2, alpha=0.001, beta=0.001, max_iter=100, method='LLDA'):
        self.method = method.lower()
        self.n_topics = n_topics
        self.alpha = alpha
        self.beta = beta
        self.iteration = max_iter
        assert self.method=='llda', 'Only Labeled LDA method is implemented in thi library'

        self.model = LLDA(self.n_topics, self.alpha, self.beta)

Example #2

Show file

File: get_topics2.py Project: vdavid70619/Prediction-of-Derpression-using-NLP

class get_topics2(object):
    def __init__(self, n_topics=2, alpha=0.001, beta=0.001, max_iter=100, method='LLDA'):
        self.method = method.lower()
        self.n_topics = n_topics
        self.alpha = alpha
        self.beta = beta
        self.iteration = max_iter
        assert self.method=='llda', 'Only Labeled LDA method is implemented in thi library'

        self.model = LLDA(self.n_topics, self.alpha, self.beta)


    def fit(self, X, y=None, verbose=False):
        '''
        X: Gensim corpora form
        # Gensim treat corpus input as chunks of lists
        '''

        assert isinstance(X[0], list), 'Not chuncks of lists for docs. Require [[doc1],[doc2],...] as gensim chunk format'
        if y is not None:
            assert isinstance(y[0], list), 'Not chuncks of lists for labels. Require [[label1],[label2],...] as gensim chunk format'

        self.labelset = list(set(reduce(list.__add__, y)))
        self.model.set_corpus(self.labelset, X, y)

        for i in range(self.iteration):
            if verbose:
                print  "-- %d : %.4f" % (i, self.model.perplexity())
            self.model.inference()

        if verbose:
            print "Final perplexity : %.4f" % self.model.perplexity()


    def load(self, filename):
        with open(filename, 'rb') as input:
            self.model = pickle.load(input)


    def save(self, filename):
        with open(filename, 'wb+') as output:
            ## save a class object to a file using pickle
            pickle.dump(self.model, output, pickle.HIGHEST_PROTOCOL)


    def encode(self, X, topk=20, normalize=True):
        '''
        # Gensim treat corpus input as chunks of lists
        Result is normalized any way.
        '''

        assert isinstance(X[0], list), 'Not chuncks of lists. Require [[],[],...] as gensim chunk format'
        docs = [[self.model.term_to_id(term) for term in doc] for doc in X]

        phi = self.model.phi()
        thetas = self.model.theta()

        hist = np.zeros(self.n_topics)

        for doc, theta in zip(docs, thetas):
            for w in doc:
                hist += phi[:,w] * theta

        if normalize:
            hist = hist/(np.sum(hist, axis=0) + 1e-6)

        sort_ind = np.argsort(hist)[::-1]  # reverse index sequence after argsort
        hist[sort_ind[topk:]] = 0
        return hist


    def summary(self, n_topics=-1):
        phi = self.model.phi()
        for k in range(np.size(phi, axis=0)):
            print "\n%d: " % k,
            for w in numpy.argsort(-phi[k])[:20]:
                print "+ %.4f*%s" % (phi[k,w], self.model.vocas[w]),
        print ''