Python Nmf.print_topics Examples

Programming Language: Python

Namespace/Package Name: gensim.models.nmf

Class/Type: Nmf

Method/Function: print_topics

Examples at hotexamples.com: 2

Python Nmf.print_topics - 2 examples found. These are the top rated real world Python examples of gensim.models.nmf.Nmf.print_topics extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Nmf(14)

load(3)

print_topics(2)

save(2)

transform(2)

update(1)

Example #1

Show file

File: get_topic_model_concordance_nnmf.py Project: toth12/mining_testimonial_fragments_of_the_holocaust

def main(query,output_filename,window=50,topicn=50):
	print ('Training nmf model began')
	frame = inspect.currentframe()
	args, _, _, values = inspect.getargvalues(frame)
	query_parameters = [(i, values[i]) for i in args]
	document_collection_original=blacklab.search_blacklab(query,window=window,lemma=True, include_match=False)
	print ("Search finished")
	document_collection=[match['complete_match'].strip() for match in document_collection_original[0:100]]

	#Use the phraser model
	
	phraser_model = Phraser(Phrases.load(constants.OUTPUT_FOLDER+'phrase_model'))
	document_collection=[' '.join(phraser_model[match['complete_match'].strip().split()]) for match in document_collection_original]
	print ("Phraser model done")
	#get rid of stop words
	document_collection_filtered = document_collection
	'''
	for text in document_collection:
		new_text = []
		for word in text.split():
			if (word not in set(stopwords.words('english')) and (word[0] in string.ascii_uppercase + string.ascii_lowercase)):
				new_text.append(word)
		document_collection_filtered.append(' '.join(new_text))
	'''
	print ("Filtering done")
	
	#build the corpus
	preprocessed_corpus = []

	for i,text in enumerate(document_collection_filtered):
		if i==0:
			print (i)
			text = text.split()
			
			
			dct=gensim_utils.initialize_gensim_dictionary([text])
		else:
			print (i)
			text = text.split()
			gensim_utils.add_documents_to_gensim_dictionary(dct,[text])
	#Filter it here
	
	dct.filter_extremes(no_below=10, no_above=0.95)
	
	gensim_corpus = [dct.doc2bow(bag_of_word.split()) for bag_of_word in document_collection_filtered]
	
	#text = document_collection_filtered[0].split()
	nmf = Nmf(gensim_corpus, num_topics=50)
	words = list(dct.token2id.keys())

	topics =  nmf.print_topics(50)
	for topic in topics:

		topic_words = topic[1].split('+')
		print_topic = []
		for topic_word in topic_words:
			print_topic.append(words[int(topic_word.split('*')[1][1:].strip()[:-1])])
		print (' '.join(print_topic))

	#get topic of a given document: nmf.get_document_topics(gensim_corpus[0])
	#dct.token2id.keys()
	#nmf.show_topic(10)
	#nmf.get_document_topics(dct.doc2bow(preprocessed_corpus[0]))
	pdb.set_trace()

Example #2

Show file

File: TopicModel.py Project: andrewmsilva/InsightOverflow

class TopicModel(object):
    def __init__(self):
        self.__corpus = None
        self.__modelName = None

        self.__model = None
        self.__modelFile = 'results/model.bin'

        self.__coherenceModel = None

    def setCorpus(self, corpus):
        self.__corpus = corpus

    def getCoherence(self):
        return self.__coherenceModel.get_coherence()

    def getDocumentTopics(self, document, threshold=None):
        return self.__model.get_document_topics(document, threshold)

    def build(self, model_name, num_topics, chunksize, passes, corpus=None):
        self.__modelName = model_name
        # Update corpus if necessary
        if isinstance(corpus, Corpus):
            self.__corpus = corpus
        # Build topic model
        if model_name == 'lda':
            self.__buildLDA(num_topics, chunksize, passes)
        elif model_name == 'nmf':
            self.__buildNMF(num_topics, chunksize, passes)
        # Build coherence model
        self.__buildCoherenceModel()

    def __buildLDA(self, num_topics, chunksize, passes):
        self.__model = LdaMulticore(self.__corpus,
                                    id2word=self.__corpus.getDictionary(),
                                    num_topics=num_topics,
                                    chunksize=chunksize,
                                    passes=passes,
                                    eval_every=None,
                                    workers=40,
                                    random_state=10)

    def __buildNMF(self, num_topics, chunksize, passes):
        self.__model = Nmf(self.__corpus,
                           id2word=self.__corpus.getDictionary(),
                           num_topics=num_topics,
                           chunksize=chunksize,
                           passes=passes,
                           eval_every=None,
                           random_state=10)

    def __buildCoherenceModel(self):
        self.__coherenceModel = CoherenceModel(model=self.__model,
                                               texts=self.__corpus.getTexts(),
                                               coherence='c_v',
                                               processes=7)

    def __printTopics(self):
        print('  Topics')
        for idx, topic in self.__model.print_topics(-1):
            print('    {}: {}'.format(idx, topic))

    def save(self):
        self.__model.save(self.__modelFile)

    def load(self, model_name):
        self.__modelName = model_name

        if model_name == 'lda':
            self.__model = LdaMulticore.load(self.__modelFile)
        elif model_name == 'nmf':
            self.__model = Nmf.load(self.__modelFile)