def __init__(self, corpus, dictionary, num_topics, outfile): self.corpus = corpus self.outfile = outfile self.tokenizer = Tokenizer(dictionary) self.lda = StoppableLdaModel(dictionary, num_topics) self.training_thread = threading.Thread(target=self.train) self.training_thread.start()
def __init__(self,corpus,dictionary,num_topics,outfile): self.corpus = corpus self.outfile = outfile self.tokenizer = Tokenizer(dictionary) self.lda = StoppableLdaModel(dictionary,num_topics) self.training_thread = threading.Thread(target=self.train) self.training_thread.start()
class Model(object): def __init__(self,corpus,dictionary,num_topics,outfile): self.corpus = corpus self.outfile = outfile self.tokenizer = Tokenizer(dictionary) self.lda = StoppableLdaModel(dictionary,num_topics) self.training_thread = threading.Thread(target=self.train) self.training_thread.start() def train(self): logging.info('starting model training...') self.lda.update(self.corpus) self.lda.save(self.outfile) def topics(self,text): bow = self.tokenizer.text2bow(text) return str(self.lda[bow]) def show_topic(self,topic,topn=10): return str(self.lda.show_topic(topic,topn=topn)) def print_topic(self,topic,topn=10): return self.lda.print_topic(topic,topn=topn) def request_stop(self): self.lda.request_stop() return 'stopping'
class Model(object): def __init__(self, corpus, dictionary, num_topics, outfile): self.corpus = corpus self.outfile = outfile self.tokenizer = Tokenizer(dictionary) self.lda = StoppableLdaModel(dictionary, num_topics) self.training_thread = threading.Thread(target=self.train) self.training_thread.start() def train(self): logging.info('starting model training...') self.lda.update(self.corpus) self.lda.save(self.outfile) def topics(self, text): bow = self.tokenizer.text2bow(text) return str(self.lda[bow]) def show_topic(self, topic, topn=10): return str(self.lda.show_topic(topic, topn=topn)) def print_topic(self, topic, topn=10): return self.lda.print_topic(topic, topn=topn) def request_stop(self): self.lda.request_stop() return 'stopping'
from gensim.corpora.dictionary import Dictionary from gensim.models.ldamodel import LdaModel from news.document import Tokenizer if len(sys.argv) != 2: print 'Usage: {0} rcv1_data_dir'.format(sys.argv[0]) raise SystemExit(1) data_dir = sys.argv[1] dictionary_file = data_dir+'/id_token_df' model_file = data_dir+'/lda_model' print 'creating tokenizer...' dictionary = Dictionary.load_from_text(dictionary_file) tok = Tokenizer(dictionary) print 'loading model...' lda = LdaModel.load(model_file) while True: text = raw_input('enter text (q to quit): ') if text == 'q': print 'bye!' break doc = tok.text2bow(text) topics = lda[doc] for topic,weight in sorted(topics,key=itemgetter(1),reverse=True): print weight,lda.show_topic(topic,topn=4)
from gensim.corpora.dictionary import Dictionary from gensim.models.ldamodel import LdaModel from news.document import Tokenizer if len(sys.argv) != 2: print 'Usage: {0} rcv1_data_dir'.format(sys.argv[0]) raise SystemExit(1) data_dir = sys.argv[1] dictionary_file = data_dir + '/id_token_df' model_file = data_dir + '/lda_model' print 'creating tokenizer...' dictionary = Dictionary.load_from_text(dictionary_file) tok = Tokenizer(dictionary) print 'loading model...' lda = LdaModel.load(model_file) while True: text = raw_input('enter text (q to quit): ') if text == 'q': print 'bye!' break doc = tok.text2bow(text) topics = lda[doc] for topic, weight in sorted(topics, key=itemgetter(1), reverse=True): print weight, lda.show_topic(topic, topn=4)