def demo_LdaCgsSeq(doc_len=500, V=100000, n_docs=100, K=20, n_iterations=5, corpus_seed=None, model_seed=None): from vsm.extensions.corpusbuilders import random_corpus print('Words per document:', doc_len) print('Words in vocabulary:', V) print('Documents in corpus:', n_docs) print('Number of topics:', K) print('Iterations:', n_iterations) c = random_corpus(n_docs * doc_len, V, doc_len, doc_len + 1, seed=corpus_seed) m = LdaCgsSeq(c, 'document', K=K, seed=model_seed) m.train(n_iterations=n_iterations, verbose=2) return m
def setUp(self): from vsm.extensions.corpusbuilders import random_corpus c = random_corpus(1000, 100, 0, 20) self.m = BeagleEnvironment(c, n_cols=100) self.m.train()
def demo_LdaCgsSeq(doc_len=500, V=100000, n_docs=100, K=20, n_iterations=5, corpus_seed=None, model_seed=None): from vsm.extensions.corpusbuilders import random_corpus print 'Words per document:', doc_len print 'Words in vocabulary:', V print 'Documents in corpus:', n_docs print 'Number of topics:', K print 'Iterations:', n_iterations c = random_corpus(n_docs*doc_len, V, doc_len, doc_len+1, seed=corpus_seed) m = LdaCgsSeq(c, 'document', K=K, seed=model_seed) m.train(n_iterations=n_iterations, verbose=2) return m
def demo_LdaCgsMulti(doc_len=500, V=100000, n_docs=100, K=20, n_iterations=5, n_proc=2, corpus_seed=None, model_seeds=None): from vsm.extensions.corpusbuilders import random_corpus print('Words per document:', doc_len) print('Words in vocabulary:', V) print('Documents in corpus:', n_docs) print('Number of topics:', K) print('Iterations:', n_iterations) print('Number of processors:', n_proc) c = random_corpus(n_docs*doc_len, V, doc_len, doc_len+1, seed=corpus_seed) m = LdaCgsMulti(c, 'document', K=K, n_proc=n_proc, seeds=model_seeds) m.train(n_iterations=n_iterations, verbose=2) return m
def demo_LdaCgs(doc_len=500, V=100000, n_docs=100, K=20, n_iterations=5, n_threads=1): from vsm.extensions.corpusbuilders import random_corpus print('Words per document:', doc_len) print('Words in vocabulary:', V) print('Documents in corpus:', n_docs) print('Number of topics:', K) print('Iterations:', n_iterations) c = random_corpus(n_docs*doc_len, V, doc_len, doc_len+1) print('Random corpus generated. Initializing model.') m = LdaCgs(c, 'document', K=K) print('Begin estimation.') m.train(n_iterations=n_iterations, n_threads=n_threads) return m
def demo_LdaCgs(doc_len=500, V=100000, n_docs=100, K=20, n_iterations=5, n_threads=1): from vsm.extensions.corpusbuilders import random_corpus print 'Words per document:', doc_len print 'Words in vocabulary:', V print 'Documents in corpus:', n_docs print 'Number of topics:', K print 'Iterations:', n_iterations c = random_corpus(n_docs*doc_len, V, doc_len, doc_len+1) print 'Random corpus generated. Initializing model.' m = LdaCgs(c, 'document', K=K) print 'Begin estimation.' m.train(n_iterations=n_iterations, n_threads=n_threads) return m