コード例 #1
0
ファイル: ldacgsseq.py プロジェクト: Echo-Ji/vsm
def demo_LdaCgsSeq(doc_len=500,
                   V=100000,
                   n_docs=100,
                   K=20,
                   n_iterations=5,
                   corpus_seed=None,
                   model_seed=None):

    from vsm.extensions.corpusbuilders import random_corpus

    print('Words per document:', doc_len)
    print('Words in vocabulary:', V)
    print('Documents in corpus:', n_docs)
    print('Number of topics:', K)
    print('Iterations:', n_iterations)

    c = random_corpus(n_docs * doc_len,
                      V,
                      doc_len,
                      doc_len + 1,
                      seed=corpus_seed)
    m = LdaCgsSeq(c, 'document', K=K, seed=model_seed)
    m.train(n_iterations=n_iterations, verbose=2)

    return m
コード例 #2
0
    def setUp(self):

        from vsm.extensions.corpusbuilders import random_corpus

        c = random_corpus(1000, 100, 0, 20)

        self.m = BeagleEnvironment(c, n_cols=100)
        self.m.train()
コード例 #3
0
    def setUp(self):

        from vsm.extensions.corpusbuilders import random_corpus

        c = random_corpus(1000, 100, 0, 20)

        self.m = BeagleEnvironment(c, n_cols=100)
        self.m.train()
コード例 #4
0
def demo_LdaCgsSeq(doc_len=500, V=100000, n_docs=100,
                   K=20, n_iterations=5,
                   corpus_seed=None, model_seed=None):

    from vsm.extensions.corpusbuilders import random_corpus

    print 'Words per document:', doc_len
    print 'Words in vocabulary:', V
    print 'Documents in corpus:', n_docs
    print 'Number of topics:', K
    print 'Iterations:', n_iterations

    c = random_corpus(n_docs*doc_len, V, doc_len, doc_len+1, seed=corpus_seed)
    m = LdaCgsSeq(c, 'document', K=K, seed=model_seed)
    m.train(n_iterations=n_iterations, verbose=2)

    return m
コード例 #5
0
ファイル: ldacgsmulti.py プロジェクト: inpho/vsm
def demo_LdaCgsMulti(doc_len=500, V=100000, n_docs=100,
                     K=20, n_iterations=5, n_proc=2, 
                     corpus_seed=None, model_seeds=None):

    from vsm.extensions.corpusbuilders import random_corpus
    
    print('Words per document:', doc_len)
    print('Words in vocabulary:', V)
    print('Documents in corpus:', n_docs)
    print('Number of topics:', K)
    print('Iterations:', n_iterations)
    print('Number of processors:', n_proc)

    c = random_corpus(n_docs*doc_len, V, doc_len, doc_len+1, seed=corpus_seed)
    m = LdaCgsMulti(c, 'document', K=K, n_proc=n_proc, seeds=model_seeds)
    m.train(n_iterations=n_iterations, verbose=2)

    return m
コード例 #6
0
ファイル: ldacgs.py プロジェクト: inpho/vsm
def demo_LdaCgs(doc_len=500, V=100000, n_docs=100,
                K=20, n_iterations=5, n_threads=1):

    from vsm.extensions.corpusbuilders import random_corpus

    print('Words per document:', doc_len)
    print('Words in vocabulary:', V)
    print('Documents in corpus:', n_docs)
    print('Number of topics:', K)
    print('Iterations:', n_iterations)

    c = random_corpus(n_docs*doc_len, V, doc_len, doc_len+1)

    print('Random corpus generated. Initializing model.')
    m = LdaCgs(c, 'document', K=K)

    print('Begin estimation.')
    m.train(n_iterations=n_iterations, n_threads=n_threads)

    return m
コード例 #7
0
ファイル: ldacgs.py プロジェクト: xiayanchen/vsm
def demo_LdaCgs(doc_len=500, V=100000, n_docs=100,
                K=20, n_iterations=5, n_threads=1):

    from vsm.extensions.corpusbuilders import random_corpus
    
    print 'Words per document:', doc_len
    print 'Words in vocabulary:', V
    print 'Documents in corpus:', n_docs
    print 'Number of topics:', K
    print 'Iterations:', n_iterations

    c = random_corpus(n_docs*doc_len, V, doc_len, doc_len+1)

    print 'Random corpus generated. Initializing model.'
    m = LdaCgs(c, 'document', K=K)
    
    print 'Begin estimation.'
    m.train(n_iterations=n_iterations, n_threads=n_threads)

    return m