Example #1
0
def test():
    data_file = sys.argv[1]
    model_file = sys.argv[2]
#    import nltk.corpus
    logging.info("begin to run")
    t1 = time.time()
    import vocabulary_for_mglda as vocabulary
    
    #corpus = vocabulary.load_corpus_each_sentence("pushed_words.dat")
    corpus = vocabulary.load_corpus_each_sentence(data_file)
    t2 = time.time()
    logging.info("load corpus succeed. cost:%d s", t2-t1)

    #docs[sentence_idx][word_idx]
    voca = vocabulary.Vocabulary(True)
    docs = [voca.doc_to_ids_each_sentence(doc) for doc in corpus]
    t3 = time.time()
    logging.info("doc_to_id succeed. cost:%d s", t3-t2)

    K_gl, K_loc, gamma, alpha_gl, alpha_loc, alpha_mix_gl, alpha_mix_loc, beta_gl, beta_loc, T, docs, W = 50, 10, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 3, docs, voca.size()
    mglda = MGLDA(K_gl, K_loc, gamma, alpha_gl, alpha_loc, alpha_mix_gl, alpha_mix_loc, beta_gl, beta_loc, T, docs, W)
    logging.info("corpus=%d, words=%d, K_gl=%d, K_loc=%d, gamma=%f, alpha_gl=%f, alpha_loc=%f, alpha_mix_gl=%f, alpha_mix_loc=%f, beta_gl=%f, beta_loc=%f" % (len(corpus), len(voca.vocas), K_gl, K_loc, gamma, alpha_gl, alpha_loc, alpha_mix_gl, alpha_mix_loc, beta_gl, beta_loc))
    t4 = time.time()
    logging.info("initialize succeed. cost:%d s", t4-t3)
    logging.info("begin to learn")
    
    out = open(model_file, 'wb')
    iteration = 1000
    mglda_learning(mglda, iteration, voca, out)
    out.close()
    logging.info("learn succeed. cost:%d s", time.time()-t4)
Example #2
0
def test():
#    import nltk.corpus
    import vocabulary_for_mglda as vocabulary
    
    corpus = vocabulary.load_corpus_each_sentence("0:2000")

    #docs[sentence_idx][word_idx]
    voca = vocabulary.Vocabulary(True)
    docs = [voca.doc_to_ids_each_sentence(doc) for doc in corpus]
    K_gl, K_loc, gamma, alpha_gl, alpha_loc, alpha_mix_gl, alpha_mix_loc, beta_gl, beta_loc, T, docs, W = 50, 10, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 3, docs, voca.size()
    mglda = MGLDA(K_gl, K_loc, gamma, alpha_gl, alpha_loc, alpha_mix_gl, alpha_mix_loc, beta_gl, beta_loc, T, docs, W)
    print "corpus=%d, words=%d, K_gl=%d, K_loc=%d, gamma=%f, alpha_gl=%f, alpha_loc=%f, alpha_mix_gl=%f, alpha_mix_loc=%f, beta_gl=%f, beta_loc=%f" % (len(corpus), len(voca.vocas), K_gl, K_loc, gamma, alpha_gl, alpha_loc, alpha_mix_gl, alpha_mix_loc, beta_gl, beta_loc)
    
    iteration = 1000
    mglda_learning(mglda, iteration, voca)
Example #3
0
def test():
    #    import nltk.corpus
    import vocabulary_for_mglda as vocabulary

    corpus = vocabulary.load_corpus_each_sentence("0:2000")

    #docs[sentence_idx][word_idx]
    voca = vocabulary.Vocabulary(True)
    docs = [voca.doc_to_ids_each_sentence(doc) for doc in corpus]
    K_gl, K_loc, gamma, alpha_gl, alpha_loc, alpha_mix_gl, alpha_mix_loc, beta_gl, beta_loc, T, docs, W = 50, 10, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 3, docs, voca.size(
    )
    mglda = MGLDA(K_gl, K_loc, gamma, alpha_gl, alpha_loc, alpha_mix_gl,
                  alpha_mix_loc, beta_gl, beta_loc, T, docs, W)
    print "corpus=%d, words=%d, K_gl=%d, K_loc=%d, gamma=%f, alpha_gl=%f, alpha_loc=%f, alpha_mix_gl=%f, alpha_mix_loc=%f, beta_gl=%f, beta_loc=%f" % (
        len(corpus), len(voca.vocas), K_gl, K_loc, gamma, alpha_gl, alpha_loc,
        alpha_mix_gl, alpha_mix_loc, beta_gl, beta_loc)

    iteration = 1000
    mglda_learning(mglda, iteration, voca)