def train_glove(sentences, emb_dim=50):
    glove.logger.setLevel(logging.INFO)
    vocab = glove.build_vocab(sentences)
    cooccur = glove.build_cooccur(vocab, sentences, window_size=10)
    id2word = evaluate.make_id2word(vocab)


    def evaluate_word(W):
        words = ['good', 'movie', 'bad', 'worth', 'dog']
        for word in words:
            print evaluate.most_similar(W, vocab, id2word, word)


    def save_per(W,i):
        if i % 100 == 0 and i >= 100:
            filename = "log/glove_%d_iter%d.model" % (emb_dim, i)
            W = evaluate.merge_main_context(W)
            glove.save_model(W, filename)
            evaluate_word(W)

    W = glove.train_glove(vocab, cooccur, vector_size=emb_dim, iterations=3000, iter_callback=save_per)
Ejemplo n.º 2
0
def train_glove(sentences, emb_dim=50):
    glove.logger.setLevel(logging.INFO)
    vocab = glove.build_vocab(sentences)
    cooccur = glove.build_cooccur(vocab, sentences, window_size=10)
    id2word = evaluate.make_id2word(vocab)

    def evaluate_word(W):
        words = ['good', 'movie', 'bad', 'worth', 'dog']
        for word in words:
            print evaluate.most_similar(W, vocab, id2word, word)

    def save_per(W, i):
        if i % 100 == 0 and i >= 100:
            filename = "log/glove_%d_iter%d.model" % (emb_dim, i)
            W = evaluate.merge_main_context(W)
            glove.save_model(W, filename)
            evaluate_word(W)

    W = glove.train_glove(vocab,
                          cooccur,
                          vector_size=emb_dim,
                          iterations=3000,
                          iter_callback=save_per)
Ejemplo n.º 3
0
test_corpus = ("""human interface computer
survey user computer system response time
eps user interface system
system human system eps
user response time
trees
graph trees
graph minors trees
graph minors survey
I like graph and stuff
I like trees and stuff
Sometimes I build a graph
Sometimes I build trees""").split("\n")

glove.logger.setLevel(logging.ERROR)
vocab = glove.build_vocab(test_corpus)
cooccur = glove.build_cooccur(vocab, test_corpus, window_size=10)
id2word = evaluate.make_id2word(vocab)

W = glove.train_glove(vocab, cooccur, vector_size=10, iterations=500)

# Merge and normalize word vectors
W = evaluate.merge_main_context(W)


def test_similarity():
    similar = evaluate.most_similar(W, vocab, id2word, 'graph')
    logging.debug(similar)

    assert_equal('trees', similar[0])
Ejemplo n.º 4
0
test_corpus = ("""human interface computer
survey user computer system response time
eps user interface system
system human system eps
user response time
trees
graph trees
graph minors trees
graph minors survey
I like graph and stuff
I like trees and stuff
Sometimes I build a graph
Sometimes I build trees""").split("\n")

glove.logger.setLevel(logging.ERROR)
vocab = glove.build_vocab(test_corpus)
cooccur = glove.build_cooccur(vocab, test_corpus, window_size=10)
id2word = evaluate.make_id2word(vocab)

W = glove.train_glove(vocab, cooccur, vector_size=10, iterations=500)

# Merge and normalize word vectors
W = evaluate.merge_main_context(W)


def test_similarity():
    similar = evaluate.most_similar(W, vocab, id2word, 'graph')
    logging.debug(similar)

    assert_equal('trees', similar[0])