else:
        sentences, word2idx = get_wikipedia_data(n_files=n_files, n_vocab=2000)
        with open(w2i_file, 'w') as f:
            json.dump(word2idx, f)

    V = len(word2idx)
    model = Glove(80, V, 10)
    # model.fit(sentences, cc_matrix=cc_matrix, epochs=20) # coordinate descent
    model.fit(
        sentences,
        cc_matrix=cc_matrix,
        learning_rate=3*10e-5,
        reg=0.01,
        epochs=2000,
        gd=True,
        use_theano=False
    ) # gradient descent
    model.save(we_file)


if __name__ == '__main__':
    we = 'glove_model_50.npz'
    w2i = 'glove_word2idx_50.json'
    main(we, w2i)
    for concat in (True, False):
        print "** concat:", concat
        find_analogies('king', 'man', 'woman', concat, we, w2i)
        find_analogies('france', 'paris', 'london', concat, we, w2i)
        find_analogies('france', 'paris', 'rome', concat, we, w2i)
        find_analogies('paris', 'france', 'italy', concat, we, w2i)
Esempio n. 2
0
            word2idx = json.load(f)
        sentences = []  # dummy - we won't actually use it
    else:
        sentences, word2idx = get_wikipedia_data(n_files=n_files, n_vocab=2000)
        with open(w2i_file, 'w') as f:
            json.dump(word2idx, f)

    V = len(word2idx)
    model = Glove(80, V, 10)
    # model.fit(sentences, cc_matrix=cc_matrix, epochs=20) # coordinate descent
    model.fit(sentences,
              cc_matrix=cc_matrix,
              learning_rate=3 * 10e-5,
              reg=0.01,
              epochs=2000,
              gd=True,
              use_theano=False)  # gradient descent
    model.save(we_file)


if __name__ == '__main__':
    we = 'glove_model_50.npz'
    w2i = 'glove_word2idx_50.json'
    main(we, w2i)
    for concat in (True, False):
        print "** concat:", concat
        find_analogies('king', 'man', 'woman', concat, we, w2i)
        find_analogies('france', 'paris', 'london', concat, we, w2i)
        find_analogies('france', 'paris', 'rome', concat, we, w2i)
        find_analogies('paris', 'france', 'italy', concat, we, w2i)
Esempio n. 3
0
    model.fit(
        sentences,
        cc_matrix=cc_matrix,
        learning_rate=3 * 10e-5,
        reg=0.1,
        epochs=10,
        gd=True,
        use_theano=False,
        use_tensorflow=True,
    )
    model.save(we_file)


if __name__ == '__main__':
    # we = 'glove_model_50.npz'
    # w2i = 'glove_word2idx_50.json'
    we = 'glove_model_brown.npz'
    w2i = 'glove_word2idx_brown.json'
    main(we, w2i, use_brown=True)
    for concat in (True, False):
        print "** concat:", concat
        find_analogies('king', 'man', 'woman', concat, we, w2i)
        find_analogies('france', 'paris', 'london', concat, we, w2i)
        find_analogies('france', 'paris', 'rome', concat, we, w2i)
        find_analogies('paris', 'france', 'italy', concat, we, w2i)
        find_analogies('france', 'french', 'english', concat, we, w2i)
        find_analogies('japan', 'japanese', 'chinese', concat, we, w2i)
        find_analogies('japan', 'japanese', 'italian', concat, we, w2i)
        find_analogies('japan', 'japanese', 'australian', concat, we, w2i)
        find_analogies('december', 'november', 'june', concat, we, w2i)
    model.fit(
        sentences,
        cc_matrix=cc_matrix,
        learning_rate=3*10e-5,
        reg=0.01,
        epochs=500,
        gd=True,
        use_theano=False,
        use_tensorflow=True,
    )
    model.save(we_file)


if __name__ == '__main__':
    # we = 'glove_model_50.npz'
    # w2i = 'glove_word2idx_50.json'
    we = 'glove_model_brown.npz'
    w2i = 'glove_word2idx_brown.json'
    main(we, w2i, use_brown=True)
    for concat in (True, False):
        print "** concat:", concat
        find_analogies('king', 'man', 'woman', concat, we, w2i)
        find_analogies('france', 'paris', 'london', concat, we, w2i)
        find_analogies('france', 'paris', 'rome', concat, we, w2i)
        find_analogies('paris', 'france', 'italy', concat, we, w2i)
        find_analogies('france', 'french', 'english', concat, we, w2i)
        find_analogies('japan', 'japanese', 'chinese', concat, we, w2i)
        find_analogies('japan', 'japanese', 'italian', concat, we, w2i)
        find_analogies('japan', 'japanese', 'australian', concat, we, w2i)
        find_analogies('december', 'november', 'june', concat, we, w2i)