Ejemplo n.º 1
0
def test_fitting():
    """
    Verify that the square error diminishes with fitting
    """

    num_sentences = 5000
    seed = 10

    corpus = Corpus()

    corpus.fit(generate_training_corpus(num_sentences, vocabulary_size=50, seed=seed))

    # Check that the performance is poor without fitting
    glove_model = Glove(no_components=100, learning_rate=0.05)
    glove_model.fit(corpus.matrix, epochs=0, no_threads=2)

    log_cooc_mat = corpus.matrix.copy()
    log_cooc_mat.data = np.log(log_cooc_mat.data)
    log_cooc_mat = np.asarray(log_cooc_mat.todense())

    repr_matrix = _reproduce_input_matrix(glove_model)

    assert ((repr_matrix - log_cooc_mat) ** 2).sum() > 30000.0

    # Check that it is good with fitting
    glove_model = Glove(no_components=100, learning_rate=0.05)
    glove_model.fit(corpus.matrix, epochs=500, no_threads=2)

    repr_matrix = _reproduce_input_matrix(glove_model)

    assert ((repr_matrix - log_cooc_mat) ** 2).sum() < 1500.0
Ejemplo n.º 2
0
def test_large_corpus_construction():

    num_sentences = 5000
    seed = 10

    corpus = Corpus()

    corpus.fit(generate_training_corpus(num_sentences, seed=seed))

    matrix = corpus.matrix.tocsr().tocoo()
    check_matrix = build_coocurrence_matrix(
        generate_training_corpus(num_sentences, seed=seed))

    assert (matrix.row == check_matrix.row).all()
    assert (matrix.col == check_matrix.col).all()
    assert np.allclose(matrix.data, check_matrix.data)
    assert (matrix.data > 0).all()
Ejemplo n.º 3
0
def test_large_corpus_construction_wo_weighting_symmetric():

    num_sentences = 5000
    seed = 10

    corpus = Corpus()

    corpus.fit(generate_training_corpus(num_sentences, seed=seed),
               distance_weighting=False,
               symmetric=True)

    matrix = corpus.matrix.tocsr().tocoo()
    check_matrix = build_coocurrence_matrix_wo_weighting_symmetric(
        generate_training_corpus(num_sentences, seed=seed))

    assert (matrix.row == check_matrix.row).all()
    assert (matrix.col == check_matrix.col).all()
    assert np.allclose(matrix.data, check_matrix.data)
    assert (matrix.data > 0).all()
Ejemplo n.º 4
0
def test_fitting():
    """
    Verify that the square error diminishes with fitting
    """

    num_sentences = 5000
    seed = 10

    corpus = Corpus()

    corpus.fit(generate_training_corpus(num_sentences,
                                        vocabulary_size=50,
                                        seed=seed))

    # Check that the performance is poor without fitting
    glove_model = Glove(no_components=100, learning_rate=0.05)
    glove_model.fit(corpus.matrix,
                    epochs=0,
                    no_threads=2)

    log_cooc_mat = corpus.matrix.copy()
    log_cooc_mat.data = np.log(log_cooc_mat.data)
    log_cooc_mat = np.asarray(log_cooc_mat.todense())

    repr_matrix = _reproduce_input_matrix(glove_model)

    assert ((repr_matrix - log_cooc_mat) ** 2).sum() > 30000.0

    # Check that it is good with fitting
    glove_model = Glove(no_components=100, learning_rate=0.05)
    glove_model.fit(corpus.matrix,
                    epochs=500,
                    no_threads=2)

    repr_matrix = _reproduce_input_matrix(glove_model)

    assert ((repr_matrix - log_cooc_mat) ** 2).sum() < 1500.0