Esempio n. 1
0
def test_from_entries_and_from_matrix():
    # Note: for convenience from_matrix() is tested here implicitly, rather
    # than in a separate test.

    # Reject outright a space with no entries and a space with insufficient
    # entries
    assert AssocSpace.from_entries([], k=1) is None
    assert AssocSpace.from_entries([(1, 'apple', 'red')], k=1) is None

    # Build with mostly-default parameters and check some simple properties
    assoc_default = AssocSpace.from_entries(ENTRIES, k=4)
    eq_(assoc_default.k, 4)
    eq_(assoc_default.sigma[0], 1.0)
    assert assoc_default.assoc_between_two_terms('apple', 'red') > 0.5
    assert assoc_default.assoc_between_two_terms('red', 'red') > 0.999
    assert assoc_default.assoc_between_two_terms('lemon', 'red') < 0.2

    # Build with strip_a0=False; in this case we have negative eigenvalues,
    # so we lose an eigenvalue from the middle to make room for a0
    assoc_no_strip = AssocSpace.from_entries(ENTRIES, k=4, strip_a0=False)
    eq_(assoc_no_strip.k, 4)
    assert np.allclose(assoc_no_strip.sigma[-1] / assoc_no_strip.sigma[1],
                       assoc_default.sigma[-1])
    assert (np.allclose(assoc_no_strip.u[:, 1], assoc_default.u[:, 0]) or
            np.allclose(assoc_no_strip.u[:, 1], -assoc_default.u[:, 0]))

    # Build with normalize_gm=False
    assoc_no_norm = AssocSpace.from_entries(ENTRIES, k=4, normalize_gm=False)
    eq_(assoc_no_norm.k, 4)
def test_from_entries_and_from_matrix():
    # Note: for convenience from_matrix() is tested here implicitly, rather
    # than in a separate test.

    # Reject outright a space with no entries and a space with insufficient
    # entries
    assert AssocSpace.from_entries([], k=1) is None
    assert AssocSpace.from_entries([(1, 'apple', 'red')], k=1) is None

    # Build with mostly-default parameters and check some simple properties
    assoc_default = AssocSpace.from_entries(ENTRIES, k=4)
    eq_(assoc_default.k, 4)
    eq_(assoc_default.sigma[0], 1.0)
    assert assoc_default.assoc_between_two_terms('apple', 'red') > 0.5
    assert assoc_default.assoc_between_two_terms('red', 'red') > 0.999
    assert assoc_default.assoc_between_two_terms('lemon', 'red') < 0.2

    # Build with strip_a0=False; in this case we have negative eigenvalues,
    # so we lose an eigenvalue from the middle to make room for a0
    assoc_no_strip = AssocSpace.from_entries(ENTRIES, k=4, strip_a0=False)
    eq_(assoc_no_strip.k, 4)
    assert np.allclose(assoc_no_strip.sigma[-1] / assoc_no_strip.sigma[1],
                       assoc_default.sigma[-1])
    assert (np.allclose(assoc_no_strip.u[:, 1], assoc_default.u[:, 0])
            or np.allclose(assoc_no_strip.u[:, 1], -assoc_default.u[:, 0]))

    # Build with normalize_gm=False
    assoc_no_norm = AssocSpace.from_entries(ENTRIES, k=4, normalize_gm=False)
    eq_(assoc_no_norm.k, 4)
def test_vectorizing_and_similar_terms():
    # Simple test for vectorizing weighted terms
    assoc = AssocSpace.from_entries(ENTRIES, k=3)
    weighted_terms = [('apple', 5), ('banana', 22), ('not a term', 17)]
    apple = assoc.row_named('apple')
    banana = assoc.row_named('banana')
    vector = assoc.vector_from_terms(weighted_terms)

    # The similarity of 'apple' to itself is approximately 1
    assert abs(assoc.assoc_between_two_terms('apple', 'apple') - 1.0) < 1e-3

    # 'apple' and 'banana' are at least 10% less similar to each other than
    # to themselves
    assert assoc.assoc_between_two_terms('apple', 'banana') < 0.9

    # The vector is some linear combination of apple and banana. Test this
    # by subtracting out apple and banana components, so that there is nothing
    # left.
    norm_apple = normalize(apple)
    banana_perp_apple = normalize(banana - norm_apple * norm_apple.dot(banana))
    residual = vector - norm_apple * norm_apple.dot(vector)
    residual -= banana_perp_apple * banana_perp_apple.dot(residual)
    assert norm(residual) < 1e-3

    # Simple test for finding similar terms
    labels, scores = zip(*assoc.terms_similar_to_vector(vector))
    eq_(list(scores), sorted(scores, reverse=True))

    most_similar = assoc.most_similar_to_vector(vector)
    eq_(most_similar[0], labels[0])
    eq_(most_similar[1], scores[0])

    assert labels.index('banana') < labels.index('apple')
    assert labels.index('apple') < labels.index('green')
    assert labels.index('apple') < labels.index('celery')
Esempio n. 4
0
def test_vectorizing_and_similar_terms():
    # Simple test for vectorizing weighted terms
    assoc = AssocSpace.from_entries(ENTRIES, k=3)
    weighted_terms = [('apple', 5), ('banana', 22), ('not a term', 17)]
    apple = assoc.row_named('apple')
    banana = assoc.row_named('banana')
    vector = assoc.vector_from_terms(weighted_terms)

    # The similarity of 'apple' to itself is approximately 1
    assert abs(assoc.assoc_between_two_terms('apple', 'apple') - 1.0) < 1e-3

    # 'apple' and 'banana' are at least 10% less similar to each other than
    # to themselves
    assert assoc.assoc_between_two_terms('apple', 'banana') < 0.9

    # The vector is some linear combination of apple and banana. Test this
    # by subtracting out apple and banana components, so that there is nothing
    # left.
    norm_apple = normalize(apple)
    banana_perp_apple = normalize(banana - norm_apple * norm_apple.dot(banana))
    residual = vector - norm_apple * norm_apple.dot(vector)
    residual -= banana_perp_apple * banana_perp_apple.dot(residual)
    assert norm(residual) < 1e-3

    # Simple test for finding similar terms
    labels, scores = zip(*assoc.terms_similar_to_vector(vector))
    eq_(list(scores), sorted(scores, reverse=True))

    most_similar = assoc.most_similar_to_vector(vector)
    eq_(most_similar[0], labels[0])
    eq_(most_similar[1], scores[0])

    assert labels.index('banana') < labels.index('apple')
    assert labels.index('apple') < labels.index('green')
    assert labels.index('apple') < labels.index('celery')
def test_truncation():
    # Simple test of truncation
    assoc = AssocSpace.from_entries(ENTRIES, k=3)
    truncated = assoc.truncated_to(2)
    assert np.allclose(truncated.u, assoc.u[:, :2])
    assert np.allclose(truncated.sigma, assoc.sigma[:2])
    eq_(truncated.labels, assoc.labels)
    assert 0.999 < norm(truncated.assoc[0]) < 1.0
Esempio n. 6
0
def test_truncation():
    # Simple test of truncation
    assoc = AssocSpace.from_entries(ENTRIES, k=3)
    truncated = assoc.truncated_to(2)
    assert np.allclose(truncated.u, assoc.u[:, :2])
    assert np.allclose(truncated.sigma, assoc.sigma[:2])
    eq_(truncated.labels, assoc.labels)
    assert 0.999 < norm(truncated.assoc[0]) < 1.0
def test_strip_a0():
    """When stripping a0, AssocSpace uses axes [1,k] instead of [0,k-1]."""
    assoc = AssocSpace.from_entries(entries, 3, strip_a0=False)
    assoc_stripped_mat = AssocSpace.from_entries(entries, 3, strip_a0=True)

    # Check for the same number of k
    eq_(assoc.u.shape[1], 3)
    assert np.allclose(np.abs(assoc.u[:,1]), np.abs(assoc_stripped_mat.u[:,0]))

    # check that the ratio between sigma values is preserved
    assert np.allclose(assoc.sigma[1] / assoc.sigma[2],
            assoc_stripped_mat.sigma[0] / assoc_stripped_mat.sigma[1])

    assoc_stripped_dropa0 = AssocSpace.from_entries(entries, 3).with_first_axis_dropped()
    assert np.allclose(np.abs(assoc.u[:,1]),
            np.abs(assoc_stripped_dropa0.u[:,0]))
    assert np.allclose(assoc.sigma[1] / assoc.sigma[2],
            assoc_stripped_dropa0.sigma[0] / assoc_stripped_dropa0.sigma[1])
Esempio n. 8
0
def test_merging():
    # The actual math of merging is tested separately in test_eigenmath; here
    # we just spot-verify that AssocSpace is using it reasonably

    # Generate test assoc spaces and merge them
    assoc1 = AssocSpace.from_entries(ENTRIES, k=4)
    assoc2 = AssocSpace.from_entries(MORE_ENTRIES, k=4)
    merged = assoc1.merged_with(assoc2)
    eq_(merged.k, 8)

    # Check some simple things
    merged = assoc1.merged_with(assoc2, k=4)
    eq_(merged.k, 4)

    eq_(' '.join(merged.labels),
        'apple red green celery orange banana yellow lemon blue tasty ferret')
    assert merged.assoc_between_two_terms('ferret', 'yellow') > 0.5
    assert (assoc2.assoc_between_two_terms('apple', 'red') <
            merged.assoc_between_two_terms('apple', 'red') <
            assoc1.assoc_between_two_terms('apple', 'red'))
def test_merging():
    # The actual math of merging is tested separately in test_eigenmath; here
    # we just spot-verify that AssocSpace is using it reasonably

    # Generate test assoc spaces and merge them
    assoc1 = AssocSpace.from_entries(ENTRIES, k=4)
    assoc2 = AssocSpace.from_entries(MORE_ENTRIES, k=4)
    merged = assoc1.merged_with(assoc2)
    eq_(merged.k, 8)

    # Check some simple things
    merged = assoc1.merged_with(assoc2, k=4)
    eq_(merged.k, 4)

    eq_(' '.join(merged.labels),
        'apple red green celery orange banana yellow lemon blue tasty ferret')
    assert merged.assoc_between_two_terms('ferret', 'yellow') > 0.5
    assert (assoc2.assoc_between_two_terms(
        'apple', 'red') < merged.assoc_between_two_terms('apple', 'red') <
            assoc1.assoc_between_two_terms('apple', 'red'))
Esempio n. 10
0
def test_filter():
    # Build and filter an assoc space
    assoc = AssocSpace.from_entries(ENTRIES, k=5)
    filtered = assoc.filter(_filter)

    # Check simple properties of the filtered space
    eq_(filtered.k, 5)
    eq_(' '.join(filtered.labels), 'red green celery banana lemon')

    # Check that redecomposition happened
    assert np.allclose(norm(filtered.u[:, 1]), 1.0)

    # Redecomposition can be kind of weird, but this result is intuitive
    assert (assoc.assoc_between_two_terms(
        'red', 'banana') < filtered.assoc_between_two_terms('red', 'banana') <
            assoc.assoc_between_two_terms('yellow', 'banana'))
Esempio n. 11
0
def test_filter():
    # Build and filter an assoc space
    assoc = AssocSpace.from_entries(ENTRIES, k=5)
    filtered = assoc.filter(_filter)

    # Check simple properties of the filtered space
    eq_(filtered.k, 5)
    eq_(' '.join(filtered.labels), 'red green celery banana lemon')

    # Check that redecomposition happened
    assert np.allclose(norm(filtered.u[:, 1]), 1.0)

    # Redecomposition can be kind of weird, but this result is intuitive
    assert (assoc.assoc_between_two_terms('red', 'banana') <
            filtered.assoc_between_two_terms('red', 'banana') <
            assoc.assoc_between_two_terms('yellow', 'banana'))
def run():
    ENTRIES = [
        (4, '/c/en/apple', '/c/en/red'),
        (1, '/c/en/apple', '/c/en/green'),
        (3, '/c/en/apple', '/c/en/orange'),
        (3, '/c/en/banana', '/c/en/orange'),
        (1, '/c/en/banana', '/c/en/yellow'),
        (0.5, '/c/en/lemon', '/c/en/yellow'),
        (1.5, '/c/en/orange', '/c/en/lemon'),
        (0.1, '/c/en/apple', '/c/en/lemon'),
        (0.2, '/c/en/banana', '/c/en/lemon'),
        (0.5, '/c/en/ideas', '/c/en/colorless'),
        (0.5, '/c/en/ideas', '/c/en/green'),
        (1, '/c/en/example', '/c/en/green'),
    ]
    space = AssocSpace.from_entries(ENTRIES, k=4)
    space.save_dir('../conceptnet5/support_data/testdata/input/assoc_space')
Esempio n. 13
0
def test_association_calculations():
    assoc = AssocSpace.from_entries(entries, 3)
    assert abs(assoc.assoc_between_two_terms('apple', 'apple') - 1.0) < 1e-3
    assert assoc.assoc_between_two_terms('apple', 'banana') < 0.9
Esempio n. 14
0
def test_pickle_round_trip():
    """An AssocSpace survives a round-trip to pickle format and back."""
    assoc = AssocSpace.from_entries(entries, 3)
    pickled = pickle.dumps(assoc)
    assoc2 = pickle.loads(pickled)
    eq_(assoc, assoc2)
Esempio n. 15
0
def test_dir_round_trip():
    assoc = AssocSpace.from_entries(ENTRIES, k=3)
    assoc.save_dir('/tmp/assoc_test')
    assoc2 = AssocSpace.load_dir('/tmp/assoc_test')
    eq_(assoc, assoc2)
Esempio n. 16
0
def test_pickle_round_trip():
    """An AssocSpace survives a round-trip to pickle format and back."""
    assoc = AssocSpace.from_entries(ENTRIES, k=3)
    pickled = pickle.dumps(assoc)
    assoc2 = pickle.loads(pickled)
    eq_(assoc, assoc2)
Esempio n. 17
0
def test_dir_round_trip():
    assoc = AssocSpace.from_entries(entries, 3)
    assoc.save_dir('/tmp/assoc_test')
    assoc2 = AssocSpace.load_dir('/tmp/assoc_test')
    eq_(assoc, assoc2)