def test_vectorizing_and_similar_terms(): # Simple test for vectorizing weighted terms assoc = AssocSpace.from_entries(ENTRIES, k=3) weighted_terms = [('apple', 5), ('banana', 22), ('not a term', 17)] apple = assoc.row_named('apple') banana = assoc.row_named('banana') vector = assoc.vector_from_terms(weighted_terms) # The similarity of 'apple' to itself is approximately 1 assert abs(assoc.assoc_between_two_terms('apple', 'apple') - 1.0) < 1e-3 # 'apple' and 'banana' are at least 10% less similar to each other than # to themselves assert assoc.assoc_between_two_terms('apple', 'banana') < 0.9 # The vector is some linear combination of apple and banana. Test this # by subtracting out apple and banana components, so that there is nothing # left. norm_apple = normalize(apple) banana_perp_apple = normalize(banana - norm_apple * norm_apple.dot(banana)) residual = vector - norm_apple * norm_apple.dot(vector) residual -= banana_perp_apple * banana_perp_apple.dot(residual) assert norm(residual) < 1e-3 # Simple test for finding similar terms labels, scores = zip(*assoc.terms_similar_to_vector(vector)) eq_(list(scores), sorted(scores, reverse=True)) most_similar = assoc.most_similar_to_vector(vector) eq_(most_similar[0], labels[0]) eq_(most_similar[1], scores[0]) assert labels.index('banana') < labels.index('apple') assert labels.index('apple') < labels.index('green') assert labels.index('apple') < labels.index('celery')
def test_truncation(): # Simple test of truncation assoc = AssocSpace.from_entries(ENTRIES, k=3) truncated = assoc.truncated_to(2) assert np.allclose(truncated.u, assoc.u[:, :2]) assert np.allclose(truncated.sigma, assoc.sigma[:2]) eq_(truncated.labels, assoc.labels) assert 0.999 < norm(truncated.assoc[0]) < 1.0
def test_filter(): # Build and filter an assoc space assoc = AssocSpace.from_entries(ENTRIES, k=5) filtered = assoc.filter(_filter) # Check simple properties of the filtered space eq_(filtered.k, 5) eq_(' '.join(filtered.labels), 'red green celery banana lemon') # Check that redecomposition happened assert np.allclose(norm(filtered.u[:, 1]), 1.0) # Redecomposition can be kind of weird, but this result is intuitive assert (assoc.assoc_between_two_terms( 'red', 'banana') < filtered.assoc_between_two_terms('red', 'banana') < assoc.assoc_between_two_terms('yellow', 'banana'))
def test_filter(): # Build and filter an assoc space assoc = AssocSpace.from_entries(ENTRIES, k=5) filtered = assoc.filter(_filter) # Check simple properties of the filtered space eq_(filtered.k, 5) eq_(' '.join(filtered.labels), 'red green celery banana lemon') # Check that redecomposition happened assert np.allclose(norm(filtered.u[:, 1]), 1.0) # Redecomposition can be kind of weird, but this result is intuitive assert (assoc.assoc_between_two_terms('red', 'banana') < filtered.assoc_between_two_terms('red', 'banana') < assoc.assoc_between_two_terms('yellow', 'banana'))
def test_norm_and_normalize(): vec = np.asarray([8.0, 9.0, 12.0]) assert np.allclose(norm(vec), 17.0) assert np.allclose(normalize(vec), vec / 17.0) # We normalize the zero vector to itself rather than raising an error assert (np.zeros(5) == normalize(np.zeros(5))).all()