def test_cosine_distance():
    """test the Vector Model's cosine similarity distance measurement"""
    s1 = "string1"
    s2 = "string2"
    dist = string_distance_measures.distance_cosine_measure(s1, s1)
    print dist
    assert dist == 0
    dist = string_distance_measures.distance_cosine_measure(s1, s2)
    print dist
    assert dist == 1

    s3 = "mary had a little lamb"
    s4 = "mary had another little lamb"
    dist = string_distance_measures.distance_cosine_measure(s3, s4)
    print "distance:", dist
    assert dist > 0.19 and dist < 0.21  # approx. 0.2

    s3 = "mary had a little lamb"
    s4 = "mary had little lamb"
    dist = string_distance_measures.distance_cosine_measure(s3, s4)
    print "distance:", dist
    assert dist > 0.1 and dist < 0.11  # approx. 0.105572809

    dist = string_distance_measures.distance_cosine_measure("", "")
    print "distance:", dist
    assert dist == 0 
Example #2
0
def test_cosine_distance():
    """test the Vector Model's cosine similarity distance measurement"""
    s1 = "string1"
    s2 = "string2"
    dist = string_distance_measures.distance_cosine_measure(s1, s1)
    print dist
    assert dist == 0
    dist = string_distance_measures.distance_cosine_measure(s1, s2)
    print dist
    assert dist == 1

    s3 = "mary had a little lamb"
    s4 = "mary had another little lamb"
    dist = string_distance_measures.distance_cosine_measure(s3, s4)
    print "distance:", dist
    assert dist > 0.19 and dist < 0.21  # approx. 0.2

    s3 = "mary had a little lamb"
    s4 = "mary had little lamb"
    dist = string_distance_measures.distance_cosine_measure(s3, s4)
    print "distance:", dist
    assert dist > 0.1 and dist < 0.11  # approx. 0.105572809

    dist = string_distance_measures.distance_cosine_measure("", "")
    print "distance:", dist
    assert dist == 0
def test_calls_work():
    """test the calls to check that the signature works as expected"""
    s1 = "string1"
    s2 = "string2"
    assert string_distance_measures.distance_levenshtein_distance(s1, s1) == 0
    assert string_distance_measures.distance_levenshtein_distance(s1, s2) > 0

    assert string_distance_measures.distance_levenshtein_jaro_winkler(s1, s1) == 0
    assert string_distance_measures.distance_levenshtein_jaro_winkler(s1, s2) > 0

    assert string_distance_measures.distance_levenshtein_ratio(s1, s1) == 0
    assert string_distance_measures.distance_levenshtein_ratio(s1, s2) > 0

    assert string_distance_measures.distance_title_len(s1, s1) == 0
    assert string_distance_measures.distance_title_len(s1, s2) == 0  # note same length strings!

    s1 = string_distance_measures.make_terms_from_string(s1)
    s2 = string_distance_measures.make_terms_from_string(s2)
    assert string_distance_measures.distance_nbr_title_terms(s1, s1) == 0
    assert string_distance_measures.distance_nbr_title_terms(s1, s2) == 0  # note same length strings!

    dist = string_distance_measures.distance_cosine_measure(s1, s1)
    print dist
    assert dist == 0
    dist = string_distance_measures.distance_cosine_measure(s1, s2)
    print dist
    assert dist == 1
Example #4
0
def test_calls_work():
    """test the calls to check that the signature works as expected"""
    s1 = "string1"
    s2 = "string2"
    assert string_distance_measures.distance_levenshtein_distance(s1, s1) == 0
    assert string_distance_measures.distance_levenshtein_distance(s1, s2) > 0

    assert string_distance_measures.distance_levenshtein_jaro_winkler(s1,
                                                                      s1) == 0
    assert string_distance_measures.distance_levenshtein_jaro_winkler(s1,
                                                                      s2) > 0

    assert string_distance_measures.distance_levenshtein_ratio(s1, s1) == 0
    assert string_distance_measures.distance_levenshtein_ratio(s1, s2) > 0

    assert string_distance_measures.distance_title_len(s1, s1) == 0
    assert string_distance_measures.distance_title_len(
        s1, s2) == 0  # note same length strings!

    s1 = string_distance_measures.make_terms_from_string(s1)
    s2 = string_distance_measures.make_terms_from_string(s2)
    assert string_distance_measures.distance_nbr_title_terms(s1, s1) == 0
    assert string_distance_measures.distance_nbr_title_terms(
        s1, s2) == 0  # note same length strings!

    dist = string_distance_measures.distance_cosine_measure(s1, s1)
    print dist
    assert dist == 0
    dist = string_distance_measures.distance_cosine_measure(s1, s2)
    print dist
    assert dist == 1