Esempio n. 1
0
def compare(string1, string2, larger_tol):

    # strcmp95 always trims input, so we have to do the same for our tests
    s1, s2 = [s.strip() for s in [string1, string2]]
    if s1 == s2 == '': return

    ans1 = strcmp95.strcmp95(string1, string2, larger_tol, to_upper=0, debug=0)
    ans2 = all_metrics(s1, s2, longer_prob=larger_tol)

    weights = ans2[-5:]

    rearrange = ans1[:2]!=ans2[:2] and ans1[0]==ans2[1] and ans1[1]==ans2[0]
    check = ((rearrange and ans1[2:] == ans2[2:]) or
                                           (not rearrange and ans1 == ans2))

    # print ('-->', s1, s2, larger_tol, rearrange, check)
    if not check:
        print rearrange
        for a1, a2 in zip(ans1, ans2):
            print str(a1==a2).ljust(5), a1, a2
        print ans1
        print ans2
    assert check

    (weight_jaro, weight_typo, weight_winkler,
                                   weight_winkler_typo, weight_longer) = weights

    assert weight_jaro == jaro.metric_jaro(s1, s2)
    assert weight_winkler == jaro.metric_jaro_winkler(s1, s2)

    check_original = jaro.metric_original(s1, s2)
    if larger_tol:
        assert weight_longer == check_original
    else:
        assert weight_longer == weight_winkler_typo
Esempio n. 2
0
def test():
    import jaro

    for test in jaro_tests:
        # s1, s2, m, t, jaro, wink = test
        s1, s2 = test[:2]

        string_metrics = jaro.string_metrics(s1, s2)
        (len1, len2, num_matches, half_transposes,
                          typo_score, pre_matches, adjust_long) = string_metrics

        weight_jaro = jaro.metric_jaro(s1, s2)
        weight_winkler = jaro.metric_jaro_winkler(s1, s2)
        weight_original = jaro.metric_original(s1, s2)
        # TODO: Test for the custom function?

        weights = [weight_jaro, weight_winkler, weight_original]

        check = [num_matches, half_transposes]
        check.extend(['%7.5f' % w for w in weights])

        if check != list(test[2:]):
            print
            print s1, s2
            print check
            print test[2:]
            raise AssertionError

        strings = []
        for s in [s1, s2]:
            if s.strip() == '':
                s = '-'*(len(s)+1)
            strings.append(s.ljust(12))
        for n in [num_matches, half_transposes]:
            strings.append(str(n).rjust(2))
        for w in weights:
            strings.append(' %7.5f' % w)

        print ' '.join(strings)
Esempio n. 3
0
def test():
    import jaro

    for test in jaro_tests:
        # s1, s2, m, t, jaro, wink = test
        s1, s2 = test[:2]

        string_metrics = jaro.string_metrics(s1, s2)
        (len1, len2, num_matches, half_transposes, typo_score, pre_matches,
         adjust_long) = string_metrics

        weight_jaro = jaro.metric_jaro(s1, s2)
        weight_winkler = jaro.metric_jaro_winkler(s1, s2)
        weight_original = jaro.metric_original(s1, s2)
        # TODO: Test for the custom function?

        weights = [weight_jaro, weight_winkler, weight_original]

        check = [num_matches, half_transposes]
        check.extend(['%7.5f' % w for w in weights])

        if check != list(test[2:]):
            print
            print s1, s2
            print check
            print test[2:]
            raise AssertionError

        strings = []
        for s in [s1, s2]:
            if s.strip() == '':
                s = '-' * (len(s) + 1)
            strings.append(s.ljust(12))
        for n in [num_matches, half_transposes]:
            strings.append(str(n).rjust(2))
        for w in weights:
            strings.append(' %7.5f' % w)

        print ' '.join(strings)
Esempio n. 4
0
def compare(string1, string2, larger_tol):

    # strcmp95 always trims input, so we have to do the same for our tests
    s1, s2 = [s.strip() for s in [string1, string2]]
    if s1 == s2 == '': return

    ans1 = strcmp95.strcmp95(string1, string2, larger_tol, to_upper=0, debug=0)
    ans2 = all_metrics(s1, s2, longer_prob=larger_tol)

    weights = ans2[-5:]

    rearrange = ans1[:2] != ans2[:2] and ans1[0] == ans2[1] and ans1[
        1] == ans2[0]
    check = ((rearrange and ans1[2:] == ans2[2:])
             or (not rearrange and ans1 == ans2))

    # print ('-->', s1, s2, larger_tol, rearrange, check)
    if not check:
        print rearrange
        for a1, a2 in zip(ans1, ans2):
            print str(a1 == a2).ljust(5), a1, a2
        print ans1
        print ans2
    assert check

    (weight_jaro, weight_typo, weight_winkler, weight_winkler_typo,
     weight_longer) = weights

    assert weight_jaro == jaro.metric_jaro(s1, s2)
    assert weight_winkler == jaro.metric_jaro_winkler(s1, s2)

    check_original = jaro.metric_original(s1, s2)
    if larger_tol:
        assert weight_longer == check_original
    else:
        assert weight_longer == weight_winkler_typo
Esempio n. 5
0
def original_metric(string1, string2):
    return jaro.metric_original(string1, string2)
Esempio n. 6
0
def original_metric(string1, string2):
    return jaro.metric_original(string1, string2)