コード例 #1
0
def compare(string1, string2, larger_tol):

    # strcmp95 always trims input, so we have to do the same for our tests
    s1, s2 = [s.strip() for s in [string1, string2]]
    if s1 == s2 == '': return

    ans1 = strcmp95.strcmp95(string1, string2, larger_tol, to_upper=0, debug=0)
    ans2 = all_metrics(s1, s2, longer_prob=larger_tol)

    weights = ans2[-5:]

    rearrange = ans1[:2]!=ans2[:2] and ans1[0]==ans2[1] and ans1[1]==ans2[0]
    check = ((rearrange and ans1[2:] == ans2[2:]) or
                                           (not rearrange and ans1 == ans2))

    # print ('-->', s1, s2, larger_tol, rearrange, check)
    if not check:
        print rearrange
        for a1, a2 in zip(ans1, ans2):
            print str(a1==a2).ljust(5), a1, a2
        print ans1
        print ans2
    assert check

    (weight_jaro, weight_typo, weight_winkler,
                                   weight_winkler_typo, weight_longer) = weights

    assert weight_jaro == jaro.metric_jaro(s1, s2)
    assert weight_winkler == jaro.metric_jaro_winkler(s1, s2)

    check_original = jaro.metric_original(s1, s2)
    if larger_tol:
        assert weight_longer == check_original
    else:
        assert weight_longer == weight_winkler_typo
コード例 #2
0
def compare(string1, string2, larger_tol, to_upper):

    flag_str = ''.join([str(int(f)) for f in [larger_tol, to_upper]])

    old_stdout = sys.stdout
    cout = run_oracle(string1, string2, flag_str)

    new_stdout = cStringIO.StringIO()
    sys.stdout = new_stdout

    ans1 = strcmp95.strcmp95(string1, string2, not(larger_tol), not(to_upper))
    pyout = new_stdout.getvalue()
    new_stdout.close()

    sys.stdout = old_stdout

    if cout != pyout:
        print
        print 'Mismatch!'
        cout = cout.split('\n')
        pyout = pyout.split('\n')
        len1 = len(cout)
        len2 = len(pyout)
        for i in xrange(max(len1, len2)):
            s1 = cout[i] if i < len1 else ''
            s2 = pyout[i] if i < len2 else ''
            print str(s1==s2).ljust(6), s1.ljust(47), s2
        print
        print repr(cout)
        print repr(pyout)
        raise AssertionError
コード例 #3
0
def compare(string1, string2, larger_tol, to_upper):

    flag_str = ''.join([str(int(f)) for f in [larger_tol, to_upper]])

    old_stdout = sys.stdout
    cout = run_oracle(string1, string2, flag_str)

    new_stdout = cStringIO.StringIO()
    sys.stdout = new_stdout

    ans1 = strcmp95.strcmp95(string1, string2, not (larger_tol),
                             not (to_upper))
    pyout = new_stdout.getvalue()
    new_stdout.close()

    sys.stdout = old_stdout

    if cout != pyout:
        print
        print 'Mismatch!'
        cout = cout.split('\n')
        pyout = pyout.split('\n')
        len1 = len(cout)
        len2 = len(pyout)
        for i in xrange(max(len1, len2)):
            s1 = cout[i] if i < len1 else ''
            s2 = pyout[i] if i < len2 else ''
            print str(s1 == s2).ljust(6), s1.ljust(47), s2
        print
        print repr(cout)
        print repr(pyout)
        raise AssertionError
コード例 #4
0
def compare(string1, string2, larger_tol):

    # strcmp95 always trims input, so we have to do the same for our tests
    s1, s2 = [s.strip() for s in [string1, string2]]
    if s1 == s2 == '': return

    ans1 = strcmp95.strcmp95(string1, string2, larger_tol, to_upper=0, debug=0)
    ans2 = all_metrics(s1, s2, longer_prob=larger_tol)

    weights = ans2[-5:]

    rearrange = ans1[:2] != ans2[:2] and ans1[0] == ans2[1] and ans1[
        1] == ans2[0]
    check = ((rearrange and ans1[2:] == ans2[2:])
             or (not rearrange and ans1 == ans2))

    # print ('-->', s1, s2, larger_tol, rearrange, check)
    if not check:
        print rearrange
        for a1, a2 in zip(ans1, ans2):
            print str(a1 == a2).ljust(5), a1, a2
        print ans1
        print ans2
    assert check

    (weight_jaro, weight_typo, weight_winkler, weight_winkler_typo,
     weight_longer) = weights

    assert weight_jaro == jaro.metric_jaro(s1, s2)
    assert weight_winkler == jaro.metric_jaro_winkler(s1, s2)

    check_original = jaro.metric_original(s1, s2)
    if larger_tol:
        assert weight_longer == check_original
    else:
        assert weight_longer == weight_winkler_typo