def compare(string1, string2, larger_tol): # strcmp95 always trims input, so we have to do the same for our tests s1, s2 = [s.strip() for s in [string1, string2]] if s1 == s2 == '': return ans1 = strcmp95.strcmp95(string1, string2, larger_tol, to_upper=0, debug=0) ans2 = all_metrics(s1, s2, longer_prob=larger_tol) weights = ans2[-5:] rearrange = ans1[:2]!=ans2[:2] and ans1[0]==ans2[1] and ans1[1]==ans2[0] check = ((rearrange and ans1[2:] == ans2[2:]) or (not rearrange and ans1 == ans2)) # print ('-->', s1, s2, larger_tol, rearrange, check) if not check: print rearrange for a1, a2 in zip(ans1, ans2): print str(a1==a2).ljust(5), a1, a2 print ans1 print ans2 assert check (weight_jaro, weight_typo, weight_winkler, weight_winkler_typo, weight_longer) = weights assert weight_jaro == jaro.metric_jaro(s1, s2) assert weight_winkler == jaro.metric_jaro_winkler(s1, s2) check_original = jaro.metric_original(s1, s2) if larger_tol: assert weight_longer == check_original else: assert weight_longer == weight_winkler_typo
def compare(string1, string2, larger_tol, to_upper): flag_str = ''.join([str(int(f)) for f in [larger_tol, to_upper]]) old_stdout = sys.stdout cout = run_oracle(string1, string2, flag_str) new_stdout = cStringIO.StringIO() sys.stdout = new_stdout ans1 = strcmp95.strcmp95(string1, string2, not(larger_tol), not(to_upper)) pyout = new_stdout.getvalue() new_stdout.close() sys.stdout = old_stdout if cout != pyout: print print 'Mismatch!' cout = cout.split('\n') pyout = pyout.split('\n') len1 = len(cout) len2 = len(pyout) for i in xrange(max(len1, len2)): s1 = cout[i] if i < len1 else '' s2 = pyout[i] if i < len2 else '' print str(s1==s2).ljust(6), s1.ljust(47), s2 print print repr(cout) print repr(pyout) raise AssertionError
def compare(string1, string2, larger_tol, to_upper): flag_str = ''.join([str(int(f)) for f in [larger_tol, to_upper]]) old_stdout = sys.stdout cout = run_oracle(string1, string2, flag_str) new_stdout = cStringIO.StringIO() sys.stdout = new_stdout ans1 = strcmp95.strcmp95(string1, string2, not (larger_tol), not (to_upper)) pyout = new_stdout.getvalue() new_stdout.close() sys.stdout = old_stdout if cout != pyout: print print 'Mismatch!' cout = cout.split('\n') pyout = pyout.split('\n') len1 = len(cout) len2 = len(pyout) for i in xrange(max(len1, len2)): s1 = cout[i] if i < len1 else '' s2 = pyout[i] if i < len2 else '' print str(s1 == s2).ljust(6), s1.ljust(47), s2 print print repr(cout) print repr(pyout) raise AssertionError
def compare(string1, string2, larger_tol): # strcmp95 always trims input, so we have to do the same for our tests s1, s2 = [s.strip() for s in [string1, string2]] if s1 == s2 == '': return ans1 = strcmp95.strcmp95(string1, string2, larger_tol, to_upper=0, debug=0) ans2 = all_metrics(s1, s2, longer_prob=larger_tol) weights = ans2[-5:] rearrange = ans1[:2] != ans2[:2] and ans1[0] == ans2[1] and ans1[ 1] == ans2[0] check = ((rearrange and ans1[2:] == ans2[2:]) or (not rearrange and ans1 == ans2)) # print ('-->', s1, s2, larger_tol, rearrange, check) if not check: print rearrange for a1, a2 in zip(ans1, ans2): print str(a1 == a2).ljust(5), a1, a2 print ans1 print ans2 assert check (weight_jaro, weight_typo, weight_winkler, weight_winkler_typo, weight_longer) = weights assert weight_jaro == jaro.metric_jaro(s1, s2) assert weight_winkler == jaro.metric_jaro_winkler(s1, s2) check_original = jaro.metric_original(s1, s2) if larger_tol: assert weight_longer == check_original else: assert weight_longer == weight_winkler_typo