def compare(string1, string2, larger_tol): # strcmp95 always trims input, so we have to do the same for our tests s1, s2 = [s.strip() for s in [string1, string2]] if s1 == s2 == '': return ans1 = strcmp95.strcmp95(string1, string2, larger_tol, to_upper=0, debug=0) ans2 = all_metrics(s1, s2, longer_prob=larger_tol) weights = ans2[-5:] rearrange = ans1[:2]!=ans2[:2] and ans1[0]==ans2[1] and ans1[1]==ans2[0] check = ((rearrange and ans1[2:] == ans2[2:]) or (not rearrange and ans1 == ans2)) # print ('-->', s1, s2, larger_tol, rearrange, check) if not check: print rearrange for a1, a2 in zip(ans1, ans2): print str(a1==a2).ljust(5), a1, a2 print ans1 print ans2 assert check (weight_jaro, weight_typo, weight_winkler, weight_winkler_typo, weight_longer) = weights assert weight_jaro == jaro.metric_jaro(s1, s2) assert weight_winkler == jaro.metric_jaro_winkler(s1, s2) check_original = jaro.metric_original(s1, s2) if larger_tol: assert weight_longer == check_original else: assert weight_longer == weight_winkler_typo
def test(): import jaro for test in jaro_tests: # s1, s2, m, t, jaro, wink = test s1, s2 = test[:2] string_metrics = jaro.string_metrics(s1, s2) (len1, len2, num_matches, half_transposes, typo_score, pre_matches, adjust_long) = string_metrics weight_jaro = jaro.metric_jaro(s1, s2) weight_winkler = jaro.metric_jaro_winkler(s1, s2) weight_original = jaro.metric_original(s1, s2) # TODO: Test for the custom function? weights = [weight_jaro, weight_winkler, weight_original] check = [num_matches, half_transposes] check.extend(['%7.5f' % w for w in weights]) if check != list(test[2:]): print print s1, s2 print check print test[2:] raise AssertionError strings = [] for s in [s1, s2]: if s.strip() == '': s = '-'*(len(s)+1) strings.append(s.ljust(12)) for n in [num_matches, half_transposes]: strings.append(str(n).rjust(2)) for w in weights: strings.append(' %7.5f' % w) print ' '.join(strings)
def test(): import jaro for test in jaro_tests: # s1, s2, m, t, jaro, wink = test s1, s2 = test[:2] string_metrics = jaro.string_metrics(s1, s2) (len1, len2, num_matches, half_transposes, typo_score, pre_matches, adjust_long) = string_metrics weight_jaro = jaro.metric_jaro(s1, s2) weight_winkler = jaro.metric_jaro_winkler(s1, s2) weight_original = jaro.metric_original(s1, s2) # TODO: Test for the custom function? weights = [weight_jaro, weight_winkler, weight_original] check = [num_matches, half_transposes] check.extend(['%7.5f' % w for w in weights]) if check != list(test[2:]): print print s1, s2 print check print test[2:] raise AssertionError strings = [] for s in [s1, s2]: if s.strip() == '': s = '-' * (len(s) + 1) strings.append(s.ljust(12)) for n in [num_matches, half_transposes]: strings.append(str(n).rjust(2)) for w in weights: strings.append(' %7.5f' % w) print ' '.join(strings)
def compare(string1, string2, larger_tol): # strcmp95 always trims input, so we have to do the same for our tests s1, s2 = [s.strip() for s in [string1, string2]] if s1 == s2 == '': return ans1 = strcmp95.strcmp95(string1, string2, larger_tol, to_upper=0, debug=0) ans2 = all_metrics(s1, s2, longer_prob=larger_tol) weights = ans2[-5:] rearrange = ans1[:2] != ans2[:2] and ans1[0] == ans2[1] and ans1[ 1] == ans2[0] check = ((rearrange and ans1[2:] == ans2[2:]) or (not rearrange and ans1 == ans2)) # print ('-->', s1, s2, larger_tol, rearrange, check) if not check: print rearrange for a1, a2 in zip(ans1, ans2): print str(a1 == a2).ljust(5), a1, a2 print ans1 print ans2 assert check (weight_jaro, weight_typo, weight_winkler, weight_winkler_typo, weight_longer) = weights assert weight_jaro == jaro.metric_jaro(s1, s2) assert weight_winkler == jaro.metric_jaro_winkler(s1, s2) check_original = jaro.metric_original(s1, s2) if larger_tol: assert weight_longer == check_original else: assert weight_longer == weight_winkler_typo
def jaro_metric(string1, string2): return jaro.metric_jaro(string1, string2)