def test4(): k = 8 A = ks.shingleset_k("hamsterkäufe", k) A = A.union(ks.wildcard_shinglesets(A, n_max_wildcards=1)) B = ks.shingleset_k("hamsterkauf", k) B = B.union(ks.wildcard_shinglesets(B, n_max_wildcards=1)) score = ks.jaccard(A, B) assert 0.412 < score < 0.413
def test2(): s1 = ks.shingleset_k("abc", k=3) s2 = ks.shingleset_range("abc", 1, 3) s3 = ks.shingleset_list("abc", [1, 2, 3]) assert s1 == set(["a", "b", "c", "ab", "bc", "abc"]) assert s2 == s1 assert s3 == s2
def test3(): k = 8 A = ks.shingleset_k("hamsterkäufe", k) B = ks.shingleset_k("hamsterkauf", k) score = ks.jaccard(A, B) assert 0.448 < score < 0.449
def test2(): k = 1 A = ks.shingleset_k("hamsterkäufe", k) B = ks.shingleset_k("hamsterkauf", k) score = ks.jaccard(A, B) assert 0.909 < score < 0.910
def test1(): shingles = ks.shingleset_k("abc", k=3) assert shingles == set(["a", "b", "c", "ab", "bc", "abc"])