예제 #1
0
    def test_with_fuzzy_string_transpose(self):
        x1 = TokenSet.from_string("abr")
        x2 = TokenSet.from_string("bra")
        x3 = TokenSet.from_string("foo")
        y = TokenSet.from_fuzzy_string("bar", 1)

        assert x1.intersect(y).to_list() == ["abr"]
        assert x2.intersect(y).to_list() == ["bra"]
        assert x3.intersect(y).to_list() == []
예제 #2
0
    def test_with_fuzzy_string_deletion(self):
        x1 = TokenSet.from_string("ar")
        x2 = TokenSet.from_string("br")
        x3 = TokenSet.from_string("ba")
        x4 = TokenSet.from_string("bar")
        x5 = TokenSet.from_string("foo")
        y = TokenSet.from_fuzzy_string("bar", 1)

        assert x1.intersect(y).to_list() == ["ar"]
        assert x2.intersect(y).to_list() == ["br"]
        assert x3.intersect(y).to_list() == ["ba"]
        assert x4.intersect(y).to_list() == ["bar"]
        assert x5.intersect(y).to_list() == []
예제 #3
0
    def test_with_fuzzy_string_substitution(self):
        x1 = TokenSet.from_string("bar")
        x2 = TokenSet.from_string("cur")
        x3 = TokenSet.from_string("cat")
        x4 = TokenSet.from_string("car")
        x5 = TokenSet.from_string("foo")
        y = TokenSet.from_fuzzy_string("car", 1)

        assert x1.intersect(y).to_list() == ["bar"]
        assert x2.intersect(y).to_list() == ["cur"]
        assert x3.intersect(y).to_list() == ["cat"]
        assert x4.intersect(y).to_list() == ["car"]
        assert x5.intersect(y).to_list() == []
예제 #4
0
    def test_with_fuzzy_string_insertion(self):
        x1 = TokenSet.from_string("bbar")
        x2 = TokenSet.from_string("baar")
        x3 = TokenSet.from_string("barr")
        x4 = TokenSet.from_string("bar")
        x5 = TokenSet.from_string("ba")
        x6 = TokenSet.from_string("foo")
        x7 = TokenSet.from_string("bara")
        y = TokenSet.from_fuzzy_string("bar", 1)

        assert x1.intersect(y).to_list() == ["bbar"]
        assert x2.intersect(y).to_list() == ["baar"]
        assert x3.intersect(y).to_list() == ["barr"]
        assert x4.intersect(y).to_list() == ["bar"]
        assert x5.intersect(y).to_list() == ["ba"]
        assert x6.intersect(y).to_list() == []
        assert x7.intersect(y).to_list() == ["bara"]
예제 #5
0
    def test_catastrophic_backtracking_with_leading_characters(self):
        x = TokenSet.from_string("f" * 100)
        y = TokenSet.from_string("*f")

        assert len(x.intersect(y).to_list()) == 1
예제 #6
0
    def test_contained_wildcard_backtracking_no_intersection(self):
        x = TokenSet.from_string("ababc")
        y = TokenSet.from_string("a*ac")

        assert x.intersect(y).to_list() == []
예제 #7
0
    def test_leading_wildcard_backtracking_no_intersection(self):
        x = TokenSet.from_string("aaacbab")
        y = TokenSet.from_string("*abc")

        assert x.intersect(y).to_list() == []
예제 #8
0
    def test_fuzzy_string_transpose(self):
        x = TokenSet.from_string("bca")
        y = TokenSet.from_fuzzy_string("abc", 2)

        assert x.intersect(y).to_list() == ["bca"]
예제 #9
0
    def test_leading_wildcard_intersection(self):
        x = TokenSet.from_string("cat")
        y = TokenSet.from_string("*t")
        z = x.intersect(y)

        assert {"cat"} == set(z.to_list())
예제 #10
0
    def test_no_intersection(self):
        x = TokenSet.from_string("cat")
        y = TokenSet.from_string("bar")
        z = x.intersect(y)

        assert len(z.to_list()) == 0
예제 #11
0
    def test_from_string_with_trailing_wildcard(self):
        x = TokenSet.from_string("a*")
        wild = x.edges["a"].edges["*"]

        assert wild == wild.edges["*"]
        assert wild.final
예제 #12
0
    def test_wildcard_zero_or_more_characters(self):
        x = TokenSet.from_string("foo")
        y = TokenSet.from_string("foo*")
        z = x.intersect(y)

        assert {"foo"} == set(z.to_list())
예제 #13
0
    def test_contained_wildcard_no_intersection(self):
        x = TokenSet.from_string("foo")
        y = TokenSet.from_string("b*r")
        z = x.intersect(y)

        assert len(z.to_list()) == 0
예제 #14
0
    def test_contained_wildcard_intersection(self):
        x = TokenSet.from_string("foo")
        y = TokenSet.from_string("f*o")
        z = x.intersect(y)

        assert {"foo"} == set(z.to_list())
예제 #15
0
    def test_leading_wildcard_no_intersection(self):
        x = TokenSet.from_string("cat")
        y = TokenSet.from_string("*r")
        z = x.intersect(y)

        assert len(z.to_list()) == 0
예제 #16
0
    def test_leading_atrailing_wildcard_backtracking_intersection(self):
        x = TokenSet.from_string("acbaabab")
        y = TokenSet.from_string("a*ba*b")

        assert x.intersect(y).to_list() == ["acbaabab"]
예제 #17
0
    def test_from_string_without_wildcard(self):
        TokenSet._next_id = 1
        x = TokenSet.from_string("a")

        assert str(x) == "0a2"
        assert x.edges["a"].final
예제 #18
0
    def test_fuzzy_string_substitution(self):
        x = TokenSet.from_string("axx")
        y = TokenSet.from_fuzzy_string("abc", 2)

        assert x.intersect(y).to_list() == ["axx"]
예제 #19
0
    def test_to_list_includes_single_words(self):
        word = "bat"
        token_set = TokenSet.from_string(word)

        assert {word} == set(token_set.to_list())
예제 #20
0
    def test_fuzzy_string_deletion(self):
        x = TokenSet.from_string("a")
        y = TokenSet.from_fuzzy_string("abc", 2)

        assert x.intersect(y).to_list() == ["a"]
예제 #21
0
 def test_builds_a_token_set_for_the_corpus(self):
     needle = TokenSet.from_string("test")
     assert "test" in self.builder.token_set.intersect(needle).to_list()
예제 #22
0
    def test_simple_intersection(self):
        x = TokenSet.from_string("cat")
        y = TokenSet.from_string("cat")
        z = x.intersect(y)

        assert {"cat"} == set(z.to_list())