def test_http(): ts = TextSearch("ignore", "norm") ts.add_http_handler(keep_result=True) assert ts.findall("http://google.com") == ["http://google.com"]
def test_add_dict(): ts = TextSearch("smart", "norm") ts.add({"hi": "greeting", "bye": "bye", "goodbye": "bye"}) assert ts.findall("hi bye goodbye") == ["greeting", "bye", "bye"]
def test_serializable(): ts = TextSearch("sensitive", dict) ts.add("hi") result = ts.findall("hi") assert result assert json.dumps(result)
def test_ignore_match(): ts = TextSearch("ignore", "match") ts.add("hi") assert ts.findall("hi") == ["hi"] assert ts.findall("HI") == ["hi"] assert ts.findall("asdf") == []
def test_add_list(): ts = TextSearch("smart", "match") ts.add(["hi", "bye", "hello"]) assert ts.findall("hi bye hello") == ["hi", "bye", "hello"]
def test_fast_no_bounds(): ts = TextSearch("sensitive", "match", set(), set()) ts.add("hi") assert ts.findall("asdfhiadsfs")
def test_sensitive_match(): ts = TextSearch("sensitive", "object") ts.add("hi") assert ts.findall("hi") assert not ts.findall("HI")
def test_insensitive_object(): ts = TextSearch("insensitive", "object") ts.add("hi") assert ts.findall("HI")[0].end == 2
def test_regex_norm(): ts = TextSearch("insensitive", "norm") ts.add_regex_handler(["last "], r"\d", keep_result=True) assert ts.findall("last 5") == ["last 5"]
def test_not_overlap_3(): ts = TextSearch("ignore", "norm") ts.add("a") ts.add("a a") assert ts.findall("a a a") == ["a a", "a"]
def test_postfix_regex(): ts = TextSearch("ignore", "norm") ts.add_regex_handler(["products"], r"\d+ ", keep_result=True, prefix=False) assert ts.findall("90 products") == ["90 products"]
def test_not_overlap(): ts = TextSearch("ignore", "norm") ts.add("http://") ts.add_http_handler(True) assert len(ts.findall("https://vks.ai")) == 1
def test_insensitive_match(): ts = TextSearch("insensitive", "match") ts.add("hi") assert ts.findall("HI") == ["HI"]
def test_right_bounds(): ts = TextSearch("sensitive", "match") ts.add("hi") assert not ts.findall("hiasf")
def test_http_no_keep(): ts = TextSearch("ignore", "norm") ts.add_http_handler(keep_result=False) ts.add("google") assert ts.findall("http://google.com") == []
def test_regex_overlap(): ts = TextSearch("insensitive", "object") ts.add_regex_handler(["last "], r"\d", keep_result=True) ts.add("last") assert ts.findall("last 5")[0].norm == "last 5"
def test_twitter(): ts = TextSearch("ignore", "norm") ts.add_twitter_handler(keep_result=True) assert ts.findall("@hello") == ["@hello"] assert ts.findall("#hello") == ["#hello"]
def test_ignore_norm(): ts = TextSearch("ignore", "norm") ts.add("hi", "HI") assert ts.findall("hi") == ["HI"] assert ts.findall("asdf") == []