def tag(string): """ Tags a string. Parameters ---------- string : str, Returns ------- tuples : list of tuples and boolean Examples -------- """ sentence_with_no_accent = remove_accents(string) for tag in regex_segmenting_order: for compiled_regex in compiled_regex_segmenting_dict[tag]: if compiled_regex.search(sentence_with_no_accent): if tag in ["HELLO", "GREETINGS", "THANKS"]: # We search for words of the flag list who mean the sentence contains information as body for regex, value in regex_flags_dict.items(): if re.search(pattern=regex, string=sentence_with_no_accent, flags=re.IGNORECASE): return string, False return [(string, tag)], True return string, False
def tag(string): """ Tags a string. Parameters ---------- string : str, Returns ------- tuples : list of tuples and boolean Examples -------- """ regex_parts = compiled_regex_segmenting_dict.items() sentence_with_no_accent = remove_accents(string) for k, compiled_regex_list in regex_parts: for compiled_regex in compiled_regex_list: if compiled_regex.search(sentence_with_no_accent): return [(string, k)], True return string, False
def tag(string): """ Tags a string. Parameters ---------- string : str, Returns ------- tuples : list of tuples and boolean Examples -------- """ regex_parts = regex_segmenting_dict.items() sentence_with_no_accent = remove_accents(string) for k, reg in regex_parts: for r in reg: r = r.replace(" ", regex_tag) if re.search(r, sentence_with_no_accent, re.I): return [(string, k)], True return string, False
def test_remove_accents(): input_str = "éèëêàù" expected_str = "eeeeau" result = remove_accents(input_str) np.testing.assert_string_equal(result, expected_str)