def test_confusable_regex__basic_ascii_regex_with_padding(self): regex = confusable_regex('bore', include_character_padding=True) reg = re.compile(regex) self.assertTrue( reg.search( 'Sometimes people say that life can be a ь.𝞂.ř.ɜ, but I don\'t agree' ))
def check_if_message_infringes(self, message): word_boundary_chars = '(?:^|$|\\s|\\b)' pattern = word_boundary_chars + confusable_regex( self.banned_word, True) + word_boundary_chars if re.search(pattern, message) is not None: return True return False
def test_confusable_regex__dont_treat_pipe_as_wildcard(self): regex = confusable_regex('bore') reg = re.compile(regex) self.assertFalse( reg.search( 'Sometimes people say that life can be a ||||, but I don\'t agree' ))
def test_confusable_regex__match_subwords(self): regex = confusable_regex('bore', match_subword=True) reg = re.compile(regex) self.assertTrue( reg.search( 'Sometimes people say that life can be a ь𝞂řɜd, but I don\'t agree' )) self.assertTrue( reg.search( 'Sometimes people say that life can be a ь𝞂řɜ, but I don\'t agree' ))
def test_confusable_regex__basic_ascii_regex_without_padding(self): regex = confusable_regex('bore') reg = re.compile(regex) self.assertFalse( reg.search( 'Sometimes people say that life can be a ь.𝞂.ř.ɜ, but I don\'t agree' )) self.assertTrue( reg.search( 'Sometimes people say that life can be a ь𝞂řɜ, but I don\'t agree' ))
def test_confusable_regex__match_multi_character_confusion(self): regex = confusable_regex('‷') reg = re.compile(regex) self.assertFalse( reg.search( 'Sometimes people say that life can be \' , but I don\'t agree' )) self.assertTrue( reg.search( 'Sometimes people say that life can be \'\'\' , but I don\'t agree' ))
def msg_contains_forbidden(msg: str, forbidden_words: List[str]) -> bool: """Returns True if msg contains a word in the forbidden_words list or is confusable with any words in forbidden_words """ contains_forbidden = False for word in forbidden_words: regex_string = confusable_regex(word, include_character_padding=True) regex = re.compile(regex_string) contains_forbidden = regex.search(msg) if contains_forbidden: return contains_forbidden return contains_forbidden
def confusables(data, line): buf_ptr = line['buffer'] message = line['message'] tags = line['tags'] prefix = line['prefix'] if OPTIONS['list_of_words'] == "": # no words given, to look at return weechat.WEECHAT_RC_OK search_strings = OPTIONS['list_of_words'].split(',') for i in search_strings: regex_string = confusable_regex(i, include_character_padding=True) regex = re.compile(regex_string) conf_result = regex.search( message) # get match to test with original string later if regex.search(message) and conf_result.group().lower() != i.lower( ): # matching string is original string? return {"tags": tags + ',' + OPTIONS['tags']} # weechat.prnt_date_tags(buf_ptr,0,tags + ',' + OPTIONS['tags'],message) return weechat.WEECHAT_RC_OK
def detect_banned_word(message, banned_word): word_boundary_chars = '\\b' pattern = word_boundary_chars + confusable_regex(banned_word, True) + word_boundary_chars if re.search(pattern, message) is not None: return True return False
def test_confusable_regex__regex_does_not_match_if_only_subset_of_word( self): regex = confusable_regex('bore') reg = re.compile(regex) self.assertFalse(reg.search('Hopefully you don\'t get bored easily'))
def test_confusable_regex__regex_special_characters_are_escaped(self): regex = confusable_regex('e|mo') reg = re.compile(regex) self.assertTrue(reg.search('elmo')) self.assertFalse(reg.search('emo'))