def should_parse(self, message: discord.Message) -> bool: """ Return True if `message` should be parsed. A qualifying message: 1. Is not authored by a bot 2. Is in a valid channel 3. Has more than 3 lines 4. Has no bot or webhook token """ return (not message.author.bot and self.is_valid_channel(message.channel) and has_lines( message.content, constants.CodeBlock.minimum_lines) and not TokenRemover.find_token_in_message(message) and not WEBHOOK_URL_RE.search(message.content))
def test_extract_user_id_invalid(self): """Should consider non-digit and non-ASCII IDs invalid.""" ids = ( ("SGVsbG8gd29ybGQ", "non-digit ASCII"), ("0J_RgNC40LLQtdGCINC80LjRgA", "cyrillic text"), ("4pO14p6L4p6C4pG34p264pGl8J-EiOKSj-KCieKBsA", "Unicode digits"), ("4oaA4oaB4oWh4oWi4Lyz4Lyq4Lyr4LG9", "Unicode numerals"), ("8J2fjvCdn5nwnZ-k8J2fr_Cdn7rgravvvJngr6c", "Unicode decimals"), ("{hello}[world]&(bye!)", "ASCII invalid Base64"), ("Þíß-ï§-ňøẗ-våłìÐ", "Unicode invalid Base64"), ) for user_id, msg in ids: with self.subTest(msg=msg): result = TokenRemover.extract_user_id(user_id) self.assertIsNone(result)
def test_format_log_message(self, log_message): """Should correctly format the log message with info from the message and token.""" token = Token("NDcyMjY1OTQzMDYyNDEzMzMy", "XsySD_", "s45jqDV_Iisn-symw0yDRrk_jf4") log_message.format.return_value = "Howdy" return_value = TokenRemover.format_log_message(self.msg, token) self.assertEqual(return_value, log_message.format.return_value) log_message.format.assert_called_once_with( author=format_user(self.msg.author), channel=self.msg.channel.mention, user_id=token.user_id, timestamp=token.timestamp, hmac="xxxxxxxxxxxxxxxxxxxxxxxxjf4", )
def test_format_userid_log_message_bot(self, known_user_log_message): """Should correctly format the user ID portion when the ID belongs to a known bot.""" token = Token("NDcyMjY1OTQzMDYyNDEzMzMy", "XsySD_", "s45jqDV_Iisn-symw0yDRrk_jf4") known_user_log_message.format.return_value = " Partner" msg = MockMessage(id=555, content="hello world") msg.guild.get_member.return_value.__str__.return_value = "Sam" msg.guild.get_member.return_value.bot = True return_value = TokenRemover.format_userid_log_message(msg, token) self.assertEqual(return_value, (known_user_log_message.format.return_value, False)) known_user_log_message.format.assert_called_once_with( user_id=472265943062413332, user_name="Sam", kind="BOT", )
async def relay_message(self, msg: Message) -> None: """Relays the message to the relevant watch channel.""" limit = BigBrotherConfig.header_message_limit if ( msg.author.id != self.message_history.last_author or msg.channel.id != self.message_history.last_channel or self.message_history.message_count >= limit ): self.message_history = MessageHistory(last_author=msg.author.id, last_channel=msg.channel.id) await self.send_header(msg) if TokenRemover.find_token_in_message(msg) or WEBHOOK_URL_RE.search(msg.content): cleaned_content = "Content is censored because it contains a bot or webhook token." elif cleaned_content := msg.clean_content: # Put all non-media URLs in a code block to prevent embeds media_urls = {embed.url for embed in msg.embeds if embed.type in ("image", "video")} for url in URL_RE.findall(cleaned_content): if url not in media_urls: cleaned_content = cleaned_content.replace(url, f"`{url}`")
def test_find_token_valid_match(self, token_re, token_cls, is_valid_id, is_valid_timestamp): """The first match with a valid user ID and timestamp should be returned as a `Token`.""" matches = [ mock.create_autospec(Match, spec_set=True, instance=True), mock.create_autospec(Match, spec_set=True, instance=True), ] tokens = [ mock.create_autospec(Token, spec_set=True, instance=True), mock.create_autospec(Token, spec_set=True, instance=True), ] token_re.finditer.return_value = matches token_cls.side_effect = tokens is_valid_id.side_effect = ( False, True) # The 1st match will be invalid, 2nd one valid. is_valid_timestamp.return_value = True return_value = TokenRemover.find_token_in_message(self.msg) self.assertEqual(tokens[1], return_value) token_re.finditer.assert_called_once_with(self.msg.content)
async def test_take_action(self, format_log_message, format_userid_log_message, logger, mod_log_property): """Should delete the message and send a mod log.""" cog = TokenRemover(self.bot) mod_log = mock.create_autospec(ModLog, spec_set=True, instance=True) token = mock.create_autospec(Token, spec_set=True, instance=True) token.user_id = "no-id" log_msg = "testing123" userid_log_message = "userid-log-message" mod_log_property.return_value = mod_log format_log_message.return_value = log_msg format_userid_log_message.return_value = (userid_log_message, True) await cog.take_action(self.msg, token) self.msg.delete.assert_called_once_with() self.msg.channel.send.assert_called_once_with( token_remover.DELETION_MESSAGE_TEMPLATE.format( mention=self.msg.author.mention)) format_log_message.assert_called_once_with(self.msg, token) format_userid_log_message.assert_called_once_with(self.msg, token) logger.debug.assert_called_with(log_msg) self.bot.stats.incr.assert_called_once_with("tokens.removed_tokens") mod_log.ignore.assert_called_once_with(constants.Event.message_delete, self.msg.id) mod_log.send_log_message.assert_called_once_with( icon_url=constants.Icons.token_removed, colour=Colour(constants.Colours.soft_red), title="Token removed!", text=log_msg + "\n" + userid_log_message, thumbnail=self.msg.author.avatar_url_as.return_value, channel_id=constants.Channels.mod_alerts, ping_everyone=True, )
def test_find_token_invalid_matches( self, token_re, token_cls, extract_user_id, is_valid_timestamp, is_maybe_valid_hmac, ): """None should be returned if no matches have valid user IDs, HMACs, and timestamps.""" token_re.finditer.return_value = [ mock.create_autospec(Match, spec_set=True, instance=True) ] token_cls.return_value = mock.create_autospec(Token, spec_set=True, instance=True) extract_user_id.return_value = None is_valid_timestamp.return_value = False is_maybe_valid_hmac.return_value = False return_value = TokenRemover.find_token_in_message(self.msg) self.assertIsNone(return_value) token_re.finditer.assert_called_once_with(self.msg.content)
async def on_message(self, msg: Message) -> None: """ Detect poorly formatted Python code in new messages. If poorly formatted code is detected, send the user a helpful message explaining how to do properly formatted Python syntax highlighting codeblocks. """ is_help_channel = (getattr(msg.channel, "category", None) and msg.channel.category.id in (Categories.help_available, Categories.help_in_use)) parse_codeblock = ((is_help_channel or msg.channel.id in self.channel_cooldowns or msg.channel.id in self.channel_whitelist) and not msg.author.bot and len(msg.content.splitlines()) > 3 and not TokenRemover.find_token_in_message(msg) and not WEBHOOK_URL_RE.search(msg.content)) if parse_codeblock: # no token in the msg on_cooldown = (time.time() - self.channel_cooldowns.get(msg.channel.id, 0)) < 300 if not on_cooldown or DEBUG_MODE: try: if self.has_bad_ticks(msg): ticks = msg.content[:3] content = self.codeblock_stripping( f"```{msg.content[3:-3]}```", True) if content is None: return content, repl_code = content if len(content) == 2: content = content[1] else: content = content[0] space_left = 204 if len(content) >= space_left: current_length = 0 lines_walked = 0 for line in content.splitlines(keepends=True): if current_length + len( line ) > space_left or lines_walked == 10: break current_length += len(line) lines_walked += 1 content = content[:current_length] + "#..." content_escaped_markdown = RE_MARKDOWN.sub( r'\\\1', content) howto = ( "It looks like you are trying to paste code into this channel.\n\n" "You seem to be using the wrong symbols to indicate where the codeblock should start. " f"The correct symbols would be \\`\\`\\`, not `{ticks}`.\n\n" "**Here is an example of how it should look:**\n" f"\\`\\`\\`python\n{content_escaped_markdown}\n\\`\\`\\`\n\n" "**This will result in the following:**\n" f"```python\n{content}\n```") else: howto = "" content = self.codeblock_stripping(msg.content, False) if content is None: return content, repl_code = content # Attempts to parse the message into an AST node. # Invalid Python code will raise a SyntaxError. tree = ast.parse(content[0]) # Multiple lines of single words could be interpreted as expressions. # This check is to avoid all nodes being parsed as expressions. # (e.g. words over multiple lines) if not all( isinstance(node, ast.Expr) for node in tree.body) or repl_code: # Shorten the code to 10 lines and/or 204 characters. space_left = 204 if content and repl_code: content = content[1] else: content = content[0] if len(content) >= space_left: current_length = 0 lines_walked = 0 for line in content.splitlines(keepends=True): if current_length + len( line ) > space_left or lines_walked == 10: break current_length += len(line) lines_walked += 1 content = content[:current_length] + "#..." content_escaped_markdown = RE_MARKDOWN.sub( r'\\\1', content) howto += ( "It looks like you're trying to paste code into this channel.\n\n" "Discord has support for Markdown, which allows you to post code with full " "syntax highlighting. Please use these whenever you paste code, as this " "helps improve the legibility and makes it easier for us to help you.\n\n" f"**To do this, use the following method:**\n" f"\\`\\`\\`python\n{content_escaped_markdown}\n\\`\\`\\`\n\n" "**This will result in the following:**\n" f"```python\n{content}\n```") log.debug( f"{msg.author} posted something that needed to be put inside python code " "blocks. Sending the user some instructions.") else: log.trace( "The code consists only of expressions, not sending instructions" ) if howto != "": # Increase amount of codeblock correction in stats self.bot.stats.incr("codeblock_corrections") howto_embed = Embed(description=howto) bot_message = await msg.channel.send( f"Hey {msg.author.mention}!", embed=howto_embed) self.codeblock_message_ids[msg.id] = bot_message.id self.bot.loop.create_task( wait_for_deletion(bot_message, (msg.author.id, ), self.bot)) else: return if msg.channel.id not in self.channel_whitelist: self.channel_cooldowns[msg.channel.id] = time.time() except SyntaxError: log.trace( f"{msg.author} posted in a help channel, and when we tried to parse it as Python code, " "ast.parse raised a SyntaxError. This probably just means it wasn't Python code. " f"The message that was posted was:\n\n{msg.content}\n\n" )