Exemple #1
0
    def should_parse(self, message: discord.Message) -> bool:
        """
        Return True if `message` should be parsed.

        A qualifying message:

        1. Is not authored by a bot
        2. Is in a valid channel
        3. Has more than 3 lines
        4. Has no bot or webhook token
        """
        return (not message.author.bot
                and self.is_valid_channel(message.channel) and has_lines(
                    message.content, constants.CodeBlock.minimum_lines)
                and not TokenRemover.find_token_in_message(message)
                and not WEBHOOK_URL_RE.search(message.content))
Exemple #2
0
    def test_extract_user_id_invalid(self):
        """Should consider non-digit and non-ASCII IDs invalid."""
        ids = (
            ("SGVsbG8gd29ybGQ", "non-digit ASCII"),
            ("0J_RgNC40LLQtdGCINC80LjRgA", "cyrillic text"),
            ("4pO14p6L4p6C4pG34p264pGl8J-EiOKSj-KCieKBsA", "Unicode digits"),
            ("4oaA4oaB4oWh4oWi4Lyz4Lyq4Lyr4LG9", "Unicode numerals"),
            ("8J2fjvCdn5nwnZ-k8J2fr_Cdn7rgravvvJngr6c", "Unicode decimals"),
            ("{hello}[world]&(bye!)", "ASCII invalid Base64"),
            ("Þíß-ï§-ňøẗ-våłìÐ", "Unicode invalid Base64"),
        )

        for user_id, msg in ids:
            with self.subTest(msg=msg):
                result = TokenRemover.extract_user_id(user_id)
                self.assertIsNone(result)
Exemple #3
0
    def test_format_log_message(self, log_message):
        """Should correctly format the log message with info from the message and token."""
        token = Token("NDcyMjY1OTQzMDYyNDEzMzMy", "XsySD_",
                      "s45jqDV_Iisn-symw0yDRrk_jf4")
        log_message.format.return_value = "Howdy"

        return_value = TokenRemover.format_log_message(self.msg, token)

        self.assertEqual(return_value, log_message.format.return_value)
        log_message.format.assert_called_once_with(
            author=format_user(self.msg.author),
            channel=self.msg.channel.mention,
            user_id=token.user_id,
            timestamp=token.timestamp,
            hmac="xxxxxxxxxxxxxxxxxxxxxxxxjf4",
        )
Exemple #4
0
    def test_format_userid_log_message_bot(self, known_user_log_message):
        """Should correctly format the user ID portion when the ID belongs to a known bot."""
        token = Token("NDcyMjY1OTQzMDYyNDEzMzMy", "XsySD_",
                      "s45jqDV_Iisn-symw0yDRrk_jf4")
        known_user_log_message.format.return_value = " Partner"
        msg = MockMessage(id=555, content="hello world")
        msg.guild.get_member.return_value.__str__.return_value = "Sam"
        msg.guild.get_member.return_value.bot = True

        return_value = TokenRemover.format_userid_log_message(msg, token)

        self.assertEqual(return_value,
                         (known_user_log_message.format.return_value, False))

        known_user_log_message.format.assert_called_once_with(
            user_id=472265943062413332,
            user_name="Sam",
            kind="BOT",
        )
Exemple #5
0
    async def relay_message(self, msg: Message) -> None:
        """Relays the message to the relevant watch channel."""
        limit = BigBrotherConfig.header_message_limit

        if (
            msg.author.id != self.message_history.last_author
            or msg.channel.id != self.message_history.last_channel
            or self.message_history.message_count >= limit
        ):
            self.message_history = MessageHistory(last_author=msg.author.id, last_channel=msg.channel.id)

            await self.send_header(msg)

        if TokenRemover.find_token_in_message(msg) or WEBHOOK_URL_RE.search(msg.content):
            cleaned_content = "Content is censored because it contains a bot or webhook token."
        elif cleaned_content := msg.clean_content:
            # Put all non-media URLs in a code block to prevent embeds
            media_urls = {embed.url for embed in msg.embeds if embed.type in ("image", "video")}
            for url in URL_RE.findall(cleaned_content):
                if url not in media_urls:
                    cleaned_content = cleaned_content.replace(url, f"`{url}`")
Exemple #6
0
    def test_find_token_valid_match(self, token_re, token_cls, is_valid_id,
                                    is_valid_timestamp):
        """The first match with a valid user ID and timestamp should be returned as a `Token`."""
        matches = [
            mock.create_autospec(Match, spec_set=True, instance=True),
            mock.create_autospec(Match, spec_set=True, instance=True),
        ]
        tokens = [
            mock.create_autospec(Token, spec_set=True, instance=True),
            mock.create_autospec(Token, spec_set=True, instance=True),
        ]

        token_re.finditer.return_value = matches
        token_cls.side_effect = tokens
        is_valid_id.side_effect = (
            False, True)  # The 1st match will be invalid, 2nd one valid.
        is_valid_timestamp.return_value = True

        return_value = TokenRemover.find_token_in_message(self.msg)

        self.assertEqual(tokens[1], return_value)
        token_re.finditer.assert_called_once_with(self.msg.content)
Exemple #7
0
    async def test_take_action(self, format_log_message,
                               format_userid_log_message, logger,
                               mod_log_property):
        """Should delete the message and send a mod log."""
        cog = TokenRemover(self.bot)
        mod_log = mock.create_autospec(ModLog, spec_set=True, instance=True)
        token = mock.create_autospec(Token, spec_set=True, instance=True)
        token.user_id = "no-id"
        log_msg = "testing123"
        userid_log_message = "userid-log-message"

        mod_log_property.return_value = mod_log
        format_log_message.return_value = log_msg
        format_userid_log_message.return_value = (userid_log_message, True)

        await cog.take_action(self.msg, token)

        self.msg.delete.assert_called_once_with()
        self.msg.channel.send.assert_called_once_with(
            token_remover.DELETION_MESSAGE_TEMPLATE.format(
                mention=self.msg.author.mention))

        format_log_message.assert_called_once_with(self.msg, token)
        format_userid_log_message.assert_called_once_with(self.msg, token)
        logger.debug.assert_called_with(log_msg)
        self.bot.stats.incr.assert_called_once_with("tokens.removed_tokens")

        mod_log.ignore.assert_called_once_with(constants.Event.message_delete,
                                               self.msg.id)
        mod_log.send_log_message.assert_called_once_with(
            icon_url=constants.Icons.token_removed,
            colour=Colour(constants.Colours.soft_red),
            title="Token removed!",
            text=log_msg + "\n" + userid_log_message,
            thumbnail=self.msg.author.avatar_url_as.return_value,
            channel_id=constants.Channels.mod_alerts,
            ping_everyone=True,
        )
Exemple #8
0
    def test_find_token_invalid_matches(
        self,
        token_re,
        token_cls,
        extract_user_id,
        is_valid_timestamp,
        is_maybe_valid_hmac,
    ):
        """None should be returned if no matches have valid user IDs, HMACs, and timestamps."""
        token_re.finditer.return_value = [
            mock.create_autospec(Match, spec_set=True, instance=True)
        ]
        token_cls.return_value = mock.create_autospec(Token,
                                                      spec_set=True,
                                                      instance=True)
        extract_user_id.return_value = None
        is_valid_timestamp.return_value = False
        is_maybe_valid_hmac.return_value = False

        return_value = TokenRemover.find_token_in_message(self.msg)

        self.assertIsNone(return_value)
        token_re.finditer.assert_called_once_with(self.msg.content)
Exemple #9
0
    async def on_message(self, msg: Message) -> None:
        """
        Detect poorly formatted Python code in new messages.

        If poorly formatted code is detected, send the user a helpful message explaining how to do
        properly formatted Python syntax highlighting codeblocks.
        """
        is_help_channel = (getattr(msg.channel, "category", None)
                           and msg.channel.category.id
                           in (Categories.help_available,
                               Categories.help_in_use))
        parse_codeblock = ((is_help_channel
                            or msg.channel.id in self.channel_cooldowns
                            or msg.channel.id in self.channel_whitelist)
                           and not msg.author.bot
                           and len(msg.content.splitlines()) > 3
                           and not TokenRemover.find_token_in_message(msg)
                           and not WEBHOOK_URL_RE.search(msg.content))

        if parse_codeblock:  # no token in the msg
            on_cooldown = (time.time() -
                           self.channel_cooldowns.get(msg.channel.id, 0)) < 300
            if not on_cooldown or DEBUG_MODE:
                try:
                    if self.has_bad_ticks(msg):
                        ticks = msg.content[:3]
                        content = self.codeblock_stripping(
                            f"```{msg.content[3:-3]}```", True)
                        if content is None:
                            return

                        content, repl_code = content

                        if len(content) == 2:
                            content = content[1]
                        else:
                            content = content[0]

                        space_left = 204
                        if len(content) >= space_left:
                            current_length = 0
                            lines_walked = 0
                            for line in content.splitlines(keepends=True):
                                if current_length + len(
                                        line
                                ) > space_left or lines_walked == 10:
                                    break
                                current_length += len(line)
                                lines_walked += 1
                            content = content[:current_length] + "#..."
                        content_escaped_markdown = RE_MARKDOWN.sub(
                            r'\\\1', content)
                        howto = (
                            "It looks like you are trying to paste code into this channel.\n\n"
                            "You seem to be using the wrong symbols to indicate where the codeblock should start. "
                            f"The correct symbols would be \\`\\`\\`, not `{ticks}`.\n\n"
                            "**Here is an example of how it should look:**\n"
                            f"\\`\\`\\`python\n{content_escaped_markdown}\n\\`\\`\\`\n\n"
                            "**This will result in the following:**\n"
                            f"```python\n{content}\n```")

                    else:
                        howto = ""
                        content = self.codeblock_stripping(msg.content, False)
                        if content is None:
                            return

                        content, repl_code = content
                        # Attempts to parse the message into an AST node.
                        # Invalid Python code will raise a SyntaxError.
                        tree = ast.parse(content[0])

                        # Multiple lines of single words could be interpreted as expressions.
                        # This check is to avoid all nodes being parsed as expressions.
                        # (e.g. words over multiple lines)
                        if not all(
                                isinstance(node, ast.Expr)
                                for node in tree.body) or repl_code:
                            # Shorten the code to 10 lines and/or 204 characters.
                            space_left = 204
                            if content and repl_code:
                                content = content[1]
                            else:
                                content = content[0]

                            if len(content) >= space_left:
                                current_length = 0
                                lines_walked = 0
                                for line in content.splitlines(keepends=True):
                                    if current_length + len(
                                            line
                                    ) > space_left or lines_walked == 10:
                                        break
                                    current_length += len(line)
                                    lines_walked += 1
                                content = content[:current_length] + "#..."

                            content_escaped_markdown = RE_MARKDOWN.sub(
                                r'\\\1', content)
                            howto += (
                                "It looks like you're trying to paste code into this channel.\n\n"
                                "Discord has support for Markdown, which allows you to post code with full "
                                "syntax highlighting. Please use these whenever you paste code, as this "
                                "helps improve the legibility and makes it easier for us to help you.\n\n"
                                f"**To do this, use the following method:**\n"
                                f"\\`\\`\\`python\n{content_escaped_markdown}\n\\`\\`\\`\n\n"
                                "**This will result in the following:**\n"
                                f"```python\n{content}\n```")

                            log.debug(
                                f"{msg.author} posted something that needed to be put inside python code "
                                "blocks. Sending the user some instructions.")
                        else:
                            log.trace(
                                "The code consists only of expressions, not sending instructions"
                            )

                    if howto != "":
                        # Increase amount of codeblock correction in stats
                        self.bot.stats.incr("codeblock_corrections")
                        howto_embed = Embed(description=howto)
                        bot_message = await msg.channel.send(
                            f"Hey {msg.author.mention}!", embed=howto_embed)
                        self.codeblock_message_ids[msg.id] = bot_message.id

                        self.bot.loop.create_task(
                            wait_for_deletion(bot_message, (msg.author.id, ),
                                              self.bot))
                    else:
                        return

                    if msg.channel.id not in self.channel_whitelist:
                        self.channel_cooldowns[msg.channel.id] = time.time()

                except SyntaxError:
                    log.trace(
                        f"{msg.author} posted in a help channel, and when we tried to parse it as Python code, "
                        "ast.parse raised a SyntaxError. This probably just means it wasn't Python code. "
                        f"The message that was posted was:\n\n{msg.content}\n\n"
                    )