Python Scrubber.Scrubberの例

プログラミング言語: Python

名前空間/パッケージ名: kittystore.scrub

クラス/型: Scrubber

メソッド/関数: Scrubber

hotexamples.comのコード掲載数: 10

Python Scrubber.Scrubber - 10件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのkittystore.scrub.Scrubber.Scrubberの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Scrubber(10)

scrub(10)

よく使われるメソッド

Scrubber (10)

scrub (10)

コード例 #1

ファイルを表示

ファイル: test_scrub.py プロジェクト: marksteward/kittystore

    def test_remove_next_part_from_content(self):
        with open(get_test_file("pipermail_nextpart.txt")) as email_file:
            msg = email.message_from_file(email_file, _class=Message)
        scrubber = Scrubber("*****@*****.**", msg)
        contents, attachments = scrubber.scrub()

        self.failIf("-------------- next part --------------" in contents)

コード例 #2

ファイルを表示

ファイル: test_scrub.py プロジェクト: olasd/kittystore

 def test_html_only_email(self):
     # This email only has an HTML part, thus the scrubbed content will be
     # empty. It should be an unicode empty string, not str.
     with open(get_test_file("html-email-2.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertTrue(isinstance(contents, unicode),
                     u"Scrubbed content should always be unicode")

コード例 #3

ファイルを表示

ファイル: test_scrub.py プロジェクト: marksteward/kittystore

 def test_non_ascii_payload(self):
     """Scrubber must handle non-ascii messages"""
     for enc in ["utf8", "iso8859"]:
         with open(get_test_file("payload-%s.txt" % enc)) as email_file:
             msg = email.message_from_file(email_file, _class=Message)
         scrubber = Scrubber("*****@*****.**", msg)
         contents, attachments = scrubber.scrub()
         self.assertTrue(isinstance(contents, unicode))
         self.assertEqual(contents, u'This message contains non-ascii '
                 u'characters:\n\xe9 \xe8 \xe7 \xe0 \xee \xef \xeb \u20ac\n')

コード例 #4

ファイルを表示

ファイル: test_scrub.py プロジェクト: olasd/kittystore

 def test_bad_content_type(self):
     """Scrubber must handle unknown content-types"""
     with open(get_test_file("payload-unknown.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     try:
         contents, attachments = scrubber.scrub()
     except LookupError, e:
         import traceback
         print traceback.format_exc()
         self.fail(e)  # codec not found

コード例 #5

ファイルを表示

ファイル: test_scrub.py プロジェクト: marksteward/kittystore

 def test_html_email_1(self):
     with open(get_test_file("html-email-1.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     # HTML part
     self.assertEqual(attachments[0][0:4],
             (2, "attachment.html", "text/html", "iso-8859-1"))
     self.assertEqual(len(attachments[0][4]), 2723)
     # Scrubbed content
     self.assertEqual(contents,
             u"This is a test message\r\n"
             u"Non-ASCII chars: r\xe9ponse fran\xe7ais \n")

コード例 #6

ファイルを表示

ファイル: test_scrub.py プロジェクト: olasd/kittystore

 def test_attachment_1(self):
     with open(get_test_file("attachment-1.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     self.assertEqual(attachments[0],
                      (2, 'puntogil.vcf', 'text/x-vcard', "utf-8",
                       'begin:vcard\r\nfn:gil\r\nn:;gil\r\nversion:2.1\r\n'
                       'end:vcard\r\n\r\n'))
     self.assertEqual(
         contents, "This is a test message.\r\n\r\n"
         "\n-- \ndevel mailing list\[email protected]\n"
         "https://admin.fedoraproject.org/mailman/listinfo/devel\n")

コード例 #7

ファイルを表示

ファイル: test_scrub.py プロジェクト: marksteward/kittystore

 def test_attachment_5(self):
     with open(get_test_file("attachment-5.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     # text attachment
     self.assertEqual(attachments[0][0:4],
             #(2, u"todo-déjeuner.txt", "text/plain", "utf-8"))
             (2, u"attachment.bin", "text/plain", "utf-8"))
     self.assertEqual(len(attachments[0][4]), 112)
     # Scrubbed content
     self.assertEqual(contents, u'This is a test, HTML message with '
             u'accented letters : \xe9 \xe8 \xe7 \xe0.\r\nAnd an '
             u'attachment with an accented filename\r\n\r\n\r\n\r\n')

コード例 #8

ファイルを表示

ファイル: test_scrub.py プロジェクト: marksteward/kittystore

 def test_attachment_3(self):
     with open(get_test_file("attachment-3.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 2)
     # HTML part
     self.assertEqual(attachments[0][0:4],
             (3, "attachment.html", "text/html", "iso-8859-1"))
     self.assertEqual(len(attachments[0][4]), 3134)
     # Image attachment
     self.assertEqual(attachments[1][0:4],
             (4, "GeoffreyRoucourt.jpg", "image/jpeg", None))
     self.assertEqual(len(attachments[1][4]), 282180)
     # Scrubbed content
     self.assertEqual(contents, u"This is a test message\r\n")

コード例 #9

ファイルを表示

ファイル: test_scrub.py プロジェクト: marksteward/kittystore

 def test_attachment_2(self):
     with open(get_test_file("attachment-2.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     self.assertEqual(attachments[0], (
             3, 'signature.asc', 'application/pgp-signature', None,
             '-----BEGIN PGP SIGNATURE-----\r\nVersion: GnuPG v1.4.12 '
             '(GNU/Linux)\r\nComment: Using GnuPG with Mozilla - '
             'http://www.enigmail.net/\r\n\r\niEYEARECAAYFAlBhm3oACgkQhmBj'
             'z394AnmMnQCcC+6tWcqE1dPQmIdRbLXgKGVp\r\nEeUAn2OqtaXaXaQV7rx+'
             'SmOldmSzcFw4\r\n=OEJv\r\n-----END PGP SIGNATURE-----\r\n'))
     self.assertEqual(contents,
             u"This is a test message\r\nNon-ascii chars: Hofm\xfchlgasse\r\n"
             u"\n-- \ndevel mailing list\[email protected]\n"
             u"https://admin.fedoraproject.org/mailman/listinfo/devel\n"
             )

コード例 #10

ファイルを表示

ファイル: store.py プロジェクト: marksteward/kittystore

    def add_to_list(self, mlist, message):
        """Add the message to a specific list of the store.

        :param mlist: The mailing-list object, implementing
            mailman.interfaces.mailinglist.IMailingList.
        :param message: An email.message.Message instance containing at
            least a unique Message-ID header.  The message will be given
            an X-Message-ID-Hash header, overriding any existing such
            header.
        :returns: The calculated X-Message-ID-Hash header.
        :raises ValueError: if the message is missing a Message-ID 
            header.
            The storage service is also allowed to raise this exception
            if it find, but disallows collisions.
        """
        list_name = unicode(mlist.fqdn_listname)
        # Create the list if it does not exist
        l = self.db.find(List, List.name == list_name).one()
        if l is None:
            l = List(list_name)
            self.db.add(l)
        l.display_name = mlist.display_name
        l.subject_prefix = mlist.subject_prefix
        if not message.has_key("Message-Id"):
            raise ValueError("No 'Message-Id' header in email", message)
        msg_id = unicode(unquote(message['Message-Id']))
        email = Email(list_name, msg_id)
        if self.is_message_in_list(list_name, email.message_id):
            print("Duplicate email from %s: %s" %
                  (message['From'], message.get('Subject', '""')))
            return email.message_id_hash

        # the message.as_string() call must be done before scrubbing
        email_full = EmailFull(list_name, msg_id, message.as_string())
        # Find thread id
        new_thread = False
        ref, thread_id = get_ref_and_thread_id(message, list_name, self)
        if thread_id is None:
            new_thread = True
            # make up the thread_id if not found
            thread_id = email.message_id_hash
        email.thread_id = thread_id
        email.in_reply_to = ref

        from_name, from_email = parseaddr(message['From'])
        from_name = header_to_unicode(from_name)
        email.sender_name = from_name.strip()
        email.sender_email = unicode(from_email).strip()
        email.subject = header_to_unicode(message.get('Subject'))
        msg_date = parsedate(message.get("Date"))
        if msg_date is None:
            # Absent or unparseable date
            msg_date = datetime.datetime.utcnow()
        utcoffset = msg_date.utcoffset()
        if msg_date.tzinfo is not None:
            msg_date = msg_date.astimezone(tzutc()).replace(tzinfo=None)
        email.date = msg_date
        if utcoffset is None:
            email.timezone = 0
        else:
            # in minutes
            email.timezone = (
                (utcoffset.days * 24 * 60 * 60) + utcoffset.seconds) / 60

        scrubber = Scrubber(list_name, message)
        # warning: scrubbing modifies the msg in-place
        email.content, attachments = scrubber.scrub()

        # store the Mailman user
        email.user_id = self._store_mailman_user(email.sender_email)

        #category = 'Question' # TODO: enum + i18n ?
        #if ('agenda' in message.get('Subject', '').lower() or
        #        'reminder' in message.get('Subject', '').lower()):
        #    # i18n!
        #    category = 'Agenda'

        if new_thread:
            thread = Thread(list_name, thread_id, email.date)
        else:
            thread = self.db.find(
                Thread,
                And(
                    Thread.list_name == list_name,
                    Thread.thread_id == thread_id,
                )).one()
        thread.date_active = email.date
        self.db.add(thread)

        self.db.add(email)
        self.db.add(email_full)
        compute_thread_order_and_depth(thread)
        for attachment in attachments:
            self.add_attachment(list_name, msg_id, *attachment)
        self.flush()
        # search indexing
        if self.search_index is not None:
            self.search_index.add(email)
        return email.message_id_hash