def test_store_and_load_from_pymongo(self):
     self.facade.bind(AppConfig.mongo_uri)
     message = EmailMessage(subject='foo', body='bar', sender='baz', recipient='bip', date='2016-07-07')
     self.facade.store(self.email_collection, message.to_dict())
     loaded_messages = self.facade.load(self.email_collection, content_hash=unicode(message.content_hash))
     self.assertEqual(1, len(loaded_messages))
     self.assertEqual(message, EmailMessage(**loaded_messages[0]))
 def test_store_and_load_from_flask(self):
     with self.app.app_context():
         self.facade.bind_flask(self.app)
         message = EmailMessage(subject='foo', body='bar', sender='baz', recipient='bip', date='2016-07-07')
         self.facade.store(self.email_collection, message.to_dict())
         loaded_messages = self.facade.load(self.email_collection, content_hash=message.content_hash)
         self.assertEqual(1, len(loaded_messages))
         self.assertEqual(message, EmailMessage(**loaded_messages[0]))
 def test_filter_by_field(self):
     self.facade.bind(AppConfig.mongo_uri)
     for i in range(1, 100):
         message = EmailMessage(subject='foo{}foo'.format(i), body='bar', sender='baz', recipient='bip', date='2016-07-07')
         self.facade.store(self.email_collection, message.to_dict())
     loaded_messages = self.facade.load(self.email_collection, page_size=60, subject='1')
     self.assertEqual(19, len(loaded_messages))
     for email in [EmailMessage(**message) for message in loaded_messages]:
         self.assertTrue('1' in email.subject)
 def test_store_and_load_a_page(self):
     self.facade.bind(AppConfig.mongo_uri)
     for i in range(1, 1000):
         message = EmailMessage(subject='foo{}'.format(i), body='bar', sender='baz', recipient='bip', date='2016-07-07')
         self.facade.store(self.email_collection, message.to_dict())
     loaded_messages = self.facade.load(self.email_collection, page=12, page_size=6)
     self.assertEqual(6, len(loaded_messages))
     for num, email in zip(range(67, 72), [EmailMessage(**message) for message in loaded_messages]):
         self.assertEqual(email.subject, 'foo{}'.format(num))
 def _process_single_node(self, node):
     """
     Extract the contents of a single XML dump node
     :param node: The XML node corresponding to a message
     :return: An EmailMessage instance containing the message contents
     """
     text = unicode(node.find('text').text)
     text = unicode.lstrip(text, u'>')  # remove leading char that got into the text somehow
     if use_full_parser(text):
         text = fix_broken_hotmail_headers(text)
         parser = Parser()
         mime_message = parser.parse(StringIO(text))
         return_message = get_nested_payload(mime_message)
     else:
         return_message = EmailMessage()
         subject_node = node.find('subject')
         from_node = node.find('from')
         to_node = node.find('to')
         date_node = node.find('receivedat')
         subject = unicode(subject_node.text, 'utf-8') if not subject_node is None else ''
         sender = clean_sender('{} <{}>'.format(from_node.find('name').text, from_node.find('email').text))
         recipient = clean_recipient('{} <{}>'.format(to_node.find('name').text, to_node.find('email').text))
         date_string = '{} {}'.format(date_node.find('date').text, date_node.find('time').text)
         return_message.append_body(unicode(text))
         return_message.subject = subject
         return_message.sender = sender
         return_message.recipient = recipient
         return_message.date = parse(date_string)
         return_message.date = normalize_to_utc(return_message.date, self._timezone)
     return_message.source = "XML File {} node {}".format(self._process_path, node.attrib)
     return return_message
Exemple #6
0
 def _process_single_node(self, node):
     """
     Extract the contents of a single XML dump node
     :param node: The XML node corresponding to a message
     :return: An EmailMessage instance containing the message contents
     """
     text = unicode(node.find('text').text)
     text = unicode.lstrip(
         text, u'>')  # remove leading char that got into the text somehow
     if use_full_parser(text):
         text = fix_broken_hotmail_headers(text)
         parser = Parser()
         mime_message = parser.parse(StringIO(text))
         return_message = get_nested_payload(mime_message)
     else:
         return_message = EmailMessage()
         subject_node = node.find('subject')
         from_node = node.find('from')
         to_node = node.find('to')
         date_node = node.find('receivedat')
         subject = unicode(subject_node.text,
                           'utf-8') if not subject_node is None else ''
         sender = clean_sender('{} <{}>'.format(
             from_node.find('name').text,
             from_node.find('email').text))
         recipient = clean_recipient('{} <{}>'.format(
             to_node.find('name').text,
             to_node.find('email').text))
         date_string = '{} {}'.format(
             date_node.find('date').text,
             date_node.find('time').text)
         return_message.append_body(unicode(text))
         return_message.subject = subject
         return_message.sender = sender
         return_message.recipient = recipient
         return_message.date = parse(date_string)
         return_message.date = normalize_to_utc(return_message.date,
                                                self._timezone)
     return_message.source = "XML File {} node {}".format(
         self._process_path, node.attrib)
     return return_message
def get_nested_payload(mime_message):
    """
    Returns a single message object from a list of text content and attachments in a MIME message,
    after filtering out unwanted content. Also handles nested content like forwarded messages.
    :param mime_message: The MIME message to traverse looking for content
    :return: A list of plain-text email bodies and a list of base-64 attachments (if any)
    """
    return_message = EmailMessage()
    return_message.subject = mime_message.get('Subject')
    return_message.sender = clean_sender(mime_message.get('From'))
    return_message.recipient = clean_recipient(mime_message.get('To'))
    return_message.date = parse(mime_message.get('Date'))
    for sub_message in mime_message.walk():
        content_type = sub_message.get_content_type()
        disposition = sub_message.get('Content-Disposition')
        if content_type == 'text/plain' and disposition is None:
            x = unicode(sub_message.get_payload())
            return_message.append_body(x)
        elif content_type in _ignored_content_types and disposition is None:
            pass  # throw away contents we don't want
        else:
            return_message.add_attachment(sub_message.get_payload(), content_type=content_type, filename=disposition)
    return return_message