Example #1
0
    def test_link(self):

        html = '<a href="http://www.test.com">Test link title</a>'

        result = 'Test link title <http://www.test.com>'

        self.assertEqual(convert_html_to_text(html, keep_linebreaks=True), result)
Example #2
0
    def test_list_to_newlines(self):

        html = """
<html>
	<head>
		<style type="text/css">
			body {
				background-image: url(dolfijn.jpg);
				background-position: right center;
				background-repeat: no-repeat;
				font-family: sans;
				font-size: 36pt;
				font-weight: bold;
			}
		</style>
		<title>Dolfijnwoorden</title>
	</head>
	<body>
		<h3>Dolfijnwoorden 26-02-2014</h3>
		<ul>
			<li>Wit</li>
			<li>Badpak</li>
			<li>Bedisputeren</li>
			<li>Dolfijnwoord</li>
			<li>Hogere wiskunde</li>
			<li>Moeder</li>
			<li>Pinpointen</li>
			<li>Redetwisten</li>
			<li>Schildpadwoord</li>
			<li>Sukkelseks</li>
			<li>Vakantie</li>
			<li>Vingerpistool</li>
			<li>Voor jouw beeldvorming</li>
			<li>Never gonna give you up</li>
			<li>Never gonna let you down</li>
		</ul>
	</body>
</html>"""

        result = """
Dolfijnwoorden 26-02-2014

Wit
Badpak
Bedisputeren
Dolfijnwoord
Hogere wiskunde
Moeder
Pinpointen
Redetwisten
Schildpadwoord
Sukkelseks
Vakantie
Vingerpistool
Voor jouw beeldvorming
Never gonna give you up
Never gonna let you down

"""
        self.assertEqual(convert_html_to_text(html, keep_linebreaks=True), result)
Example #3
0
    def test_link(self):

        html = '<a href="http://www.test.com">Test link title</a>'

        result = 'Test link title <http://www.test.com>'

        self.assertEqual(convert_html_to_text(html, keep_linebreaks=True), result)
Example #4
0
    def test_br_to_space(self):
        html = 'Hello VoipGRID,<br><br>This is a test'

        result = 'Hello VoipGRID,  This is a test'

        self.assertEqual(convert_html_to_text(html, keep_linebreaks=False),
                         result)
Example #5
0
    def test_remove_different_source_of_tags(self):
        html = 'Hello VoipGRID,<br><br><i>This is a test</i><br><br><b><i>dsfg</i></b><br><b>dsfg</b><br><b>ds</b>'

        result = """Hello VoipGRID,

This is a test

dsfg
dsfg
ds"""
        self.assertEqual(convert_html_to_text(html, keep_linebreaks=True), result)
Example #6
0
    def test_remove_different_source_of_tags(self):
        html = 'Hello VoipGRID,<br><br><i>This is a test</i><br><br><b><i>dsfg</i></b><br><b>dsfg</b><br><b>ds</b>'

        result = """Hello VoipGRID,

This is a test

dsfg
dsfg
ds"""
        self.assertEqual(convert_html_to_text(html, keep_linebreaks=True), result)
Example #7
0
    def test_list_to_newlines(self):

        html = """Hi there!
<script>console.log('hello');</script>
<ul><li>1</li> <li>2</li></ul>
Bye!
"""
        result = """Hi there!

1 2
Bye!
"""

        self.assertEqual(convert_html_to_text(html, keep_linebreaks=True), result)
Example #8
0
    def test_list_to_newlines(self):

        html = """Hi there!
<script>console.log('hello');</script>
<ul><li>1</li> <li>2</li></ul>
Bye!
"""
        result = """Hi there!

1 2
Bye!
"""

        self.assertEqual(convert_html_to_text(html, keep_linebreaks=True), result)
Example #9
0
def parse_message(message, remove_tags=[]):
    """
    Parse an email.message.Message instance.
    """
    text = ''
    html = ''
    attachments = []
    inline_attachments = {}

    for message_part in message.walk():
        # if message.get_content_maintype() == 'multipart':
        is_attachment = parse_attachment(message_part, attachments, inline_attachments)
        if is_attachment:
            continue

        if message_part is None:
            continue

        content_type, body = parse_body(message_part, remove_tags=remove_tags)
        if content_type == 'text/html':
            html += body
        elif content_type == 'text/plain':
            text += body
        elif not any([content_type, body]):
            continue

        if message_part.get_content_maintype() == 'multipart':
            continue

        if message_part.get('Content-Disposition') is None:
            continue

    if len(text) > 0:
        text = convert_html_to_text(text)

    if len(html) > 0 and len(inline_attachments) > 0:
        soup = BeautifulSoup(html)
        inline_images = soup.findAll('img', {'src': lambda src: src and src.startswith('cid:')})
        cids_in_body = []
        for image in inline_images:
            cids_in_body.append(image.get('src')[4:])

        for cid, inline_attachment in inline_attachments.items():
            if cid not in cids_in_body:
                del inline_attachments[cid]

    return text, html, attachments, inline_attachments
Example #10
0
    def test_convert_nbsp_to_space(self):
        html = 'Hello&nbsp;VoipGRID'

        result = 'Hello VoipGRID'

        self.assertEqual(convert_html_to_text(html), result)
Example #11
0
    def test_br_to_space(self):
        html = 'Hello VoipGRID,<br><br>This is a test'

        result = 'Hello VoipGRID,  This is a test'

        self.assertEqual(convert_html_to_text(html, keep_linebreaks=False), result)
Example #12
0
def create_message_query_string(message, account_id, folder_name):
    """
    Create query string for message.

    Arguments:
        message (instance): Message object
        account_id (int): id of the account
        folder_name (string): name of de folder on the server

    Returns:
        total_query_string (str): custom query string
        params_list (list): list of parameters for query string
        query_count (int): number of queries in query string
    """
    param_list = []
    total_query_string = ''
    query_count = 0
    query_string = 'UPDATE email_emailmessage SET is_deleted = FALSE, '

    message_flags = message.get_flags()
    if message_flags:
        query_string += 'flags = %s, '
        param_list.append(str(message_flags))

    body_html = message.get_html_body(remove_tags=settings.BLACKLISTED_EMAIL_TAGS)
    body_text = message.get_text_body()

    if body_html is not None and not body_text:
        body_text = convert_html_to_text(body_html, keep_linebreaks=True)

    if body_html is not None:
        query_string += 'body_html = %s, '
        param_list.append(replace_anchors_in_html(body_html))

    if body_text is not None:
        query_string += 'body_text = %s, '
        param_list.append(escape(body_text))

    if query_string.endswith(', '):
        query_string = query_string.rstrip(', ')
        query_string += ' WHERE account_id = %s AND uid = %s AND folder_name = %s;\n'
        param_list.append(account_id)
        param_list.append(message.uid)
        param_list.append(folder_name)

        total_query_string += query_string
        query_count += 1

    message_sent_date = message.get_sent_date()
    query_string = 'UPDATE messaging_message SET '

    if message_flags:
        query_string += 'is_seen = %s, '
        param_list.append(SEEN in message_flags)

    query_string += 'sent_date = %s'
    param_list.append(datetime.strftime(message_sent_date, '%Y-%m-%d %H:%M:%S%z'))

    query_string += ' WHERE historylistitem_ptr_id = (SELECT message_ptr_id FROM email_emailmessage WHERE account_id = %s AND uid = %s AND folder_name = %s);\n'
    param_list.append(account_id)
    param_list.append(message.uid)
    param_list.append(folder_name)

    total_query_string += query_string
    query_count += 1

    return total_query_string, param_list, query_count
Example #13
0
def save_email_message(message, account, folder, email_ctype):
    """
    Get or Create existing message or create a new one

    Arguments:
        message (instance): Message object
        account (instance): The email account instance to which every message will be linked
        folder (string): The remote folder where the message is stored
        email_ctype (integer): ctype id of the EmailMessage class

    Returns:
        email_headers (list): List of EmailHeaders
        email_address_headers (list): List of EmailAddressHeaders
        email_attachments (list): of List of EmailAttachments
        inline_email_attachments (list) of EmailAttachments
    """
    sent_date = message.get_sent_date()

    email_message = EmailMessage.objects.get_or_create(
        uid=message.uid,
        folder_name=folder.name_on_server,
        account=account,
        sent_date=sent_date,
        tenant=account.tenant,
    )[0]

    message_flags = message.get_flags()
    if message_flags:
        email_message.is_seen = SEEN in message_flags
        email_message.flags = message_flags

    body_html = message.get_html_body(remove_tags=settings.BLACKLISTED_EMAIL_TAGS)
    body_text = message.get_text_body()

    if body_html is not None and not body_text:
        body_text = convert_html_to_text(body_html, keep_linebreaks=True)
    elif body_text is not None:
        body_text = escape(body_text)

    # Check for headers
    headers = message.get_headers()
    email_headers = None
    email_address_headers = None
    if headers is not None:
        email_headers, email_address_headers, message_identifier = get_headers_and_identifier(headers)
        if message_identifier:
            email_message.message_identifier = message_identifier

    # Check if message is sent from account
    name, from_email = message.get_send_from()
    if account.email.email_address == from_email:
        email_message.sent_from_account = True

    email_message.body_html = replace_anchors_in_html(body_html)
    email_message.body_text = body_text
    email_message.size = message.get_size()
    email_message.folder_identifier = folder.identifier
    email_message.is_private = False
    email_message.tenant = account.tenant
    email_message.polymorphic_ctype = email_ctype
    email_message.save()

    # Check for attachments
    email_attachments = None
    attachments = message.get_attachments()
    if len(attachments):
        email_attachments = create_email_attachments(
            attachments,
            account.tenant_id
        )

    # Check for inline attachments
    inline_email_attachments = None
    inline_attachments = message.get_inline_attachments().items()
    if len(inline_attachments):
        inline_email_attachments = create_email_attachments(
            inline_attachments,
            account.tenant_id,
            inline=True
        )

    return email_headers, email_address_headers, email_attachments, inline_email_attachments
Example #14
0
    def test_list_to_newlines(self):

        html = """
<html>
	<head>
		<style type="text/css">
			body {
				background-image: url(dolfijn.jpg);
				background-position: right center;
				background-repeat: no-repeat;
				font-family: sans;
				font-size: 36pt;
				font-weight: bold;
			}
		</style>
		<title>Dolfijnwoorden</title>
	</head>
	<body>
		<h3>Dolfijnwoorden 26-02-2014</h3>
		<ul>
			<li>Wit</li>
			<li>Badpak</li>
			<li>Bedisputeren</li>
			<li>Dolfijnwoord</li>
			<li>Hogere wiskunde</li>
			<li>Moeder</li>
			<li>Pinpointen</li>
			<li>Redetwisten</li>
			<li>Schildpadwoord</li>
			<li>Sukkelseks</li>
			<li>Vakantie</li>
			<li>Vingerpistool</li>
			<li>Voor jouw beeldvorming</li>
			<li>Never gonna give you up</li>
			<li>Never gonna let you down</li>
		</ul>
	</body>
</html>"""

        result = """
Dolfijnwoorden 26-02-2014

Wit
Badpak
Bedisputeren
Dolfijnwoord
Hogere wiskunde
Moeder
Pinpointen
Redetwisten
Schildpadwoord
Sukkelseks
Vakantie
Vingerpistool
Voor jouw beeldvorming
Never gonna give you up
Never gonna let you down

"""
        self.assertEqual(convert_html_to_text(html, keep_linebreaks=True),
                         result)
Example #15
0
    def test_convert_nbsp_to_space(self):
        html = 'Hello&nbsp;VoipGRID'

        result = 'Hello VoipGRID'

        self.assertEqual(convert_html_to_text(html), result)
Example #16
0
    def test_br_to_newline(self):
        html = 'Hello VoipGRID,<br><br>This is a test'
        result = 'Hello VoipGRID,\n\nThis is a test'

        self.assertEqual(convert_html_to_text(html, keep_linebreaks=True), result)
Example #17
0
    def test_br_to_newline(self):
        html = 'Hello VoipGRID,<br><br>This is a test'
        result = 'Hello VoipGRID,\n\nThis is a test'

        self.assertEqual(convert_html_to_text(html, keep_linebreaks=True), result)