Beispiel #1
0
def extract_body(message: message.Message, remove_quotations: bool=True) -> str:
    import talon
    global talon_initialized
    if not talon_initialized:
        talon.init()
        talon_initialized = True

    # If the message contains a plaintext version of the body, use
    # that.
    plaintext_content = get_message_part_by_type(message, "text/plain")
    if plaintext_content:
        if remove_quotations:
            return talon.quotations.extract_from_plain(plaintext_content)
        else:
            return plaintext_content

    # If we only have an HTML version, try to make that look nice.
    html_content = get_message_part_by_type(message, "text/html")
    if html_content:
        if remove_quotations:
            return convert_html_to_markdown(talon.quotations.extract_from_html(html_content))
        else:
            return convert_html_to_markdown(html_content)

    if plaintext_content is not None or html_content is not None:
        raise ZulipEmailForwardUserError("Email has no nonempty body sections; ignoring.")

    logging.warning("Content types: %s" % ([part.get_content_type() for part in message.walk()]))
    raise ZulipEmailForwardUserError("Unable to find plaintext or HTML message body")
Beispiel #2
0
def extract_body(message: message.Message) -> str:
    import talon
    global talon_initialized
    if not talon_initialized:
        talon.init()
        talon_initialized = True

    # If the message contains a plaintext version of the body, use
    # that.
    plaintext_content = get_message_part_by_type(message, "text/plain")
    if plaintext_content:
        return talon.quotations.extract_from_plain(plaintext_content)

    # If we only have an HTML version, try to make that look nice.
    html_content = get_message_part_by_type(message, "text/html")
    if html_content:
        return convert_html_to_markdown(talon.quotations.extract_from_html(html_content))

    raise ZulipEmailForwardError("Unable to find plaintext or HTML message body")
def parse_email_quotes():
    """
    Run through each file in archive and add 'clean_body' and 'signature' to 
    each email's information.
    """
    talon.init()
    archive_dir = "archive/"
    for filenum, filename in enumerate(os.listdir(archive_dir)):
        if filenum % 1000 == 0:
            print filenum
        if filename.endswith(".email.json"):
            full_filename = os.path.join(archive_dir, filename) 
            fh = open(full_filename, "r")
            email_data = load(fh)
            fh.close()
            if not "clean_body" in email_data or not 'signature' in email_data:
                reply_body = naive_quote_removal(email_data['body'])
                email_data['clean_body'], email_data['signature'] = extract_signature(reply_body)
                fh = open(full_filename, "w")
                fh.write(dumps(email_data))
                fh.close()
Beispiel #4
0
from zerver.lib.actions import decode_email_address, get_email_gateway_message_string_from_address, \
    internal_send_message
from zerver.lib.notifications import convert_html_to_markdown
from zerver.lib.redis_utils import get_redis_client
from zerver.lib.upload import upload_message_image
from zerver.lib.utils import generate_random_token
from zerver.lib.str_utils import force_text
from zerver.models import Stream, Recipient, get_user_profile_by_email, \
    get_user_profile_by_id, get_display_recipient, get_recipient, \
    Message, Realm, UserProfile
from six import text_type, binary_type
import six
import talon
from talon import quotations

talon.init()

logger = logging.getLogger(__name__)

def redact_stream(error_message):
    # type: (text_type) -> text_type
    domain = settings.EMAIL_GATEWAY_PATTERN.rsplit('@')[-1]
    stream_match = re.search(u'\\b(.*?)@' + domain, error_message)
    if stream_match:
        stream_name = stream_match.groups()[0]
        return error_message.replace(stream_name, "X" * len(stream_name))
    return error_message

def report_to_zulip(error_message):
    # type: (text_type) -> None
    error_stream = Stream.objects.get(name="errors", realm__domain=settings.ADMIN_DOMAIN)
import logging
import os
import requests
import urllib
import json
from flask import Flask, jsonify, request
from flask_cors import CORS
from HTMLParser import HTMLParser
import re
import talon
from talon import quotations
from nltk.corpus import stopwords
talon.init()

with open('saved_replies.json') as json_data:
    saved_replies = json.load(json_data)
    json_data.close()

app = Flask(__name__)
CORS(app)


class MLStripper(HTMLParser):
    def __init__(self):
        self.reset()
        self.fed = []

    def handle_data(self, d):
        self.fed.append(d)

    def get_data(self):
Beispiel #6
0
 def handle(self, *args, **options):
     import talon
     # loads machine learning classifiers
     talon.init()
     get_messages(**options)
Beispiel #7
0
 def __init__(self):
     talon.init(
     )  # initiates the talon machine learning model for parsing signatures