def extract_body(message: message.Message, remove_quotations: bool=True) -> str: import talon global talon_initialized if not talon_initialized: talon.init() talon_initialized = True # If the message contains a plaintext version of the body, use # that. plaintext_content = get_message_part_by_type(message, "text/plain") if plaintext_content: if remove_quotations: return talon.quotations.extract_from_plain(plaintext_content) else: return plaintext_content # If we only have an HTML version, try to make that look nice. html_content = get_message_part_by_type(message, "text/html") if html_content: if remove_quotations: return convert_html_to_markdown(talon.quotations.extract_from_html(html_content)) else: return convert_html_to_markdown(html_content) if plaintext_content is not None or html_content is not None: raise ZulipEmailForwardUserError("Email has no nonempty body sections; ignoring.") logging.warning("Content types: %s" % ([part.get_content_type() for part in message.walk()])) raise ZulipEmailForwardUserError("Unable to find plaintext or HTML message body")
def extract_body(message: message.Message) -> str: import talon global talon_initialized if not talon_initialized: talon.init() talon_initialized = True # If the message contains a plaintext version of the body, use # that. plaintext_content = get_message_part_by_type(message, "text/plain") if plaintext_content: return talon.quotations.extract_from_plain(plaintext_content) # If we only have an HTML version, try to make that look nice. html_content = get_message_part_by_type(message, "text/html") if html_content: return convert_html_to_markdown(talon.quotations.extract_from_html(html_content)) raise ZulipEmailForwardError("Unable to find plaintext or HTML message body")
def parse_email_quotes(): """ Run through each file in archive and add 'clean_body' and 'signature' to each email's information. """ talon.init() archive_dir = "archive/" for filenum, filename in enumerate(os.listdir(archive_dir)): if filenum % 1000 == 0: print filenum if filename.endswith(".email.json"): full_filename = os.path.join(archive_dir, filename) fh = open(full_filename, "r") email_data = load(fh) fh.close() if not "clean_body" in email_data or not 'signature' in email_data: reply_body = naive_quote_removal(email_data['body']) email_data['clean_body'], email_data['signature'] = extract_signature(reply_body) fh = open(full_filename, "w") fh.write(dumps(email_data)) fh.close()
from zerver.lib.actions import decode_email_address, get_email_gateway_message_string_from_address, \ internal_send_message from zerver.lib.notifications import convert_html_to_markdown from zerver.lib.redis_utils import get_redis_client from zerver.lib.upload import upload_message_image from zerver.lib.utils import generate_random_token from zerver.lib.str_utils import force_text from zerver.models import Stream, Recipient, get_user_profile_by_email, \ get_user_profile_by_id, get_display_recipient, get_recipient, \ Message, Realm, UserProfile from six import text_type, binary_type import six import talon from talon import quotations talon.init() logger = logging.getLogger(__name__) def redact_stream(error_message): # type: (text_type) -> text_type domain = settings.EMAIL_GATEWAY_PATTERN.rsplit('@')[-1] stream_match = re.search(u'\\b(.*?)@' + domain, error_message) if stream_match: stream_name = stream_match.groups()[0] return error_message.replace(stream_name, "X" * len(stream_name)) return error_message def report_to_zulip(error_message): # type: (text_type) -> None error_stream = Stream.objects.get(name="errors", realm__domain=settings.ADMIN_DOMAIN)
import logging import os import requests import urllib import json from flask import Flask, jsonify, request from flask_cors import CORS from HTMLParser import HTMLParser import re import talon from talon import quotations from nltk.corpus import stopwords talon.init() with open('saved_replies.json') as json_data: saved_replies = json.load(json_data) json_data.close() app = Flask(__name__) CORS(app) class MLStripper(HTMLParser): def __init__(self): self.reset() self.fed = [] def handle_data(self, d): self.fed.append(d) def get_data(self):
def handle(self, *args, **options): import talon # loads machine learning classifiers talon.init() get_messages(**options)
def __init__(self): talon.init( ) # initiates the talon machine learning model for parsing signatures