Example #1
0
def test_link_breaks_quotation_markers_sequence():
    # link starts and ends on the same line
    msg_body = """Blah

On Thursday, October 25, 2012 at 3:03 PM, life is short. on Bob wrote:

>
> Post a response by replying to this email
>
 (http://example.com/c/YzOTYzMmE) >
> life is short. (http://example.com/c/YzMmE)
>
"""
    eq_("Blah", quotations.extract_from_plain(msg_body))

    # link starts after some text on one line and ends on another
    msg_body = """Blah

On Monday, 24 September, 2012 at 3:46 PM, bob wrote:

> [Ticket #50] test from bob
>
> View ticket (http://example.com/action
_nonce=3dd518)
>
"""
    eq_("Blah", quotations.extract_from_plain(msg_body))
Example #2
0
def test_pattern_original_message():
    msg_body = """Test reply

-----Original Message-----

Test"""

    eq_("Test reply", quotations.extract_from_plain(msg_body))

    msg_body = """Test reply

 -----Original Message-----

Test"""

    eq_("Test reply", quotations.extract_from_plain(msg_body))

    msg_body = """Test reply

 -----Urspr=C3=BCngliche Nachricht-----

Test"""

    eq_("Test reply", quotations.extract_from_plain(msg_body))

    msg_body = u"""Test reply

 -----Ursprüngliche Nachricht-----

Test"""

    eq_("Test reply", quotations.extract_from_plain(msg_body))
Example #3
0
def test_appointment():
    msg_body = """Response

10/19/2017 @ 9:30 am for physical therapy
Bla
1517 4th Avenue Ste 300
London CA 19129, 555-421-6780

John Doe, FCLS
Mailgun Inc
555-941-0697

From: [email protected] [mailto:[email protected]]
Sent: Wednesday, October 18, 2017 2:05 PM
To: John Doer - SIU <*****@*****.**>
Subject: RE: Claim # 5551188-1

Text"""

    expected = """Response

10/19/2017 @ 9:30 am for physical therapy
Bla
1517 4th Avenue Ste 300
London CA 19129, 555-421-6780

John Doe, FCLS
Mailgun Inc
555-941-0697"""
    eq_(expected, quotations.extract_from_plain(msg_body))
Example #4
0
def _check_pattern_original_message(original_message_indicator):
    msg_body = u"""Test reply

-----{}-----

Test"""
    eq_('Test reply', quotations.extract_from_plain(msg_body.format(six.text_type(original_message_indicator))))
Example #5
0
def test_norwegian_from_line():
    eq_('Lorem', quotations.extract_from_plain(
    u"""Lorem
På 14 september 2015 på 02:23:18, Valentino Rudy ([email protected]) skrev:

Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse.
"""))
def test_pattern_original_message():
    msg_body = """Test reply

-----Original Message-----

Test"""

    eq_("Test reply", quotations.extract_from_plain(msg_body))

    msg_body = """Test reply

 -----Original Message-----

Test"""

    eq_("Test reply", quotations.extract_from_plain(msg_body))
Example #7
0
def test_reply_after_quotations():
    msg_body = """On 04/19/2011 07:10 AM, Roman Tkachenko wrote:

>
> Test
Test reply"""
    eq_("Test reply", quotations.extract_from_plain(msg_body))
Example #8
0
def test_too_many_lines():
    msg_body = """Test reply
Hi
-----Original Message-----

Test"""
    eq_("Test reply", quotations.extract_from_plain(msg_body))
Example #9
0
def test_short_quotation():
    msg_body = """Hi

On 04/19/2011 07:10 AM, Roman Tkachenko wrote:

> Hello"""
    eq_("Hi", quotations.extract_from_plain(msg_body))
Example #10
0
def test_french_from_block():
    eq_('Lorem ipsum', quotations.extract_from_plain(
    u"""Lorem ipsum

Le 23 janv. 2015 à 22:03, Brendan xxx <[email protected]<mailto:[email protected]>> a écrit:

Bonjour!"""))
Example #11
0
def test_vietnamese_from_block():
    eq_('Hello', quotations.extract_from_plain(
    u"""Hello

Vào 14:24 8 tháng 6, 2017, Hùng Nguyễn <*****@*****.**> đã viết:

> Xin chào
"""))
Example #12
0
def test_pattern_on_date_wrote_somebody():
    eq_('Lorem', quotations.extract_from_plain(
    """Lorem

Op 13-02-2014 3:18 schreef Julius Caesar <*****@*****.**>:
    
Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse.
"""))
Example #13
0
def test_dutch_from_block():
    eq_('Gluten-free culpa lo-fi et nesciunt nostrud.', quotations.extract_from_plain(
    """Gluten-free culpa lo-fi et nesciunt nostrud. 

Op 17-feb.-2015, om 13:18 heeft Julius Caesar <*****@*****.**> het volgende geschreven:
    
Small batch beard laboris tempor, non listicle hella Tumblr heirloom. 
"""))
Example #14
0
def test_with_indent():
    msg_body = """YOLO salvia cillum kogi typewriter mumblecore cardigan skateboard Austin.

------On 12/29/1987 17:32 PM, Julius Caesar wrote-----

Brunch mumblecore pug Marfa tofu, irure taxidermy hoodie readymade pariatur. 
    """
    eq_("YOLO salvia cillum kogi typewriter mumblecore cardigan skateboard Austin.", quotations.extract_from_plain(msg_body))
Example #15
0
def test_from_block_starts_with_date():
    msg_body = """Blah

Date: Wed, 16 May 2012 00:15:02 -0600
To: [email protected]

"""
    eq_('Blah', quotations.extract_from_plain(msg_body))
Example #16
0
def test_pattern_on_date_somebody_wrote_allows_space_in_front():
    msg_body = """Thanks Thanmai
 On Mar 8, 2012 9:59 AM, "Example.com" <
*****@*****.**> wrote:


>**
>  Blah-blah-blah"""
    eq_("Thanks Thanmai", quotations.extract_from_plain(msg_body))
Example #17
0
def test_android_wrote():
    msg_body = """Test reply

---- John Smith wrote ----

> quoted
> text
"""
    eq_("Test reply", quotations.extract_from_plain(msg_body))
Example #18
0
def test_weird_date_format_in_date_block():
    msg_body = """Blah
Date: Fri=2C 28 Sep 2012 10:55:48 +0000
From: [email protected]
To: [email protected]
Subject: [Ticket #8] Test

"""
    eq_('Blah', quotations.extract_from_plain(msg_body))
Example #19
0
def test_polish_from_block():
    eq_('Lorem ipsum', quotations.extract_from_plain(
    u"""Lorem ipsum

W dniu 28 stycznia 2015 01:53 użytkownik Zoe xxx <*****@*****.**>
napisał:

Blah!
"""))
Example #20
0
def test_feedback_below_left_unparsed():
    msg_body = """Please enter your feedback below. Thank you.

------------------------------------- Enter Feedback Below -------------------------------------

The user experience was unparallelled. Please continue production. I'm sending payment to ensure
that this line is intact."""

    parsed = quotations.extract_from_plain(msg_body)
    eq_(msg_body, parsed.decode('utf8'))
Example #21
0
def test_appointment():
    msg_body = """Invitation for an interview:

Date: Wednesday 3, October 2011 
Time: 7 : 00am 
Address: 130 Fox St

Please bring in your ID."""
    parsed = quotations.extract_from_plain(msg_body)
    eq_(msg_body, parsed.decode('utf8'))
Example #22
0
def test_date_time_email_splitter():
    msg_body = """Test reply

2014-10-17 11:28 GMT+03:00 Postmaster <
*****@*****.**>:

> First from site
>
    """
    eq_("Test reply", quotations.extract_from_plain(msg_body))
Example #23
0
def test_link_closed_with_quotation_marker_on_new_line():
    msg_body = '''8.45am-1pm

From: [email protected]

<http://email.example.com/c/dHJhY2tpbmdfY29kZT1mMDdjYzBmNzM1ZjYzMGIxNT
>  <[email protected] <mailto:[email protected]> >

Requester: '''
    eq_('8.45am-1pm', quotations.extract_from_plain(msg_body))
Example #24
0
def test_pattern_on_date_somebody_wrote_date_with_dots_german():
    msg_body = """Test reply

Am 25.11.2014 14:59 schrieb Roman Tkachenko:

>
> Test.
>
> Roman"""
    eq_("Test reply", quotations.extract_from_plain(msg_body))
Example #25
0
def test_pattern_on_date_somebody_wrote_date_with_slashes():
    msg_body = """Test reply

On 04/19/2011 07:10 AM, Roman Tkachenko wrote:

>
> Test.
>
> Roman"""
    eq_("Test reply", quotations.extract_from_plain(msg_body))
Example #26
0
def test_pattern_on_date_somebody_sent():
    msg_body = """Test reply

On 11-Apr-2011, at 6:54 PM, Roman Tkachenko <*****@*****.**> sent:

>
> Test
>
> Roman"""
    eq_("Test reply", quotations.extract_from_plain(msg_body))
Example #27
0
def test_english_from_block():
    eq_('Allo! Follow up MIME!', quotations.extract_from_plain("""Allo! Follow up MIME!

From: [email protected]
Sent: March-19-11 5:42 PM
To: Somebody
Subject: The manager has commented on your Loop

Blah-blah-blah
"""))
Example #28
0
def test_quotation_separator_takes_3_lines():
    msg_body = """Test reply

On Nov 30, 2011, at 12:47 PM, Somebody <
*****@*****.**>
wrote:

Test message
"""
    eq_("Test reply", quotations.extract_from_plain(msg_body))
Example #29
0
def test_swedish_from_block():
    eq_('Allo! Follow up MIME!', quotations.extract_from_plain(
    u"""Allo! Follow up MIME!
Från: Anno Sportel [mailto:[email protected]]
Skickat: den 26 augusti 2015 14:45
Till: Isacson Leiff
Ämne: RE: Week 36

Blah-blah-blah
"""))
Example #30
0
def test_bold_from_block():
    msg_body = """Hi

  *From:* [email protected] [mailto:
  [email protected]]
  *Sent:* Wednesday, June 27, 2012 3:05 PM
  *To:* [email protected]
  *Subject:* Hello

"""
    eq_("Hi", quotations.extract_from_plain(msg_body))
Example #31
0
def test_french_multiline_from_block():
    eq_(
        'Lorem ipsum',
        quotations.extract_from_plain(u"""Lorem ipsum

De : Brendan xxx [mailto:[email protected]]
Envoyé : vendredi 23 janvier 2015 16:39
À : Camille XXX
Objet : Follow Up

Blah-blah-blah
"""))
def test_swedish_from_block():
    eq_(
        "Allo! Follow up MIME!",
        quotations.extract_from_plain(u"""Allo! Follow up MIME!
Från: Anno Sportel [mailto:[email protected]]
Skickat: den 26 augusti 2015 14:45
Till: Isacson Leiff
Ämne: RE: Week 36

Blah-blah-blah
"""),
    )
Example #33
0
def test_pattern_on_date_polymail():
    msg_body = """Test reply

On Tue, Apr 11, 2017 at 10:07 PM John Smith

<
mailto:John Smith <*****@*****.**>
> wrote:
Test quoted data
"""

    eq_("Test reply", quotations.extract_from_plain(msg_body))
Example #34
0
def test_german_from_block():
    eq_(
        'Allo! Follow up MIME!',
        quotations.extract_from_plain("""Allo! Follow up MIME!

Von: [email protected]
Gesendet: Dienstag, 25. November 2014 14:59
An: Somebody
Betreff: The manager has commented on your Loop

Blah-blah-blah
"""))
Example #35
0
def test_english_from_block():
    eq_(
        'Allo! Follow up MIME!',
        quotations.extract_from_plain("""Allo! Follow up MIME!

From: [email protected]
Sent: March-19-11 5:42 PM
To: Somebody
Subject: The manager has commented on your Loop

Blah-blah-blah
"""))
Example #36
0
def test_danish_from_block():
    eq_(
        'Allo! Follow up MIME!',
        quotations.extract_from_plain("""Allo! Follow up MIME!

Fra: [email protected]
Sendt: 19. march 2011 12:10
Til: Somebody
Emne: The manager has commented on your Loop

Blah-blah-blah
"""))
Example #37
0
def test_dont_parse_quotations_for_forwarded_messages():
    msg_body = """FYI

---------- Forwarded message ----------
From: [email protected]
Date: Tue, Sep 4, 2012 at 1:35 PM
Subject: Two
line subject
To: [email protected]

Text"""
    eq_(msg_body, quotations.extract_from_plain(msg_body))
Example #38
0
def push_to_api(message):
    email_id = message['id']

    from_header = get_header(message, 'From')
    if '<' in from_header:
        from_email = from_header.split(' <')[1].split('>')[0]
        from_name = from_header.split(' <')[0]
        from_name = from_name.strip('"')
        if from_name == from_email:
            from_name = None
    else:
        from_email = from_header
        from_name = None

    subject = get_header(message, 'Subject')

    if 'parts' in message['payload']:
        full_html = get_body_by_mime_type(message, 'text/html')
        html_reply = quotations.extract_from_html(full_html)
        full_text = get_body_by_mime_type(message, 'text/plain')
        text_reply = quotations.extract_from_plain(full_text)
    elif 'mimeType' in message['payload'] and message['payload'][
            'mimeType'] == 'text/html' and 'body' in message[
                'payload'] and 'snippet' in message:
        full_html = decode_base_64_data(message['payload']['body']['data'])
        html_reply = quotations.extract_from_html(full_html)
        full_text = message['snippet']
        text_reply = message['snippet']
    else:
        raise 'Unsupported email format'

    payload = {
        'emailId': email_id,
        'fromEmail': from_email,
        'fromName': from_name,
        'subject': subject,
        'fullHtml': full_html,
        'htmlReply': html_reply,
        'fullText': full_text,
        'textReply': text_reply,
    }

    headers = {
        'authorization': os.getenv('API_AUTHORIZATION_HEADER'),
        'accept': 'application/vnd.faultfixers.v14+json',
        'content-type': 'application/json',
    }

    response = requests.post(os.getenv('API_ENDPOINT'),
                             headers=headers,
                             json=payload)

    response.raise_for_status()
Example #39
0
def test_feedback_below_left_unparsed():
    msg_body = """Please enter your feedback below. Thank you.

------------------------------------- Enter Feedback Below -------------------------------------

The user experience was unparallelled. Please continue production. I'm sending payment to ensure
that this line is intact."""

    parsed = quotations.extract_from_plain(msg_body)
    if isinstance(parsed, bytes):
        parsed = parsed.decode('utf8')
    eq_(msg_body, parsed)
Example #40
0
def test_appointment_2():
    msg_body = """Invitation for an interview:

Date: Wednesday 3, October 2011
Time: 7 : 00am
Address: 130 Fox St

Please bring in your ID."""
    parsed = quotations.extract_from_plain(msg_body)
    if isinstance(parsed, bytes):
        parsed = parsed.decode('utf8')
    eq_(msg_body, parsed)
Example #41
0
def test_reply_and_quotation_splitter_share_line():
    # reply lines and 'On <date> <person> wrote:' splitter pattern
    # are on the same line
    msg_body = """reply On Wed, Apr 4, 2012 at 3:59 PM, [email protected] wrote:
> Hi"""
    eq_('reply', quotations.extract_from_plain(msg_body))

    # test pattern '--- On <date> <person> wrote:' with reply text on
    # the same line
    msg_body = """reply--- On Wed, Apr 4, 2012 at 3:59 PM, [email protected] wrote:
> Hi"""
    eq_('reply', quotations.extract_from_plain(msg_body))

    # test pattern '--- On <date> <person> wrote:' with reply text containing
    # '-' symbol
    msg_body = """reply
bla-bla - bla--- On Wed, Apr 4, 2012 at 3:59 PM, [email protected] wrote:
> Hi"""
    reply = """reply
bla-bla - bla"""

    eq_(reply, quotations.extract_from_plain(msg_body))
Example #42
0
def extract_body(message: message.Message) -> str:
    # If the message contains a plaintext version of the body, use
    # that.
    plaintext_content = get_message_part_by_type(message, "text/plain")
    if plaintext_content:
        return quotations.extract_from_plain(plaintext_content)

    # If we only have an HTML version, try to make that look nice.
    html_content = get_message_part_by_type(message, "text/html")
    if html_content:
        return convert_html_to_markdown(quotations.extract_from_html(html_content))

    raise ZulipEmailForwardError("Unable to find plaintext or HTML message body")
Example #43
0
def get_message_body(message: EmailMessage) -> Optional[str]:
    """
    Get the core message body part as a cleaned string.

    In terms of the goal of the project, we are interested in unique unstructured text of a reasonable length.
    So we discard forwarded/replied emails, and those which are too short/long.

    NOTE: As part of the identification of actionable emails, the above is subject to change, but for now the
    presumption is otherwise.

    The Enron dataset used for testing seems to have quite a number of poorly parsed email bodies.
    Issues such as incorrect splitting of urls across lines cause havoc without attempts to clean the data.
    Will have to make due for now, but some 'strange' parsing seem here is a direct result of the data used.

    READING:
        * General discussion on the topic.
        https://en.wikipedia.org/wiki/Posting_style

        * MailGun sited these papers for their 'Talon' project.
        http://www.cs.cmu.edu/~vitor/papers/sigFilePaper_finalversion.pdf
        http://www.cs.cornell.edu/people/tj/publications/joachims_01a.pdf

    :param message: a parsed EmailMessage
    :return: cleaned message body as a string
    """
    core_message: Optional[EmailMessage] = message.get_body()  # type: ignore
    if not isinstance(core_message, EmailMessage):
        return None

    potential_message_body: Optional[str] = extract_core_message_body(core_message)
    if not potential_message_body or not isinstance(potential_message_body, str):
        return None

    # Raw body as a string
    message_body: str = potential_message_body

    # Remove inline mails
    message_body = remove_inline_message(message_body)

    # Check length now to prevent unnecessary processing
    if not is_valid_length(text=message_body, minimum=250, maximum=5_000):
        return None

    # Handle HTML in text
    if "html" in core_message.get_content_subtype():
        message_body = strip_html_contents(text=message_body)

    # Use Talon to attempt to remove message quotations
    message_body = str(quotations.extract_from_plain(message_body))

    return message_body
Example #44
0
    def post(self, request):
        """
        Receive conversation replies via e-mail
        """

        auth_key = request.META.get('HTTP_X_MESSAGESYSTEMS_WEBHOOK_TOKEN')
        if auth_key is None or auth_key != settings.SPARKPOST_RELAY_SECRET:
            return Response(
                status=status.HTTP_403_FORBIDDEN,
                data={
                    'message':
                    'Invalid HTTP_X_MESSAGESYSTEMS_WEBHOOK_TOKEN header'
                })

        for messages in [e['msys'].values() for e in request.data]:
            for message in messages:
                # 1. get email content and reply-to
                reply_to = parseaddr(message['rcpt_to'])[1]
                content = message['content']

                # 2. check local part of reply-to and extract conversation and user (fail if they don't exist)
                local_part = reply_to.split('@')[0]
                conversation_id, user_id, thread_id = parse_local_part(
                    local_part)
                user = get_user_model().objects.get(id=user_id)

                thread = None
                if thread_id is not None:
                    thread = ConversationMessage.objects.get(id=thread_id)
                    conversation = thread.conversation
                else:
                    conversation = Conversation.objects.get(id=conversation_id)

                if not conversation.participants.filter(id=user.id).exists():
                    raise Exception('User not in conversation')

                # 3. extract the email reply text and add it to the conversation
                text_content = content['text']
                reply_plain = quotations.extract_from_plain(text_content)

                ConversationMessage.objects.create(
                    author=user,
                    conversation=conversation,
                    thread=thread,
                    content=reply_plain,
                    received_via='email',
                )

        return Response(status=status.HTTP_200_OK, data={})
Example #45
0
def test_reply_wraps_quotations():
    msg_body = """Test reply

On 04/19/2011 07:10 AM, Roman Tkachenko wrote:

>
> Test

Regards, Roman"""

    reply = """Test reply

Regards, Roman"""

    eq_(reply, quotations.extract_from_plain(msg_body))
Example #46
0
def test_forwarded_message_in_quotations():
    msg_body = """Blah

-----Original Message-----

FYI

---------- Forwarded message ----------
From: [email protected]
Date: Tue, Sep 4, 2012 at 1:35 PM
Subject: Two
line subject
To: [email protected]

"""
    eq_("Blah", quotations.extract_from_plain(msg_body))
Example #47
0
def test_short_quotation_with_newline():
    msg_body = """Btw blah blah...

On Tue, Jan 27, 2015 at 12:42 PM -0800, "Company" <*****@*****.**> wrote:

Hi Mark,
Blah blah? 
Thanks,Christine 

On Jan 27, 2015, at 11:55 AM, Mark XXX <*****@*****.**> wrote:

Lorem ipsum?
Mark

Sent from Acompli"""
    eq_("Btw blah blah...", quotations.extract_from_plain(msg_body))
Example #48
0
def test_quotation_separator_takes_2_lines():
    msg_body = """Test reply

On Fri, May 6, 2011 at 6:03 PM, Roman Tkachenko from Hacker News
<*****@*****.**> wrote:

> Test.
>
> Roman

Regards, Roman"""

    reply = """Test reply

Regards, Roman"""
    eq_(reply, quotations.extract_from_plain(msg_body))
Example #49
0
def test_standard_replies():
    for filename in os.listdir(STANDARD_REPLIES):
        filename = os.path.join(STANDARD_REPLIES, filename)
        if not filename.endswith('.eml') or os.path.isdir(filename):
            continue
        with open(filename) as f:
            message = email.message_from_file(f)
            body = next(email.iterators.typed_subpart_iterator(message, subtype='plain'))
            text = ''.join(body_iterator(body, True))

            stripped_text = quotations.extract_from_plain(text)
            reply_text_fn = filename[:-4] + '_reply_text'
            if os.path.isfile(reply_text_fn):
                with open(reply_text_fn) as f:
                    reply_text = f.read().strip()
            else:
                reply_text = 'Hello'
            yield eq_, reply_text, stripped_text, \
                "'%(reply)s' != %(stripped)s for %(fn)s" % \
                {'reply': reply_text, 'stripped': stripped_text,
                 'fn': filename}
def test_standard_replies():
    for filename in os.listdir(STANDARD_REPLIES):
        filename = os.path.join(STANDARD_REPLIES, filename)
        if not filename.endswith(".eml") or os.path.isdir(filename):
            continue
        with open(filename) as f:
            message = email.message_from_file(f)
            body = next(
                email.iterators.typed_subpart_iterator(message,
                                                       subtype="plain"))
            text = "".join(body_iterator(body, True))

            stripped_text = quotations.extract_from_plain(text)
            reply_text_fn = filename[:-4] + "_reply_text"
            if os.path.isfile(reply_text_fn):
                with open(reply_text_fn) as f:
                    reply_text = f.read().strip()
            else:
                reply_text = "Hello"
            yield eq_, reply_text, stripped_text, "'%(reply)s' != %(stripped)s for %(fn)s" % {
                "reply": reply_text,
                "stripped": stripped_text,
                "fn": filename,
            }
Example #51
0
def test_empty_body():
    eq_('', quotations.extract_from_plain(''))
Example #52
0
    def extractBody(self, s):
        body = self.extractBodyFromEmail(s)
        reply = quotations.extract_from_plain(body)
        text, signature = extract_signature(reply)

        return text
Example #53
0
__author__ = 'a_medelyan'
import talon
from talon import quotations
from talon.signature.bruteforce import extract_signature

talon.init()

text = "The price is still 91.87.\n\nKeoni Almeida\nCalifornia Independent System Operator\nphone: 916/608-7053\npager:  916/814-7352\nalpha page:  [email protected]\ne-mail:  <mailto:[email protected]>\n\n\n\n> -----Original Message-----\n> From:\tCRCommunications\n> Sent:\tFriday, June 22, 2001 11:34 AM\n> To:\tISO Market Participants\n> Subject:\tCAISO Notice: Update to June 20 Market Notice\n>\n>  <<MARKET NOTICE 010622_.doc>>\n>\n> Market Participants:\n> Please read the attached explanation of Footnote 14 in the California ISO\n> June 20, 2001, Market Notice.\n>\n> CR Communications\n> Client Relations Communications\n\n - MARKET NOTICE 010622_.doc"

reply = quotations.extract_from_plain(text)
signature = extract_signature(text)[1]

print "Reply: ", reply

print "Signature: ", signature
Example #54
0
    # print message_id
    # print csv_signature
    # print csv_authored_content
    # print "-------"

    text = dataset[message_id]

    # find talon signatures
    results = extract_signature(text)
    if results[1]:
        talon_signature = results[1].split('\n')
    else:
        talon_signature = []

    #find talon authored content
    talon_authored_content = quotations.extract_from_plain(text).split('\n')

    # do a comparative scoring of results found
    if len(talon_signature) > 0 or len(csv_signature) > 0:
        required = set(csv_signature)
        signature_lines_total += len(csv_signature)
        for line in talon_signature:
            if len(line) > 0:
                signature_lines_talon += 1.0
                if line in required:
                    signature_lines_correct += 1.0

    if len(talon_authored_content) > 0 or len(csv_authored_content) > 0:
        required = set(csv_authored_content)
        ac_lines_total += len(csv_authored_content)
        for line in talon_authored_content:
Example #55
0
def test_line_starts_with_on():
    msg_body = """Blah-blah-blah
On blah-blah-blah"""
    eq_(msg_body, quotations.extract_from_plain(msg_body))
Example #56
0
def test_pattern_date_email_with_unicode():
    msg_body = """Replying ok
2011/4/7 Nathan \xd0\xb8ova <*****@*****.**>

>  Cool beans, scro"""
    eq_("Replying ok", quotations.extract_from_plain(msg_body))
Example #57
0
def test_quotation_marker_false_positive():
    msg_body = """Visit us now for assistance...
>>> >>>  http://www.domain.com <<<
Visit our site by clicking the link above"""
    eq_(msg_body, quotations.extract_from_plain(msg_body))
Example #58
0
def test_from_block_starts_with_date():
    msg_body = """Blah

Date: Wed, 16 May 2012 00:15:02 -0600
To: [email protected]"""
    eq_('Blah', quotations.extract_from_plain(msg_body))
Example #59
0
def test_reply_quotations_share_block():
    stripped_html = quotations.extract_from_plain(REPLY_QUOTATIONS_SHARE_BLOCK)
    ok_(stripped_html)
    ok_('From' not in stripped_html)
Example #60
0
def test_preprocess_postprocess_2_links():
    msg_body = "<http://link1> <http://link2>"
    eq_(msg_body, quotations.extract_from_plain(msg_body))