Ejemplo n.º 1
0
def test_long_line_in_signature():
    sender = "*****@*****.**"
    body = """Call me ASAP, please.This is about the last changes you deployed.

Thanks in advance,
some long text here which doesn't seem to be a signature at all
Bob"""

    text, extracted_signature = signature.extract(body, sender)
    eq_('\n'.join(body.splitlines()[:-1]), text)
    eq_('Bob', extracted_signature)

    body = """Thanks David,

    some *long* text here which doesn't seem to be a signature at all
    """
    ((body, None), signature.extract(body, "*****@*****.**"))
Ejemplo n.º 2
0
def test_message_shorter_SIGNATURE_MAX_LINES():
    sender = "*****@*****.**"
    body = """Call me ASAP, please.This is about the last changes you deployed.

Thanks in advance,
Bob"""
    text, extracted_signature = signature.extract(body, sender)
    eq_('\n'.join(body.splitlines()[:2]), text)
    eq_('\n'.join(body.splitlines()[-2:]), extracted_signature)
Ejemplo n.º 3
0
def test_over_2_text_lines_after_signature():
    body = """Blah

    Bob,
    If there are more than
    2 non signature lines in the end
    It's not signature
    """
    text, extracted_signature = signature.extract(body, "Bob")
    eq_(extracted_signature, None)
Ejemplo n.º 4
0
def test_text_line_in_signature():
    # test signature should consist of one solid part
    sender = "*****@*****.**"
    body = """Call me ASAP, please.This is about the last changes you deployed.

Thanks in advance,
some text which doesn't seem to be a signature at all
Bob"""

    text, extracted_signature = signature.extract(body, sender)
    eq_('\n'.join(body.splitlines()[:2]), text)
    eq_('\n'.join(body.splitlines()[-3:]), extracted_signature)
Ejemplo n.º 5
0
def test_messages_longer_SIGNATURE_MAX_LINES():
    for filename in os.listdir(STRIPPED):
        filename = os.path.join(STRIPPED, filename)
        if not filename.endswith('_body'):
            continue
        sender, body = dataset.parse_msg_sender(filename)
        text, extracted_signature = signature.extract(body, sender)
        extracted_signature = extracted_signature or ''
        with open(filename[:-len('body')] + 'signature') as ms:
            msg_signature = ms.read()
            eq_(msg_signature.strip(), extracted_signature.strip())
            stripped_msg = body.strip()[:len(body.strip())-len(msg_signature)]
            eq_(stripped_msg.strip(), text.strip())
Ejemplo n.º 6
0
def test_messages_longer_SIGNATURE_MAX_LINES():
    import sys
    kwargs = {}
    if sys.version_info > (3, 0):
        kwargs["encoding"] = "utf8"

    for filename in os.listdir(STRIPPED):
        filename = os.path.join(STRIPPED, filename)
        if not filename.endswith('_body'):
            continue
        sender, body = dataset.parse_msg_sender(filename)
        text, extracted_signature = extract(body, sender)
        extracted_signature = extracted_signature or ''
        with open(filename[:-len('body')] + 'signature', **kwargs) as ms:
            msg_signature = ms.read()
            eq_(msg_signature.strip(), extracted_signature.strip())
            stripped_msg = body.strip()[:len(body.strip()) - len(msg_signature)]
            eq_(stripped_msg.strip(), text.strip())
Ejemplo n.º 7
0
    def __init__(self, email_string):
        """
        Takes a raw email string and processes it into something useful
        """
        self.str = email_string
        self.raw = mime.from_string(self.str)

        to = self.raw.headers['To']
        if to is None:
            self.recipients = []
        else:
            to = to.lower()
            self.recipients = address.parse_list(to) if ',' in to else [address.parse(to)]

        # It's possible a recipient is None if it is something like
        # 'Undisclosed recipients:;'
        self.recipients = [r for r in self.recipients if r is not None]
        self.sender = address.parse(self.raw.headers['From'].lower())

        self.subject = self.raw.subject
        self.id = self.raw.message_id
        self.date = parse(self.raw.headers['Date'])
        self.content_encoding = self.raw.content_encoding[0]

        # Extract plaintext body
        if self.raw.content_type.is_singlepart():
            self.full_body = self.raw.body
        elif self.raw.content_type.is_multipart():
            for p in self.raw.parts:
                if p.content_type == 'text/plain':
                    self.full_body = p.body
                    break

        # Try to get signature
        self.body, self.signature = extract_signature(self.full_body)

        # Try ML approach if necessary
        if self.signature is None:
            self.body, self.signature = signature.extract(self.full_body, sender=self.sender)

        # Get replies only, not the quotes
        self.body = quotations.extract_from(self.body, 'text/plain')
Ejemplo n.º 8
0
def test_capitalized():
    msg_body = """Hi Mary,

Do you still need a DJ for your wedding? I've included a video demo of one of our DJs available for your wedding date.

DJ Doe 
http://example.com
Password: SUPERPASSWORD

Would you like to check out more?


At your service,

John Smith
Doe Inc
555-531-7967"""

    sig = """John Smith
Doe Inc
555-531-7967"""

    eq_(sig, extract(msg_body, 'Doe')[1])
Ejemplo n.º 9
0
def test_signature_extract_crash(has_signature):
    has_signature.side_effect = Exception("Bam!")
    msg_body = u"Blah\r\n--\r\n\r\nСергей"
    eq_((msg_body, None), extract(msg_body, "Сергей"))
Ejemplo n.º 10
0
def test_signature_extract_crash(has_signature):
    has_signature.side_effect = Exception('Bam!')
    msg_body = 'Blah\r\n--\r\n\r\nСергей'
    eq_((msg_body, None), signature.extract(msg_body, 'Сергей'))
Ejemplo n.º 11
0
import talon
# don't forget to init the library first
# it loads machine learning classifiers
talon.init()

from talon import signature

message = """Annette Anderson,
11833 Spring Laurel DR
Charlotte, NC 28215
704-724-4697
 
Client is a referral from our chiro- she is there now about to have her appointment. Please reach out in the next hour!
 
Respectfully,

Lacie N. Johnson
Intake Paralegal 
Law Offices of Shane Smith, PC
263 Hwy 74 N
Peachtree City, GA 30269

770.487.8999 ext. 42
770.631.7667 fax
"""

text, signature = signature.extract(message, sender='*****@*****.**')
print text
Ejemplo n.º 12
0
 def signature(self, sender, message):
     logger.info('message from %s', sender)
     text, sig = signature.extract(message, sender)
     logger.info('extracted %s', sig)
     return sig
Ejemplo n.º 13
0
def test_basic():
    msg_body = 'Blah\r\n--\r\n\r\nSergey Obukhov'
    eq_(('Blah', '--\r\n\r\nSergey Obukhov'),
        signature.extract(msg_body, 'Sergey'))
Ejemplo n.º 14
0
def test_no_signature():
    sender, body = "*****@*****.**", "Hello"
    eq_((body, None), extract(body, sender))
Ejemplo n.º 15
0
def receive_email(request):

    if request.method == 'POST':
        talon.init()
        from talon import signature

        sender = request.POST.get('sender')
        recipient = request.POST.get('recipient')
        subject = request.POST.get('subject', '')
        body_plain = request.POST.get('body-plain', '')
        text, signature = signature.extract(body_plain, sender=sender)
        body_without_quotes = request.POST.get('stripped-text', '')
        sender_name = get_object_or_404(User.objects.prefetch_related(),
                                        email=sender)
        raw_sender_name = sender_name.username
        synonyms = nltk_rel_words_email(subject + " " + text)
        print("Synonyms = ", synonyms)

        ## reply to post creates comment to that post
        if 'newpost' in recipient:
            print('found newhost')
            post_id = re.findall('([0-9]+)', recipient)
            print("Add comment to post ID ", post_id)
            print("Sender name ", sender_name.id)
            try:
                to_save_comment = Comment(
                    case=post_id,
                    comment=text,
                    commented_by=sender_name,
                )
                to_save_comment.save()
                print("Success!")
                return HttpResponse('OK')
            except Exception as e:
                print(e)

        else:

            if request.FILES:
                for key in request.FILES:
                    file = request.FILES[key]
                    # attachment_name = request.POST.get('attachment')
                    attachment_name = 'attachment'
                    to_save = Case(state=sender_name.state,
                                   county=sender_name.county,
                                   title=subject,
                                   issue_detail=text,
                                   created_by=str(raw_sender_name),
                                   issue_area_id=determine_area(recipient),
                                   related_document=file,
                                   related_document_name=attachment_name)
            else:
                to_save = Case(
                    state=sender_name.state,
                    county=sender_name.county,
                    title=subject,
                    issue_detail=text,
                    created_by=str(raw_sender_name),
                    issue_area_id=determine_area(recipient),
                )

            to_save.save()
            print(to_save.id)

            return_email_info(sender, recipient, subject, to_save.id)

            find_rel_questions_email(synonyms, determine_area(recipient),
                                     sender_name.county, to_save.id)

            # attachments:
        # for key in request.FILES:
        #     file = request.FILES[key]
        # do something with the file
    #
    #      # Returned text is ignored but HTTP status code matters:
    #      # Mailgun wants to see 2xx, otherwise it will make another attempt in 5 minutes
    return HttpResponse('OK')
Ejemplo n.º 16
0
 def signature(self, sender, message):
     logger.info('message from %s', sender)
     text, sig = signature.extract(message, sender)
     logger.info('extracted %s', sig)
     return sig
Ejemplo n.º 17
0
def remove_signature(message, sender):
    text, _signature = signature.extract(message, sender)
    return text
Ejemplo n.º 18
0
def test_no_signature():
    sender, body = "*****@*****.**", "Hello"
    eq_((body, None), signature.extract(body, sender))
Ejemplo n.º 19
0
def test_handles_unicode():
    sender, body = dataset.parse_msg_sender(UNICODE_MSG)
    text, extracted_signature = extract(body, sender)
Ejemplo n.º 20
0
def test_basic():
    raise SkipTest()

    msg_body = "Blah\r\n--\r\n\r\nSergey Obukhov"
    eq_(("Blah", "--\r\n\r\nSergey Obukhov"), extract(msg_body, "Sergey"))
Ejemplo n.º 21
0
def test_signature_extract_crash(has_signature):
    has_signature.side_effect = Exception('Bam!')
    msg_body = u'Blah\r\n--\r\n\r\nСергей'
    eq_((msg_body, None), extract(msg_body, 'Сергей'))
Ejemplo n.º 22
0
def test_basic():
    msg_body = 'Blah\r\n--\r\n\r\nSergey Obukhov'
    eq_(('Blah', '--\r\n\r\nSergey Obukhov'), extract(msg_body, 'Sergey'))
Ejemplo n.º 23
0
def test_handles_unicode():
    sender, body = dataset.parse_msg_sender(UNICODE_MSG)
    text, extracted_signature = signature.extract(body, sender)
def extractSignature_MachineLearning(message, sender):
	body, sig = signature.extract(message, sender=sender)
	return body, sig