예제 #1
0
def test_attachment_reconstruction():
    raw_message = """Received: ConsoleMessageDelivery
From: "(Secure) iOS Dev" <*****@*****.**>
Content-Type: multipart/related; boundary="Apple-Mail=_B3D4A2AE-CBE5-47E8-86E4-5052190755A6"; type="text/plain"
Subject:
Message-Id: <*****@*****.**>
Date: Mon, 27 Jun 2016 10:43:03 -0400
To: iOS Dev <*****@*****.**>
Mime-Version: 1.0 (Mac OS X Mail 9.3 \(3124\))

--Apple-Mail=_B3D4A2AE-CBE5-47E8-86E4-5052190755A6
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
charset=us-ascii

blah blah blah

--Apple-Mail=_B3D4A2AE-CBE5-47E8-86E4-5052190755A6
Content-Transfer-Encoding: base64
Content-Disposition: dummy; filename="handle"
Content-Type: dummy/dummy; name="handle"

cGxhY2Vob2xkZXIgZm9yIGFuIGF0dGFjaG1lbnQ=
"""
    attachment_content = randUnicode()
    raw_attachment = "Content-Type: text/plain; name=\"test.txt\"\r\nContent-Disposition: attachment; filename=\"test.txt\"\r\n\r\n{}".format(attachment_content)

    message = mime.from_string(raw_message)
    attachment = mime.from_string(raw_attachment)
    attachments = {"handle": attachment}

    status, restored_message = EmailV1.restoreAttachments(message, attachments)
    assert status
    assert len(restored_message.parts) == 2
    assert restored_message.parts[1] == attachment
예제 #2
0
파일: emailParse.py 프로젝트: abhipec/pec
def emailParse(email, data):

    msg = mime.from_string(str(data).decode('utf-8').encode('ascii','ignore'))
    sender = msg.headers['From']
    senderEmail = re.search(emailRegex,sender)
    if not senderEmail:
        print(sender)
    email.sender = senderEmail.group(0)
    email.subject = msg.headers['Subject']
    date = msg.headers['Date']
    if date.find('(') > -1:
        date = date[:date.find('(')]
    email.timeStamp = dateParser.parse(date)
    if msg.content_type.is_multipart():
        for part in msg.parts:
            if part.content_type == 'text/plain':
                email.textPlain = cleanText(part.body)
            elif part.content_type == 'text/html':
                email.textHtml = htmlToText(part.body)
            elif part.content_type == 'application/pdf' or part.content_type == 'application/octet-stream':
                filename = part.headers['content-Disposition'][1]['filename']
                emailUtil.writeFile(email.messageId, part.body, filename)
                email.attachments = email.attachments + filename + ','
    elif msg.content_type.is_singlepart():
        if msg.headers['Content-Type'] == 'text/plain':
            email.textPlain = cleanText(msg.body)
        elif msg.headers['Content-Type'] == 'text/html':
            email.textHtml = htmlToText(msg.body)
예제 #3
0
    def _process_message(self, uid, mailbox, download_attachments=DEFAULT_DOWNLOAD_ATTACHMENTS):
        message = mailbox.mail(uid, include_raw=True)
        mime_msg = mime.from_string(message.raw)

        body = message.body
        sent_from = message.from_addr
        sent_to = message.to
        subject = message.title
        date = message.date
        message_id = message.message_id
        headers = mime_msg.headers.items()
        has_attachments = bool(message.attachments)

        # Flatten the headers so they can be unpickled
        headers = self._flattern_headers(headers=headers)

        payload = {
            'uid': uid,
            'from': sent_from,
            'to': sent_to,
            'headers': headers,
            'date': date,
            'subject': subject,
            'message_id': message_id,
            'body': body,
            'has_attachments': has_attachments,
            'attachments': []
        }

        if has_attachments and download_attachments:
            self._logger.debug('[IMAPSensor]: Downloading attachments for message {}'.format(uid))
            result = self._download_and_store_message_attachments(message=message)
            payload['attachments'] = result

        self._sensor_service.dispatch(trigger=self._trigger, payload=payload)
예제 #4
0
def process_attachement(attachment, detected_content_type, detected_file_name, origin_domain, passwordlist, sha):
    indicators = 0
    payload_results = []
    suspicious_urls = set()
    try:
        mpart_attachment = mime.from_string(attachment)
        if mpart_attachment.content_type.is_multipart():
            for p in mpart_attachment.walk():
                detected_content_type = str(p.detected_content_type)
                filename = detected_file_name
                ind, s_urls, payload_r = process_attachement(p.body, detected_content_type, filename, origin_domain, passwordlist, sha)
                indicators += ind
                suspicious_urls |= set(s_urls)
                payload_results += payload_r
    except DecodingError:
        # Binary attachement
        pass
    extract_urls = ExtractURL(attachment, origin_domain, sha)
    suspicious_urls |= set(extract_urls.processing())
    indicators += extract_urls.indicators
    content_type = detected_content_type
    filename = detected_file_name
    if filename is not None and len(filename) > 0:
        passwordlist.append(filename)
        prefix, suffix = os.path.splitext(filename)
        passwordlist.append(prefix)
    passwordlist = [i for i in passwordlist if len(i) > 1]
    r_indicators, is_archive, r = process_payload(filename, attachment, content_type, origin_domain, passwordlist, sha)
    r['filename'] = filename
    r['content_type'] = content_type
    indicators += r_indicators
    payload_results.append(r)
    return indicators, list(suspicious_urls), is_archive, payload_results
예제 #5
0
def test_reply_quotations_share_block():
    msg = mime.from_string(REPLY_QUOTATIONS_SHARE_BLOCK)
    html_part = list(msg.walk())[1]
    assert html_part.content_type == 'text/html'
    stripped_html = quotations.extract_from_html(html_part.body)
    ok_(stripped_html)
    ok_('From' not in stripped_html)
예제 #6
0
def _extract_parts(namespace_id, folder_id, body_string):
    data_sha256 = sha256(body_string).hexdigest()

    if not is_in_blockstore(data_sha256):
        save_to_blockstore(data_sha256, body_string)

    try:
        parsed = mime.from_string(body_string)
    except (mime.DecodingError, AttributeError, RuntimeError,
            TypeError) as e:
        log.error('Error parsing message metadata',
                  folder_id=folder_id, namespace_id=namespace_id, error=e)
        return

    if parsed is None:
        return

    for mimepart in parsed.walk(
            with_self=parsed.content_type.is_singlepart()):
        try:
            if mimepart.content_type.is_multipart():
                continue  # TODO should we store relations?
            _parse_mimepart(namespace_id, mimepart)
        except (mime.DecodingError, AttributeError, RuntimeError,
                TypeError, binascii.Error, UnicodeDecodeError) as e:
            log.error('Error parsing message MIME parts',
                      folder_id=folder_id, namespace_id=namespace_id,
                      exc_info=True)
            return
예제 #7
0
def test_standard_replies(filename):
    def check_part(email_part):
        text = email_part.body
        parsed = quotations.extract_from_plain(text)
        reply_text_fn = filename[:-4] + '_reply_text'
        if os.path.isfile(reply_text_fn):
            with open(reply_text_fn) as f:
                expected_text = f.read()
        else:
            expected_text = 'Hello'

        assert parsed == expected_text, 'Parsed text was incorrect for file {0}'.format(
            filename
        )

    with open(filename) as f:
        msg = f.read()
        m = mime.from_string(msg)

        found_text_plain_part = False
        if m.content_type == 'text/plain':
            found_text_plain_part = True
            check_part(m)
        else:
            for part in m.walk():
                if part.content_type == 'text/plain':
                    found_text_plain_part = True
                    check_part(part)

        if not found_text_plain_part:
            pytest.fail('Could not find text/plain part in email {0}'.format(filename))
예제 #8
0
파일: email.py 프로젝트: nivertech/aleph
    def ingest(self, meta, local_path):
        with open(local_path, 'rb') as emlfh:
            data = emlfh.read()
        msg = mime.from_string(data)
        meta = self.parse_headers(msg, meta)

        body_type = 'text/plain'
        body_part = msg.body

        for part in msg.walk():
            if not part.is_body():
                self.ingest_attachment(part, meta)
                continue

            body = part.body
            if 'html' not in body_type and \
                    body is not None and len(body.strip()):
                body_type = unicode(part.detected_content_type)
                body_part = body

        out_path = ''
        if body_part is None:
            raise IngestorException("No body in E-Mail: %r" % meta)
        try:
            if 'html' in body_type:
                out_path = self.write_temp(body_part, 'htm')
                ing = HtmlIngestor(self.source_id)
            else:
                out_path = self.write_temp(body_part, 'txt')
                ing = DocumentIngestor(self.source_id)
            ing.ingest(meta, out_path)
        finally:
            remove_tempfile(out_path)
예제 #9
0
def test_quoted_printable_encoding_avoided_for_compatibility(
        patch_smtp, api_client):
    # Test that messages with long lines don't get quoted-printable encoded,
    # for maximum server compatibility.
    api_client.post_data(
        '/send',
        {'to': [{'email': '*****@*****.**'}],
         'subject': 'In Catilinam',
         'body': 'Etenim quid est, Catilina, quod iam amplius exspectes, si '
         'neque nox tenebris obscurare coeptus nefarios neque privata domus '
         'parietibus continere voces conjurationis tuae potest? Si '
         'illustrantur, si erumpunt omnia? Muta iam istam mentem, mihi crede! '
         'obliviscere caedis atque incendiorum. Teneris undique: luce sunt '
         'clariora nobis tua consilia omnia; quae iam mecum licet recognoscas.'
         ' Meministine me ante diem duodecimum Kalendas Novembres dicere in '
         'senatu, fore in armis certo die, qui dies futurus esset ante diem '
         'sextum Kalendas Novembres, C. Manlium, audaciae satellitem atque '
         'administrum tuae? Num me fefellit, Catilina, non modo res tanta, tam'
         ' atrox, tamque incredibilis, verum id quod multo magis admirandum, '
         'dies? '})
    _, msg = patch_smtp[-1]
    parsed = mime.from_string(msg)
    assert len(parsed.parts) == 2
    for part in parsed.parts:
        if part.content_type.value == 'text/html':
            assert part.content_encoding[0] == 'base64'
        elif part.content_type.value == 'text/plain':
            assert part.content_encoding[0] in ('7bit', 'base64')
예제 #10
0
    def get_header(self, header, mid):
        if self.decode_error:
            log.warning('Error getting message header', mid=mid)
            return

        parsed = mime.from_string(self.full_body.data)
        return parsed.headers.get(header)
예제 #11
0
def test_send_with_event(patch_smtp, api_client, example_draft, event):
    # Create a draft
    r = api_client.post_data('/drafts', example_draft)
    msgs = patch_smtp
    assert r.status_code == 200
    draft_public_id = json.loads(r.data)['id']
    version = json.loads(r.data)['version']

    # Send the draft along with an event ID to use for invites
    r = api_client.post_data('/send',
                             {'draft_id': draft_public_id,
                              'version': version,
                              'event_id': event.public_id})
    assert r.status_code == 200

    # Make sure one message was sent
    assert len(msgs) == 1
    recipients, raw_msg = msgs[0]
    msg = mime.from_string(raw_msg)

    # Check the MIME body of the message to make sure the event is there
    parts = []
    for mimepart in msg.walk(with_self=msg.content_type.is_singlepart()):
        format_type = mimepart.content_type.format_type
        subtype = mimepart.content_type.subtype
        parts.append((format_type, subtype))
    assert ('text', 'plain') in parts
    assert ('text', 'html') in parts
    assert ('text', 'calendar') in parts
예제 #12
0
def test_from_mime():
    # create email from separated mime and test if it get reconstructed ok
    root_mime = mime.create.multipart("mixed")
    text_1 = mime.create.text("plain", randUnicode(length=3))
    root_mime.append(text_1)
    attachments = []
    for _ in range(2):
        a = mime.create.attachment("image/png", randStr(size=10), randUnicode(), AttachmentType.INLINE)
        attachments.append(a)
        a.to_string()
        root_mime.append(a)

    text_2 = mime.create.text("plain", randUnicode(length=3))
    root_mime.append(text_2)
    for _ in range(3):
        a = mime.create.attachment("video/mp4", randStr(size=15), randUnicode(), AttachmentType.ATTACHMENT)
        attachments.append(a)
        a.to_string()
        root_mime.append(a)
    root_mime.headers["Message-Id"] = u"<{}>".format(EmailHelpers.newMessageId())
    email = EmailV1.fromMime(root_mime.to_string(), [], {"user_id": u"*****@*****.**", "display_name": u"S B"})

    # check if the attachments have been all separated properly
    body_mime = mime.from_string(email.body.content)
    assert len(attachments) == len(filter(lambda p: p.content_type.value == DUMMY_CONTENT_TYPE , body_mime.parts))
    # check att hashes are properly inserted as filenames
    assert map(lambda a: HexEncode(Sha256Sum(a.to_string())), attachments) == map(lambda p: p.content_disposition[1]["filename"], filter(lambda p: p.content_type.value == DUMMY_CONTENT_TYPE , body_mime.parts))
예제 #13
0
def test_reply_headers_set(patch_smtp, api_client, example_draft):
    thread_id = api_client.get_data('/threads')[0]['id']

    api_client.post_data('/send', {'to': [{'email': '*****@*****.**'}],
                                   'thread_id': thread_id})
    _, msg = patch_smtp[-1]
    parsed = mime.from_string(msg)
    assert 'In-Reply-To' in parsed.headers
    assert 'References' in parsed.headers
예제 #14
0
파일: emailParse.py 프로젝트: abhipec/pec
def returnText(email):
    msg = mime.from_string(str(email))
    if msg.content_type.is_multipart():
        for part in msg.parts:
            if part.content_type == 'text/plain':
                return part.body
    elif msg.content_type.is_singlepart():
        if msg.headers['Content-Type'] == 'text/plain':
            return msg.body
예제 #15
0
def process_headers(sha, store):
    mailpath = os.path.join(store, sha)
    if os.path.exists(mailpath):
        with open(mailpath, 'rb') as f:
            mail = mime.from_string(f.read())
            examine_headers = ExamineHeaders(mail, sha)
            origin_ip, rbl_listed, rbl_comment, mailfrom, mailto, origin_domain = examine_headers.processing()
            return (mail.subject, origin_ip, rbl_listed, rbl_comment, mailfrom,
                    mailto, origin_domain, examine_headers.indicators)
예제 #16
0
    def parse_message(self, mailfrom, rcpttos, data):
        message = mime.from_string(data)
        payload = {
            'from': None,
            'to': None,
            'subject': None,
            'date': None,
            'body_plain': None,
            'body_html': None,
            'attachments': [],
            'headers': message.headers.items(),
        }

        # Try to get the addressee via headers, or
        # fall-back to raw protocol request
        if 'To' in message.headers.keys():
            payload['to'] = message.headers['To']
        else:
            payload['to'] = rcpttos

        # Try to get the recipient via headers, or
        # fall-back to raw protocol request
        if 'From' in message.headers.keys():
            payload['from'] = message.headers['From']
        else:
            payload['from'] = mailfrom

        if 'Subject' in message.headers.keys():
            payload['subject'] = message.headers['Subject']

        if 'Date' in message.headers.keys():
            payload['date'] = message.headers['Date']

        # Body
        if message.content_type.is_singlepart():
            payload['body_plain'] = message.body
        elif message.content_type.is_multipart():
            for part in message.parts:
                content_type = part.content_type[0]

                if content_type == 'text/plain':
                    payload['body_plain'] = part.body
                elif content_type == 'text/html':
                    payload['body_html'] = part.body
                elif part.is_attachment():
                    attachment = {
                        'filename': part.detected_file_name,
                        'md5': hashlib.md5(part.body).hexdigest(),
                        'sha1': hashlib.sha1(part.body).hexdigest(),
                        'data': base64.b64encode(part.body),
                        'encoding': part.content_encoding[0],
                        'type': content_type,
                    }
                    payload['attachments'].append(attachment)

        return payload
예제 #17
0
def test_address_parsing_edge_cases():
    """Check that header parsing can handle a variety of tricky input."""
    # Extra quotes around display name
    mimepart = mime.from_string('From: ""Bob"" <*****@*****.**>')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [(' Bob ', '*****@*****.**')]

    # Comments after addr-spec
    mimepart = mime.from_string(
        'From: "Bob" <*****@*****.**>(through Yahoo!  Store Order System)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Bob', '*****@*****.**')]

    mimepart = mime.from_string(
        'From: Indiegogo <*****@*****.**> (no reply)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Indiegogo', '*****@*****.**')]

    mimepart = mime.from_string(
        'From: Anon <*****@*****.**> (GitHub Staff)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Anon', '*****@*****.**')]

    # Display name in comment
    mimepart = mime.from_string('From: root@gunks (Cron Daemon)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Cron Daemon', 'root@gunks')]

    # Missing closing angle bracket
    mimepart = mime.from_string('From: Bob <*****@*****.**')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Bob', '*****@*****.**')]

    # Blank (spammers)
    mimepart = mime.from_string('From:  ()')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == []

    # Missing header
    mimepart = mime.from_string('')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == []

    # Duplicate header
    mimepart = mime.from_string('From: [email protected]\r\n'
                                'From: [email protected]')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('', '*****@*****.**')]
예제 #18
0
def test_draft_updates(db, default_account, mock_imapclient):
    # Set up folder list
    mock_imapclient._data['Drafts'] = {}
    mock_imapclient._data['Trash'] = {}
    mock_imapclient.list_folders = lambda: [
        (('\\HasNoChildren', '\\Drafts'), '/', 'Drafts'),
        (('\\HasNoChildren', '\\Trash'), '/', 'Trash')
    ]

    pool = writable_connection_pool(default_account.id)

    draft = create_message_from_json({'subject': 'Test draft'},
                                     default_account.namespace, db.session,
                                     True)

    draft.is_draft = True
    draft.version = 0
    db.session.commit()
    save_draft(default_account.id, draft.id, {'version': 0})
    with pool.get() as conn:
        conn.select_folder('Drafts', lambda *args: True)
        assert len(conn.all_uids()) == 1

    # Check that draft is not resaved if already synced.
    update_draft(default_account.id, draft.id, {'version': 0})
    with pool.get() as conn:
        conn.select_folder('Drafts', lambda *args: True)
        assert len(conn.all_uids()) == 1

    # Check that an older version is deleted
    draft.version = 4
    sendmail_update_draft(db.session, default_account, draft,
                          from_addr=draft.from_addr, subject='New subject',
                          blocks=[])
    db.session.commit()

    update_draft(default_account.id, draft.id, {'version': 5})
    with pool.get() as conn:
        conn.select_folder('Drafts', lambda *args: True)
        all_uids = conn.all_uids()
        assert len(all_uids) == 1
        data = conn.uids(all_uids)[0]
        parsed = mime.from_string(data.body)
        expected_message_id = '<{}-{}@mailer.nylas.com>'.format(
            draft.public_id, draft.version)
        assert parsed.headers.get('Message-Id') == expected_message_id

    delete_draft(default_account.id, draft.id,
                 {'message_id_header': draft.message_id_header,
                  'inbox_uid': draft.inbox_uid, 'version': 5})
    with pool.get() as conn:
        conn.select_folder('Drafts', lambda *args: True)
        all_uids = conn.all_uids()
        assert len(all_uids) == 0
예제 #19
0
def test_sending_from_email_alias(patch_smtp, api_client):
    api_client.post_data('/send',
                         {'to': [{'email': '*****@*****.**'}],
                          'from': [{'name': 'admin',
                                    'email': '*****@*****.**'}],
                          'subject': 'Banalities',
                          'body': '<html>Hello there</html>'})
    _, msg = patch_smtp[-1]
    parsed = mime.from_string(msg)
    assert 'From' in parsed.headers
    assert parsed.headers['From'] == 'admin <*****@*****.**>'
예제 #20
0
    def load_from_stdin(self, logger):
        '''
        Load email from standard input
        '''
        logger.info("%s: loading email from stdin" % self.uuid)
        email_string = ""
        for line in sys.stdin:
            email_string += line
        self.raw = mime.from_string(email_string)

        self.raw.headers.add("X-Capkopper-Filter-UUID", self.uuid)
예제 #21
0
파일: views.py 프로젝트: alifanov/mailburn
 def get(self, request, *args, **kwargs):
     params = {"access_token": request.GET.get("access_token")}
     if request.GET.get("maxResults"):
         params["maxResults"] = request.GET.get("maxResults")
     if request.GET.get("pageToken"):
         params["pageToken"] = request.GET.get("pageToken")
     if request.GET.get("q"):
         params["q"] = request.GET.get("q")
     r = requests.get(
         "https://www.googleapis.com/gmail/v1/users/me/threads/{}".format(kwargs.get("threadId")), params=params
     )
     if r.status_code == 200:
         ans = r.json()
         msgs = []
         for m in r.json()["messages"]:
             mr = requests.get(
                 "https://www.googleapis.com/gmail/v1/users/me/messages/{}".format(m["id"]),
                 params={"access_token": request.GET.get("access_token"), "format": request.GET.get("format")},
             )
             if mr.status_code == 200:
                 ans_msg = {
                     "id": m["id"],
                     "opened": True if cache.get(m["id"]) else False,
                     "snippet": mr.json()["snippet"],
                 }
                 if request.GET.get("format"):
                     msg_raw = str(mr.json()["raw"])
                     if request.GET.get("decode"):
                         msg = mime.from_string(base64.urlsafe_b64decode(msg_raw))
                         if msg.content_type.is_multipart():
                             for part in msg.parts:
                                 if part.content_type == "text/plain":
                                     part.body = self.msg_filter(part.body)
                                     break
                                 if part.content_type == "text/html":
                                     part.body = self.msg_filter(part.body)
                         else:
                             msg.body = self.msg_filter(msg.body)
                         ans_msg["debug"] = mime.python_message_to_string(msg.to_python_message())
                         ans_msg["raw"] = mime.python_message_to_string(msg.to_python_message())
                         ans_msg["raw"] = base64.urlsafe_b64encode(ans_msg["raw"])
                     else:
                         ans_msg["raw"] = msg_raw
                 else:
                     if "parts" in mr.json()["payload"]:
                         self.parse_parts(ans_msg, mr.json()["payload"]["parts"])
                     else:
                         self.parse_parts(ans_msg, [mr.json()["payload"]])
                 msgs.append(ans_msg)
         ans["messages"] = msgs
         return HttpResponse(json.dumps(ans), content_type="application/json")
     else:
         return HttpResponse(r.text, content_type="application/json", status=r.status_code)
예제 #22
0
def test_reply_headers_set(db, patch_smtp, api_client, example_draft, thread,
                           message):
    message.message_id_header = '<*****@*****.**>'
    db.session.commit()
    thread_id = api_client.get_data('/threads')[0]['id']

    api_client.post_data('/send', {'to': [{'email': '*****@*****.**'}],
                                   'thread_id': thread_id})
    _, msg = patch_smtp[-1]
    parsed = mime.from_string(msg)
    assert 'In-Reply-To' in parsed.headers
    assert 'References' in parsed.headers
예제 #23
0
 def replaceDummyReferences(message, reference_map):
     if not isinstance(message, mime.message.part.MimePart):
         return False, None
     for part in message.walk(with_self=True):
         if part.content_type == DUMMY_CONTENT_TYPE:
             t, o = part.content_disposition
             filename = o.get("filename")
             if filename in reference_map:
                 part.content_disposition.params["filename"] = reference_map[filename]
             # HACK: Must be set on the MIMEPart that has been modified
             part.was_changed = types.MethodType(was_changed_always, part)
     message.was_changed = types.MethodType(was_changed_always, message)
     return True, mime.from_string(message.to_string())
예제 #24
0
    def data(self):
        if self.size == 0:
            log.warning("Block size is 0")
            return ""
        elif hasattr(self, "_data"):
            # On initial download we temporarily store data in memory
            value = self._data
        else:
            value = get_from_blockstore(self.data_sha256)

        if value is None:
            log.warning("Couldn't find data on S3 for block with hash {}".format(self.data_sha256))

            from inbox.models.block import Block

            if isinstance(self, Block):
                if self.parts:
                    # This block is an attachment of a message that was
                    # accidentially deleted. We will attempt to fetch the raw
                    # message and parse out the needed attachment.

                    message = self.parts[0].message  # only grab one
                    raw_mime = get_from_blockstore(message.data_sha256)

                    if raw_mime is None:
                        log.error("Don't have raw message for hash {}".format(message.data_sha256))
                        return None

                    parsed = mime.from_string(raw_mime)
                    if parsed is not None:
                        for mimepart in parsed.walk(with_self=parsed.content_type.is_singlepart()):
                            if mimepart.content_type.is_multipart():
                                continue  # TODO should we store relations?

                            data = mimepart.body

                            if isinstance(data, unicode):
                                data = data.encode("utf-8", "strict")

                            # Found it!
                            if sha256(data).hexdigest() == self.data_sha256:
                                log.info("Found subpart with hash {}".format(self.data_sha256))
                                save_to_blockstore(self.data_sha256, data)
                                return data

            log.error("No data returned!")
            return value

        assert self.data_sha256 == sha256(value).hexdigest(), "Returned data doesn't match stored hash!"
        return value
예제 #25
0
    def toMime(self):
        if not self.body.isLoaded() or (len(self.attachments) > 0 and any([not attachment.isLoaded() for attachment in self.attachments])):
            raise EmailException(u"EmailV2.toMime: All content must be loaded!")

        body = EmailHelpers.deserializeBody(self.body.content)
        time = None
        if not isinstance(self.server_attr, NOT_ASSIGNED):
            time = self.server_attr.server_time

        raw_mime = createMime(body["text"], body["html"], self.attachments, self.message_id, time, self.subject, self.tos, self.ccs, self.bccs, self.reply_tos, self.sender, self.in_reply_to, self.references)

        for key, value in self.other_headers.iteritems():
            raw_mime.headers[key] = value

        return mime.from_string(raw_mime.to_string())
예제 #26
0
    def setMIMEBcc(message, bccs):
        if not isinstance(message, mime.message.part.MimePart):
            return False, None
        if not isinstance(bccs, list):
            return False, None
        for bcc in bccs:
            if not isinstance(bcc, dict):
                return False, None
            if not isinstance(bcc.get("user_id"), unicode) or not isinstance(bcc.get("display_name"), unicode):
                return False, None

        if len(bccs) == 0:
            message.remove_headers("Bcc")
        else:
            message.headers["Bcc"] = u"{}".format(", ".join([u"{} <{}>".format(bcc["display_name"], bcc["user_id"]) for bcc in bccs]))

        return True, mime.from_string(message.to_string())
예제 #27
0
def test_bcc_in_recipients_but_stripped_from_headers(patch_smtp, api_client):
    r = api_client.post_data(
        '/send',
        {
            'to': [{'email': '*****@*****.**'}],
            'cc': [{'email': '*****@*****.**'}],
            'bcc': [{'email': '*****@*****.**'}],
            'subject': 'Banalities'
        })
    assert r.status_code == 200
    recipients, msg = patch_smtp[0]
    assert set(recipients) == {'*****@*****.**', '*****@*****.**',
                               '*****@*****.**'}
    parsed = mime.from_string(msg)
    assert 'Bcc' not in parsed.headers
    assert parsed.headers.get('To') == '*****@*****.**'
    assert parsed.headers.get('Cc') == '*****@*****.**'
예제 #28
0
def test_inline_image_send(patch_smtp, api_client, uploaded_file_ids):
    file_id = uploaded_file_ids[0]
    r = api_client.post_data('/send', {
        'subject': 'Inline image test',
        'body': 'Before image\r\n[cid:{}]\r\nAfter image'.format(file_id),
        'file_ids': [file_id],
        'to': [{'name': 'Foo Bar',
                'email': '*****@*****.**'}]
    })
    assert r.status_code == 200

    _, msg = patch_smtp[-1]
    parsed = mime.from_string(msg)
    for mimepart in parsed.walk():
        if mimepart.headers['Content-Type'] == 'image/jpeg':
            assert mimepart.headers['Content-Id'] == '<{}>'.format(file_id)
            assert mimepart.headers['Content-Disposition'][0] == 'inline'
예제 #29
0
def test_body_construction(patch_smtp, api_client):
    api_client.post_data('/send',
                         {'to': [{'email': '*****@*****.**'}],
                          'subject': 'Banalities',
                          'body': '<html>Hello there</html>'})
    _, msg = patch_smtp[-1]
    parsed = mime.from_string(msg)
    assert len(parsed.parts) == 2
    plain_part_found = False
    html_part_found = False
    for part in parsed.parts:
        if part.content_type.value == 'text/plain':
            plain_part_found = True
            assert part.body.strip() == 'Hello there'
        elif part.content_type.value == 'text/html':
            html_part_found = True
            assert part.body.strip() == '<html>Hello there</html>'
    assert plain_part_found and html_part_found
예제 #30
0
def test_send_with_event_and_attachments(patch_smtp, api_client, example_draft,
                                         event, attachments):
    msgs = patch_smtp

    # Load and post file for attachment
    filename, path = attachments[0]
    data = {'file': (open(path, 'rb'), filename)}
    r = api_client.post_raw('/files', data=data)
    assert r.status_code == 200
    attachment_id = json.loads(r.data)[0]['id']

    # Add attachment to the new draft and post the draft
    example_draft['file_ids'] = [attachment_id]
    r = api_client.post_data('/drafts', example_draft)
    assert r.status_code == 200
    returned_draft = json.loads(r.data)
    draft_public_id = returned_draft['id']
    version = returned_draft['version']

    # Send the draft along with an event ID to use for invites
    r = api_client.post_data('/send',
                             {'draft_id': draft_public_id,
                              'version': version,
                              'event_id': event.public_id})
    assert r.status_code == 200

    # Make sure one message was sent
    assert len(msgs) == 1
    recipients, raw_msg = msgs[0]
    msg = mime.from_string(raw_msg)

    # Check the MIME body of the message to make sure both the event and the
    # attachment are there
    parts = []
    for mimepart in msg.walk(with_self=msg.content_type.is_singlepart()):
        is_attachment = mimepart.is_attachment()
        format_type = mimepart.content_type.format_type
        subtype = mimepart.content_type.subtype
        parts.append((format_type, subtype, is_attachment))

    assert ('text', 'plain', False) in parts
    assert ('text', 'html', False) in parts
    assert ('text', 'calendar', False) in parts
    assert ('image', 'jpeg', True) in parts
예제 #31
0
def test_body_construction(patch_smtp, api_client):
    api_client.post_data(
        "/send",
        {
            "to": [{
                "email": "*****@*****.**"
            }],
            "subject": "Banalities",
            "body": "<html>Hello there</html>",
        },
    )
    _, msg = patch_smtp[-1]
    parsed = mime.from_string(msg)
    assert len(parsed.parts) == 2
    plain_part_found = False
    html_part_found = False
    for part in parsed.parts:
        if part.content_type.value == "text/plain":
            plain_part_found = True
            assert part.body.strip() == "Hello there"
        elif part.content_type.value == "text/html":
            html_part_found = True
            assert part.body.strip() == "<html>Hello there</html>"
    assert plain_part_found and html_part_found
예제 #32
0
    def process_message(source, message_text):
        from flanker import mime

        metadata_dictionary = {}

        message = mime.from_string(force_bytes(message_text))

        if source.from_metadata_type:
            metadata_dictionary[
                source.from_metadata_type.name] = message.headers.get('From')

        if source.subject_metadata_type:
            metadata_dictionary[source.subject_metadata_type.
                                name] = message.headers.get('Subject')

        document_ids, parts_metadata_dictionary = EmailBaseModel._process_message(
            source=source, message=message)

        metadata_dictionary.update(parts_metadata_dictionary)

        if metadata_dictionary:
            for document in Document.objects.filter(id__in=document_ids):
                set_bulk_metadata(document=document,
                                  metadata_dictionary=metadata_dictionary)
예제 #33
0
def test_address_parsing():
    """Check that header parsing can handle a variety of tricky input."""
    # Extra quotes around display name
    mimepart = mime.from_string('From: ""Bob"" <*****@*****.**>')
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [[" Bob ", "*****@*****.**"]]

    # Comments after addr-spec
    mimepart = mime.from_string(
        'From: "Bob" <*****@*****.**>(through Yahoo!  Store Order System)')
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [["Bob", "*****@*****.**"]]

    mimepart = mime.from_string(
        "From: Indiegogo <*****@*****.**> (no reply)")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [["Indiegogo", "*****@*****.**"]]

    mimepart = mime.from_string(
        "From: Anon <*****@*****.**> (GitHub Staff)")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [["Anon", "*****@*****.**"]]

    # Display name in comment
    mimepart = mime.from_string("From: root@gunks (Cron Daemon)")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [["Cron Daemon", "root@gunks"]]

    # Missing closing angle bracket
    mimepart = mime.from_string("From: Bob <*****@*****.**")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [["Bob", "*****@*****.**"]]

    # Blank (spammers)
    mimepart = mime.from_string("From:  ()")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == []

    # Missing header
    mimepart = mime.from_string("")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == []

    # Duplicate header
    mimepart = mime.from_string("From: [email protected]\r\n"
                                "From: [email protected]")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [["", "*****@*****.**"]]

    # RFC2047-encoded phrases with commas
    mimepart = mime.from_string(
        "From: =?utf-8?Q?Foo=2C=20Corp.?= <*****@*****.**>")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [["Foo, Corp.", "*****@*****.**"]]

    mimepart = mime.from_string(
        "To: =?utf-8?Q?Foo=2C=20Corp.?= <*****@*****.**>, "
        "=?utf-8?Q?Support?= <*****@*****.**>")
    parsed = parse_mimepart_address_header(mimepart, "To")
    assert parsed == [
        ["Foo, Corp.", "*****@*****.**"],
        ["Support", "*****@*****.**"],
    ]

    # Multiple header lines
    mimepart = mime.from_string(
        "To: [email protected]\nSubject: Hello\nTo: [email protected]")
    parsed = parse_mimepart_address_header(mimepart, "To")
    assert parsed == [["", "*****@*****.**"], ["", "*****@*****.**"]]
예제 #34
0
    def create_from_synced(cls, account, mid, folder_name, received_date,
                           body_string):
        """
        Parses message data and writes out db metadata and MIME blocks.

        Returns the new Message, which links to the new Part and Block objects
        through relationships. All new objects are uncommitted.

        Threads are not computed here; you gotta do that separately.

        Parameters
        ----------
        mid : int
            The account backend-specific message identifier; it's only used for
            logging errors.

        raw_message : str
            The full message including headers (encoded).

        """
        _rqd = [account, mid, folder_name, body_string]
        if not all([v is not None for v in _rqd]):
            raise ValueError(
                'Required keyword arguments: account, mid, folder_name, '
                'body_string')
        # stop trickle-down bugs
        assert account.namespace is not None
        assert not isinstance(body_string, unicode)

        msg = Message()

        msg.data_sha256 = sha256(body_string).hexdigest()

        # Persist the raw MIME message to disk/ S3
        save_to_blockstore(msg.data_sha256, body_string)

        # Persist the processed message to the database
        msg.namespace_id = account.namespace.id

        try:
            parsed = mime.from_string(body_string)
            # Non-persisted instance attribute used by EAS.
            msg.parsed_body = parsed
            msg._parse_metadata(parsed, body_string, received_date, account.id,
                                folder_name, mid)
        except (mime.DecodingError, AttributeError, RuntimeError,
                TypeError) as e:
            parsed = None
            # Non-persisted instance attribute used by EAS.
            msg.parsed_body = ''
            log.error('Error parsing message metadata',
                      folder_name=folder_name, account_id=account.id, error=e)
            msg._mark_error()

        if parsed is not None:
            plain_parts = []
            html_parts = []
            for mimepart in parsed.walk(
                    with_self=parsed.content_type.is_singlepart()):
                try:
                    if mimepart.content_type.is_multipart():
                        continue  # TODO should we store relations?
                    msg._parse_mimepart(mid, mimepart, account.namespace.id,
                                        html_parts, plain_parts)
                except (mime.DecodingError, AttributeError, RuntimeError,
                        TypeError, binascii.Error, UnicodeDecodeError) as e:
                    log.error('Error parsing message MIME parts',
                              folder_name=folder_name, account_id=account.id,
                              error=e)
                    msg._mark_error()
            msg.calculate_body(html_parts, plain_parts)

            # Occasionally people try to send messages to way too many
            # recipients. In such cases, empty the field and treat as a parsing
            # error so that we don't break the entire sync.
            for field in ('to_addr', 'cc_addr', 'bcc_addr', 'references',
                          'reply_to'):
                value = getattr(msg, field)
                if json_field_too_long(value):
                    log.error('Recipient field too long', field=field,
                              account_id=account.id, folder_name=folder_name,
                              mid=mid)
                    setattr(msg, field, [])
                    msg._mark_error()

        return msg
예제 #35
0
    def process_message(source, message_text, message_properties=None):
        from flanker import mime

        counter = 1
        message = mime.from_string(force_bytes(message_text))
        metadata_dictionary = {}

        if not message_properties:
            message_properties = {}

        message_properties['Subject'] = message_properties.get(
            'Subject', message.headers.get('Subject'))

        message_properties['From'] = message_properties.get(
            'From', message.headers.get('From'))

        if source.subject_metadata_type:
            metadata_dictionary[source.subject_metadata_type.
                                name] = message_properties.get('Subject')

        if source.from_metadata_type:
            metadata_dictionary[source.from_metadata_type.
                                name] = message_properties.get('From')

        # Messages are tree based, do nested processing of message parts until
        # a message with no children is found, then work out way up.
        if message.parts:
            for part in message.parts:
                EmailBaseModel.process_message(
                    source=source,
                    message_text=part.to_string(),
                    message_properties=message_properties)
        else:
            # Treat inlines as attachments, both are extracted and saved as
            # documents
            if message.is_attachment() or message.is_inline():

                # Reject zero length attachments
                if len(message.body) == 0:
                    return

                label = message.detected_file_name or 'attachment-{}'.format(
                    counter)
                with ContentFile(content=message.body,
                                 name=label) as file_object:
                    if label == source.metadata_attachment_name:
                        metadata_dictionary = yaml.load(
                            stream=file_object.read(), Loader=SafeLoader)
                        logger.debug('Got metadata dictionary: %s',
                                     metadata_dictionary)
                    else:
                        documents = source.handle_upload(
                            document_type=source.document_type,
                            file_object=file_object,
                            expand=(source.uncompress ==
                                    SOURCE_UNCOMPRESS_CHOICE_Y))
                        if metadata_dictionary:
                            for document in documents:
                                set_bulk_metadata(
                                    document=document,
                                    metadata_dictionary=metadata_dictionary)
            else:
                # If it is not an attachment then it should be a body message part.
                # Another option is to use message.is_body()
                if message.detected_content_type == 'text/html':
                    label = 'email_body.html'
                else:
                    label = 'email_body.txt'

                if source.store_body:
                    with ContentFile(content=force_bytes(message.body),
                                     name=label) as file_object:
                        documents = source.handle_upload(
                            document_type=source.document_type,
                            expand=SOURCE_UNCOMPRESS_CHOICE_N,
                            file_object=file_object)
                        if metadata_dictionary:
                            for document in documents:
                                set_bulk_metadata(
                                    document=document,
                                    metadata_dictionary=metadata_dictionary)
예제 #36
0
def test_address_parsing():
    """Check that header parsing can handle a variety of tricky input."""
    # Extra quotes around display name
    mimepart = mime.from_string('From: ""Bob"" <*****@*****.**>')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [[' Bob ', '*****@*****.**']]

    # Comments after addr-spec
    mimepart = mime.from_string(
        'From: "Bob" <*****@*****.**>(through Yahoo!  Store Order System)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Bob', '*****@*****.**']]

    mimepart = mime.from_string(
        'From: Indiegogo <*****@*****.**> (no reply)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Indiegogo', '*****@*****.**']]

    mimepart = mime.from_string(
        'From: Anon <*****@*****.**> (GitHub Staff)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Anon', '*****@*****.**']]

    # Display name in comment
    mimepart = mime.from_string('From: root@gunks (Cron Daemon)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Cron Daemon', 'root@gunks']]

    # Missing closing angle bracket
    mimepart = mime.from_string('From: Bob <*****@*****.**')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Bob', '*****@*****.**']]

    # Blank (spammers)
    mimepart = mime.from_string('From:  ()')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == []

    # Missing header
    mimepart = mime.from_string('')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == []

    # Duplicate header
    mimepart = mime.from_string('From: [email protected]\r\n'
                                'From: [email protected]')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['', '*****@*****.**']]

    # RFC2047-encoded phrases with commas
    mimepart = mime.from_string(
        'From: =?utf-8?Q?Foo=2C=20Corp.?= <*****@*****.**>')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Foo, Corp.', '*****@*****.**']]

    mimepart = mime.from_string(
        'To: =?utf-8?Q?Foo=2C=20Corp.?= <*****@*****.**>, '
        '=?utf-8?Q?Support?= <*****@*****.**>')
    parsed = parse_mimepart_address_header(mimepart, 'To')
    assert parsed == [['Foo, Corp.', '*****@*****.**'],
                      ['Support', '*****@*****.**']]

    # Multiple header lines
    mimepart = mime.from_string(
        'To: [email protected]\nSubject: Hello\nTo: [email protected]')
    parsed = parse_mimepart_address_header(mimepart, 'To')
    assert parsed == [['', '*****@*****.**'], ['', '*****@*****.**']]
예제 #37
0
def test_draft_updates(db, default_account, mock_imapclient):
    # Set up folder list
    mock_imapclient._data["Drafts"] = {}
    mock_imapclient._data["Trash"] = {}
    mock_imapclient._data["Sent Mail"] = {}
    mock_imapclient.list_folders = lambda: [
        (("\\HasNoChildren", "\\Drafts"), "/", "Drafts"),
        (("\\HasNoChildren", "\\Trash"), "/", "Trash"),
        (("\\HasNoChildren", "\\Sent"), "/", "Sent Mail"),
    ]

    pool = writable_connection_pool(default_account.id)

    draft = create_message_from_json({"subject": "Test draft"},
                                     default_account.namespace, db.session,
                                     True)
    draft.is_draft = True
    draft.version = 0
    db.session.commit()
    with pool.get() as conn:
        save_draft(conn, default_account.id, draft.id, {"version": 0})
        conn.select_folder("Drafts", lambda *args: True)
        assert len(conn.all_uids()) == 1

        # Check that draft is not resaved if already synced.
        update_draft(conn, default_account.id, draft.id, {"version": 0})
        conn.select_folder("Drafts", lambda *args: True)
        assert len(conn.all_uids()) == 1

        # Check that an older version is deleted
        draft.version = 4
        sendmail_update_draft(
            db.session,
            default_account,
            draft,
            from_addr=draft.from_addr,
            subject="New subject",
            blocks=[],
        )
        db.session.commit()

        update_draft(conn, default_account.id, draft.id, {"version": 5})

        conn.select_folder("Drafts", lambda *args: True)
        all_uids = conn.all_uids()
        assert len(all_uids) == 1
        data = conn.uids(all_uids)[0]
        parsed = mime.from_string(data.body)
        expected_message_id = "<{}-{}@mailer.nylas.com>".format(
            draft.public_id, draft.version)
        assert parsed.headers.get("Message-Id") == expected_message_id

        # We're testing the draft deletion with Gmail here. However,
        # because of a race condition in Gmail's reconciliation algorithm,
        # we need to check if the sent mail has been created in the sent
        # folder. Since we're mocking everything, we have to create it
        # ourselves.
        mock_imapclient.append("Sent Mail",
                               data.body,
                               None,
                               None,
                               x_gm_msgid=4323)

        delete_draft(
            conn,
            default_account.id,
            draft.id,
            {
                "message_id_header": draft.message_id_header,
                "nylas_uid": draft.nylas_uid,
                "version": 5,
            },
        )

        conn.select_folder("Drafts", lambda *args: True)
        all_uids = conn.all_uids()
        assert len(all_uids) == 0
예제 #38
0
import mailbox
from collections import defaultdict
from flanker import mime
from flanker.addresslib import address

mbox_path = '...'
mbox = mailbox.mbox(mbox_path)

domains = defaultdict(int)

items = mbox.iteritems()
for msg in items:
    raw_msg = msg[1].as_string()
    parsed = mime.from_string(raw_msg)
    from_header = parsed.headers['From']
    from_domain = address.parse(from_header).hostname
    domains[from_domain] += 1
예제 #39
0
def test_draft_updates(db, default_account, mock_imapclient):
    # Set up folder list
    mock_imapclient._data['Drafts'] = {}
    mock_imapclient._data['Trash'] = {}
    mock_imapclient._data['Sent Mail'] = {}
    mock_imapclient.list_folders = lambda: [
        (('\\HasNoChildren', '\\Drafts'), '/', 'Drafts'),
        (('\\HasNoChildren', '\\Trash'), '/', 'Trash'),
        (('\\HasNoChildren', '\\Sent'), '/', 'Sent Mail'),
    ]

    pool = writable_connection_pool(default_account.id)

    draft = create_message_from_json({'subject': 'Test draft'},
                                     default_account.namespace, db.session,
                                     True)
    draft.is_draft = True
    draft.version = 0
    db.session.commit()
    with pool.get() as conn:
        save_draft(conn, default_account.id, draft.id, {'version': 0})
        conn.select_folder('Drafts', lambda *args: True)
        assert len(conn.all_uids()) == 1

        # Check that draft is not resaved if already synced.
        update_draft(conn, default_account.id, draft.id, {'version': 0})
        conn.select_folder('Drafts', lambda *args: True)
        assert len(conn.all_uids()) == 1

        # Check that an older version is deleted
        draft.version = 4
        sendmail_update_draft(db.session,
                              default_account,
                              draft,
                              from_addr=draft.from_addr,
                              subject='New subject',
                              blocks=[])
        db.session.commit()

        update_draft(conn, default_account.id, draft.id, {'version': 5})

        conn.select_folder('Drafts', lambda *args: True)
        all_uids = conn.all_uids()
        assert len(all_uids) == 1
        data = conn.uids(all_uids)[0]
        parsed = mime.from_string(data.body)
        expected_message_id = '<{}-{}@mailer.nylas.com>'.format(
            draft.public_id, draft.version)
        assert parsed.headers.get('Message-Id') == expected_message_id

        # We're testing the draft deletion with Gmail here. However,
        # because of a race condition in Gmail's reconciliation algorithm,
        # we need to check if the sent mail has been created in the sent
        # folder. Since we're mocking everything, we have to create it
        # ourselves.
        mock_imapclient.append('Sent Mail',
                               data.body,
                               None,
                               None,
                               x_gm_msgid=4323)

        delete_draft(
            conn, default_account.id, draft.id, {
                'message_id_header': draft.message_id_header,
                'nylas_uid': draft.nylas_uid,
                'version': 5
            })

        conn.select_folder('Drafts', lambda *args: True)
        all_uids = conn.all_uids()
        assert len(all_uids) == 0
예제 #40
0
    def data(self):
        if self.size == 0:
            log.warning('Block size is 0')
            return ''
        elif hasattr(self, '_data'):
            # On initial download we temporarily store data in memory
            value = self._data
        else:
            value = blockstore.get_from_blockstore(self.data_sha256)

        if value is None:
            log.warning("Couldn't find data on S3 for block",
                        sha_hash=self.data_sha256)

            from inbox.models.block import Block
            if isinstance(self, Block):
                if self.parts:
                    # This block is an attachment of a message that was
                    # deleted. We will attempt to fetch the raw
                    # message and parse out the needed attachment.

                    message = self.parts[0].message  # only grab one
                    account = message.namespace.account

                    statsd_string = 'api.direct_fetching.{}.{}'.format(
                        account.provider, account.id)

                    # Try to fetch the message from S3 first.
                    with statsd_client.timer(
                            '{}.blockstore_latency'.format(statsd_string)):
                        raw_mime = blockstore.get_from_blockstore(
                            message.data_sha256)

                    # If it's not there, get it from the provider.
                    if raw_mime is None:
                        statsd_client.incr(
                            '{}.cache_misses'.format(statsd_string))

                        with statsd_client.timer(
                                '{}.provider_latency'.format(statsd_string)):
                            raw_mime = get_raw_from_provider(message)

                        msg_sha256 = sha256(raw_mime).hexdigest()

                        # Cache the raw message in the blockstore so that
                        # we don't have to fetch it over and over.

                        with statsd_client.timer(
                                '{}.blockstore_save_latency'.format(
                                    statsd_string)):
                            blockstore.save_to_blockstore(msg_sha256, raw_mime)
                    else:
                        # We found it in the blockstore --- report this.
                        statsd_client.incr(
                            '{}.cache_hits'.format(statsd_string))

                    # If we couldn't find it there, give up.
                    if raw_mime is None:
                        log.error("Don't have raw message for hash {}".format(
                            message.data_sha256))
                        return None

                    parsed = mime.from_string(raw_mime)
                    if parsed is not None:
                        for mimepart in parsed.walk(
                                with_self=parsed.content_type.is_singlepart()):
                            if mimepart.content_type.is_multipart():
                                continue  # TODO should we store relations?

                            data = mimepart.body

                            if isinstance(data, unicode):
                                data = data.encode('utf-8', 'strict')

                            if data is None:
                                continue

                            # Found it!
                            if sha256(data).hexdigest() == self.data_sha256:
                                log.info('Found subpart with hash {}'.format(
                                    self.data_sha256))

                                with statsd_client.timer(
                                        '{}.blockstore_save_latency'.format(
                                            statsd_string)):
                                    blockstore.save_to_blockstore(
                                        self.data_sha256, data)
                                    return data
                    log.error(
                        "Couldn't find the attachment in the raw message",
                        message_id=message.id)

            log.error('No data returned!')
            return value

        assert self.data_sha256 == sha256(value).hexdigest(), \
            "Returned data doesn't match stored hash!"
        return value
예제 #41
0
 def __init__(self, message_string):
     self.message_string = message_string
     self.FKmsg = mime.from_string(self.message_string)
예제 #42
0
파일: mail.py 프로젝트: HackGT/faqbot
    def dosync(self):
        print "You\'ve Got Mail."
        did_except = True
        while did_except:
            try:
                _, data = self.mail.search(None, "ALL")
                did_except = False
            except:
                # Attempt reconnect
                did_except = True
                print "Disconnected, attempting reconnect."
                self.mail = imaplib2.IMAP4_SSL(IMAP_SERVER)
                self.mail.login(MAIL_USER, MAIL_PASSWORD)

                self.mail.select("inbox", readonly=True)

        ids = data[0]
        id_list = ids.split()
        new_mail_ids = []

        if id_list[-1] < self.last_id:
            new_mail_ids = []
        else:
            for i in xrange(len(id_list) - 1, 0, -1):
                if id_list[i] == self.last_id:
                    break
                else:
                    new_mail_ids.append(id_list[i])
        self.last_id = id_list[-1]

        for mail_id in new_mail_ids:
            _, data = self.mail.fetch(mail_id, "(RFC822)")

            raw_email = "null"
            for d in data:
                if type(d) is tuple:
                    if "RFC822" in d[0]:
                        raw_email = d[1]

            if raw_email == "null":
                continue

            email_message = email.message_from_string(raw_email)
            flanker_msg = mime.from_string(raw_email)

            body = "null"

            try:
                for part in flanker_msg.parts:
                    pp = part.body.encode('ascii', 'ignore')
                    if start_trigger(pp, TRIGGERS):
                        body = pp
                        break
            except Exception as _:
                pass

            # If body is still null, just look for this stuff
            if body == "null":
                for l in raw_email.split('\n'):
                    if start_trigger(l, TRIGGERS):
                        body = l

            # CR-LF ugh
            body = body.replace('\r', '')

            tos = email_message.get_all('to', [])
            ccs = email_message.get_all('cc', [])
            all_recipients = getaddresses(tos + ccs) + [
                parseaddr(email_message["Reply-To"] or email_message["From"])
            ]

            reply_object = {
                'subject': email_message["Subject"],
                'all_recipients': all_recipients,
                'raw_email': raw_email,
                'msg_id': email_message["Message-ID"]
            }
            if "In-Reply-To" in email_message:
                reply_object["reply_to"] = email_message["In-Reply-To"]

            trigger = start_trigger(body, TRIGGERS)
            if trigger and "From" in email_message and is_whitelisted(
                    raw_email):
                print "Request from {} for subject {}.".format(
                    email_message["From"], email_message["Subject"])
                # Extra parsing since our trigger word can include spaces due to gmail autocomplete
                body = body.replace(trigger, '')
                argv = [x.strip() for x in body.split()]
                argv = [trigger] + argv
                callbacks.triggered_email(body, argv, reply_object)
            else:
                callbacks.raw_email(flanker_msg, raw_email, reply_object)
예제 #43
0
def eml_to_list(my_eml):
    with open(my_eml, 'rb') as fhdl:
        raw_email = fhdl.read()
    msg = mime.from_string(raw_email)
    for part in msg.parts:
        container = []
        container2 = []
        if (part.content_type == "text/html"):
            soup = BeautifulSoup(part.body, "html.parser")
            res = soup.table.table
            x = res.find_all("tr")
            print(type(x))
            for item in x:
                q = item.text.replace(" ",
                                      "").replace('\u3000',
                                                  "").replace('▍',
                                                              "").splitlines()
                container.append(q)
            v = list(filter(lambda x: x, container))
            v.pop()
            resumes = []
            split_resumes = []
            coll = []
            for i in v:
                container2.append(i[0])
            container2.pop(0)
            container2.pop(0)
            container2.pop()
            result = ','.join(container2)
            info = result.split("最後修改")
            for s in info:
                if (len(s) < 100):
                    info.remove(s)
            for i in info:
                resumes.append(i.split("專長")[0])
            for x in resumes:
                record = x.split(",")
                record.pop(0)
                record.pop(0)
                split_resumes.append(record)
            mytest = list(filter(lambda y: y, split_resumes))
            for sub_list in mytest:
                d = {}
                if (len(sub_list[-1]) <= 5):
                    sub_list.pop()
                for count, record in enumerate(sub_list):
                    if "代碼" in record:
                        d["姓名"] = record.split("代碼")[0]
                    if ("男" in record) or ("女" in record):
                        d["性別"] = record.split("|")[0]
                        d["年齡"] = record.split("|")[1]
                    if "聯絡電話" in record:
                        d["聯絡電話"] = record.split("聯絡電話")[1]
                    if "電子郵件" in record:
                        d["電子郵件"] = record.split("電子郵件")[1]
                    if "聯絡地址" in record:
                        d["聯絡地址"] = record.split("聯絡地址")[1]
                    if "教育程度" in record:
                        d["教育程度"] = record.split("教育程度")[1]
                    if "職務類別" in record:
                        d["求職類別"] = record.split("職務類別")[1]
                    if "工作經驗累計年資" in record:
                        d["累計年資"] = record.split("工作經驗累計年資")[1]
                    if "累計經驗" in record:
                        d["累計經驗"] = record.split("累計經驗")[1]
                        d["過往公司"] = ",".join(sub_list[count + 1:])
                coll.append(d)
            return coll
예제 #44
0
emails_path = os.getenv('EMAILS_PATH', "/var/mailgunflanker")
elasticsearch_url = os.getenv('ELASTICSEARCH', "http://elasticsearch:9200")
email_index = os.getenv('ELASTICSEARCH_INDEX', "mailgunflanker")

from elasticsearch import Elasticsearch
es = Elasticsearch([elasticsearch_url])
es.indices.create(index=email_index, ignore=400)

id = 0

for root, dirs, files in os.walk(emails_path):
    for file in files:
        id = id + 1
        filefullname = os.path.join(root, file)
        message_string = open(filefullname, "rb").read()
        msg = mime.from_string(message_string)
        # add email to elasticsearch
        doc = {}
        doc['subject'] = msg.clean_subject
        doc['headers'] = [[header, str(value)]
                          for header, value in msg.headers.items()]

        if (msg.content_type.is_multipart()):
            doc["parts"] = []
            doc["attachments"] = []
            for part in msg.parts:
                if (part.content_type
                        and str(part.content_type).startswith("text/")):
                    doc["parts"].append({
                        "body":
                        part.body,
예제 #45
0
def create_message(db_session, log, account, mid, folder_name, received_date,
                   flags, body_string, created):
    """ Parses message data and writes out db metadata and MIME blocks.

    Returns the new Message, which links to the new Block objects through
    relationships. All new objects are uncommitted.

    Threads are not computed here; you gotta do that separately.

    Parameters
    ----------
    mid : int
        The account backend-specific message identifier; it's only used for
        logging errors.

    raw_message : str
        The full message including headers (encoded).
    """
    # trickle-down bugs
    assert account is not None and account.namespace is not None
    assert not isinstance(body_string, unicode)

    try:
        parsed = mime.from_string(body_string)

        mime_version = parsed.headers.get('Mime-Version')
        # NOTE: sometimes MIME-Version is set to "1.0 (1.0)", hence the
        # .startswith
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.error('Unexpected MIME-Version: {0}'.format(mime_version))

        new_msg = SpoolMessage() if created else Message()
        new_msg.data_sha256 = sha256(body_string).hexdigest()

        # clean_subject strips re:, fwd: etc.
        new_msg.subject = parsed.clean_subject
        new_msg.from_addr = parse_email_address_list(
            parsed.headers.get('From'))
        new_msg.sender_addr = parse_email_address_list(
            parsed.headers.get('Sender'))
        new_msg.reply_to = parse_email_address_list(
            parsed.headers.get('Reply-To'))

        new_msg.to_addr = parse_email_address_list(parsed.headers.getall('To'))
        new_msg.cc_addr = parse_email_address_list(parsed.headers.getall('Cc'))
        new_msg.bcc_addr = parse_email_address_list(
            parsed.headers.getall('Bcc'))

        new_msg.in_reply_to = parsed.headers.get('In-Reply-To')
        new_msg.message_id_header = parsed.headers.get('Message-Id')

        new_msg.received_date = received_date

        # Optional mailing list headers
        new_msg.mailing_list_headers = parse_ml_headers(parsed.headers)

        # Custom Inbox header
        new_msg.inbox_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        new_msg.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        new_msg.size = len(body_string)  # includes headers text

        i = 0  # for walk_index

        # Store all message headers as object with index 0
        headers_part = Part()
        headers_part.namespace_id = account.namespace.id
        headers_part.message = new_msg
        headers_part.walk_index = i
        headers_part.data = json.dumps(parsed.headers.items())
        new_msg.parts.append(headers_part)

        for mimepart in parsed.walk(
                with_self=parsed.content_type.is_singlepart()):
            i += 1
            if mimepart.content_type.is_multipart():
                log.warning("multipart sub-part found! on {}"
                            .format(new_msg.g_msgid))
                continue  # TODO should we store relations?

            new_part = Part()
            new_part.namespace_id = account.namespace.id
            new_part.message = new_msg
            new_part.walk_index = i
            new_part.misc_keyval = mimepart.headers.items()  # everything
            new_part.content_type = mimepart.content_type.value
            new_part.filename = trim_filename(
                mimepart.content_type.params.get('name'),
                log=log)
            # TODO maybe also trim other headers?

            if mimepart.content_disposition[0] is not None:
                value, params = mimepart.content_disposition
                if value not in ['inline', 'attachment']:
                    errmsg = """
    Unknown Content-Disposition on message {0} found in {1}.
    Bad Content-Disposition was: '{2}'
    Parsed Content-Disposition was: '{3}'""".format(
                        mid, folder_name, mimepart.content_disposition)
                    log.error(errmsg)
                    continue
                else:
                    new_part.content_disposition = value
                    if value == 'attachment':
                        new_part.filename = trim_filename(
                            params.get('filename'),
                            log=log)

            if mimepart.body is None:
                data_to_write = ''
            elif new_part.content_type.startswith('text'):
                data_to_write = mimepart.body.encode('utf-8', 'strict')
                # normalize mac/win/unix newlines
                data_to_write = data_to_write \
                    .replace('\r\n', '\n').replace('\r', '\n')
            else:
                data_to_write = mimepart.body
            if data_to_write is None:
                data_to_write = ''

            new_part.content_id = mimepart.headers.get('Content-Id')
            new_part.data = data_to_write
            new_msg.parts.append(new_part)
    except mime.DecodingError:
        # occasionally iconv will fail via maximum recursion depth
        log_decode_error(account.id, folder_name, mid, body_string)
        log.error('DecodeError, msg logged to {0}'.format(
            get_errfilename(account.id, folder_name, mid)))
        return
    except RuntimeError:
        log_decode_error(account.id, folder_name, mid, body_string)
        log.error('RuntimeError<iconv> msg logged to {0}'.format(
            get_errfilename(account.id, folder_name, mid)))
        return

    new_msg.calculate_sanitized_body()
    return new_msg
예제 #46
0
from flanker import mime

# Email from the sync dump exported to the 'test' db
with open('tests/data/messages/replyto_message.txt', 'r') as f:
    message = f.read()

parsed = mime.from_string(message)

message_id = parsed.headers.get('Message-ID')
references = parsed.headers.get('References')

TEST_MSG = {'message-id': message_id, 'references': references}
예제 #47
0
    def __init__(self, account=None, mid=None, folder_name=None,
                 received_date=None, flags=None, body_string=None,
                 *args, **kwargs):
        """ Parses message data and writes out db metadata and MIME blocks.

        Returns the new Message, which links to the new Block objects through
        relationships. All new objects are uncommitted.

        Threads are not computed here; you gotta do that separately.

        Parameters
        ----------
        mid : int
            The account backend-specific message identifier; it's only used for
            logging errors.

        raw_message : str
            The full message including headers (encoded).
        """
        _rqd = [account, mid, folder_name, flags, body_string]

        MailSyncBase.__init__(self, *args, **kwargs)

        # for drafts
        if not any(_rqd):
            return

        if any(_rqd) and not all([v is not None for v in _rqd]):
            raise ValueError(
                "Required keyword arguments: account, mid, folder_name, "
                "flags, body_string")

        # stop trickle-down bugs
        assert account.namespace is not None
        assert not isinstance(body_string, unicode)

        try:
            parsed = mime.from_string(body_string)

            mime_version = parsed.headers.get('Mime-Version')
            # sometimes MIME-Version is "1.0 (1.0)", hence the .startswith()
            if mime_version is not None and not mime_version.startswith('1.0'):
                log.warning('Unexpected MIME-Version',
                            account_id=account.id, folder_name=folder_name,
                            mid=mid, mime_version=mime_version)

            self.data_sha256 = sha256(body_string).hexdigest()

            # clean_subject strips re:, fwd: etc.
            self.subject = parsed.clean_subject
            self.from_addr = parse_email_address_list(
                parsed.headers.get('From'))
            self.sender_addr = parse_email_address_list(
                parsed.headers.get('Sender'))
            self.reply_to = parse_email_address_list(
                parsed.headers.get('Reply-To'))

            self.to_addr = parse_email_address_list(
                parsed.headers.getall('To'))
            self.cc_addr = parse_email_address_list(
                parsed.headers.getall('Cc'))
            self.bcc_addr = parse_email_address_list(
                parsed.headers.getall('Bcc'))

            self.in_reply_to = parsed.headers.get('In-Reply-To')
            self.message_id_header = parsed.headers.get('Message-Id')

            self.received_date = received_date if received_date else \
                get_internaldate(parsed.headers.get('Date'),
                                 parsed.headers.get('Received'))

            # Custom Inbox header
            self.inbox_uid = parsed.headers.get('X-INBOX-ID')

            # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
            self.references = parse_references(
                parsed.headers.get('References', ''),
                parsed.headers.get('In-Reply-To', ''))

            self.size = len(body_string)  # includes headers text

            i = 0  # for walk_index

            from inbox.models.block import Part

            # Store all message headers as object with index 0
            headers_part = Part()
            headers_part.namespace_id = account.namespace.id
            headers_part.message = self
            headers_part.walk_index = i
            headers_part.data = json.dumps(parsed.headers.items())
            self.parts.append(headers_part)

            for mimepart in parsed.walk(
                    with_self=parsed.content_type.is_singlepart()):
                i += 1
                if mimepart.content_type.is_multipart():
                    log.warning('multipart sub-part found',
                                account_id=account.id, folder_name=folder_name,
                                mid=mid)
                    continue  # TODO should we store relations?

                new_part = Part()
                new_part.namespace_id = account.namespace.id
                new_part.message = self
                new_part.walk_index = i
                new_part.content_type = mimepart.content_type.value
                new_part.filename = _trim_filename(
                    mimepart.content_type.params.get('name'),
                    account.id, mid)
                # TODO maybe also trim other headers?

                if mimepart.content_disposition[0] is not None:
                    value, params = mimepart.content_disposition
                    if value not in ['inline', 'attachment']:
                        log.error('Unknown Content-Disposition',
                                  account_id=account.id, mid=mid,
                                  folder_name=folder_name,
                                  bad_content_disposition=
                                  mimepart.content_disposition,
                                  parsed_content_disposition=value)
                        continue
                    else:
                        new_part.content_disposition = value
                        if value == 'attachment':
                            new_part.filename = _trim_filename(
                                params.get('filename'), account.id, mid)

                if mimepart.body is None:
                    data_to_write = ''
                elif new_part.content_type.startswith('text'):
                    data_to_write = mimepart.body.encode('utf-8', 'strict')
                    # normalize mac/win/unix newlines
                    data_to_write = data_to_write \
                        .replace('\r\n', '\n').replace('\r', '\n')
                else:
                    data_to_write = mimepart.body
                if data_to_write is None:
                    data_to_write = ''

                new_part.content_id = mimepart.headers.get('Content-Id')
                new_part.data = data_to_write
                self.parts.append(new_part)
            self.calculate_sanitized_body()
        except mime.DecodingError:
            # Occasionally iconv will fail via maximum recursion depth. We
            # still keep the metadata and mark it as b0rked.
            _log_decode_error(account.id, folder_name, mid, body_string)
            log.error('Message parsing DecodeError', account_id=account.id,
                      folder_name=folder_name, err_filename=_get_errfilename(
                          account.id, folder_name, mid))
            self.mark_error()
            return
        except AttributeError:
            # For EAS messages that are missing Date + Received headers, due
            # to the processing we do in inbox.util.misc.get_internaldate()
            _log_decode_error(account.id, folder_name, mid, body_string)
            log.error('Message parsing AttributeError', account_id=account.id,
                      folder_name=folder_name, err_filename=_get_errfilename(
                          account.id, folder_name, mid))
            self.mark_error()
            return
        except RuntimeError:
            _log_decode_error(account.id, folder_name, mid, body_string)
            log.error('Message parsing RuntimeError<iconv>'.format(
                err_filename=_get_errfilename(account.id, folder_name, mid)))
            self.mark_error()
            return
예제 #48
0
파일: bot.py 프로젝트: ykabusalah/faqbot
    def dosync(self):
        print "You\'ve Got Mail."
        did_except = True
        while did_except:
            try:
                result, data = self.mail.search(None, "ALL")
                did_except = False
            except:
                # Attempt reconnect
                did_except = True
                print "Disconnected, attempting reconnect."
                self.mail = imaplib2.IMAP4_SSL(IMAP_SERVER)
                self.mail.login(MAIL_USER, MAIL_PASSWORD)

                self.mail.select("inbox", readonly=True)

        ids = data[0]
        id_list = ids.split()
        new_mail_ids = []

        if id_list[-1] < self.last_id:
            new_mail_ids = []
        else:
            for i in xrange(len(id_list) - 1, 0, -1):
                if id_list[i] == self.last_id:
                    break
                else:
                    new_mail_ids.append(id_list[i])
        self.last_id = id_list[-1]

        for mail_id in new_mail_ids:
            result, data = self.mail.fetch(mail_id, "(RFC822)")
            # print data
            raw_email = "null"
            for d in data:
                if type(d) is tuple:
                    if "RFC822" in d[0]:
                        raw_email = d[1]

            if raw_email == "null":
                continue

            email_message = email.message_from_string(raw_email)
            flanker_msg = mime.from_string(raw_email)

            body = "null"

            try:
                for part in flanker_msg.parts:
                    if part.body.encode('ascii', 'ignore').startswith(TRIGGER):
                        body = part.body.encode('ascii', 'ignore')
                        break
            except Exception as e:
                pass

            # If body is still null, just look for this stuff
            if body == "null":
                for l in raw_email.split('\n'):
                    if l.startswith(TRIGGER):
                        body = l

            # CR-LF ugh
            body = body.replace('\r', '')

            COMMANDS = load_commands()

            if body.startswith(TRIGGER) and "From" in email_message:
                if len(body.split(' ')) >= 2:
                    command = body.split(' ')[1].strip()

                    # Ugly custom rule
                    if command.startswith('edu'):
                        command = "edu"
                    if command.startswith('mixed'):
                        command = "mixed"

                    # Hacky
                    for c in COMMANDS.keys():
                        if command.startswith(c):
                            command = c
                            break
                else:
                    command = "faq"

                print "Request from {} for subject {} with command {}.".format(
                    email_message["From"], email_message["Subject"], command)
                tos = email_message.get_all('to', [])
                ccs = email_message.get_all('cc', [])
                all_recipients = getaddresses(tos + ccs) + [
                    parseaddr(email_message["Reply-To"]
                              or email_message["From"])
                ]

                if command.startswith('template'):
                    lines = body.strip().split('\n')
                    new_command = lines[0].split()[2]
                    content = '<br>\n'.join(lines[1:])
                    print 'Request from {} for new command {} with body:'.format(
                        email_message["From"], new_command)
                    print content
                    COMMANDS[new_command] = content
                    save_commands(COMMANDS)
                    return

                if command.startswith('whitelist'):
                    # Compute the whitelist email
                    wl_email = None
                    for line in body.split('\n'):
                        if line.startswith(TRIGGER):
                            tokens = line.split(' ')
                            if len(tokens) >= 3:
                                wl_email = tokens[2]

                    if not wl_email:
                        return

                    print "Whitelist Email:", wl_email

                    # Post to quill
                    quill.post_wl(quill.get_wl() + [wl_email])

                    content = COMMANDS['whitelist'].format(
                        email=wl_email) + FOOTER
                else:
                    if command not in COMMANDS:
                        return
                    content = COMMANDS[command] + FOOTER

                reply_sujet = "Re: " + email_message[
                    "Subject"] if not email_message['Subject'].startswith(
                        'Re:') else email_message["Subject"]
                recipients = []
                for r in all_recipients:
                    recipients.append(r[1])

                # Try to find the initial sender
                recipients += email_finder.get_emails(raw_email)

                # Remove dupes
                recipients = list(set(recipients))

                print recipients

                msg = MIMEText(content, 'html')
                msg['Subject'] = reply_sujet
                msg["Message-ID"] = email.utils.make_msgid()
                msg["In-Reply-To"] = email_message["Message-ID"]
                msg["References"] = email_message["Message-ID"]
                msg["To"] = ", ".join(recipients)
                msg["From"] = MAIL_FROM

                s = smtplib.SMTP_SSL(SMTP_SERVER)
                s.login(SEND_MAIL_USER, SEND_MAIL_PASSWORD)
                s.sendmail(MAIL_FROM, recipients, msg.as_string())
                s.quit()
예제 #49
0
"""
import sys
from flanker import mime
from sender import Message
from sender import Mail
import time
#SETTINGS
smtp_hostname = ""
smtp_port = 25
smtp_username = ""
smtp_password = ""
smtp_security = "SSL"
#END SETTINGS

input = sys.stdin.read()
text = mime.from_string(input)


msg = Message(text.headers['subject'])
msg.fromaddr = text.headers['from']
msg.to = text.headers['to']
msg.body = text.body
msg.date = time.time()
msg.charset = "utf-8"
#Check SSL or TLS
smtp_ssl_use = False
smtp_tls_use = False

if smtp_security == "SSL":
	smtp_ssl_use = True
elif smtp_security == "TLS":
예제 #50
0
        logging.basicConfig(level=logging.DEBUG)
    if not path.exists(args.privatekeyfile):
        sys.exit("Private key file not found.")
    if not path.exists(args.messagefile):
        sys.exit("Message file not found.")
    if sys.version_info[0] >= 3:
        args.selector = bytes(args.selector, encoding=UTF8_ENCODING)
        args.domain = bytes(args.domain, encoding=UTF8_ENCODING)
        args.headers = bytes(args.headers, encoding=UTF8_ENCODING)

    # read file contents
    message = bytes(open(args.messagefile, 'rb').read())
    private_key = bytes(open(args.privatekeyfile, 'rb').read())

    arc_headers_present = False
    mime = mime.from_string(message)
    if len(mime.headers.getall('ARC-Seal')) > 0:
        arc_headers_present = True

    authres_header = get_authres_header(args.srvid, arc_headers_present)
    message_with_authres = bytes(authres_header,
                                 encoding=UTF8_ENCODING) + message
    logging.debug("Message with authres: %s", message_with_authres)
    signature = sign_message(message_with_authres, args.selector, args.domain,
                             private_key, args.headers.split(b':'), 'ARC',
                             bytes(args.srvid, encoding=UTF8_ENCODING))
    if len(signature) == 0:
        sys.exit("Unable to generate arc headers")
    separator = "#####"
    signature[0] = signature[0].decode(UTF8_ENCODING).replace(
        "ARC-Seal: ", "ARC-Seal" + separator)
예제 #51
0
def collect_data():
    """Messy code to download training data.
    """
    c = load_config('templates')
    templates = c['templates']

    training_data = []

    mail = imaplib2.IMAP4_SSL(IMAP_SERVER)
    mail.login(MAIL_USER, MAIL_PASSWORD)

    mail.select("[Gmail]/All Mail", readonly=True)

    result, data = mail.search(None, '(BODY "%s")' % ("@faqbot"))

    ids = data[0]
    id_list = ids.split()

    for idx, r_id in enumerate(id_list):
        _, data = mail.fetch(r_id, "(RFC822)")

        print "%i / %i (%i%%)" % (idx, len(id_list),
                                  int(float(idx) / len(id_list) * 100))

        raw_email = "null"
        for d in data:
            if type(d) is tuple:
                if "RFC822" in d[0]:
                    raw_email = d[1]

        flanker_msg = mime.from_string(raw_email)

        body = "null"

        try:
            for part in flanker_msg.parts:
                if str(part) == "(text/plain)":
                    pp = part.body.encode('ascii', 'ignore')
                    body = pp
        except Exception as _:
            pass

        if body == "null":
            continue

        parsed_body = EmailReplyParser.read(body)

        if len(parsed_body.fragments) >= 2:
            if parsed_body.fragments[0].content.split()[0] == "@faqbot":
                fb = parsed_body.fragments[0].content.split()[1]
                original = parsed_body.fragments[1].content

                lines = []

                for l in original.split('\n'):
                    if l.startswith('> '):
                        tl = l.replace('>', '').strip()
                        if tl != '' and not (tl.startswith('On')):
                            lines.append(l.replace('>', ''))

                key = fb
                original = '\n'.join(lines)

                # Now that we have this, let's make sure it's
                # valid and stuff and then save it.

                if key in templates:
                    training_data.append((key, original))
                    save_config(training_data, 'smartreply_data')
예제 #52
0
opt_parser.add_argument('--format',
                        dest="format",
                        type=str,
                        help='The output format: json, msgpack, debug',
                        default="debug")
args = opt_parser.parse_args()

if not args.file:
    print("An argument is required (the name of the file)")
    sys.exit(1)

file_path = args.file
if not os.path.isfile(file_path):
    print("The file specified does not exist")
    sys.exit(1)

with open(file_path, 'r') as f:
    file_contents = f.read()

mimepart = mime.from_string(file_contents)

parser = Parser()
msg = parser.message_from_mimepart(mimepart)

if args.format == "json":
    print message_to_json(msg)
elif args.format == "msgpack":
    print message_to_msgpack(msg)
else:
    message_to_debug_out(msg)
예제 #53
0
    return results, indicators


if __name__ == '__main__':
    argParser = argparse.ArgumentParser(description='email_abuse parser')
    argParser.add_argument(
        '-r',
        default='-',
        help='Filename of the raw email to read (default: stdin)')
    argParser.add_argument(
        '-o',
        default='ascii',
        help='Output format: ascii or json (default: ascii)')
    args = argParser.parse_args()
    if args.r == '-':
        msg = mime.from_string(sys.stdin.read())
    else:
        fp = open(args.r, 'rb')
        msg = mime.from_string(fp.read())

    msg_file = init(msg)

    subject = msg.subject
    passwordlist = [
        "password", "passw0rd", "infected", "qwerty", "malicious", "archive",
        "zip"
    ]
    indicators = 0

    examine_headers = ExamineHeaders(msg)
    origin_ip, rbl_listed, rbl_comment, mailfrom, mailto, origin_domain = examine_headers.processing(
예제 #54
0
def remove_code(msg) :
	msg = (remove_content_in_braces(msg))
	msg = (remove_func_and_struct(msg))
	msg = (remove_other_code_lines(msg))
	return msg

# remove_code(msg)
  
rt =''
fpath = "a1/*.email"
files = glob.glob(fpath)
for file in files :
  f = open(file, "r")
  msg = f.read()
  msg = mime.from_string(msg)
  if msg.content_type.is_singlepart():
      temp = str(msg.body)
      temp = temp.splitlines()
      for _ in temp:
          if _.startswith('>'):
              continue
          elif _.startswith('On'):
              continue
          else:
              rt+=_+"\n"
  else :
      for part in msg.parts :
          if "(text/plain)" in str(part) :
              temp = str(part.body)
              temp = temp.splitlines()
예제 #55
0
    def create_from_synced(cls, account, mid, folder_name, received_date,
                           body_string):
        """ Parses message data and writes out db metadata and MIME blocks.

        Returns the new Message, which links to the new Part and Block objects
        through relationships. All new objects are uncommitted.

        Threads are not computed here; you gotta do that separately.

        Parameters
        ----------
        mid : int
            The account backend-specific message identifier; it's only used for
            logging errors.

        raw_message : str
            The full message including headers (encoded).
        """
        _rqd = [account, mid, folder_name, body_string]
        if not all([v is not None for v in _rqd]):
            raise ValueError(
                "Required keyword arguments: account, mid, folder_name, "
                "body_string")
        # stop trickle-down bugs
        assert account.namespace is not None
        assert not isinstance(body_string, unicode)

        msg = Message()

        try:
            msg.namespace_id = account.namespace.id
            parsed = mime.from_string(body_string)

            mime_version = parsed.headers.get('Mime-Version')
            # sometimes MIME-Version is "1.0 (1.0)", hence the .startswith()
            if mime_version is not None and not mime_version.startswith('1.0'):
                log.warning('Unexpected MIME-Version',
                            account_id=account.id,
                            folder_name=folder_name,
                            mid=mid,
                            mime_version=mime_version)

            msg.data_sha256 = sha256(body_string).hexdigest()

            # clean_subject strips re:, fwd: etc.
            msg.subject = parsed.clean_subject
            msg.from_addr = parse_mimepart_address_header(parsed, 'From')
            msg.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
            msg.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
            msg.to_addr = parse_mimepart_address_header(parsed, 'To')
            msg.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
            msg.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

            msg.in_reply_to = parsed.headers.get('In-Reply-To')
            msg.message_id_header = parsed.headers.get('Message-Id')

            msg.received_date = received_date if received_date else \
                get_internaldate(parsed.headers.get('Date'),
                                 parsed.headers.get('Received'))

            # Custom Inbox header
            msg.inbox_uid = parsed.headers.get('X-INBOX-ID')

            # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
            msg.references = parse_references(
                parsed.headers.get('References', ''),
                parsed.headers.get('In-Reply-To', ''))

            msg.size = len(body_string)  # includes headers text

            i = 0  # for walk_index

            from inbox.models.block import Block, Part

            # Store all message headers as object with index 0
            block = Block()
            block.namespace_id = account.namespace.id
            block.data = json.dumps(parsed.headers.items())

            headers_part = Part(block=block, message=msg)
            headers_part.walk_index = i
            msg.parts.append(headers_part)

            for mimepart in parsed.walk(
                    with_self=parsed.content_type.is_singlepart()):
                i += 1
                if mimepart.content_type.is_multipart():
                    log.warning('multipart sub-part found',
                                account_id=account.id,
                                folder_name=folder_name,
                                mid=mid)
                    continue  # TODO should we store relations?
                msg._parse_mimepart(mimepart, mid, i, account.namespace.id)

            msg.calculate_sanitized_body()
        except (mime.DecodingError, AttributeError, RuntimeError) as e:
            # Message parsing can fail for several reasons. Occasionally iconv
            # will fail via maximum recursion depth. EAS messages may be
            # missing Date and Received headers. In such cases, we still keep
            # the metadata and mark it as b0rked.
            _log_decode_error(account.id, folder_name, mid, body_string)
            err_filename = _get_errfilename(account.id, folder_name, mid)
            log.error('Message parsing error',
                      folder_name=folder_name,
                      account_id=account.id,
                      err_filename=err_filename,
                      error=e)
            msg._mark_error()

        # Occasionally people try to send messages to way too many
        # recipients. In such cases, empty the field and treat as a parsing
        # error so that we don't break the entire sync.
        for field in ('to_addr', 'cc_addr', 'bcc_addr', 'references'):
            value = getattr(msg, field)
            if json_field_too_long(value):
                _log_decode_error(account.id, folder_name, mid, body_string)
                err_filename = _get_errfilename(account.id, folder_name, mid)
                log.error('Recipient field too long',
                          field=field,
                          account_id=account.id,
                          folder_name=folder_name,
                          mid=mid)
                setattr(msg, field, [])
                msg._mark_error()

        return msg
예제 #56
0
from flanker import mime

fpath = "3.email"

f = open(fpath, 'r')
mailmsg = f.read()
msg = mime.from_string(mailmsg)

# print (msg.headers.items())

print("printing email message!!")
if msg.content_type.is_singlepart():
    temp = str(msg.body)
    temp = temp.splitlines()
    for _ in temp:
        if _.startswith('>'):
            continue
        else:
            print("*** " + _)
    print("********************************")
elif msg.content_type.is_multipart():
    for part in msg.parts:
        if "(text/plain)" in str(part):
            temp = str(part.body)
            temp = temp.splitlines()
            for _ in temp:
                if _.startswith('>'):
                    continue
                else:
                    print("*** " + _)
            print("********************************")