Example #1
0
    def header(self):
        """
        Returns the message header, if it exists. Otherwise it will generate one.
        """
        try:
            return self._header
        except AttributeError:
            headerText = self._getStringStream('__substg1.0_007D')
            if headerText is not None:
                self._header = EmailParser().parsestr(headerText)
                self._header['date'] = self.date
            else:
                logger.info(
                    'Header is empty or was not found. Header will be generated from other streams.'
                )
                header = EmailParser().parsestr('')
                header.add_header('Date', self.date)
                header.add_header('From', self.sender)
                header.add_header('To', self.to)
                header.add_header('Cc', self.cc)
                header.add_header('Message-Id', self.message_id)
                # TODO find authentication results outside of header
                header.add_header('Authentication-Results', None)

                self._header = header
            return self._header
Example #2
0
def fetch_message_from_bucket(message_id: str) -> Message:
    """
    Download the saved email from out local S3 and extract the relevant Message part from it
    Args:
        message_id:

    Returns:

    """
    source_bucket = get_config('incoming_email_bucket')
    if not source_bucket:
        raise Exception('config { "incoming_email_bucket" } must be specified')

    client = boto3.client("s3")
    result = client.get_object(Bucket=source_bucket, Key=message_id)
    # Read the object (not compressed):
    text = result["Body"].read().decode()
    parser = EmailParser()
    message = parser.parsestr(text)
    message_payload = None
    # AWS returns old Message format: https://docs.python.org/3.6/library/email.compat32-message.html
    if message.is_multipart():
        payloads = message.get_payload()
        for payload in payloads:
            if payload.get_content_type() == 'text/plain':
                message_payload = payload
                break
    else:
        message_payload = message
    return message_payload
Example #3
0
 def decode(cls, content, content_type):
     if not isinstance(content, bytes):
         raise TypeError("content should be an instance of bytes")
     type_helper = EmailParser().parsestr('Content-Type: {}'.format(content_type))
     content_type = type_helper.get_content_type()
     charset = type_helper.get_content_charset()
     return cls(content, content_type, charset)
Example #4
0
 def parseHeader(self):
     """ Returns the message header. """
     headerText = self.getStringField('007D')
     headerText = headerText or ''
     parser = EmailParser(policy=default)
     header = parser.parsestr(headerText)
     return header
    def get_mail(self, message):
        """
		Return the whole email referred to by the given message ID,
		in RFC 822 format, as an instance of email.message.
		"""
        mail = self.server.fetch(str(message), '(BODY.PEEK[])')
        return EmailParser().parsebytes(mail[1][0][1])
Example #6
0
    def load_call(self, program, key, files):
        """
        Load the given call
        """
        fname = self.get_call_filename(program, key, create=False)
        with open(fname, 'rb') as fhl:
            msg = EmailParser().parsestr(fhl.read().decode('utf-8'))

        stdout = None
        for part in msg.walk():
            if 'attachment' in part.get("Content-Disposition", ''):
                base_name = part['Filename']
                for out_file in files:
                    if out_file.endswith(base_name):
                        with open(out_file, 'wb') as fhl:
                            fhl.write(part.get_payload(decode=True))
                            part = None
                if part is not None:
                    # Was not caught by any normal outputs, so we will
                    # save the file to EVERY tempdir in the hopes of
                    # hitting on of them.
                    for fdir in self.recorded_tempdirs:
                        if os.path.isdir(fdir):
                            with open(os.path.join(fdir, base_name),
                                      'wb') as fhl:
                                fhl.write(part.get_payload(decode=True))
            elif part.get_content_type() == "text/plain":
                stdout = part.get_payload(decode=True)

        return stdout
Example #7
0
 def header(self):
     """ Returns the message header. """
     if not hasattr(self, "_header"):
         headerText = self.getStringField("007D")
         headerText = headerText or ""
         parser = EmailParser(policy=default)
         self._header = parser.parsestr(headerText)
     return self._header
Example #8
0
def get_release_info():
    this_dir = os.path.dirname(__file__)
    egg_path = os.path.abspath(os.path.join(this_dir, '..', '%s.egg-info' % project_name))
    pkg_info_filename = os.path.join(egg_path, 'PKG-INFO')

    pkg_info_fp = open(pkg_info_filename)
    release_info = dict(EmailParser().parse(pkg_info_fp))
    return release_info
Example #9
0
 def header(self):
     try:
         return self._header
     except Exception:
         headerText = self._getStringStream('__substg1.0_007D')
         if headerText is not None:
             self._header = EmailParser().parsestr(headerText)
         else:
             self._header = None
         return self._header
Example #10
0
 def get_content(self, raw):
     data = base64.urlsafe_b64decode(raw)
     email_parser = EmailParser(policy=policy.default)
     email = email_parser.parsebytes(data)
     plain = email.get_body(preferencelist=('plain', ))
     body = None
     if plain:
         body = plain.get_payload()
     email_dict = dict(email)
     email_dict['body'] = body
     return email_dict
Example #11
0
def parse(message):
    msgobj = EmailParser().parsestr(message)
    header_parser = HeaderParser()

    headers = []
    headers_dict = header_parser.parsestr(message)
    for key in headers_dict.keys():
        headers += ['{}: {}'.format(key, headers_dict[key])]
    content = Scrubber(msgobj).scrub()[0]
    return {
        'body': content,
        'headers': '\n'.join(headers),
    }
Example #12
0
 def build(self) -> None:
     pkg_info_file = os.path.join(self.path_to_directory, "PKG-INFO")
     try:
         with open(pkg_info_file) as f:
             parser = EmailParser()
             metadata = parser.parse(f)
     except FileNotFoundError:
         raise DistributionNotDetected(
             f"Could not find PKG-INFO file in {self.path_to_directory}")
     self._name = metadata.get("name")
     if isinstance(self._name, Header):
         raise DistributionNotDetected(
             "Could not parse source distribution metadata, name detection failed"
         )
Example #13
0
    def extract(self, filename, **kwargs):
        # TODO: could make option here to omit all non-original content
        # (forwarded content, quoted content in reply, signature, etc),
        # perhaps using https://github.com/zapier/email-reply-parser

        # TODO: could also potentially grab text/html content instead of
        # only grabbing text/plain content

        with open(filename) as stream:
            parser = EmailParser()
            message = parser.parse(stream)

        text_content = []
        for part in message.walk():
            if part.get_content_type().startswith('text/plain'):
                text_content.append(part.get_payload())
        return '\n\n'.join(text_content)
Example #14
0
    def from_path(cls, path):
        if not path.startswith(conf.mail.maildirs):
            raise ValueError('Path "%s" is not in the maildirs path' % path)

        tmp_path = path.replace(conf.mail.maildirs, '', 1)
        if tmp_path.startswith('/'):
            tmp_path = tmp_path[1:]
        mailbox_id, subdir, mail_id = tmp_path.split('/')

        with open(path) as email_fp:
            parser = EmailParser()
            msg = parser.parse(email_fp)

        mtime = int(os.path.getmtime(path))
        brief = MailBrief.from_message(mailbox_id, mail_id, subdir, msg, mtime)
        text_payloads = [msg.get_payload()]
        return Mail(brief, text_payloads)
Example #15
0
 def header(self):
     try:
         return self._header
     except Exception:
         headerText = self._getStringStream('__substg1.0_007D')
         if headerText is not None:
             self._header = EmailParser().parsestr(headerText)
             self._header['date'] = self.date
         else:
             header = {
                 'date': self.date,
                 'from': self.sender,
                 'to': self.to,
                 'cc': self.cc
             }
             self._header = header
         return self._header
Example #16
0
def hydrateMessages(imap, messageIds, additionalFields=[]):
    info('have %d messages in folder %s' % (len(messageIds), ARGS.mailbox))
    fields = ['BODY[HEADER]'] + additionalFields
    messages = imap.fetch(messageIds[:ARGS.num], fields)
    info('got %d messages' % len(messages))
    parser = EmailParser()
    msgs = []
    for msgId, data in messages.iteritems():
        rawHeaders = data.get('BODY[HEADER]', None)
        if rawHeaders is None:
            warn('no HEADER field on msg %s, skipping it' % str(msgId))
            continue
        headers = parser.parsestr(rawHeaders, True)
        spamHeader = headers.get('X-Spam-Status', None)
        # prefer the "Received" header, because it is server generated and so
        # presumably not forged. But OK to fall back to "Date" header, if present
        dateHeader = dateFromReceivedHeader(headers.get('Received', None))
        if not dateHeader:
            dateHeader = headers.get('Date', None)
        if not dateHeader:
            # not wild about this, but a bunch of other code breaks if a message has no date
            # I believe that the Received: header is generated by receiving SMTP though,
            # so I believe this won't happen
            warn('message %s/"%s" has no discernible date, ingoring' %
                 (msgId, headers.get('Subject', None)))
            continue
        msg = Msg(msgId, spamHeader, dateHeader, headers)
        msg['headers'] = rawHeaders
        for f in additionalFields:
            msg[f.lower()] = data[f]
        msgs.append(msg)

    msgDict = {m.id: m for m in msgs}
    messageIds = [m.id for m in msgs]
    flags = imap.get_flags(messageIds)

    for id in flags.keys():
        msg = msgDict.get(id, None)
        if msg:
            msg.flags = flags[id]
    return msgs
def test_consume_and_sendmail_success(amqp_publish, smtp_messages,
                                      program_out_smtp, valid_email_message):
    message = valid_email_message
    amqp_publish(message)
    for line in program_out_smtp:
        if "Acked:" in line:
            break
    else:
        assert False, "Reached end of output without acking the message"
    for received_smtp in smtp_messages:
        received_email = EmailParser(policy=default_policy).parsebytes(
            received_smtp.message)
        assert received_smtp.sender == config.smtp_user
        assert received_email["from"] == config.smtp_user
        assert received_smtp.receivers == message["to"]
        assert received_email["to"] == ", ".join(message["to"])
        assert received_smtp.remote_host[0] == SMTP_TEST_SERVER_HOST
        assert received_email["subject"] == message["subject"]
        assert received_email.get_content().strip() == message["content"]
        break
    else:
        assert False, "No messages received"
Example #18
0
def parse(message):
    msgobj = EmailParser().parsestr(message)
    header_parser = HeaderParser()
    if msgobj['Subject'] is not None:
        decodefrag = decode_header(msgobj['Subject'])
        subj_fragments = []
        for s, enc in decodefrag:
            if enc:
                s = unicode(s, enc).encode('utf8', 'replace')
            subj_fragments.append(s)
        subject = ''.join(subj_fragments)
    else:
        subject = None

    headers = []
    headers_dict = header_parser.parsestr(message)
    for key in headers_dict.keys():
        headers += ['{}: {}'.format(key, headers_dict[key])]
    content = Scrubber(msgobj).scrub()[0]
    return {
        'subject': subject,
        'body': content,
        'headers': '\n'.join(headers),
    }
Example #19
0
 def parse(self):  # {{{2
     '''
     parse all mails from imap mailbox
     '''
     #re_charset = re.compile( r'charset=([^\s]*)', re.IGNORECASE )
     for number in self.get_list():
         typ, data = self.mailbox.fetch(number, '(RFC822 UID BODY[TEXT])')
         if (data is None) or (len(data) < 1) or (data[0] is None) or \
                 (len(data[0]) < 2):
             continue
         mail = email.message_from_string(data[0][1])
         text = u""
         msgobj = EmailParser().parse(StringIO(mail), False)
         for part in msgobj.walk():
             if part.get_content_type() == "text/plain":
                 if part.get_content_charset():
                     text += unicode(part.get_payload(decode=True),
                                     part.get_content_charset(), 'replace')
         # extracs subject
         subject = ''
         if msgobj['Subject'] is not None:
             decodefrag = decode_header(msgobj['Subject'])
             subj_fragments = []
             for string, enc in decodefrag:
                 if enc:
                     string = unicode(string, enc, 'replace')
                     #s = unicode( s , enc ).encode( 'utf8', \
                     #                               'replace' )
                 subj_fragments.append(string)
             subject = u''.join(subj_fragments)
         # 'from' field of the email received (TODO: better 'reply-to'?)
         to_email = mail['From']
         # sender of our emails
         from_email = settings.DEFAULT_FROM_EMAIL
         try:
             sid = transaction.savepoint()
             with reversion.create_revision():
                 event, dates_times_list = Event.parse_text(text)
                 reversion.add_meta(RevisionInfo,
                                    as_text=smart_unicode(event.as_text()))
             for dates_times in dates_times_list:
                 with reversion.create_revision():
                     clone = event.clone(
                         user=None,
                         except_models=[EventDate, EventSession],
                         **dates_times)
                     reversion.add_meta(RevisionInfo,
                                        as_text=smart_unicode(
                                            clone.as_text()))
             assert (type(event) == Event)
             self.mv_mail(number, 'saved')
             self.stdout.write(
                 smart_str(u'Successfully added new event: ' + event.title))
             message = render_to_string(
                 'mail/email_accepted_event.txt', {
                     'site_name': Site.objects.get_current().name,
                     'site_domain': Site.objects.get_current().domain,
                     'event': event,
                     'original_message': text,
                 })
             mail = EmailMessage(''.join(subject.splitlines()), message,
                                 from_email, (to_email, ))
             mail.send(fail_silently=False)
             transaction.savepoint_commit(sid)
         except (ValidationError, IntegrityError) as err:
             transaction.savepoint_rollback(sid)
             # error found, saving the message in the imap forder 'errors'
             self.mv_mail(number, 'errors')
             # sending a notification email to the sender {{{3
             if msgobj['Subject'] is not None:
                 subject = \
                     _( u'Validation error in: %(old_email_subject)s' ) \
                     % { 'old_email_subject': \
                     subject.replace( '\n', ' ' ), }
                 subject = subject.replace('\n', ' ')
             else:
                 subject = _(u'Validation error')
             # insert errors message into the email body
             if hasattr(err, 'message_dict'):
                 # if hasattr(err, 'message_dict'), it looks like:
                 # {'url': [u'Enter a valid value.']}
                 message = render_to_string(
                     'mail/email_parsing_errors.txt', {
                         'site_name': Site.objects.get_current().name,
                         'site_domain': Site.objects.get_current().domain,
                         'original_message': text,
                         'errors_dict': err.message_dict
                     })
                 #TODO: write to an error log file instead of stderr
                 self.stderr.write( smart_str(
                     u"Found errors in message with subject: %s\n\terrors: %s" \
                     % ( mail['Subject'], unicode(err.message_dict))))
             elif hasattr(err, 'messages'):
                 message = render_to_string(
                     'mail/email_parsing_errors.txt', {
                         'site_name': Site.objects.get_current().name,
                         'site_domain': Site.objects.get_current().domain,
                         'original_message': text,
                         'errors_list': err.messages
                     })
                 self.stderr.write( smart_str(
                     u"Found errors in message with subject: %s\n\terrors: %s" \
                     % ( mail['Subject'], unicode(err.messages))))
             elif hasattr(err, 'message'):
                 message = render_to_string(
                     'mail/email_parsing_errors.txt', {
                         'site_name': Site.objects.get_current().name,
                         'site_domain': Site.objects.get_current().domain,
                         'original_message': text,
                         'errors_list': [err.message]
                     })
                 self.stderr.write( smart_str(
                     u"Found errors in message with subject: %s\n\terrors: %s" \
                     % ( mail['Subject'], unicode(err.message))))
             else:
                 message = render_to_string(
                     'mail/email_parsing_errors.txt', {
                         'site_name': Site.objects.get_current().name,
                         'site_domain': Site.objects.get_current().domain,
                         'original_message': text,
                         'errors_list': []
                     })
                 self.stderr.write( smart_str(
                     u"Found errors in message with subject: %s" \
                     % mail['Subject'] ))
             if subject and message and from_email:
                 mail = EmailMessage( subject, message, \
                                      from_email, ( to_email, ) )
                 msg = str(mail.message())
                 try:
                     mail.send(fail_silently=False)
                     self.mailbox.append( 'IMAP.sent', None, None, \
                                    msg )
                 except SMTPException:
                     #TODO: write to an error log file instead of stderr
                     self.stderr.write('imap.py:ERR:smtplib.SMTPException')
             else:
                 #TODO: write to an error log file instead of stderr
                 self.stderr.write(
                     'imap.py:ERR:missing info for error email')
         except:
             transaction.savepoint_rollback(sid)
             raise
Example #20
0
def get_attachments(message):
    message = EmailParser().parsestr(message)
    return Scrubber(message).scrub()[1]
Example #21
0
        # Email Subject
        subject = _getStringStream(email_msg, '__substg1.0_0037')
        if subject is None:
            subject = "[No subject]"
        else:
            subject = "".join(i for i in subject if i not in r'\/:*?"<>|')
        print "subject is: " + subject

        # Email Header
        header = ""
        try:
            header = email_msg._header
        except Exception:
            headerText = _getStringStream(email_msg, '__substg1.0_007D')
            if headerText is not None:
                header = EmailParser().parsestr(headerText)
                #email_msg._header = EmailParser().parsestr(headerText)
            else:
                email_msg._header = None
                header = email_msg._header
        print "header is: " + str(header)

        header_date = header['date']
        date = email.utils.parsedate(header_date)
        dirName = '{0:02d}-{1:02d}-{2:02d}_{3:02d}{4:02d}'.format(*date)
        print "dirName for date is: " + dirName

        # Save the email attachments

        # Message - attachments()
        #         try:
Example #22
0
 def __init__(self, contents):
     super().__init__(contents)
     ole_msg = olefile.OleFileIO(contents)
     # msg_dir contains each msg stream
     tagger = json.load(open("emailparser/id.json", "r"))
     out = {
         'attachments': {},
         'body': u"",
     }
     streams = OutLookMsg._streams(ole_msg)
     for stream in streams:
         if '__attach_' in stream and '__substg1' in stream:
             attachment_number = str(int(
                 stream.split('#')[1].split('/')[0]))
             lookup_code = OutLookMsg._lookup_code(stream, tagger)
             if attachment_number not in out['attachments']:
                 out['attachments'][attachment_number] = {}
             if 'Binary' in lookup_code:
                 out['attachments'][attachment_number][
                     lookup_code] = ole_msg.openstream(stream).read()
             else:
                 out['attachments'][attachment_number][
                     lookup_code] = OutLookMsg._convert(
                         ole_msg.openstream(stream).read())
         elif '__substg1' in stream:
             lookup_code = OutLookMsg._lookup_code(stream, tagger)
             if ole_msg.openstream(stream):
                 content = OutLookMsg._convert(
                     ole_msg.openstream(stream).read())
                 if content:
                     if lookup_code == 'unknown':
                         out['body'] += content
                     else:
                         if lookup_code not in out:
                             out[lookup_code] = ''
                         out[lookup_code] += content
         else:
             continue
     # map to email object
     if 'PidTagHtml' in out:
         self.body = out['PidTagHtml']
     elif 'PidTagBody' in out:
         self.body = out['PidTagBody']
     if 'PidTagSubject' in out:
         self.subject = out['PidTagSubject']
     # extract data from headers
     if 'PidTagTransportMessageHeaders' in out:
         headers = EmailParser().parsestr(
             out['PidTagTransportMessageHeaders'])
         # date/timestamp
         if 'date' in headers:
             self.timestamp = email.utils.parsedate(headers['date'])
         # sender
         if 'from' in headers and headers['from'] is not None:
             self.sender = email.utils.parseaddr(headers['from'])[1]
         # receiver
         if 'to' in headers and headers['to'] is not None:
             self.receiver = email.utils.parseaddr(headers['to'])[1]
     # backup if headers are missing
     if not self.sender and 'PidTagSenderEmailAddress' in out:
         self.sender = email.utils.parseaddr(
             out['PidTagSenderEmailAddress'])[1]
     if not self.receiver and "PidTagDisplayTo" in out:
         self.receiver = email.utils.parseaddr(out["PidTagDisplayTo"])[1]
     # attachments
     for k, a in out['attachments'].items():
         self.attachments.append(OutlookAttachment(a))
Example #23
0
def process_email(txt):
    mail = EmailParser(policy=default).parsestr(txt)
    return mail.get_body().get_content()
Example #24
0
def parse_msg(msg, extract_urls):
    # Reference: https://msdn.microsoft.com/en-us/library/office/ff861332.aspx
    # Reference: http://cerbero-blog.com/?p=1625

    extract_urls = extract_urls
    results = []

    # Extract all header information
    headers = msg._getStringStream('__substg1.0_007D')
    # Parse headers for easier data gathering, replace "\n", "\r", "\t"
    trimmed_headers = re.sub('\r|\n|\t', '', headers.strip())
    parsed_headers = EmailParser().parsestr(headers)
    results.append({'values': trimmed_headers, 'type': 'email-header'})

    # Get email targets (the addresses that received the email from the header)
    email_targets = set()
    email_targets_regex = re.compile(r'for\s(.*@.*);', re.I)
    email_targets_match = re.finditer(email_targets_regex, headers)
    for match in email_targets_match:
        email_targets.add(match.group(1).strip(' <>'))
    for target in email_targets:
        results.append({
            'values': target,
            'type': 'target-email',
            'comment': 'Extracted from email Received header'
        })

    # E-Mail MIME Boundry
    mime_boundary_regex = re.compile(r'boundary\=\".*?(?=\")', re.S | re.I)
    mime_boundary_match = re.search(mime_boundary_regex, headers)
    if mime_boundary_match:
        mime_boundary = mime_boundary_match.group().replace('boundary="', '')
        results.append({
            'values': mime_boundary,
            'type': 'email-mime-boundary'
        })

    # Reply To
    if parsed_headers['reply-to']:
        reply_to = parsed_headers['reply-to']
        results.append({'values': reply_to.strip(), 'type': 'email-reply-to'})

    # Return Path
    if parsed_headers['return-path']:
        return_path = parsed_headers['return-path']
        # May need to split so can return email-src and email-src-display-name
        results.append({'values': return_path.strip(), 'type': 'email-src'})

    # X-Sender
    if parsed_headers['x-sender']:
        x_mailer = parsed_headers['x-sender']
        results.append({'values': x_mailer, 'type': 'email-src'})

    # X-Mailer
    if parsed_headers['x-mailer']:
        x_mailer = parsed_headers['x-mailer']
        results.append({'values': x_mailer, 'type': 'email-x-mailer'})

    # User-Agent
    if parsed_headers['user-agent']:
        x_mailer = parsed_headers['user-agent']
        results.append({'values': x_mailer, 'type': 'user-agent'})

    # Thread Index
    if parsed_headers['thread-index']:
        thread_index = parsed_headers['thread-index']
        results.append({'values': thread_index, 'type': 'email-thread-index'})

    # Message ID
    if parsed_headers['message-id']:
        message_id = parsed_headers['message-id']
        results.append({'values': message_id, 'type': 'email-message-id'})

    # Subject
    if msg._getStringStream('__substg1.0_0037'):
        subject = msg._getStringStream('__substg1.0_0037')
        results.append({'values': subject, 'type': 'email-subject'})

    # Source
    # Try headers first, otherwise parse from streams
    if parsed_headers['from']:
        sender = parsed_headers[
            'from']  #Amazon Prime <*****@*****.**>
        try:
            sender_email = sender.split('<')[1].strip('>')
            sender_name = sender.split('<')[0].strip('" ')
            results.append({'values': sender_email, 'type': 'email-src'})
            results.append({
                'values': sender_name,
                'type': 'email-src-display-name'
            })
        except:
            results.append({'values': sender, 'type': 'email-src'})
    else:
        '''
		"From:" is typically 0C1F, however if this is an Microsoft Exchange email (validate using type 0C1E, either "EX" or "SMTP"),
		it is not readable directly without resolving using Exchange.
		'''
        sender_email = msg._getStringStream(
            '__substg1.0_5D01')  # 0_5D01001F, 0_5D02001F
        sender_name = msg._getStringStream('__substg1.0_0C1A')
        results.append({'values': sender_email, 'type': 'email-src'})
        results.append({
            'values': sender_name,
            'type': 'email-src-display-name'
        })

    # Destinations
    recipDirs = []
    for dir_ in msg.listdir():
        if dir_[0].startswith('__recip') and dir_[0] not in recipDirs:
            recipDirs.append(dir_[0])

    for recipDir in recipDirs:
        recip_email = msg._getStringStream([recipDir,
                                            '__substg1.0_39FE'])  # 0_39FE001F
        #recip_name = msg._getStringStream([recipDir, '__substg1.0_3A20']) # 0_3A20001F 0_3001001F
        results.append({'values': recip_email, 'type': 'email-dst'})
        #results.append({'values' : recip_name, 'type' : 'email-dst-display-name'})

    # Get Attachments
    attachDirs = []
    for dir_ in msg.listdir():
        if dir_[0].startswith('__attach') and dir_[0] not in attachDirs:
            attachDirs.append(dir_[0])

    attachments = []
    for attachDir in attachDirs:
        long_filename = msg._getStringStream([attachDir, '__substg1.0_3707'])
        short_filename = msg._getStringStream([attachDir, '__substg1.0_3704'])
        # Get attachment data, path is hardcoded due to issues with _getStringStream returning None
        attachment_data = msg._getStream(attachDir + '/__substg1.0_37010102')

        # Gather filename for appending
        if long_filename:
            filename = long_filename
        elif short_filename:
            filename = short_filename

        if filename:
            results.append({'values': filename, 'type': 'email-attachment'})
            results.append({
                'values': filename,
                'data': base64.b64encode(attachment_data).decode(),
                'type': 'malware-sample'
            })

    # Extract URLs from the message body
    if extract_urls:
        body = msg._getStringStream('__substg1.0_1000')
        urls = get_urls_from_plain(body)
        for url in urls:
            results.append({'values': url, 'type': 'url'})
            # Parse pattern in traffic from URL
            parsed = urlparse(url)
            if parsed.path:
                results.append({
                    "values": parsed.path,
                    "type": 'pattern-in-traffic'
                })

    r = {'results': results}
    return r
Example #25
0
def handle_bounce_backs(retr_n, recipients, userid, password) -> int:
    '''show help message if emails are bounced back, this usually happens when trying to email a wrong school email address'''
    # emails before 08 use IMAP instead of POP3
    if int(userid[1:3]) < 9:
        print(
            "Unable to check bouncebacks, this functionality is currently unavailable for users before 08s."
        )
        return 0

    print("checking for bounce-backs...")
    time.sleep(5)  # wait for bounce back

    # connect to pop3 server
    pop3 = poplib.POP3_SSL('msa.ntu.edu.tw', 995)
    pop3.user(userid)
    pop3.pass_(password)

    # retrieve last n emails
    _, mails, _ = pop3.list()
    emails = [
        pop3.retr(i)[1] for i in range(len(mails),
                                       len(mails) - retr_n, -1)
    ]
    pop3.quit()

    email_contents = []
    # Concat message pieces:
    for mssg in emails:
        # some chinese character may not be able to parse,
        # however, we only care about the bounce back notifications,
        # which are alays in English
        try:
            email_contents.append(b'\r\n'.join(mssg).decode('utf-8'))
        except:
            continue

    # Parse message into an email object:
    email_contents = [
        EmailParser().parsestr(content, headersonly=True)
        for content in email_contents
    ]

    bounced_list = []

    for content in email_contents:
        if not re.match(
                '(Delivery Status Notification)|(Undelivered Mail Returned to Sender)',
                content['subject']):
            continue

        for part in content.walk():
            if part.get_content_type():
                body = str(part.get_payload(decode=True))

                # match for email addresses
                bounced = re.findall(
                    '[a-z0-9-_\.]+@[a-z0-9-\.]+\.[a-z\.]{2,5}', body)

                if bounced:
                    bounced = str(bounced[0].replace(userid, ''))
                    if bounced == '':
                        break

                    bounced_list.append(bounced)

    if len(bounced_list) > 0:
        print('emails sent to these addresses are bounced back (failed):')
        for address in bounced_list:
            if address in recipients:
                print(f'\t{address},')
        print('Please check these emails.')
    else:
        print('No bounce-backs found, all emails are delivered successfully')

    return len(bounced_list)
Example #26
0
    def decode(cls, message):

        headers, separator, body = message.partition('\r\n\r\n')
        if not separator:
            raise CPIMParserError('Invalid CPIM message')

        sender = None
        recipients = []
        courtesy_recipients = []
        subject = None
        timestamp = None
        required = []
        additional_headers = []

        namespaces = {'': CPIMNamespace(cls.standard_namespace)}
        subjects = {}

        for prefix, name, value in cls.headers_re.findall(headers):
            namespace = namespaces.get(prefix)

            if namespace is None or '.' in name:
                continue

            try:
                #value = value.decode('cpim-header')
                if namespace == cls.standard_namespace:
                    if name == 'From':
                        sender = ChatIdentity.parse(value)
                    elif name == 'To':
                        recipients.append(ChatIdentity.parse(value))
                    elif name == 'cc':
                        courtesy_recipients.append(ChatIdentity.parse(value))
                    elif name == 'Subject':
                        match = cls.subject_re.match(value)
                        if match is None:
                            raise ValueError('Illegal Subject header: %r' %
                                             value)
                        lang, subject = match.groups()
                        # language tags must be ASCII
                        subjects[str(lang
                                     ) if lang is not None else None] = subject
                    elif name == 'DateTime':
                        timestamp = ISOTimestamp(value)
                    elif name == 'Required':
                        required.extend(re.split(r'\s*,\s*', value))
                    elif name == 'NS':
                        match = cls.namespace_re.match(value)
                        if match is None:
                            raise ValueError('Illegal NS header: %r' % value)
                        prefix, uri = match.groups()
                        namespaces[prefix] = CPIMNamespace(uri, prefix)
                    else:
                        additional_headers.append(
                            CPIMHeader(name, namespace, value))
                else:
                    additional_headers.append(
                        CPIMHeader(name, namespace, value))
            except ValueError:
                pass

        if None in subjects:
            subject = MultilingualText(subjects.pop(None), **subjects)
        elif subjects:
            subject = MultilingualText(**subjects)

        mime_message = EmailParser().parsestr(body)
        content_type = mime_message.get_content_type()
        if content_type is None:
            raise CPIMParserError(
                "CPIM message missing Content-Type MIME header")
        content = mime_message.get_payload()
        charset = mime_message.get_content_charset()

        return cls(content, content_type, charset, sender, recipients,
                   courtesy_recipients, subject, timestamp, required,
                   additional_headers)