Ejemplo n.º 1
0
def processEmail((response, num, error)):
    """
    Meant to be used as a callback to the IMAP fetch command. Converts the body
    of the email to email.Message components. Processes each text body before
    html bodies, and calls processMessage() on them after cleaning them with
    cleanText() or cleanHtml().
    """
    if response is None:
        return

    typ, data = response
    if typ != "OK":
        return

    message = email.message_from_string(data[0][1])
    if message["X-Google-Group-Id"] != settings.SCRAPER_GOOGLE_GROUP:
        print "Discarding %s" % message["Subject"]
        return

    from_header = message["From"]
    if from_header is None:
        return
    from_header = from_header.strip()

    match = re.match(r"^(.*?)\s*<(.*)>$", from_header)
    if match is not None:
        # From header of the form: Foo Bar <*****@*****.**>
        from_name = match.group(1)
        from_email = match.group(2)
    else:
        # From header of the form: [email protected]
        from_name = ""
        from_email = from_header

    from_user, created = User.objects.get_or_create(email=from_email, defaults={"name": from_name, "points": 0})

    ts = datetime(*parsedate(message["Date"])[0:6]) or datetime.now()

    # prefer text versions over HTML versions
    # FIXME: we return as soon as we give out a single shanpoint. The working
    # assumption here is that these different content-types are different
    # versions of the same message, which may or may not be the case. This
    # breaks if for example, I have two text/plain's that give points to a
    # different set of people. Only the people in the first text/plain will be
    # processed.
    for msg in typed_subpart_iterator(message, "text", "plain"):
        if processMessage(from_user, ts, cleanText(msg.get_payload())) != 0:
            return

    for msg in typed_subpart_iterator(message, "text", "html"):
        if processMessage(from_user, ts, cleanHtml(msg.get_payload())) != 0:
            return
Ejemplo n.º 2
0
def to_plaintext(msg):
    changedp = 0
    counter = count()
    with ExitStack() as resources:
        tempdir = tempfile.mkdtemp()
        resources.callback(shutil.rmtree, tempdir)
        for subpart in typed_subpart_iterator(msg, 'text', 'html'):
            filename = os.path.join(tempdir, '{}.html'.format(next(counter)))
            with open(filename, 'w', encoding='utf-8') as fp:
                fp.write(subpart.get_payload())
            template = Template(config.mailman.html_to_plain_text_command)
            command = template.safe_substitute(filename=filename).split()
            try:
                stdout = subprocess.check_output(
                    command, universal_newlines=True)
            except subprocess.CalledProcessError:
                log.exception('HTML -> text/plain command error')
            else:
                # Replace the payload of the subpart with the converted text
                # and tweak the content type.
                del subpart['content-transfer-encoding']
                subpart.set_payload(stdout)
                subpart.set_type('text/plain')
                changedp += 1
    return changedp
Ejemplo n.º 3
0
def is_message_empty(msg):
    """Is the message missing a text/plain part with content?"""
    for part in typed_subpart_iterator(msg, 'text'):
        if part.get_content_subtype() == 'plain':
            if len(part.get_payload().strip()) > 0:
                return False
    return True
Ejemplo n.º 4
0
def main() :
	path = '/home/nakazaki/Mail/inbox'
	mail_files = ch_inbox(path)
	for mail_file in mail_files :
		pattern = "^\."
		if re.search(pattern, mail_file) :
			continue
		path = os.path.join(path,mail_file)
		remove_quote(path)
		with open(path) as f :
			mail_s = Parser().parse(f)
			for rfc_message in iterators.typed_subpart_iterator(mail_s,'message','rfc822') :
				# print mail.get_payload()
				# print (mail.get_payload()[0]).get_payload()
				rfc_submessage = (rfc_message.get_payload())[0]	# rfc_submessage is message.Message Instance 
				if rfc_submessage.get_content_maintype == 'multipart' :
					d = datetime.datetime.today()
					filename = "%s-%s-%s-%s" % (d.hour,d.minute,d.second,d.microsecond)
					with open(os.path.join('/home/nakazaki/mailware/newmail',filename),'wb') as ff :
						ff.write(rfc_submessage.as_string())

					# iterators._structure(multi_mail)
					continue

				
				with open(os.path.join('/home/nakazaki/mailware/newmail',hashlib.sha1(rfc_submessage.as_string()).hexdigest()),'wb') as fp :
					fp.write(rfc_submessage.as_string())

				# save_mail(mail)


		os.remove(os.path.join('/home/nakazaki/Mail/inbox',mail_file))
Ejemplo n.º 5
0
def to_plaintext(msg):
    changedp = False
    for subpart in typed_subpart_iterator(msg, 'text', 'html'):
        filename = tempfile.mktemp('.html')
        fp = open(filename, 'w')
        try:
            fp.write(subpart.get_payload(decode=True))
            fp.close()
            cmd = os.popen(config.HTML_TO_PLAIN_TEXT_COMMAND %
                           {'filename': filename})
            plaintext = cmd.read()
            rtn = cmd.close()
            if rtn:
                log.error('HTML->text/plain error: %s', rtn)
        finally:
            try:
                os.unlink(filename)
            except OSError, e:
                if e.errno <> errno.ENOENT:
                    raise
        # Now replace the payload of the subpart and twiddle the Content-Type:
        del subpart['content-transfer-encoding']
        subpart.set_payload(plaintext)
        subpart.set_type('text/plain')
        changedp = True
Ejemplo n.º 6
0
def extract_body(mail, types=None):
    """
    returns a body text string for given mail.
    If types is `None`, 'text/*' is used:
    In case mail has a 'text/html' part, it is prefered over
    'text/plain' parts.

    :param mail: the mail to use
    :type mail: :class:`email.Message`
    :param types: mime content types to use for body string
    :type types: list of str
    """
    html = list(typed_subpart_iterator(mail, 'text', 'html'))

    # if no specific types are given, we favor text/html over text/plain
    drop_plaintext = False
    if html and not types:
        drop_plaintext = True

    body_parts = []
    for part in mail.walk():
        ctype = part.get_content_type()

        if types is not None:
            if ctype not in types:
                continue

        enc = part.get_content_charset() or 'ascii'
        raw_payload = part.get_payload(decode=True)
        if part.get_content_maintype() == 'text':
            raw_payload = string_decode(raw_payload, enc)
        if ctype == 'text/plain' and not drop_plaintext:
            body_parts.append(string_sanitize(raw_payload))
        else:
            #get mime handler
            handler = get_mime_handler(ctype, key='view',
                                       interactive=False)
            if handler:
                #open tempfile. Not all handlers accept stuff from stdin
                tmpfile = tempfile.NamedTemporaryFile(delete=False,
                                                      suffix='.html')
                #write payload to tmpfile
                if part.get_content_maintype() == 'text':
                    tmpfile.write(raw_payload.encode('utf8'))
                else:
                    tmpfile.write(raw_payload)
                tmpfile.close()
                #create and call external command
                cmd = handler % tmpfile.name
                cmdlist = shlex.split(cmd.encode('utf-8', errors='ignore'))
                rendered_payload, errmsg, retval = helper.call_cmd(cmdlist)
                #remove tempfile
                os.unlink(tmpfile.name)
                if rendered_payload:  # handler had output
                    body_parts.append(string_sanitize(rendered_payload))
                elif part.get_content_maintype() == 'text':
                    body_parts.append(string_sanitize(raw_payload))
                # else drop
    return '\n\n'.join(body_parts)
Ejemplo n.º 7
0
def extract_body(mail, types=None, field_key='copiousoutput'):
    """Returns a string view of a Message.

    If the `types` argument is set then any encoding types there will be used
    as the prefered encoding to extract. If `types` is None then
    :ref:`prefer_plaintext <prefer-plaintext>` will be consulted; if it is True
    then text/plain parts will be returned, if it is false then text/html will
    be returned if present or text/plain if there are no text/html parts.

    :param mail: the mail to use
    :type mail: :class:`email.Message`
    :param types: mime content types to use for body string
    :type types: list[str]
    :returns: The combined text of any parts to be used
    :rtype: str
    """

    preferred = 'text/plain' if settings.get(
        'prefer_plaintext') else 'text/html'
    has_preferred = False

    # see if the mail has our preferred type
    if types is None:
        has_preferred = list(typed_subpart_iterator(
            mail, *preferred.split('/')))

    body_parts = []
    for part in mail.walk():
        # skip non-leaf nodes in the mail tree
        if part.is_multipart():
            continue

        ctype = part.get_content_type()

        if types is not None:
            if ctype not in types:
                continue
        cd = part.get('Content-Disposition', '')
        if cd.startswith('attachment'):
            continue
        # if the mail has our preferred type, we only keep this type
        # note that if types != None, has_preferred always stays False
        if has_preferred and ctype != preferred:
            continue

        if ctype == 'text/plain':
            body_parts.append(string_sanitize(remove_cte(part, as_string=True)))
        else:
            rendered_payload = render_part(part)
            if rendered_payload:  # handler had output
                body_parts.append(string_sanitize(rendered_payload))
            # mark as attachment
            elif cd:
                part.replace_header('Content-Disposition', 'attachment; ' + cd)
            else:
                part.add_header('Content-Disposition', 'attachment;')
    return u'\n\n'.join(body_parts)
Ejemplo n.º 8
0
def _content_parse(msg):
    if not msg.is_multipart():
        __content = msg.get_payload(decode=True)
    else:
        __content = (m.get_payload(decode=True) for m in \
                        typed_subpart_iterator(msg, 'text', 'plain') \
                        if 'filename' not in m.get('Content-Disposition',''))
        __content = ' '.join(__content)
    return __content
Ejemplo n.º 9
0
    def test_alwaysIncludeTimezones(self):
        """
        L{MailHandler.generateEmail} generates a MIME-formatted email with a
        text/plain part, a text/html part, and a text/calendar part.
        """
        _ignore, message = self.generateSampleEmail(inviteTextWithTimezone)
        calparts = tuple(typed_subpart_iterator(message, "text", "calendar"))
        self.assertEqual(len(calparts), 1)
        caldata = calparts[0].get_payload(decode=True)
        self.assertTrue("BEGIN:VTIMEZONE" in caldata)
        self.assertTrue("TZID:America/New_York" in caldata)

        _ignore, message = self.generateSampleEmail(inviteTextNoTimezone)
        calparts = tuple(typed_subpart_iterator(message, "text", "calendar"))
        self.assertEqual(len(calparts), 1)
        caldata = calparts[0].get_payload(decode=True)
        self.assertTrue("BEGIN:VTIMEZONE" in caldata)
        self.assertTrue("TZID:America/New_York" in caldata)
Ejemplo n.º 10
0
def decodeMail(mailtext):
    ''' expects raw mailtext, returns decoded body '''
    mesg =  message_from_string(mailtext)
    # note: if we give no params, it defaults to text. but we only
    # want text/html and text/plain, maybe ONLY plain
    # but there are other texts we dont want, e.g. text/x-vcard
    # which some idiots include
    for part in typed_subpart_iterator(mesg, 'text','plain'):
	body = part.get_payload(decode=True) # decodes Content-Transfer-Encoding
	yield body
Ejemplo n.º 11
0
 def get_body(self, message):
     if message.is_multipart():
         text_parts = [part for part in typed_subpart_iterator(message, 'text', 'plain')]
         body = []
         for part in text_parts:
             charset = self.get_charset(part, self.get_charset(message))
             body.append(unicode(part.get_payload(), charset, 'replace'))
         return u'\n'.join(body).strip()
     else:
         body = unicode(message.get_payload(decode=True), self.get_charset(message), 'replace')
         return body.strip()
Ejemplo n.º 12
0
 def __init__(self, msg, msgdata, results):
     self.command_lines = []
     self.ignored_lines = []
     self.processed_lines = []
     # Depending on where the message was destined to, add some implicit
     # commands.  For example, if this was sent to the -join or -leave
     # addresses, it's the same as if 'join' or 'leave' commands were sent
     # to the -request address.
     subaddress = msgdata.get('subaddress')
     if subaddress == 'join':
         self.command_lines.append('join')
     elif subaddress == 'leave':
         self.command_lines.append('leave')
     elif subaddress == 'confirm':
         mo = re.match(config.mta.verp_confirm_regexp, msg.get('to', ''))
         if mo:
             self.command_lines.append('confirm ' + mo.group('cookie'))
     # Extract the subject header and do RFC 2047 decoding.
     raw_subject = msg.get('subject', '')
     try:
         subject = unicode(make_header(decode_header(raw_subject)))
         # Mail commands must be ASCII.
         self.command_lines.append(subject.encode('us-ascii'))
     except (HeaderParseError, UnicodeError, LookupError):
         # The Subject header was unparseable or not ASCII.  If the raw
         # subject is a unicode object, convert it to ASCII ignoring all
         # bogus characters.  Otherwise, there's nothing in the subject
         # that we can use.
         if isinstance(raw_subject, unicode):
             safe_subject = raw_subject.encode('us-ascii', 'ignore')
             self.command_lines.append(safe_subject)
     # Find the first text/plain part of the message.
     part = None
     for part in typed_subpart_iterator(msg, 'text', 'plain'):
         break
     if part is None or part is not msg:
         # Either there was no text/plain part or we ignored some
         # non-text/plain parts.
         print(_('Ignoring non-text/plain MIME parts'), file=results)
     if part is None:
         # There was no text/plain part to be found.
         return
     body = part.get_payload(decode=True)
     # text/plain parts better have string payloads.
     assert isinstance(body, basestring), 'Non-string decoded payload'
     lines = body.splitlines()
     # Use no more lines than specified
     max_lines = int(config.mailman.email_commands_max_lines)
     self.command_lines.extend(lines[:max_lines])
     self.ignored_lines.extend(lines[max_lines:])
Ejemplo n.º 13
0
def process(message):
    """
    The function which actually processes a received command email message.

    :param message: The received command email message.
    :type message: ``bytes``
    """
    assert isinstance(message, six.binary_type), 'Message must be given as bytes'
    msg = message_from_bytes(message)
    email = extract_email_address_from_header(msg.get('From', ''))
    logdata = {
        'from': email,
        'msgid': msg.get('Message-ID', 'no-msgid-present@localhost'),
    }
    logger.info("control <= %(from)s %(msgid)s", logdata)
    if 'X-Loop' in msg and DISTRO_TRACKER_CONTROL_EMAIL in msg.get_all('X-Loop'):
        logger.info("control :: discarded %(msgid)s due to X-Loop", logdata)
        return
    # Get the first plain-text part of the message
    plain_text_part = next(typed_subpart_iterator(msg, 'text', 'plain'), None)
    if not plain_text_part:
        # There is no plain text in the email
        send_plain_text_warning(msg, logdata)
        return

    # Decode the plain text into a unicode string
    try:
        text = get_decoded_message_payload(plain_text_part)
    except UnicodeDecodeError:
        send_plain_text_warning(msg, logdata)
        return

    lines = extract_command_from_subject(msg) + text.splitlines()
    # Process the commands
    factory = CommandFactory({'email': email})
    confirmation_set = ConfirmationSet()
    processor = CommandProcessor(factory)
    processor.confirmation_set = confirmation_set
    processor.process(lines)

    confirmation_set.ask_confirmation_all()
    # Send a response only if there were some commands processed
    if processor.is_success():
        send_response(msg, processor.get_output(), recipient_email=email,
                      cc=set(confirmation_set.get_emails()))
    else:
        logger.info("control :: no command processed in %(msgid)s", logdata)
Ejemplo n.º 14
0
    def get_body(self):
        """Get the body of the email message"""

        if self.is_multipart():
            # get the plain text version only
            text_parts = [part for part in typed_subpart_iterator(self, "text", "plain")]
            body = []
            for part in text_parts:
                charset = get_charset(part, get_charset(self))
                body.append(unicode(part.get_payload(decode=True), charset, "replace"))

            return u"\n".join(body).strip()

        else:  # if it is not multipart, the payload will be a string
            # representing the message body
            body = unicode(self.get_payload(decode=True), get_charset(self), "replace")
            return body.strip()
Ejemplo n.º 15
0
    def get_text_mime_part(self):
        '''
        generator
        :return: < line with text from mime-part in utf8 > ,
                 < Content-Type value >,
                 < lang value >
        '''

        # partial support of asian encodings, just to decode in UTF without exceptions
        # and normilize with NFC form: one unicode ch per symbol
        langs_map = {
                        'russian'   :  ['koi8','windows-1251','cp866', 'ISO_8859-5','Latin-?5'],
                        'french'    :  ['ISO_8859-[19]','Latin-?[19]','CP819', 'windows-1252'],
                        'jis'       :  ['shift_jis','ISO-2022-JP','big5']
        }

        for p in iterators.typed_subpart_iterator(self.msg):
            (decoded_line, decode_flag, dammit_obj) = [None]*3
            if p.get('Content-Transfer-Encoding'):
                decode_flag=True
            try:
            # show must go on
                decoded_line = p.get_payload(decode=decode_flag)
                dammit_obj = UnicodeDammit(decoded_line, is_html=False)

            except Exception as err:
                if dammit_obj is None:
                    continue

            decoded_line = dammit_obj.unicode_markup
            #logger.debug(decoded_line)
            if decoded_line is None or len(decoded_line.strip()) == 0:
                continue

            lang = self.DEFAULT_LANG
            if dammit_obj.original_encoding:
                for l in langs_map.iterkeys():
                    if filter(lambda ch: re.match(r''+ch, dammit_obj.original_encoding, re.I), langs_map.get(l)):
                        lang = l
                        yield(decoded_line, p.get_content_type(), lang)

            l = filter(lambda lang_header: re.match(r'(Content|Accept)-Language', lang_header), map(itemgetter(0),self.msg.items()))[-1:]
            if l:
                lang = ''.join(self.msg.get(''.join(l)).split('-')[:1])

            yield(decoded_line, p.get_content_type(), lang)
Ejemplo n.º 16
0
def process(message):
    """
    The function which actually processes a received command email message.

    :param message: The received command email message.
    :type message: ``bytes``
    """
    assert isinstance(message, six.binary_type), 'Message must be given as bytes'
    msg = message_from_bytes(message)
    # msg = message_from_string(message)
    if 'X-Loop' in msg and PTS_CONTROL_EMAIL in msg.get_all('X-Loop'):
        return
    # Get the first plain-text part of the message
    plain_text_part = next(typed_subpart_iterator(msg, 'text', 'plain'), None)
    if not plain_text_part:
        # There is no plain text in the email
        send_plain_text_warning(msg)
        return

    # Decode the plain text into a unicode string
    try:
        text = get_decoded_message_payload(plain_text_part)
    except UnicodeDecodeError:
        send_plain_text_warning(msg)
        return

    lines = extract_command_from_subject(msg) + text.splitlines()
    # Process the commands
    factory = CommandFactory({
        'email': extract_email_address_from_header(msg['From']),
    })
    confirmation_set = ConfirmationSet()
    processor = CommandProcessor(factory)
    processor.confirmation_set = confirmation_set
    processor.process(lines)

    confirmation_set.ask_confirmation_all()
    # Send a response only if there were some commands processed
    if processor.is_success():
        send_response(
            msg, processor.get_output(), set(confirmation_set.get_emails()))
Ejemplo n.º 17
0
 def check(self, mlist, msg, msgdata):
     """See `IRule`."""
     # The list must have the administrivia check enabled.
     if not mlist.administrivia:
         return False
     # First check the Subject text.
     lines_to_check = []
     subject = str(msg.get('subject', ''))
     if subject <> '':
         lines_to_check.append(subject)
     # Search only the first text/plain subpart of the message.  There's
     # really no good way to find email commands in any other content type.
     for part in typed_subpart_iterator(msg, 'text', 'plain'):
         payload = part.get_payload(decode=True)
         lines = payload.splitlines()
         # Count lines without using enumerate() because blank lines in the
         # payload don't count against the maximum examined.
         lineno = 0
         for line in lines:
             line = line.strip()
             if len(line) == 0:
                 continue
             lineno += 1
             if lineno > config.mailman.email_commands_max_lines:
                 break
             lines_to_check.append(line)
         # Only look at the first text/plain part.
         break
     # For each line we're checking, split the line into words.  Then see
     # if it looks like a command with the min-to-max number of arguments.
     for line in lines_to_check:
         words = [word.lower() for word in line.split()]
         if words[0] not in EMAIL_COMMANDS:
             # This is not an administrivia command.
             continue
         minargs, maxargs = EMAIL_COMMANDS[words[0]]
         if minargs <= len(words) - 1 <= maxargs:
             return True
     return False
Ejemplo n.º 18
0
def get_body(message):
    """Get the body of the email message"""

    if message.is_multipart():
        #get the plain text version only
        text_parts = [part
                      for part in typed_subpart_iterator(message,
                                                         'text',
                                                         'plain')]

        body = []
        for part in text_parts:
            _part = part.get_payload(decode=True)
            _part = _part.decode('utf-8', "replace")
            body.append(_part)

        return u"\n".join(body).strip()

    else: # if it is not multipart, the payload will be a string
          # representing the message body
        body = message.get_payload(decode=True)
        body = body.decode('utf-8', "replace")
        return body.strip()
Ejemplo n.º 19
0
def get_body(message):
    """Get the body of the email message"""

    if message.is_multipart():
        #get the plain text version only
        text_parts = [part
                      for part in typed_subpart_iterator(message,
                                                         'text',
                                                         'plain')]
        body = []
        for part in text_parts:
            charset = get_charset(part, get_charset(message))
            body.append(str(part.get_payload(decode=True),
                                charset,
                                "replace"))

        return "\n".join(body).strip()

    else: # if it is not multipart, the payload will be a string
          # representing the message body
        body = str(message.get_payload(decode=True),
                       get_charset(message),
                       "replace")
        return body.strip()
Ejemplo n.º 20
0
 def check(self, mlist, msg, msgdata):
     """See `IRule`."""
     # See if the message has an Approved or Approve header with a valid
     # moderator password.  Also look at the first non-whitespace line in
     # the file to see if it looks like an Approved header.
     missing = object()
     password = self._get_password(msg, missing)
     if password is missing:
         # Find the first text/plain part in the message
         part = None
         stripped = False
         payload = None
         for part in typed_subpart_iterator(msg, 'text', 'plain'):
             payload = part.get_payload(decode=True)
             break
         if payload is not None:
             charset = part.get_content_charset('us-ascii')
             payload = payload.decode(charset, 'replace')
             line = ''
             lines = payload.splitlines(True)
             for lineno, line in enumerate(lines):
                 if line.strip() != '':
                     break
             if ':' in line:
                 header, value = line.split(':', 1)
                 if header.lower() in HEADERS:
                     password = value.strip()
                     # Now strip the first line from the payload so the
                     # password doesn't leak.
                     del lines[lineno]
                     reset_payload(part, EMPTYSTRING.join(lines))
                     stripped = True
         if stripped:
             # Now try all the text parts in case it's
             # multipart/alternative with the approved line in HTML or
             # other text part.  We make a pattern from the Approved line
             # and delete it from all text/* parts in which we find it.  It
             # would be better to just iterate forward, but email
             # compatability for pre Python 2.2 returns a list, not a true
             # iterator.
             #
             # This will process all the multipart/alternative parts in the
             # message as well as all other text parts.  We shouldn't find
             # the pattern outside the multipart/alternative parts, but if
             # we do, it is probably best to delete it anyway as it does
             # contain the password.
             #
             # Make a pattern to delete.  We can't just delete a line
             # because line of HTML or other fancy text may include
             # additional message text.  This pattern works with HTML.  It
             # may not work with rtf or whatever else is possible.
             pattern = header + ':(\s|&nbsp;)*' + re.escape(password)
             for part in typed_subpart_iterator(msg, 'text'):
                 payload = part.get_payload()
                 if payload is not None:
                     if re.search(pattern, payload):
                         reset_payload(part, re.sub(pattern, '', payload))
     else:
         for header in HEADERS:
             del msg[header]
     if password is missing:
         return False
     is_valid, new_hash = config.password_context.verify(
         password, mlist.moderator_password)
     if is_valid and new_hash:
         # Hash algorithm migration.
         mlist.moderator_password = new_hash
     return is_valid
Ejemplo n.º 21
0
Archivo: utils.py Proyecto: a3nm/alot
def extract_body(mail, types=None):
    """
    returns a body text string for given mail.
    If types is `None`, `text/*` is used:
    In case mail has a `text/html` part, it is prefered over
    `text/plain` parts.

    :param mail: the mail to use
    :type mail: :class:`email.Message`
    :param types: mime content types to use for body string
    :type types: list of str
    """
    html = list(typed_subpart_iterator(mail, 'text', 'html'))

    # if no specific types are given, we favor text/html over text/plain
    drop_plaintext = False
    if html and not types:
        drop_plaintext = True

    body_parts = []
    for part in mail.walk():
        ctype = part.get_content_type()

        if types is not None:
            if ctype not in types:
                continue
        cd = part.get('Content-Disposition', '')
        if cd.startswith('attachment'):
            continue

        enc = part.get_content_charset() or 'ascii'
        raw_payload = part.get_payload(decode=True)
        if ctype == 'text/plain' and not drop_plaintext:
            raw_payload = string_decode(raw_payload, enc)
            body_parts.append(string_sanitize(raw_payload))
        else:
            #get mime handler
            key = 'copiousoutput'
            handler, entry = settings.mailcap_find_match(ctype, key=key)

            if entry:
                # open tempfile, respect mailcaps nametemplate
                nametemplate = entry.get('nametemplate', '%s')
                prefix, suffix = parse_mailcap_nametemplate(nametemplate)
                tmpfile = tempfile.NamedTemporaryFile(delete=False,
                                                      prefix=prefix,
                                                      suffix=suffix)
                # write payload to tmpfile
                tmpfile.write(raw_payload)
                tmpfile.close()

                # read parameter, create handler command
                parms = tuple(map('='.join, part.get_params()))

                # create and call external command
                cmd = mailcap.subst(entry['view'], ctype,
                                    filename=tmpfile.name, plist=parms)
                logging.debug('command: %s' % cmd)
                logging.debug('parms: %s' % str(parms))
                cmdlist = split_commandstring(cmd)
                # call handler
                rendered_payload, errmsg, retval = helper.call_cmd(cmdlist)
                # remove tempfile
                os.unlink(tmpfile.name)
                if rendered_payload:  # handler had output
                    body_parts.append(string_sanitize(rendered_payload))
    return u'\n\n'.join(body_parts)
Ejemplo n.º 22
0
 def check(self, mlist, msg, msgdata):
     """See `IRule`."""
     if mlist.moderator_password is None:
         return False
     # See if the message has an Approved or Approve header with a valid
     # moderator password.  Also look at the first non-whitespace line in
     # the file to see if it looks like an Approved header.
     missing = object()
     password = self._get_password(msg, missing)
     if password is missing:
         # Find the first text/plain part in the message
         part = None
         stripped = False
         payload = None
         for part in typed_subpart_iterator(msg, 'text', 'plain'):
             payload = part.get_payload(decode=True)
             break
         if payload is not None:
             charset = part.get_content_charset('us-ascii')
             try:
                 # Do the decoding inside the try/except so that if the
                 # charset is unknown, we'll just drop back to ascii.
                 payload = payload.decode(charset, 'replace')
             except LookupError:
                 # Unknown or empty charset.
                 payload = payload.decode('us-ascii', 'replace')
             line = ''
             lines = payload.splitlines(True)
             for lineno, line in enumerate(lines):
                 if line.strip() != '':
                     break
             if ':' in line:
                 header, value = line.split(':', 1)
                 if header.lower() in HEADERS:
                     password = value.strip()
                     # Now strip the first line from the payload so the
                     # password doesn't leak.
                     del lines[lineno]
                     reset_payload(part, EMPTYSTRING.join(lines))
                     stripped = True
         if stripped:
             # Now try all the text parts in case it's
             # multipart/alternative with the approved line in HTML or
             # other text part.  We make a pattern from the Approved line
             # and delete it from all text/* parts in which we find it.  It
             # would be better to just iterate forward, but email
             # compatability for pre Python 2.2 returns a list, not a true
             # iterator.
             #
             # This will process all the multipart/alternative parts in the
             # message as well as all other text parts.  We shouldn't find
             # the pattern outside the multipart/alternative parts, but if
             # we do, it is probably best to delete it anyway as it does
             # contain the password.
             #
             # Make a pattern to delete.  We can't just delete a line
             # because line of HTML or other fancy text may include
             # additional message text.  This pattern works with HTML.  It
             # may not work with rtf or whatever else is possible.
             pattern = header + ':(\s|&nbsp;)*' + re.escape(password)
             for part in typed_subpart_iterator(msg, 'text'):
                 payload = part.get_payload()
                 if payload is not None:
                     if re.search(pattern, payload):
                         reset_payload(part, re.sub(pattern, '', payload))
     else:
         for header in HEADERS:
             del msg[header]
     if password is missing:
         return False
     is_valid, new_hash = config.password_context.verify(
         password, mlist.moderator_password)
     if is_valid and new_hash:
         # Hash algorithm migration.
         mlist.moderator_password = new_hash
     return is_valid
Ejemplo n.º 23
0
 def get_mimeparts(msg, maintype="*", subtype="*"):
     ''' Takes a email.Message Object and returns a list of matching maintype, subtype message parts as list [[mimetype, rawdata]*] '''
     l = []
     for part in typed_subpart_iterator(msg, maintype, subtype):
         l += [[part.get_content_type(), part.get_payload(decode=True)]]
     return l
Ejemplo n.º 24
0
 def process(self, msg):
     """See `IBounceDetector`."""
     # Iterate over each message/delivery-status subpart.
     failed_addresses = []
     delayed_addresses = []
     for part in typed_subpart_iterator(msg, 'message', 'delivery-status'):
         if not part.is_multipart():
             # Huh?
             continue
         # Each message/delivery-status contains a list of Message objects
         # which are the header blocks.  Iterate over those too.
         for msgblock in part.get_payload():
             address_set = None
             # We try to dig out the Original-Recipient (which is optional)
             # and Final-Recipient (which is mandatory, but may not exactly
             # match an address on our list).  Some MTA's also use
             # X-Actual-Recipient as a synonym for Original-Recipient, but
             # some apparently use that for other purposes :(
             #
             # Also grok out Action so we can do something with that too.
             action = msgblock.get('action', '').lower()
             # Some MTAs have been observed that put comments on the action.
             if action.startswith('delayed'):
                 address_set = delayed_addresses
             elif action.startswith('fail'):
                 address_set = failed_addresses
             else:
                 # Some non-permanent failure, so ignore this block.
                 continue
             params = []
             foundp = False
             for header in ('original-recipient', 'final-recipient'):
                 for k, v in msgblock.get_params([], header):
                     if k.lower() == 'rfc822':
                         foundp = True
                     else:
                         params.append(k)
                 if foundp:
                     # Note that params should already be unquoted.
                     address_set.extend(params)
                     break
                 else:
                     # MAS: This is a kludge, but
                     # SMTP-GATEWAY01.intra.home.dk has a final-recipient
                     # with an angle-addr and no address-type parameter at
                     # all. Non-compliant, but ...
                     for param in params:
                         if param.startswith('<') and param.endswith('>'):
                             address_set.append(param[1:-1])
     # There may be Nones in the current set of failures, so filter those
     # out of both sets.  Also, for Python 3 compatibility, the API
     # requires byte addresses.
     return (
         # First, the delayed, or temporary failures.
         set(parseaddr(address)[1].encode('us-ascii') 
             for address in delayed_addresses
             if address is not None),
         # And now the failed or permanent failures.
         set(parseaddr(address)[1].encode('us-ascii') 
             for address in failed_addresses
             if address is not None)
         )
Ejemplo n.º 25
0
def get_zip_passwords(message):
    """ Parse message for possible zip password combinations.

    Args:
        message (email.message) Email message object to parse.
    """
    possible_passwords = []
    # Passwords commonly used for malware
    malware_passwords = ["infected", "malware"]
    possible_passwords += malware_passwords
    # Commonly used passwords
    common_passwords = [
        "123456", "password", "12345678", "qwerty", "abc123", "123456789",
        "111111", "1234567", "iloveyou", "adobe123", "123123", "sunshine",
        "1234567890", "letmein", "1234", "monkey", "shadow", "sunshine",
        "12345", "password1", "princess", "azerty", "trustno1", "000000"
    ]

    possible_passwords += common_passwords

    # Not checking for multi-part message because by having an
    # encrypted zip file it must be multi-part.
    text_parts = [
        part for part in typed_subpart_iterator(message, 'text', 'plain')
    ]
    html_parts = [
        part for part in typed_subpart_iterator(message, 'text', 'html')
    ]
    body = []
    # Get full message character set once
    # Language example reference (using python2)
    # http://ginstrom.com/scribbles/2007/11/19/parsing-multilingual-email-with-python/
    message_charset = get_charset(message)
    for part in text_parts:
        charset = get_charset(part, message_charset)
        body.append(part.get_payload(decode=True).decode(charset))
    for part in html_parts:
        charset = get_charset(part, message_charset)
        html_part = part.get_payload(decode=True).decode(charset)
        html_parser = HTMLTextParser()
        html_parser.feed(html_part)
        for text in html_parser.text_data:
            body.append(text)
    raw_text = "\n".join(body).strip()

    # Add subject to text corpus to parse
    subject = " " + message.get('Subject')
    raw_text += subject

    # Grab any strings that are marked off by special chars
    marking_chars = [["\'", "\'"], ['"', '"'], ['[', ']'], ['(', ')']]
    for char_set in marking_chars:
        regex = re.compile(r"""\{0}([^\{1}]*)\{1}""".format(
            char_set[0], char_set[1]))
        marked_off = re.findall(regex, raw_text)
        possible_passwords += marked_off

    # Create a list of unique words to test as passwords
    individual_words = re.split(r"\s", raw_text)
    # Also get words with basic punctuation stripped out
    # just in case someone places a password in a proper sentence
    stripped_words = [i.strip('.,;:?!') for i in individual_words]
    unique_words = list(set(individual_words + stripped_words))
    possible_passwords += unique_words

    return possible_passwords
Ejemplo n.º 26
0
 def process(self, msg):
     """See `IBounceDetector`."""
     # Iterate over each message/delivery-status subpart.
     failed_addresses = []
     delayed_addresses = []
     for part in typed_subpart_iterator(msg, 'message', 'delivery-status'):
         if not part.is_multipart():
             # Huh?
             continue
         # Each message/delivery-status contains a list of Message objects
         # which are the header blocks.  Iterate over those too.
         for msgblock in part.get_payload():
             address_set = None
             # We try to dig out the Original-Recipient (which is optional)
             # and Final-Recipient (which is mandatory, but may not exactly
             # match an address on our list).  Some MTA's also use
             # X-Actual-Recipient as a synonym for Original-Recipient, but
             # some apparently use that for other purposes :(
             #
             # Also grok out Action so we can do something with that too.
             action = msgblock.get('action', '').lower()
             # Some MTAs have been observed that put comments on the action.
             if action.startswith('delayed'):
                 address_set = delayed_addresses
             elif action.startswith('fail'):
                 address_set = failed_addresses
             else:
                 # Some non-permanent failure, so ignore this block.
                 continue
             params = []
             foundp = False
             for header in ('original-recipient', 'final-recipient'):
                 for k, v in msgblock.get_params([], header):
                     if k.lower() == 'rfc822':
                         foundp = True
                     else:
                         params.append(k)
                 if foundp:
                     # Note that params should already be unquoted.
                     address_set.extend(params)
                     break
                 else:
                     # MAS: This is a kludge, but
                     # SMTP-GATEWAY01.intra.home.dk has a final-recipient
                     # with an angle-addr and no address-type parameter at
                     # all. Non-compliant, but ...
                     for param in params:
                         if param.startswith('<') and param.endswith('>'):
                             address_set.append(param[1:-1])
     # There may be Nones in the current set of failures, so filter those
     # out of both sets.  Also, for Python 3 compatibility, the API
     # requires byte addresses.
     return (
         # First, the delayed, or temporary failures.
         set(
             parseaddr(address)[1].encode('us-ascii')
             for address in delayed_addresses if address is not None),
         # And now the failed or permanent failures.
         set(
             parseaddr(address)[1].encode('us-ascii')
             for address in failed_addresses if address is not None))
Ejemplo n.º 27
0
Archivo: emlx.py Proyecto: mikez/emlx
def find_next_payload_of_type(message, maintype="text", subtype=None):
    return next(
        (part.get_payload()
         for part in typed_subpart_iterator(message, maintype, subtype)),
        None,
    )
Ejemplo n.º 28
0
def extract_body(mail, types=None, field_key='copiousoutput'):
    """
    returns a body text string for given mail.
    If types is `None`, `text/*` is used:
    The exact preferred type is specified by the prefer_plaintext config option
    which defaults to text/html.

    :param mail: the mail to use
    :type mail: :class:`email.Message`
    :param types: mime content types to use for body string
    :type types: list of str
    """

    preferred = 'text/plain' if settings.get(
        'prefer_plaintext') else 'text/html'
    has_preferred = False

    # see if the mail has our preferred type
    if types is None:
        has_preferred = list(typed_subpart_iterator(
            mail, *preferred.split('/')))

    body_parts = []
    for part in mail.walk():
        ctype = part.get_content_type()

        if types is not None:
            if ctype not in types:
                continue
        cd = part.get('Content-Disposition', '')
        if cd.startswith('attachment'):
            continue
        # if the mail has our preferred type, we only keep this type
        # note that if types != None, has_preferred always stays False
        if has_preferred and ctype != preferred:
            continue

        enc = part.get_content_charset() or 'ascii'
        raw_payload = part.get_payload(decode=True)
        if ctype == 'text/plain':
            raw_payload = string_decode(raw_payload, enc)
            body_parts.append(string_sanitize(raw_payload))
        else:
            # get mime handler
            _, entry = settings.mailcap_find_match(ctype, key=field_key)
            tempfile_name = None
            stdin = None

            if entry:
                handler_raw_commandstring = entry['view']
                # in case the mailcap defined command contains no '%s',
                # we pipe the files content to the handling command via stdin
                if '%s' in handler_raw_commandstring:
                    # open tempfile, respect mailcaps nametemplate
                    nametemplate = entry.get('nametemplate', '%s')
                    prefix, suffix = parse_mailcap_nametemplate(nametemplate)
                    with tempfile.NamedTemporaryFile(
                            delete=False, prefix=prefix, suffix=suffix) \
                            as tmpfile:
                        tmpfile.write(raw_payload)
                        tempfile_name = tmpfile.name
                else:
                    stdin = raw_payload

                # read parameter, create handler command
                parms = tuple('='.join(p) for p in part.get_params())

                # create and call external command
                cmd = mailcap.subst(entry['view'], ctype,
                                    filename=tempfile_name, plist=parms)
                logging.debug('command: %s', cmd)
                logging.debug('parms: %s', str(parms))
                cmdlist = split_commandstring(cmd)
                # call handler
                rendered_payload, _, _ = helper.call_cmd(cmdlist, stdin=stdin)

                # remove tempfile
                if tempfile_name:
                    os.unlink(tempfile_name)

                if rendered_payload:  # handler had output
                    body_parts.append(string_sanitize(rendered_payload))
    return u'\n\n'.join(body_parts)
Ejemplo n.º 29
0
Archivo: utils.py Proyecto: windo/alot
def extract_body(mail, types=None):
    """
    returns a body text string for given mail.
    If types is `None`, `text/*` is used:
    The exact preferred type is specified by the prefer_plaintext config option
    which defaults to text/html.

    :param mail: the mail to use
    :type mail: :class:`email.Message`
    :param types: mime content types to use for body string
    :type types: list of str
    """

    preferred = 'text/plain' if settings.get('prefer_plaintext') else 'text/html'
    has_preferred = False

    # see if the mail has our preferred type
    if types == None:
        has_preferred = list(typed_subpart_iterator(mail, *preferred.split('/')))

    body_parts = []
    for part in mail.walk():
        ctype = part.get_content_type()

        if types is not None:
            if ctype not in types:
                continue
        cd = part.get('Content-Disposition', '')
        if cd.startswith('attachment'):
            continue
        # if the mail has our preferred type, we only keep this type
        # note that if types != None, has_preferred always stays False
        if has_preferred and ctype != preferred:
            continue

        enc = part.get_content_charset() or 'ascii'
        raw_payload = part.get_payload(decode=True)
        if ctype == 'text/plain':
            raw_payload = string_decode(raw_payload, enc)
            body_parts.append(string_sanitize(raw_payload))
        else:
            #get mime handler
            key = 'copiousoutput'
            handler, entry = settings.mailcap_find_match(ctype, key=key)
            tempfile_name = None
            stdin = None

            if entry:
                handler_raw_commandstring = entry['view']
                # in case the mailcap defined command contains no '%s',
                # we pipe the files content to the handling command via stdin
                if '%s' in handler_raw_commandstring:
                    # open tempfile, respect mailcaps nametemplate
                    nametemplate = entry.get('nametemplate', '%s')
                    prefix, suffix = parse_mailcap_nametemplate(nametemplate)
                    tmpfile = tempfile.NamedTemporaryFile(delete=False,
                                                        prefix=prefix,
                                                        suffix=suffix)
                    # write payload to tmpfile
                    tmpfile.write(raw_payload)
                    tmpfile.close()
                    tempfile_name = tmpfile.name
                else:
                    stdin = raw_payload

                # read parameter, create handler command
                parms = tuple(map('='.join, part.get_params()))

                # create and call external command
                cmd = mailcap.subst(entry['view'], ctype,
                                    filename=tempfile_name, plist=parms)
                logging.debug('command: %s' % cmd)
                logging.debug('parms: %s' % str(parms))
                cmdlist = split_commandstring(cmd)
                # call handler
                rendered_payload, errmsg, retval = helper.call_cmd(cmdlist, stdin=stdin)

                # remove tempfile
                if tempfile_name:
                    os.unlink(tempfile_name)

                if rendered_payload:  # handler had output
                    body_parts.append(string_sanitize(rendered_payload))
    return u'\n\n'.join(body_parts)
Ejemplo n.º 30
0
Archivo: utils.py Proyecto: vrs/alot
def extract_body(mail, types=None, field_key='copiousoutput'):
    """Returns a string view of a Message.

    If the `types` argument is set then any encoding types there will be used
    as the prefered encoding to extract. If `types` is None then
    :ref:`prefer_plaintext <prefer-plaintext>` will be consulted; if it is True
    then text/plain parts will be returned, if it is false then text/html will
    be returned if present or text/plain if there are no text/html parts.

    :param mail: the mail to use
    :type mail: :class:`email.Message`
    :param types: mime content types to use for body string
    :type types: list[str]
    :returns: The combined text of any parts to be used
    :rtype: str
    """

    preferred = 'text/plain' if settings.get(
        'prefer_plaintext') else 'text/html'
    has_preferred = False

    # see if the mail has our preferred type
    if types is None:
        has_preferred = list(
            typed_subpart_iterator(mail, *preferred.split('/')))

    body_parts = []
    for part in mail.walk():
        # skip non-leaf nodes in the mail tree
        if part.is_multipart():
            continue

        ctype = part.get_content_type()

        if types is not None:
            if ctype not in types:
                continue
        cd = part.get('Content-Disposition', '')
        if cd.startswith('attachment'):
            continue
        # if the mail has our preferred type, we only keep this type
        # note that if types != None, has_preferred always stays False
        if has_preferred and ctype != preferred:
            continue

        enc = part.get_content_charset() or 'ascii'
        raw_payload = part.get_payload(decode=True)
        if ctype == 'text/plain':
            raw_payload = string_decode(raw_payload, enc)
            body_parts.append(string_sanitize(raw_payload))
        else:
            # get mime handler
            _, entry = settings.mailcap_find_match(ctype, key=field_key)
            if entry is None:
                part.add_header('Content-Disposition', 'attachment; ' + cd)
            else:
                tempfile_name = None
                stdin = None
                handler_raw_commandstring = entry['view']
                # in case the mailcap defined command contains no '%s',
                # we pipe the files content to the handling command via stdin
                if '%s' in handler_raw_commandstring:
                    # open tempfile, respect mailcaps nametemplate
                    nametemplate = entry.get('nametemplate', '%s')
                    prefix, suffix = parse_mailcap_nametemplate(nametemplate)
                    with tempfile.NamedTemporaryFile(
                            delete=False, prefix=prefix, suffix=suffix) \
                            as tmpfile:
                        tmpfile.write(raw_payload)
                        tempfile_name = tmpfile.name
                else:
                    stdin = raw_payload

                # read parameter, create handler command
                parms = tuple('='.join(p) for p in part.get_params())

                # create and call external command
                cmd = mailcap.subst(entry['view'],
                                    ctype,
                                    filename=tempfile_name,
                                    plist=parms)
                logging.debug('command: %s', cmd)
                logging.debug('parms: %s', str(parms))
                cmdlist = split_commandstring(cmd)
                # call handler
                rendered_payload, _, _ = helper.call_cmd(cmdlist, stdin=stdin)

                # remove tempfile
                if tempfile_name:
                    os.unlink(tempfile_name)

                if rendered_payload:  # handler had output
                    body_parts.append(string_sanitize(rendered_payload))
    return u'\n\n'.join(body_parts)