def _parsebody(self, container, fp, firstbodyline=None): # Parse the body, but first split the payload on the content-type # boundary if present. boundary = container.get_boundary() isdigest = (container.get_content_type() == 'multipart/digest') # If there's a boundary, split the payload text into its constituent # parts and parse each separately. Otherwise, just parse the rest of # the body as a single message. Note: any exceptions raised in the # recursive parse need to have their line numbers coerced. if boundary: preamble = epilogue = None # Split into subparts. The first boundary we're looking for won't # always have a leading newline since we're at the start of the # body text, and there's not always a preamble before the first # boundary. separator = '--' + boundary payload = fp.read() if firstbodyline is not None: payload = firstbodyline + '\n' + payload # We use an RE here because boundaries can have trailing # whitespace. mo = re.search( r'(?P<sep>' + re.escape(separator) + r')(?P<ws>[ \t]*)', payload) if not mo: if self._strict: raise Errors.BoundaryError( "Couldn't find starting boundary: %s" % boundary) container.set_payload(payload) return start = mo.start() if start > 0: # there's some pre-MIME boundary preamble preamble = payload[0:start] # Find out what kind of line endings we're using start += len(mo.group('sep')) + len(mo.group('ws')) mo = NLCRE.search(payload, start) if mo: start += len(mo.group(0)) # We create a compiled regexp first because we need to be able to # specify the start position, and the module function doesn't # support this signature. :( cre = re.compile('(?P<sep>\r\n|\r|\n)' + re.escape(separator) + '--') mo = cre.search(payload, start) if mo: terminator = mo.start() linesep = mo.group('sep') if mo.end() < len(payload): # There's some post-MIME boundary epilogue epilogue = payload[mo.end():] elif self._strict: raise Errors.BoundaryError( "Couldn't find terminating boundary: %s" % boundary) else: # Handle the case of no trailing boundary. Check that it ends # in a blank line. Some cases (spamspamspam) don't even have # that! mo = re.search('(?P<sep>\r\n|\r|\n){2}$', payload) if not mo: mo = re.search('(?P<sep>\r\n|\r|\n)$', payload) if not mo: raise Errors.BoundaryError( 'No terminating boundary and no trailing empty line' ) linesep = mo.group('sep') terminator = len(payload) # We split the textual payload on the boundary separator, which # includes the trailing newline. If the container is a # multipart/digest then the subparts are by default message/rfc822 # instead of text/plain. In that case, they'll have a optional # block of MIME headers, then an empty line followed by the # message headers. parts = re.split( linesep + re.escape(separator) + r'[ \t]*' + linesep, payload[start:terminator]) for part in parts: if isdigest: if part.startswith(linesep): # There's no header block so create an empty message # object as the container, and lop off the newline so # we can parse the sub-subobject msgobj = self._class() part = part[len(linesep):] else: parthdrs, part = part.split(linesep + linesep, 1) # msgobj in this case is the "message/rfc822" container msgobj = self.parsestr(parthdrs, headersonly=1) # while submsgobj is the message itself msgobj.set_default_type('message/rfc822') maintype = msgobj.get_content_maintype() if maintype in ('message', 'multipart'): submsgobj = self.parsestr(part) msgobj.attach(submsgobj) else: msgobj.set_payload(part) else: msgobj = self.parsestr(part) container.preamble = preamble container.epilogue = epilogue container.attach(msgobj) elif container.get_main_type() == 'multipart': # Very bad. A message is a multipart with no boundary! raise Errors.BoundaryError( 'multipart message with no defined boundary') elif container.get_type() == 'message/delivery-status': # This special kind of type contains blocks of headers separated # by a blank line. We'll represent each header block as a # separate Message object blocks = [] while True: blockmsg = self._class() self._parseheaders(blockmsg, fp) if not len(blockmsg): # No more header blocks left break blocks.append(blockmsg) container.set_payload(blocks) elif container.get_main_type() == 'message': # Create a container for the payload, but watch out for there not # being any headers left try: msg = self.parse(fp) except Errors.HeaderParseError: msg = self._class() self._parsebody(msg, fp) container.attach(msg) else: text = fp.read() if firstbodyline is not None: text = firstbodyline + '\n' + text container.set_payload(text)
# Copyright (C) 2001 Python Software Foundation
def _parsebody(self, container, fp): # Parse the body, but first split the payload on the content-type # boundary if present. boundary = container.get_boundary() isdigest = (container.get_type() == 'multipart/digest') # If there's a boundary, split the payload text into its constituent # parts and parse each separately. Otherwise, just parse the rest of # the body as a single message. Note: any exceptions raised in the # recursive parse need to have their line numbers coerced. if boundary: preamble = epilogue = None # Split into subparts. The first boundary we're looking for won't # have the leading newline since we're at the start of the body # text. separator = '--' + boundary payload = fp.read() start = payload.find(separator) if start < 0: raise Errors.BoundaryError( "Couldn't find starting boundary: %s" % boundary) if start > 0: # there's some pre-MIME boundary preamble preamble = payload[0:start] start += len(separator) + 1 + isdigest terminator = payload.find('\n' + separator + '--', start) if terminator < 0: raise Errors.BoundaryError( "Couldn't find terminating boundary: %s" % boundary) if terminator + len(separator) + 3 < len(payload): # there's some post-MIME boundary epilogue epilogue = payload[terminator + len(separator) + 3:] # We split the textual payload on the boundary separator, which # includes the trailing newline. If the container is a # multipart/digest then the subparts are by default message/rfc822 # instead of text/plain. In that case, they'll have an extra # newline before the headers to distinguish the message's headers # from the subpart headers. if isdigest: separator += '\n\n' else: separator += '\n' parts = payload[start:terminator].split('\n' + separator) for part in parts: msgobj = self.parsestr(part) container.preamble = preamble container.epilogue = epilogue # Ensure that the container's payload is a list if not isinstance(container.get_payload(), ListType): container.set_payload([msgobj]) else: container.add_payload(msgobj) elif container.get_type() == 'message/delivery-status': # This special kind of type contains blocks of headers separated # by a blank line. We'll represent each header block as a # separate Message object blocks = [] while 1: blockmsg = self._class() self._parseheaders(blockmsg, fp) if not len(blockmsg): # No more header blocks left break blocks.append(blockmsg) container.set_payload(blocks) elif container.get_main_type() == 'message': # Create a container for the payload, but watch out for there not # being any headers left try: msg = self.parse(fp) except Errors.HeaderParseError: msg = self._class() self._parsebody(msg, fp) container.add_payload(msg) else: container.add_payload(fp.read())