コード例 #1
0
 def _parsebody(self, container, fp, firstbodyline=None):
     # Parse the body, but first split the payload on the content-type
     # boundary if present.
     boundary = container.get_boundary()
     isdigest = (container.get_content_type() == 'multipart/digest')
     # If there's a boundary, split the payload text into its constituent
     # parts and parse each separately.  Otherwise, just parse the rest of
     # the body as a single message.  Note: any exceptions raised in the
     # recursive parse need to have their line numbers coerced.
     if boundary:
         preamble = epilogue = None
         # Split into subparts.  The first boundary we're looking for won't
         # always have a leading newline since we're at the start of the
         # body text, and there's not always a preamble before the first
         # boundary.
         separator = '--' + boundary
         payload = fp.read()
         if firstbodyline is not None:
             payload = firstbodyline + '\n' + payload
         # We use an RE here because boundaries can have trailing
         # whitespace.
         mo = re.search(
             r'(?P<sep>' + re.escape(separator) + r')(?P<ws>[ \t]*)',
             payload)
         if not mo:
             if self._strict:
                 raise Errors.BoundaryError(
                     "Couldn't find starting boundary: %s" % boundary)
             container.set_payload(payload)
             return
         start = mo.start()
         if start > 0:
             # there's some pre-MIME boundary preamble
             preamble = payload[0:start]
         # Find out what kind of line endings we're using
         start += len(mo.group('sep')) + len(mo.group('ws'))
         mo = NLCRE.search(payload, start)
         if mo:
             start += len(mo.group(0))
         # We create a compiled regexp first because we need to be able to
         # specify the start position, and the module function doesn't
         # support this signature. :(
         cre = re.compile('(?P<sep>\r\n|\r|\n)' + re.escape(separator) +
                          '--')
         mo = cre.search(payload, start)
         if mo:
             terminator = mo.start()
             linesep = mo.group('sep')
             if mo.end() < len(payload):
                 # There's some post-MIME boundary epilogue
                 epilogue = payload[mo.end():]
         elif self._strict:
             raise Errors.BoundaryError(
                 "Couldn't find terminating boundary: %s" % boundary)
         else:
             # Handle the case of no trailing boundary.  Check that it ends
             # in a blank line.  Some cases (spamspamspam) don't even have
             # that!
             mo = re.search('(?P<sep>\r\n|\r|\n){2}$', payload)
             if not mo:
                 mo = re.search('(?P<sep>\r\n|\r|\n)$', payload)
                 if not mo:
                     raise Errors.BoundaryError(
                         'No terminating boundary and no trailing empty line'
                     )
             linesep = mo.group('sep')
             terminator = len(payload)
         # We split the textual payload on the boundary separator, which
         # includes the trailing newline. If the container is a
         # multipart/digest then the subparts are by default message/rfc822
         # instead of text/plain.  In that case, they'll have a optional
         # block of MIME headers, then an empty line followed by the
         # message headers.
         parts = re.split(
             linesep + re.escape(separator) + r'[ \t]*' + linesep,
             payload[start:terminator])
         for part in parts:
             if isdigest:
                 if part.startswith(linesep):
                     # There's no header block so create an empty message
                     # object as the container, and lop off the newline so
                     # we can parse the sub-subobject
                     msgobj = self._class()
                     part = part[len(linesep):]
                 else:
                     parthdrs, part = part.split(linesep + linesep, 1)
                     # msgobj in this case is the "message/rfc822" container
                     msgobj = self.parsestr(parthdrs, headersonly=1)
                 # while submsgobj is the message itself
                 msgobj.set_default_type('message/rfc822')
                 maintype = msgobj.get_content_maintype()
                 if maintype in ('message', 'multipart'):
                     submsgobj = self.parsestr(part)
                     msgobj.attach(submsgobj)
                 else:
                     msgobj.set_payload(part)
             else:
                 msgobj = self.parsestr(part)
             container.preamble = preamble
             container.epilogue = epilogue
             container.attach(msgobj)
     elif container.get_main_type() == 'multipart':
         # Very bad.  A message is a multipart with no boundary!
         raise Errors.BoundaryError(
             'multipart message with no defined boundary')
     elif container.get_type() == 'message/delivery-status':
         # This special kind of type contains blocks of headers separated
         # by a blank line.  We'll represent each header block as a
         # separate Message object
         blocks = []
         while True:
             blockmsg = self._class()
             self._parseheaders(blockmsg, fp)
             if not len(blockmsg):
                 # No more header blocks left
                 break
             blocks.append(blockmsg)
         container.set_payload(blocks)
     elif container.get_main_type() == 'message':
         # Create a container for the payload, but watch out for there not
         # being any headers left
         try:
             msg = self.parse(fp)
         except Errors.HeaderParseError:
             msg = self._class()
             self._parsebody(msg, fp)
         container.attach(msg)
     else:
         text = fp.read()
         if firstbodyline is not None:
             text = firstbodyline + '\n' + text
         container.set_payload(text)
コード例 #2
0
# Copyright (C) 2001 Python Software Foundation
コード例 #3
0
 def _parsebody(self, container, fp):
     # Parse the body, but first split the payload on the content-type
     # boundary if present.
     boundary = container.get_boundary()
     isdigest = (container.get_type() == 'multipart/digest')
     # If there's a boundary, split the payload text into its constituent
     # parts and parse each separately.  Otherwise, just parse the rest of
     # the body as a single message.  Note: any exceptions raised in the
     # recursive parse need to have their line numbers coerced.
     if boundary:
         preamble = epilogue = None
         # Split into subparts.  The first boundary we're looking for won't
         # have the leading newline since we're at the start of the body
         # text.
         separator = '--' + boundary
         payload = fp.read()
         start = payload.find(separator)
         if start < 0:
             raise Errors.BoundaryError(
                 "Couldn't find starting boundary: %s" % boundary)
         if start > 0:
             # there's some pre-MIME boundary preamble
             preamble = payload[0:start]
         start += len(separator) + 1 + isdigest
         terminator = payload.find('\n' + separator + '--', start)
         if terminator < 0:
             raise Errors.BoundaryError(
                 "Couldn't find terminating boundary: %s" % boundary)
         if terminator + len(separator) + 3 < len(payload):
             # there's some post-MIME boundary epilogue
             epilogue = payload[terminator + len(separator) + 3:]
         # We split the textual payload on the boundary separator, which
         # includes the trailing newline.  If the container is a
         # multipart/digest then the subparts are by default message/rfc822
         # instead of text/plain.  In that case, they'll have an extra
         # newline before the headers to distinguish the message's headers
         # from the subpart headers.
         if isdigest:
             separator += '\n\n'
         else:
             separator += '\n'
         parts = payload[start:terminator].split('\n' + separator)
         for part in parts:
             msgobj = self.parsestr(part)
             container.preamble = preamble
             container.epilogue = epilogue
             # Ensure that the container's payload is a list
             if not isinstance(container.get_payload(), ListType):
                 container.set_payload([msgobj])
             else:
                 container.add_payload(msgobj)
     elif container.get_type() == 'message/delivery-status':
         # This special kind of type contains blocks of headers separated
         # by a blank line.  We'll represent each header block as a
         # separate Message object
         blocks = []
         while 1:
             blockmsg = self._class()
             self._parseheaders(blockmsg, fp)
             if not len(blockmsg):
                 # No more header blocks left
                 break
             blocks.append(blockmsg)
         container.set_payload(blocks)
     elif container.get_main_type() == 'message':
         # Create a container for the payload, but watch out for there not
         # being any headers left
         try:
             msg = self.parse(fp)
         except Errors.HeaderParseError:
             msg = self._class()
             self._parsebody(msg, fp)
         container.add_payload(msg)
     else:
         container.add_payload(fp.read())