def _parse_headers(self, lines): # Passed a list of lines that make up the headers for the current msg lastheader = '' lastvalue = [] for lineno, line in enumerate(lines): # Check for continuation if line[0] in ' \t': if not lastheader: # The first line of the headers was a continuation. This # is illegal, so let's note the defect, store the illegal # line, and ignore it for purposes of headers. defect = Errors.FirstHeaderLineIsContinuationDefect(line) self._cur.defects.append(defect) continue lastvalue.append(line) continue if lastheader: # XXX reconsider the joining of folded lines lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n') self._cur[lastheader] = lhdr lastheader, lastvalue = '', [] # Check for envelope header, i.e. unix-from if line.startswith('From '): if lineno == 0: # Strip off the trailing newline mo = NLCRE_eol.search(line) if mo: line = line[:-len(mo.group(0))] self._cur.set_unixfrom(line) continue elif lineno == len(lines) - 1: # Something looking like a unix-from at the end - it's # probably the first line of the body, so push back the # line and stop. self._input.unreadline(line) return else: # Weirdly placed unix-from line. Note this as a defect # and ignore it. defect = Errors.MisplacedEnvelopeHeaderDefect(line) self._cur.defects.append(defect) continue # Split the line on the colon separating field name from value. i = line.find(':') if i < 0: defect = Errors.MalformedHeaderDefect(line) self._cur.defects.append(defect) continue lastheader = line[:i] lastvalue = [line[i + 1:].lstrip()] # Done with all the lines, so handle the last header. if lastheader: # XXX reconsider the joining of folded lines self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
def send(self): sender = self.getaddrs('from') if sender: if len(sender) > 1: raise Errors.MessageError( 'More than one sender specified: %s' % (', '.join(sender))) else: sender = sender[0] recipients = self.getaddrs('to', 'cc') if not recipients: raise ValueError('no recipients specified') if not self.has_key('subject'): self.add_header('subject', ' '.join(sys.argv)) for sm_path in self.sm_path_try: if os.path.exists(sm_path): sendmail = sm_path break else: raise ValueError('sendmail not found') args = [sendmail] if sender: args.append('-f' + sender) args.extend(recipients) sm = os.popen(' '.join(args), 'w') try: sm.write(self.as_string(unixfrom=False)) finally: if sm.close(): print >> sys.stderr, 'command failed: %s' % ' '.join(args)
def _parse_headers(self, lines): lastheader = '' lastvalue = [] for (lineno, line) in enumerate(lines): if line[0] in ' \t': if not lastheader: defect = Errors.FirstHeaderLineIsContinuationDefect(line) self._cur.defects.append(defect) continue lastvalue.append(line) continue if lastheader: lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n') self._cur[lastheader] = lhdr lastheader = '' lastvalue = [] if line.startswith('From '): if lineno == 0: mo = NLCRE_eol.search(line) if mo: line = line[:-len(mo.group(0))] self._cur.set_unixfrom(line) continue elif lineno == len(lines) - 1: self._input.unreadline(line) return None else: defect = Errors.MisplacedEnvelopeHeaderDefect(line) self._cur.defects.append(defect) i = line.find(':') if i < 0: defect = Errors.MalformedHeaderDefect(line) self._cur.defects.append(defect) continue lastheader = line[:i] lastvalue = [line[i + 1:].lstrip()] if lastheader: self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
def close(self): self._input.close() self._call_parse() root = self._pop_message() if root.get_content_maintype( ) == 'multipart' and not root.is_multipart(): root.defects.append(Errors.MultipartInvariantViolationDefect()) return root
def getaddrs(self, *hdrs): addrs = [] for hdr in hdrs: for cmt, addr in Utils.getaddresses(self.get_all(hdr, [])): try: valid_addr(addr) except Errors.MessageError, e: raise Errors.MessageError('%s: %s' % (hdr, e)) addrs.append(addr)
def close(self): """Parse all remaining data and return the root message object.""" self._input.close() self._call_parse() root = self._pop_message() assert not self._msgstack # Look for final set of defects if root.get_content_maintype() == 'multipart' \ and not root.is_multipart(): root.defects.append(Errors.MultipartInvariantViolationDefect()) return root
def add_payload(self, payload): """Add the given payload to the current payload. If the current payload is empty, then the current payload will be made a scalar, set to the given value. Note: This method is deprecated. Use .attach() instead. """ warnings.warn('add_payload() is deprecated, use attach() instead.', DeprecationWarning, 2) if self._payload is None: self._payload = payload elif isinstance(self._payload, ListType): self._payload.append(payload) elif self.get_main_type() not in (None, 'multipart'): raise Errors.MultipartConversionError( 'Message main content type must be "multipart" or missing') else: self._payload = [self._payload, payload]
def _parseheaders(self, container, fp): # Parse the headers, returning a list of header/value pairs. None as # the header means the Unix-From header. lastheader = '' lastvalue = [] lineno = 0 firstbodyline = None while True: # Don't strip the line before we test for the end condition, # because whitespace-only header lines are RFC compliant # continuation lines. line = fp.readline() if not line: break line = line.splitlines()[0] if not line: break # Ignore the trailing newline lineno += 1 # Check for initial Unix From_ line if line.startswith('From '): if lineno == 1: container.set_unixfrom(line) continue elif self._strict: raise Errors.HeaderParseError( 'Unix-from in headers after first rfc822 header') else: # ignore the wierdly placed From_ line # XXX: maybe set unixfrom anyway? or only if not already? continue # Header continuation line if line[0] in ' \t': if not lastheader: raise Errors.HeaderParseError( 'Continuation line seen before first header') lastvalue.append(line) continue # Normal, non-continuation header. BAW: this should check to make # sure it's a legal header, e.g. doesn't contain spaces. Also, we # should expose the header matching algorithm in the API, and # allow for a non-strict parsing mode (that ignores the line # instead of raising the exception). i = line.find(':') if i < 0: if self._strict: raise Errors.HeaderParseError( "Not a header, not a continuation: ``%s''" % line) elif lineno == 1 and line.startswith('--'): # allow through duplicate boundary tags. continue else: # There was no separating blank line as mandated by RFC # 2822, but we're in non-strict mode. So just offer up # this current line as the first body line. firstbodyline = line break if lastheader: container[lastheader] = NL.join(lastvalue) lastheader = line[:i] lastvalue = [line[i + 1:].lstrip()] # Make sure we retain the last header if lastheader: container[lastheader] = NL.join(lastvalue) return firstbodyline
def _parsebody(self, container, fp, firstbodyline=None): # Parse the body, but first split the payload on the content-type # boundary if present. boundary = container.get_boundary() isdigest = (container.get_content_type() == 'multipart/digest') # If there's a boundary and the message has a main type of # 'multipart', split the payload text into its constituent parts and # parse each separately. Otherwise, just parse the rest of the body # as a single message. Note: any exceptions raised in the recursive # parse need to have their line numbers coerced. if container.get_content_maintype() == 'multipart' and boundary: preamble = epilogue = None # Split into subparts. The first boundary we're looking for won't # always have a leading newline since we're at the start of the # body text, and there's not always a preamble before the first # boundary. separator = '--' + boundary payload = fp.read() if firstbodyline is not None: payload = firstbodyline + '\n' + payload # We use an RE here because boundaries can have trailing # whitespace. mo = re.search( r'(?P<sep>' + re.escape(separator) + r')(?P<ws>[ \t]*)', payload) if not mo: if self._strict: raise Errors.BoundaryError( "Couldn't find starting boundary: %s" % boundary) container.set_payload(payload) return start = mo.start() if start > 0: # there's some pre-MIME boundary preamble preamble = payload[0:start] # Find out what kind of line endings we're using start += len(mo.group('sep')) + len(mo.group('ws')) mo = NLCRE.search(payload, start) if mo: start += len(mo.group(0)) # We create a compiled regexp first because we need to be able to # specify the start position, and the module function doesn't # support this signature. :( cre = re.compile('(?P<sep>\r\n|\r|\n)' + re.escape(separator) + '--') mo = cre.search(payload, start) if mo: terminator = mo.start() linesep = mo.group('sep') if mo.end() < len(payload): # There's some post-MIME boundary epilogue epilogue = payload[mo.end():] elif self._strict: raise Errors.BoundaryError( "Couldn't find terminating boundary: %s" % boundary) else: # Handle the case of no trailing boundary. Check that it ends # in a blank line. Some cases (spamspamspam) don't even have # that! mo = re.search('(?P<sep>\r\n|\r|\n){2}$', payload) if not mo: mo = re.search('(?P<sep>\r\n|\r|\n)$', payload) if not mo: raise Errors.BoundaryError( 'No terminating boundary and no trailing empty line' ) linesep = mo.group('sep') terminator = len(payload) # We split the textual payload on the boundary separator, which # includes the trailing newline. If the container is a # multipart/digest then the subparts are by default message/rfc822 # instead of text/plain. In that case, they'll have a optional # block of MIME headers, then an empty line followed by the # message headers. parts = re.split( linesep + re.escape(separator) + r'[ \t]*' + linesep, payload[start:terminator]) for part in parts: if isdigest: if part.startswith(linesep): # There's no header block so create an empty message # object as the container, and lop off the newline so # we can parse the sub-subobject msgobj = self._class() part = part[len(linesep):] else: parthdrs, part = part.split(linesep + linesep, 1) # msgobj in this case is the "message/rfc822" container msgobj = self.parsestr(parthdrs, headersonly=1) # while submsgobj is the message itself msgobj.set_default_type('message/rfc822') maintype = msgobj.get_content_maintype() if maintype in ('message', 'multipart'): submsgobj = self.parsestr(part) msgobj.attach(submsgobj) else: msgobj.set_payload(part) else: msgobj = self.parsestr(part) container.preamble = preamble container.epilogue = epilogue container.attach(msgobj) elif container.get_main_type() == 'multipart': # Very bad. A message is a multipart with no boundary! raise Errors.BoundaryError( 'multipart message with no defined boundary') elif container.get_type() == 'message/delivery-status': # This special kind of type contains blocks of headers separated # by a blank line. We'll represent each header block as a # separate Message object blocks = [] while True: blockmsg = self._class() self._parseheaders(blockmsg, fp) if not len(blockmsg): # No more header blocks left break blocks.append(blockmsg) container.set_payload(blocks) elif container.get_main_type() == 'message': # Create a container for the payload, but watch out for there not # being any headers left try: msg = self.parse(fp) except Errors.HeaderParseError: msg = self._class() self._parsebody(msg, fp) container.attach(msg) else: text = fp.read() if firstbodyline is not None: text = firstbodyline + '\n' + text container.set_payload(text)
def _parsegen(self): # Create a new message and start by parsing headers. self._new_message() headers = [] # Collect the headers, searching for a line that doesn't match the RFC # 2822 header or continuation pattern (including an empty line). for line in self._input: if line is NeedMoreData: yield NeedMoreData continue if not headerRE.match(line): # If we saw the RFC defined header/body separator # (i.e. newline), just throw it away. Otherwise the line is # part of the body so push it back. if not NLCRE.match(line): self._input.unreadline(line) break headers.append(line) # Done with the headers, so parse them and figure out what we're # supposed to see in the body of the message. self._parse_headers(headers) # Headers-only parsing is a backwards compatibility hack, which was # necessary in the older parser, which could throw errors. All # remaining lines in the input are thrown into the message body. if self._headersonly: lines = [] while True: line = self._input.readline() if line is NeedMoreData: yield NeedMoreData continue if line == '': break lines.append(line) self._cur.set_payload(EMPTYSTRING.join(lines)) return if self._cur.get_content_type() == 'message/delivery-status': # message/delivery-status contains blocks of headers separated by # a blank line. We'll represent each header block as a separate # nested message object, but the processing is a bit different # than standard message/* types because there is no body for the # nested messages. A blank line separates the subparts. while True: self._input.push_eof_matcher(NLCRE.match) for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData continue break msg = self._pop_message() # We need to pop the EOF matcher in order to tell if we're at # the end of the current file, not the end of the last block # of message headers. self._input.pop_eof_matcher() # The input stream must be sitting at the newline or at the # EOF. We want to see if we're at the end of this subpart, so # first consume the blank line, then test the next line to see # if we're at this subpart's EOF. while True: line = self._input.readline() if line is NeedMoreData: yield NeedMoreData continue break while True: line = self._input.readline() if line is NeedMoreData: yield NeedMoreData continue break if line == '': break # Not at EOF so this is a line we're going to need. self._input.unreadline(line) return if self._cur.get_content_maintype() == 'message': # The message claims to be a message/* type, then what follows is # another RFC 2822 message. for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData continue break self._pop_message() return if self._cur.get_content_maintype() == 'multipart': boundary = self._cur.get_boundary() if boundary is None: # The message /claims/ to be a multipart but it has not # defined a boundary. That's a problem which we'll handle by # reading everything until the EOF and marking the message as # defective. self._cur.defects.append(Errors.NoBoundaryInMultipartDefect()) lines = [] for line in self._input: if line is NeedMoreData: yield NeedMoreData continue lines.append(line) self._cur.set_payload(EMPTYSTRING.join(lines)) return # Create a line match predicate which matches the inter-part # boundary as well as the end-of-multipart boundary. Don't push # this onto the input stream until we've scanned past the # preamble. separator = '--' + boundary boundaryre = re.compile( '(?P<sep>' + re.escape(separator) + r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$') capturing_preamble = True preamble = [] linesep = False while True: line = self._input.readline() if line is NeedMoreData: yield NeedMoreData continue if line == '': break mo = boundaryre.match(line) if mo: # If we're looking at the end boundary, we're done with # this multipart. If there was a newline at the end of # the closing boundary, then we need to initialize the # epilogue with the empty string (see below). if mo.group('end'): linesep = mo.group('linesep') break # We saw an inter-part boundary. Were we in the preamble? if capturing_preamble: if preamble: # According to RFC 2046, the last newline belongs # to the boundary. lastline = preamble[-1] eolmo = NLCRE_eol.search(lastline) if eolmo: preamble[-1] = lastline[:-len(eolmo.group(0))] self._cur.preamble = EMPTYSTRING.join(preamble) capturing_preamble = False self._input.unreadline(line) continue # We saw a boundary separating two parts. Consume any # multiple boundary lines that may be following. Our # interpretation of RFC 2046 BNF grammar does not produce # body parts within such double boundaries. while True: line = self._input.readline() if line is NeedMoreData: yield NeedMoreData continue mo = boundaryre.match(line) if not mo: self._input.unreadline(line) break # Recurse to parse this subpart; the input stream points # at the subpart's first line. self._input.push_eof_matcher(boundaryre.match) for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData continue break # Because of RFC 2046, the newline preceding the boundary # separator actually belongs to the boundary, not the # previous subpart's payload (or epilogue if the previous # part is a multipart). if self._last.get_content_maintype() == 'multipart': epilogue = self._last.epilogue if epilogue == '': self._last.epilogue = None elif epilogue is not None: mo = NLCRE_eol.search(epilogue) if mo: end = len(mo.group(0)) self._last.epilogue = epilogue[:-end] else: payload = self._last.get_payload() if isinstance(payload, basestring): mo = NLCRE_eol.search(payload) if mo: payload = payload[:-len(mo.group(0))] self._last.set_payload(payload) self._input.pop_eof_matcher() self._pop_message() # Set the multipart up for newline cleansing, which will # happen if we're in a nested multipart. self._last = self._cur else: # I think we must be in the preamble assert capturing_preamble preamble.append(line) # We've seen either the EOF or the end boundary. If we're still # capturing the preamble, we never saw the start boundary. Note # that as a defect and store the captured text as the payload. # Everything from here to the EOF is epilogue. if capturing_preamble: self._cur.defects.append(Errors.StartBoundaryNotFoundDefect()) self._cur.set_payload(EMPTYSTRING.join(preamble)) epilogue = [] for line in self._input: if line is NeedMoreData: yield NeedMoreData continue self._cur.epilogue = EMPTYSTRING.join(epilogue) return # If the end boundary ended in a newline, we'll need to make sure # the epilogue isn't None if linesep: epilogue = [''] else: epilogue = [] for line in self._input: if line is NeedMoreData: yield NeedMoreData continue epilogue.append(line) # Any CRLF at the front of the epilogue is not technically part of # the epilogue. Also, watch out for an empty string epilogue, # which means a single newline. if epilogue: firstline = epilogue[0] bolmo = NLCRE_bol.match(firstline) if bolmo: epilogue[0] = firstline[len(bolmo.group(0)):] self._cur.epilogue = EMPTYSTRING.join(epilogue) return # Otherwise, it's some non-multipart type, so the entire rest of the # file contents becomes the payload. lines = [] for line in self._input: if line is NeedMoreData: yield NeedMoreData continue lines.append(line) self._cur.set_payload(EMPTYSTRING.join(lines))
def attach(self, payload): # The public API prohibits attaching multiple subparts to MIMEBase # derived subtypes since none of them are, by definition, of content # type multipart/* raise Errors.MultipartConversionError( 'Cannot attach additional subparts to non-multipart/*')
def _parsegen(self): self._new_message() headers = [] for line in self._input: if line is NeedMoreData: yield NeedMoreData continue if not headerRE.match(line): if not NLCRE.match(line): self._input.unreadline(line) break headers.append(line) self._parse_headers(headers) if self._headersonly: lines = [] while True: line = self._input.readline() if line is NeedMoreData: yield NeedMoreData continue if line == '': break lines.append(line) self._cur.set_payload(EMPTYSTRING.join(lines)) return None if self._cur.get_content_type() == 'message/delivery-status': while True: self._input.push_eof_matcher(NLCRE.match) for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData continue break msg = self._pop_message() self._input.pop_eof_matcher() while True: line = self._input.readline() if line is NeedMoreData: yield NeedMoreData continue break while True: line = self._input.readline() if line is NeedMoreData: yield NeedMoreData continue break if line == '': break self._input.unreadline(line) return None if self._cur.get_content_maintype() == 'message': for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData continue break self._pop_message() return None if self._cur.get_content_maintype() == 'multipart': boundary = self._cur.get_boundary() if boundary is None: self._cur.defects.append(Errors.NoBoundaryInMultipartDefect()) lines = [] for line in self._input: if line is NeedMoreData: yield NeedMoreData continue lines.append(line) self._cur.set_payload(EMPTYSTRING.join(lines)) return None separator = '--' + boundary boundaryre = re.compile( '(?P<sep>' + re.escape(separator) + ')(?P<end>--)?(?P<ws>[ \\t]*)(?P<linesep>\\r\\n|\\r|\\n)?$') capturing_preamble = True preamble = [] linesep = False while True: line = self._input.readline() if line is NeedMoreData: yield NeedMoreData continue if line == '': break mo = boundaryre.match(line) if mo: if mo.group('end'): linesep = mo.group('linesep') break if capturing_preamble: if preamble: lastline = preamble[-1] eolmo = NLCRE_eol.search(lastline) if eolmo: preamble[-1] = lastline[:-len(eolmo.group(0))] self._cur.preamble = EMPTYSTRING.join(preamble) capturing_preamble = False self._input.unreadline(line) continue while True: line = self._input.readline() if line is NeedMoreData: yield NeedMoreData continue mo = boundaryre.match(line) if not mo: self._input.unreadline(line) break continue self._input.push_eof_matcher(boundaryre.match) for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData continue break if self._last.get_content_maintype() == 'multipart': epilogue = self._last.epilogue if epilogue == '': self._last.epilogue = None elif epilogue is not None: mo = NLCRE_eol.search(epilogue) if mo: end = len(mo.group(0)) self._last.epilogue = epilogue[:-end] else: payload = self._last.get_payload() if isinstance(payload, basestring): mo = NLCRE_eol.search(payload) if mo: payload = payload[:-len(mo.group(0))] self._last.set_payload(payload) self._input.pop_eof_matcher() self._pop_message() self._last = self._cur continue preamble.append(line) if capturing_preamble: self._cur.defects.append(Errors.StartBoundaryNotFoundDefect()) self._cur.set_payload(EMPTYSTRING.join(preamble)) epilogue = [] for line in self._input: if line is NeedMoreData: yield NeedMoreData continue continue self._cur.epilogue = EMPTYSTRING.join(epilogue) return None if linesep: epilogue = [''] else: epilogue = [] for line in self._input: if line is NeedMoreData: yield NeedMoreData continue epilogue.append(line) if epilogue: firstline = epilogue[0] bolmo = NLCRE_bol.match(firstline) if bolmo: epilogue[0] = firstline[len(bolmo.group(0)):] self._cur.epilogue = EMPTYSTRING.join(epilogue) return None lines = [] for line in self._input: if line is NeedMoreData: yield NeedMoreData continue lines.append(line) self._cur.set_payload(EMPTYSTRING.join(lines))
def valid_addr(addr): if not value_addr_re.match(addr): raise Errors.MessageError('invalid address %r' % addr)