def parsemail(mail, logger='none'): log = logging.getLogger(logger) if len(mail) == 0: raise MailParserException('Empty mail') data = { 'type': 'mail' } # parse mail try: message = email.parser.Parser().parsestr(mail) except UnicodeEncodeError: message = email.parser.Parser().parsestr(mail.encode('latin_1')) # test defects and try to save defect mails if len(message.defects) != 0: raise MailParserException("Parser signaled defect:\n %s" % (str(message.defects))) # encoded word is not decoded here, because it only should appear in the # display name that is discarded by the last map function # parse from and sender addresses addresses = itertools.chain(*(message.get_all(field) for field in ('from', 'sender') if message.has_key(field))) data['from'] = map(lambda adrs: adrs[1], set(email.utils.getaddresses(addresses))) log.info("From: %s"%(' '.join(data['from']))) # parse recipient addresses addresses = itertools.chain(*(message.get_all(field) for field in ('to', 'cc') if message.has_key(field))) data['to'] = map(lambda adrs: adrs[1], set(email.utils.getaddresses(addresses))) log.info("To: %s"%(' '.join(data['to']))) # parse date and convert it to standard format in UTC if message.get('Date', None): try: # guesses format and parses 10-tuple parsedtime = email.utils.parsedate_tz(message.get('Date')) # seconds since epoch utc_timestamp = calendar.timegm(parsedtime[0:9])-parsedtime[9] # formatted data['date'] = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(utc_timestamp)) log.info("Date: %s", data['date']) except Exception as e: raise MailParserException("Could not convert %s to YYYY-MM-DD hh:mm:ss\n %s" % (message.get('Date'), str(e))) # format current UTC time data['upload_date'] = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(time.time())) log.info("upload-date: %s", data['upload_date']) # add labels data['labels'] = [] if message.get('Status', None): if not 'R' in message.get('Status'): data['labels'].append('unread') else: data['labels'].append('unread') if config.autolabels: labeller = labels.Labeller(path=config.autolabels) labeller.check(data) log.info("Labels: %s", ' '.join(data['labels'])) return data
def get_receivers(self): """ List of all receiver email-addresses (without the name parts). This includes addresses from the TO, CC and BCC headers. """ message = self.message to_addrs = (message.get_all('to', []) + message.get_all('cc', []) + message.get_all('bcc', [])) to_addrs = [str(addr) for addr in to_addrs] to_addrs = email.utils.getaddresses(to_addrs) # addresses only without name parts return [addr[1] for addr in to_addrs]
def extract_sender( message: Union[email.message.EmailMessage, email.message.Message] ) -> Optional[str]: """ Extract the sender from the message object given. """ resent_dates = message.get_all("Resent-Date") if resent_dates is not None and len(resent_dates) > 1: raise ValueError("Message has more than one 'Resent-' header block") elif resent_dates: sender_header_name = "Resent-Sender" from_header_name = "Resent-From" else: sender_header_name = "Sender" from_header_name = "From" # Prefer the sender field per RFC 2822:3.6.2. if sender_header_name in message: sender_header = message[sender_header_name] else: sender_header = message[from_header_name] if sender_header is None: return None return extract_addresses(sender_header)[0]
def extract_recipients(message: email.message.Message) -> List[str]: """ Extract the recipients from the message object given. """ recipients = [] # type: List[str] resent_dates = message.get_all("Resent-Date") if resent_dates is not None and len(resent_dates) > 1: raise ValueError("Message has more than one 'Resent-' header block") elif resent_dates: recipient_headers = ("Resent-To", "Resent-Cc", "Resent-Bcc") else: recipient_headers = ("To", "Cc", "Bcc") for header in recipient_headers: for recipient in message.get_all(header, failobj=[]): recipients.extend(extract_addresses(recipient)) return recipients
def get_mail_addresses(message, header_name): """ retrieve all email addresses from one message header @type message: email.message.Message @param message: the email message @type header_name: str @param header_name: the name of the header, can be 'from', 'to', 'cc' or any other header containing one or more email addresses @rtype: list @returns: a list of the addresses in the form of tuples C{[(u'Name', '*****@*****.**'), ...]} >>> import email >>> import email.mime.text >>> msg=email.mime.text.MIMEText('The text.', 'plain', 'us-ascii') >>> msg['From']=email.email.utils.formataddr(('Me', '*****@*****.**')) >>> msg['To']=email.email.utils.formataddr(('A', '*****@*****.**'))+', '+email.email.utils.formataddr(('B', '*****@*****.**')) >>> print msg.as_string(unixfrom=False) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Me <*****@*****.**> To: A <*****@*****.**>, B <*****@*****.**> <BLANKLINE> The text. >>> get_mail_addresses(msg, 'from') [(u'Me', '*****@*****.**')] >>> get_mail_addresses(msg, 'to') [(u'A', '*****@*****.**'), (u'B', '*****@*****.**')] """ addrs = email.utils.getaddresses( [_friendly_header(h) for h in message.get_all(header_name, [])]) for i, (addr_name, addr) in enumerate(addrs): if not addr_name and addr: # only one string! Is it the address or the address name ? # use the same for both and see later addr_name = addr if is_usascii(addr): # address must be ascii only and must match address regex if not email_address_re.match(addr): addr = '' else: addr = '' addrs[i] = (decode_mail_header(addr_name), addr) return addrs
def find_emailgateway_recipient(message: message.Message) -> str: # We can't use Delivered-To; if there is a X-Gm-Original-To # it is more accurate, so try to find the most-accurate # recipient list in descending priority order recipient_headers = ["X-Gm-Original-To", "Delivered-To", "Resent-To", "Resent-CC", "To", "CC"] pattern_parts = [re.escape(part) for part in settings.EMAIL_GATEWAY_PATTERN.split('%s')] match_email_re = re.compile(".*?".join(pattern_parts)) header_addresses = [str(addr) for recipient_header in recipient_headers for addr in message.get_all(recipient_header, [])] for addr_tuple in getaddresses(header_addresses): if match_email_re.match(addr_tuple[1]): return addr_tuple[1] raise ZulipEmailForwardError("Missing recipient in mirror email")
def find_emailgateway_recipient(message: message.Message) -> str: # We can't use Delivered-To; if there is a X-Gm-Original-To # it is more accurate, so try to find the most-accurate # recipient list in descending priority order recipient_headers = ["X-Gm-Original-To", "Delivered-To", "To"] recipients = [] # type: List[Union[str, Header]] for recipient_header in recipient_headers: r = message.get_all(recipient_header, None) if r: recipients = r break pattern_parts = [re.escape(part) for part in settings.EMAIL_GATEWAY_PATTERN.split('%s')] match_email_re = re.compile(".*?".join(pattern_parts)) for recipient_email in [str(recipient) for recipient in recipients]: if match_email_re.match(recipient_email): return recipient_email raise ZulipEmailForwardError("Missing recipient in mirror email")
def test_dkim_and_feedback_loop(self): privkey = self._get_dkim_privkey() mailing = factories.MailingFactory(dkim={'selector': 'mail', 'domain': 'unittest.cloud-mailing.net', 'privkey':privkey}, feedback_loop={'dkim': {'selector': 'mail', 'domain': 'unittest.cloud-mailing.net', 'privkey':privkey}, 'sender_id': 'CloudMailing'}) recipient = factories.RecipientFactory(mailing=mailing) message_str = self._customize(recipient) self.assertNotIn(b"\r\n", message_str) parser = email.parser.Parser() message = parser.parsestr(message_str, headersonly=False) assert (isinstance(message, email.message.Message)) self.assertTrue('Feedback-ID' in message) self.assertEqual(2, len(message.get_all('DKIM-Signature'))) d = dkim.DKIM(message_str) self.assertTrue(d.verify(0, dnsfunc=self._get_txt)) self.assertTrue(d.verify(1, dnsfunc=self._get_txt))