class EmailMessage(object): def __init__(self, raw_data): self.raw_data = raw_data self.parsed_data = HeaderParser().parsestr(self.raw_data[0][1]) def _decode(self, parsed_data): return " ".join( map(_decode_token, email.header.decode_header(parsed_data))) def get_from(self): return self._decode(self.parsed_data['From']) def get_from_email(self): matches = re.search(r'[a-zA-Z0-9._%-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}', self.get_from()) if matches is not None: return matches.group(0) return None def get_to(self): return self._decode(self.parsed_data['To']) def get_subject(self): return self._decode(self.parsed_data['Subject']) def get_reply_to(self): return self._decode(self.parsed_data['Reply-To']) def get_received_on(self): return datetime.fromtimestamp( time.mktime( email.utils.parsedate( self.parsed_data['Received'].split("\n")[1].strip()))) def get_content(self, content_type='plain'): mail = email.message_from_string(self.raw_data[0][1]) payloads = [] for part in mail.walk(): # multipart are just containers, so we skip them if part.get_content_maintype() == 'multipart': continue # we are interested only in the given content_type if part.get_content_subtype() != content_type: continue payloads.append(part.get_payload(decode=True)) content = " ".join(payloads) if self.parsed_data.get_content_charset() is not None: content = unicode(content, self.parsed_data.get_content_charset()) else: enc = chardet.detect(content)['encoding'] if not enc in ENC_GUESS_LIST: enc = ENC_DEFAULT content = unicode(content, enc) return content
class EmailMessage(object): def __init__(self, raw_data): self.raw_data = raw_data self.parsed_data = HeaderParser().parsestr(self.raw_data[0][1]) def _decode(self, parsed_data): return " ".join(map(_decode_token, email.header.decode_header(parsed_data))) def get_from(self): return self._decode(self.parsed_data['From']) def get_from_email(self): matches = re.search(r'[a-zA-Z0-9._%-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}', self.get_from()) if matches is not None: return matches.group(0) return None def get_to(self): return self._decode(self.parsed_data['To']) def get_subject(self): return self._decode(self.parsed_data['Subject']) def get_reply_to(self): return self._decode(self.parsed_data['Reply-To']) def get_received_on(self): return datetime.fromtimestamp(time.mktime(email.utils.parsedate(self.parsed_data['Received'].split("\n")[1].strip()))) def get_content(self, content_type='plain'): mail = email.message_from_string(self.raw_data[0][1]) payloads = [] for part in mail.walk(): # multipart are just containers, so we skip them if part.get_content_maintype() == 'multipart': continue # we are interested only in the given content_type if part.get_content_subtype() != content_type: continue payloads.append(part.get_payload(decode=True)) content = " ".join(payloads) if self.parsed_data.get_content_charset() is not None: content = unicode(content, self.parsed_data.get_content_charset()) else: enc = chardet.detect(content)['encoding'] if not enc in ENC_GUESS_LIST: enc = ENC_DEFAULT content = unicode(content, enc) return content