Exemple #1
0
class EmailMessage(object):
    def __init__(self, raw_data):
        self.raw_data = raw_data
        self.parsed_data = HeaderParser().parsestr(self.raw_data[0][1])

    def _decode(self, parsed_data):
        return " ".join(
            map(_decode_token, email.header.decode_header(parsed_data)))

    def get_from(self):
        return self._decode(self.parsed_data['From'])

    def get_from_email(self):
        matches = re.search(r'[a-zA-Z0-9._%-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}',
                            self.get_from())
        if matches is not None:
            return matches.group(0)
        return None

    def get_to(self):
        return self._decode(self.parsed_data['To'])

    def get_subject(self):
        return self._decode(self.parsed_data['Subject'])

    def get_reply_to(self):
        return self._decode(self.parsed_data['Reply-To'])

    def get_received_on(self):
        return datetime.fromtimestamp(
            time.mktime(
                email.utils.parsedate(
                    self.parsed_data['Received'].split("\n")[1].strip())))

    def get_content(self, content_type='plain'):
        mail = email.message_from_string(self.raw_data[0][1])
        payloads = []

        for part in mail.walk():
            # multipart are just containers, so we skip them
            if part.get_content_maintype() == 'multipart':
                continue
            # we are interested only in the given content_type
            if part.get_content_subtype() != content_type:
                continue

            payloads.append(part.get_payload(decode=True))

        content = " ".join(payloads)

        if self.parsed_data.get_content_charset() is not None:
            content = unicode(content, self.parsed_data.get_content_charset())
        else:
            enc = chardet.detect(content)['encoding']
            if not enc in ENC_GUESS_LIST:
                enc = ENC_DEFAULT
            content = unicode(content, enc)

        return content
Exemple #2
0
class EmailMessage(object):
    def __init__(self, raw_data):
        self.raw_data = raw_data
        self.parsed_data = HeaderParser().parsestr(self.raw_data[0][1])
    
    def _decode(self, parsed_data):
        return " ".join(map(_decode_token, email.header.decode_header(parsed_data)))
    
    def get_from(self):
        return self._decode(self.parsed_data['From'])
    
    def get_from_email(self):
        matches = re.search(r'[a-zA-Z0-9._%-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}', self.get_from())
        if matches is not None:
            return matches.group(0)
        return None
    
    def get_to(self):
        return self._decode(self.parsed_data['To'])
    
    def get_subject(self):
        return self._decode(self.parsed_data['Subject'])
    
    def get_reply_to(self):
        return self._decode(self.parsed_data['Reply-To'])
    
    def get_received_on(self):
        return datetime.fromtimestamp(time.mktime(email.utils.parsedate(self.parsed_data['Received'].split("\n")[1].strip())))
    
    def get_content(self, content_type='plain'):
        mail = email.message_from_string(self.raw_data[0][1])
        payloads = []
 
        for part in mail.walk():
            # multipart are just containers, so we skip them
            if part.get_content_maintype() == 'multipart':
                continue
            # we are interested only in the given content_type
            if part.get_content_subtype() != content_type:
                continue
            
            
            payloads.append(part.get_payload(decode=True))
            
        content = " ".join(payloads)
            
        if self.parsed_data.get_content_charset() is not None:
            content = unicode(content, self.parsed_data.get_content_charset())
        else:
            enc = chardet.detect(content)['encoding']
            if not enc in ENC_GUESS_LIST:
                enc = ENC_DEFAULT
            content = unicode(content, enc)
            
        return content