Esempio n. 1
0
class MailMessage(object):
    """
    Mail message structure.

    Got a mail in raw rfc2822 format, parse it to
    resolve all recipients emails, parts and group headers
    """

    zope.interface.implements(IMessageParser)

    recipient_headers = ['From', 'To', 'Cc', 'Bcc']
    message_protocol = 'email'
    warnings = []
    body_html = ""
    body_plain = ""

    def __init__(self, raw_data):
        """Parse an RFC2822,5322 mail message."""
        self.raw = raw_data
        try:
            self.mail = Message(raw_data)
        except Exception as exc:
            log.error('Parse message failed %s' % exc)
            raise exc
        if self.mail.defects:
            # XXX what to do ?
            log.warn('Defects on parsed mail %r' % self.mail.defects)
            self.warning = self.mail.defects
        self.get_bodies()

    def get_bodies(self):
        """Extract body alternatives, if any."""
        body_html = ""
        body_plain = ""

        if self.mail.get("Content-Type", None):
            if self.mail.is_multipart():
                for top_level_part in self.mail.get_payload():
                    if top_level_part.get_content_maintype() == "multipart":
                        for alternative in top_level_part.get_payload():
                            charset = alternative.get_param("charset")
                            if isinstance(charset, tuple):
                                charset = unicode(charset[2], charset[0]
                                                  or "us-ascii")
                            if alternative.get_content_type() == "text/plain":
                                body_plain = alternative.get_payload(
                                    decode=True)
                                self.body_plain = to_utf8(body_plain, charset)
                            elif alternative.get_content_type() == "text/html":
                                body_html = alternative. \
                                    get_payload(decode=True)
                                self.body_html = to_utf8(body_html, charset)
                        break
                    else:
                        charset = top_level_part.get_param("charset")
                        if isinstance(charset, tuple):
                            charset = unicode(charset[2], charset[0]
                                              or "us-ascii")
                        if top_level_part.get_content_type() == "text/plain":
                            body_plain = top_level_part. \
                                get_payload(decode=True)
                            self.body_plain = to_utf8(body_plain, charset)
                        elif top_level_part.get_content_type() == "text/html":
                            body_html = top_level_part.get_payload(decode=True)
                            self.body_html = to_utf8(body_html, charset)
            else:
                charset = self.mail.get_param("charset")
                if isinstance(charset, tuple):
                    charset = unicode(charset[2], charset[0] or "us-ascii")
                if self.mail.get_content_type() == "text/html":
                    body_html = self.mail.get_payload(decode=True)
                    self.body_html = to_utf8(body_html, charset)
                else:
                    body_plain = self.mail.get_payload(decode=True)
                    self.body_plain = to_utf8(body_plain, charset)
        else:
            self.body_plain = self.mail.get_payload(decode=True)

    @property
    def subject(self):
        """Mail subject."""
        s = decode_header(self.mail.get('Subject'))
        charset = s[0][1]
        if charset is not None:
            return s[0][0].decode(charset, "replace"). \
                encode("utf-8", "replace")
        else:
            return s[0][0]

    @property
    def size(self):
        """Get mail size in bytes."""
        return len(self.mail.as_string())

    @property
    def external_references(self):
        """Return mail references to be used as external references.

         making use of RFC5322 headers :
            message-id
            in-reply-to
            references
        headers' strings are pruned to extract email addresses only.
        """
        ext_id = self.mail.get('Message-Id')
        parent_id = self.mail.get('In-Reply-To')
        ref = self.mail.get_all("References")
        ref_addr = getaddresses(ref) if ref else None
        ref_ids = [address[1] for address in ref_addr] if ref_addr else []
        mid = clean_email_address(ext_id)[1] if ext_id else None
        pid = clean_email_address(parent_id)[1] if parent_id else None
        return {'message_id': mid, 'parent_id': pid, 'ancestors_ids': ref_ids}

    @property
    def date(self):
        """Get UTC date from a mail message."""
        mail_date = self.mail.get('Date')
        if mail_date:
            tmp_date = parsedate_tz(mail_date)
            return datetime.datetime.fromtimestamp(mktime_tz(tmp_date))
        log.debug('No date on mail using now (UTC)')
        return datetime.datetime.now(tz=pytz.utc)

    @property
    def participants(self):
        """Mail participants."""
        participants = []
        for header in self.recipient_headers:
            addrs = []
            participant_type = header.capitalize()
            if self.mail.get(header):
                if ',' in self.mail.get(header):
                    parts = self.mail.get(header).split(',')
                    filtered = [x for x in parts if '@' in x]
                    addrs.extend(filtered)
                else:
                    addrs.append(self.mail.get(header))
            for addr in addrs:
                participant = MailParticipant(participant_type, addr)
                participants.append(participant)
        return participants

    @property
    def hash_participants(self):
        """Create an hash from participants addresses for global lookup."""
        addresses = [x.address for x in self.participants]
        addresses = list(set(addresses))
        addresses.sort()
        return hashlib.sha256(''.join(addresses)).hexdigest()

    @property
    def attachments(self):
        """Extract parts which we consider as attachments."""
        if not self.mail.is_multipart():
            return []
        attchs = []
        for p in walk_with_boundary(self.mail, ""):
            if not p.is_multipart():
                if MailAttachment.is_attachment(p):
                    attchs.append(MailAttachment(p))
        return attchs

    @property
    def extra_parameters(self):
        """Mail message extra parameters."""
        lists = self.mail.get_all("List-ID")
        lists_addr = getaddresses(lists) if lists else None
        lists_ids = [address[1] for address in lists_addr] \
            if lists_addr else []
        return {'lists': lists_ids}

    def lookup_discussion_sequence(self, *args, **kwargs):
        """Return list of lookup type, value from a mail message."""
        seq = []

        # list lookup first
        for list_id in self.extra_parameters.get('lists', []):
            seq.append(('list', list_id))

        seq.append(('global', self.hash_participants))

        # try to link message to external thread's root message-id
        if len(self.external_references["ancestors_ids"]) > 0:
            seq.append(
                ("thread", self.external_references["ancestors_ids"][0]))
        elif self.external_references["parent_id"]:
            seq.append(("thread", self.external_references["parent_id"]))
        elif self.external_references["message_id"]:
            seq.append(("thread", self.external_references["message_id"]))

        return seq

    # Others parameters specific for mail message

    @property
    def headers(self):
        """Extract all headers into list.

        Duplicate on headers exists, group them by name
        with a related list of values
        """
        def keyfunc(item):
            return item[0]

        # Group multiple value for same headers into a dict of list
        headers = {}
        data = sorted(self.mail.items(), key=keyfunc)
        for k, g in groupby(data, key=keyfunc):
            headers[k] = [x[1] for x in g]
        return headers
Esempio n. 2
0
class MailMessage(object):

    """
    Mail message structure.

    Got a mail in raw rfc2822 format, parse it to
    resolve all recipients emails, parts and group headers
    """

    recipient_headers = ['From', 'To', 'Cc', 'Bcc']
    message_type = 'mail'

    def __init__(self, raw):
        """Initialize structure from a raw mail."""
        try:
            self.mail = Message(raw)
        except Exception as exc:
            log.error('Parse message failed %s' % exc)
            raise
        if self.mail.defects:
            # XXX what to do ?
            log.warn('Defects on parsed mail %r' % self.mail.defects)
        self.recipients = self._extract_recipients()
        self.parts = self._extract_parts()
        self.headers = self._extract_headers()
        self.subject = self.mail.get('Subject')
        tmp_date = parsedate_tz(self.mail['Date'])
        self.date = datetime.fromtimestamp(mktime_tz(tmp_date))
        self.external_message_id = self.mail.get('Message-Id')
        self.external_parent_id = self.mail.get('In-Reply-To')
        self.size = len(raw)

    @property
    def text(self):
        """Message all text."""
        # XXX : more complexity ?
        return "\n".join([x.data for x in self.parts if x.can_index])

    def _extract_recipients(self):
        recip = {}
        for header in self.recipient_headers:
            addrs = []
            recipient_type = header.lower()
            if self.mail.get(header):
                if ',' in self.mail.get(header):
                    addrs.extend(self.mail.get(header).split(','))
                else:
                    addrs.append(self.mail.get(header))
            addrs = [clean_email_address(x) for x in addrs]
            recip[recipient_type] = addrs
        return recip

    def _extract_headers(self):
        """
        Extract all headers into list.

        Duplicate on headers exists, group them by name
        with a related list of values
        """
        def keyfunc(item):
            return item[0]

        # Group multiple value for same headers into a dict of list
        headers = {}
        data = sorted(self.mail.items(), key=keyfunc)
        for k, g in groupby(data, key=keyfunc):
            headers[k] = [x[1] for x in g]
        return headers

    def _extract_parts(self):
        """Multipart message, extract parts."""
        parts = []
        for p in self.mail.walk():
            if not p.is_multipart():
                parts.append(self._process_part(p))
        return parts

    def _process_part(self, part):
        return MailPart(part)

    @property
    def transport_privacy_index(self):
        """Evaluate transport privacy index."""
        # XXX : TODO
        return random.randint(0, 50)

    @property
    def content_privacy_index(self):
        """Evaluate content privacy index."""
        # XXX: real evaluation needed ;)
        if 'PGP' in [x.content_type for x in self.parts]:
            return random.randint(50, 100)
        else:
            return 0.0

    @property
    def spam_level(self):
        """Report spam level."""
        try:
            score = self.headers.get('X-Spam-Score')
            score = float(score[0])
        except:
            score = 0.0
        if score < 5.0:
            return 0.0
        if score >= 5.0 and score < 15.0:
            return min(score * 10, 100.0)
        return 100.0

    @property
    def importance_level(self):
        """Return percent estimated importance level of this message."""
        # XXX. real compute needed
        return 0 if self.spam_level else random.randint(50, 100)

    @property
    def lists(self):
        """List related to message."""
        lists = []
        for list_name in self.headers.get('List-ID', []):
            lists.append(list_name)
        return lists

    @property
    def from_(self):
        """Get from recipient."""
        from_ = self.recipients.get('from')
        if from_:
            # XXX should do better
            return from_[0][1]
        return None

    def lookup_sequence(self):
        """Build parameter sequence for lookups."""
        seq = []
        # first from parent
        if self.external_parent_id:
            seq.append(('parent', self.external_parent_id))
        # then list lookup
        for listname in self.lists:
            seq.append(('list', listname))
        # last try to lookup from sender address
        if self.from_:
            seq.append(('from', self.from_))
        return seq

    def to_parameter(self):
        """Transform mail to a NewMessage parameter."""
        msg = NewMessage()
        msg.type = 'email'
        msg.subject = self.subject
        msg.from_ = self.from_
        # XXX need transform to part parameter
        for part in self.parts:
            param = Part()
            param.content_type = part.content_type
            param.data = part.data
            param.size = part.size
            param.filename = part.filename
            param.can_index = part.can_index
            msg.parts.append(param)
        msg.headers = self.headers
        msg.date = self.date
        msg.size = self.size
        msg.text = self.text
        msg.external_parent_id = self.external_parent_id
        msg.external_message_id = self.external_message_id
        # XXX well ....
        msg.privacy_index = (self.transport_privacy_index +
                             self.content_privacy_index) / 2
        msg.importance_level = self.importance_level
        return msg
Esempio n. 3
0
class MailMessage(object):
    """
    Mail message structure.

    Got a mail in raw rfc2822 format, parse it to
    resolve all recipients emails, parts and group headers
    """

    zope.interface.implements(IMessageParser)

    recipient_headers = ['From', 'To', 'Cc', 'Bcc']
    message_protocol = 'email'
    warnings = []
    body_html = ""
    body_plain = ""

    def __init__(self, raw_data):
        """Parse an RFC2822,5322 mail message."""
        self.raw = raw_data
        self._extra_parameters = {}
        try:
            self.mail = Message(raw_data)
        except Exception as exc:
            log.error('Parse message failed %s' % exc)
            raise exc
        if self.mail.defects:
            # XXX what to do ?
            log.warn('Defects on parsed mail %r' % self.mail.defects)
            self.warning = self.mail.defects
        self.get_bodies()

    def get_bodies(self):
        """Extract body alternatives, if any."""
        body_html = ""
        body_plain = ""

        if self.mail.get("Content-Type", None):
            if self.mail.is_multipart():
                if self.mail.get_content_subtype() == 'encrypted':
                    parts = self.mail.get_payload()
                    if len(parts) == 2:
                        self.body_plain = parts[1].get_payload()
                        return
                    else:
                        log.warn('Encrypted message with invalid parts count')
                for top_level_part in self.mail.get_payload():
                    if top_level_part.get_content_maintype() == "multipart":
                        for alternative in top_level_part.get_payload():
                            charset = alternative.get_param("charset")
                            if isinstance(charset, tuple):
                                charset = unicode(charset[2], charset[0]
                                                  or "us-ascii")
                            if alternative.get_content_type() == "text/plain":
                                body_plain = alternative.get_payload(
                                    decode=True)
                                self.body_plain = to_utf8(body_plain, charset)
                            elif alternative.get_content_type() == "text/html":
                                body_html = alternative. \
                                    get_payload(decode=True)
                                self.body_html = to_utf8(body_html, charset)
                        break
                    else:
                        charset = top_level_part.get_param("charset")
                        if isinstance(charset, tuple):
                            charset = unicode(charset[2], charset[0]
                                              or "us-ascii")
                        if top_level_part.get_content_type() == "text/plain":
                            body_plain = top_level_part. \
                                get_payload(decode=True)
                            self.body_plain = to_utf8(body_plain, charset)
                        elif top_level_part.get_content_type() == "text/html":
                            body_html = top_level_part.get_payload(decode=True)
                            self.body_html = to_utf8(body_html, charset)
            else:
                charset = self.mail.get_param("charset")
                if isinstance(charset, tuple):
                    charset = unicode(charset[2], charset[0] or "us-ascii")
                if self.mail.get_content_type() == "text/html":
                    body_html = self.mail.get_payload(decode=True)
                    self.body_html = to_utf8(body_html, charset)
                else:
                    body_plain = self.mail.get_payload(decode=True)
                    self.body_plain = to_utf8(body_plain, charset)
        else:
            self.body_plain = self.mail.get_payload(decode=True)

    @property
    def subject(self):
        """Mail subject."""
        s = decode_header(self.mail.get('Subject'))
        charset = s[0][1]
        if charset is not None:
            return s[0][0].decode(charset, "replace"). \
                encode("utf-8", "replace")
        else:
            try:
                return s[0][0].decode('utf-8', errors='ignore')
            except UnicodeError:
                log.warn('Invalid subject encoding')
                return s[0][0]

    @property
    def size(self):
        """Get mail size in bytes."""
        return len(self.mail.as_string())

    @property
    def external_references(self):
        """Return mail references to be used as external references.

         making use of RFC5322 headers :
            message-id
            in-reply-to
            references
        headers' strings are pruned to extract email addresses only.
        """
        ext_id = self.mail.get('Message-Id')
        parent_id = self.mail.get('In-Reply-To')
        ref = self.mail.get_all("References")
        ref_addr = getaddresses(ref) if ref else None
        ref_ids = [address[1] for address in ref_addr] if ref_addr else []
        mid = clean_email_address(ext_id)[1] if ext_id else None
        if not mid:
            log.error('Unable to find correct message_id {}'.format(ext_id))
            mid = ext_id
        pid = clean_email_address(parent_id)[1] if parent_id else None
        if not pid:
            pid = parent_id
        return {'message_id': mid, 'parent_id': pid, 'ancestors_ids': ref_ids}

    @property
    def date(self):
        """Get UTC date from a mail message."""
        mail_date = self.mail.get('Date')
        if mail_date:
            try:
                tmp_date = parsedate_tz(mail_date)
                return datetime.datetime.fromtimestamp(mktime_tz(tmp_date))
            except TypeError:
                log.error('Invalid date in mail {}'.format(mail_date))
        log.debug('No date on mail using now (UTC)')
        return datetime.datetime.now(tz=pytz.utc)

    @property
    def participants(self):
        """Mail participants."""
        participants = []
        for header in self.recipient_headers:
            addrs = []
            participant_type = header.capitalize()
            if self.mail.get(header):
                parts = self.mail.get(header).split('>,')
                if not parts:
                    pass
                if parts and parts[0] == 'undisclosed-recipients:;':
                    pass
                filtered = [x for x in parts if '@' in x]
                addrs.extend(filtered)
            for addr in addrs:
                participant = MailParticipant(participant_type, addr.lower())
                if participant.address == '' and participant.label == '':
                    log.warn('Invalid email address {}'.format(addr))
                else:
                    participants.append(participant)
        return participants

    @property
    def attachments(self):
        """Extract parts which we consider as attachments."""
        if not self.mail.is_multipart():
            return []
        attchs = []
        for p in walk_with_boundary(self.mail, ""):
            if not p.is_multipart():
                if p.get_content_subtype() == 'pgp-encrypted':
                    # Special consideration. Do not present it as an attachment
                    # but set _extra_parameters accordingly
                    self._extra_parameters.update({'encrypted': 'pgp'})
                    continue
                if MailAttachment.is_attachment(p):
                    attchs.append(MailAttachment(p))
        return attchs

    @property
    def extra_parameters(self):
        """Mail message extra parameters."""
        lists = self.mail.get_all("List-ID")
        lists_addr = getaddresses(lists) if lists else None
        lists_ids = [address[1] for address in lists_addr] \
            if lists_addr else []
        self._extra_parameters.update({'lists': lists_ids})
        return self._extra_parameters

    # Others parameters specific for mail message

    @property
    def headers(self):
        """Extract all headers into list.

        Duplicate on headers exists, group them by name
        with a related list of values
        """
        def keyfunc(item):
            return item[0]

        # Group multiple value for same headers into a dict of list
        headers = {}
        data = sorted(self.mail.items(), key=keyfunc)
        for k, g in groupby(data, key=keyfunc):
            headers[k] = [x[1] for x in g]
        return headers

    @property
    def external_flags(self):
        """
        Get headers added by our fetcher that represent flags or labels
        set by external provider,
        returned as list of tags
        """
        tags = []
        for h in ['X-Fetched-Imap-Flags', 'X-Fetched-X-GM-LABELS']:
            enc_flags = self.mail.get(h)
            if enc_flags:
                flags_str = base64.decodestring(enc_flags)
                for flag in string.split(flags_str, '\r\n'):
                    if flag not in EXCLUDED_EXT_FLAGS:
                        tag = Tag()
                        tag.name = flag
                        tag.label = flag
                        tag.type = 'imported'
                        tags.append(tag)
        return tags
Esempio n. 4
0
class MailMessage(object):
    """
    Mail message structure.

    Got a mail in raw rfc2822 format, parse it to
    resolve all recipients emails, parts and group headers
    """

    zope.interface.implements(IMessageParser)

    recipient_headers = ['From', 'To', 'Cc', 'Bcc']
    message_protocol = 'email'
    warnings = []
    body_html = ""
    body_plain = ""

    def __init__(self, raw_data):
        """Parse an RFC2822,5322 mail message."""
        self.raw = raw_data
        try:
            self.mail = Message(raw_data)
        except Exception as exc:
            log.error('Parse message failed %s' % exc)
            raise exc
        if self.mail.defects:
            # XXX what to do ?
            log.warn('Defects on parsed mail %r' % self.mail.defects)
            self.warning = self.mail.defects
        self.get_bodies()

    def get_bodies(self):
        """Extract body alternatives, if any."""
        body_html = ""
        body_plain = ""

        if self.mail.get("Content-Type", None):
            if self.mail.is_multipart():
                if self.mail.get_content_subtype() == 'encrypted':
                    parts = self.mail.get_payload()
                    if len(parts) == 2:
                        self.body_plain = parts[1].get_payload()
                        return
                    else:
                        log.warn('Encrypted message with invalid parts count')
                for top_level_part in self.mail.get_payload():
                    if top_level_part.get_content_maintype() == "multipart":
                        for alternative in top_level_part.get_payload():
                            charset = alternative.get_param("charset")
                            if isinstance(charset, tuple):
                                charset = unicode(charset[2],
                                                  charset[0] or "us-ascii")
                            if alternative.get_content_type() == "text/plain":
                                body_plain = alternative.get_payload(
                                    decode=True)
                                self.body_plain = to_utf8(body_plain, charset)
                            elif alternative.get_content_type() == "text/html":
                                body_html = alternative. \
                                    get_payload(decode=True)
                                self.body_html = to_utf8(body_html, charset)
                        break
                    else:
                        charset = top_level_part.get_param("charset")
                        if isinstance(charset, tuple):
                            charset = unicode(charset[2],
                                              charset[0] or "us-ascii")
                        if top_level_part.get_content_type() == "text/plain":
                            body_plain = top_level_part. \
                                get_payload(decode=True)
                            self.body_plain = to_utf8(body_plain, charset)
                        elif top_level_part.get_content_type() == "text/html":
                            body_html = top_level_part.get_payload(decode=True)
                            self.body_html = to_utf8(body_html, charset)
            else:
                charset = self.mail.get_param("charset")
                if isinstance(charset, tuple):
                    charset = unicode(charset[2], charset[0] or "us-ascii")
                if self.mail.get_content_type() == "text/html":
                    body_html = self.mail.get_payload(decode=True)
                    self.body_html = to_utf8(body_html, charset)
                else:
                    body_plain = self.mail.get_payload(decode=True)
                    self.body_plain = to_utf8(body_plain, charset)
        else:
            self.body_plain = self.mail.get_payload(decode=True)

    @property
    def subject(self):
        """Mail subject."""
        s = decode_header(self.mail.get('Subject'))
        charset = s[0][1]
        if charset is not None:
            return s[0][0].decode(charset, "replace"). \
                encode("utf-8", "replace")
        else:
            return s[0][0]

    @property
    def size(self):
        """Get mail size in bytes."""
        return len(self.mail.as_string())

    @property
    def external_references(self):
        """Return mail references to be used as external references.

         making use of RFC5322 headers :
            message-id
            in-reply-to
            references
        headers' strings are pruned to extract email addresses only.
        """
        ext_id = self.mail.get('Message-Id')
        parent_id = self.mail.get('In-Reply-To')
        ref = self.mail.get_all("References")
        ref_addr = getaddresses(ref) if ref else None
        ref_ids = [address[1] for address in ref_addr] if ref_addr else []
        mid = clean_email_address(ext_id)[1] if ext_id else None
        pid = clean_email_address(parent_id)[1] if parent_id else None
        return {
            'message_id': mid,
            'parent_id': pid,
            'ancestors_ids': ref_ids}

    @property
    def date(self):
        """Get UTC date from a mail message."""
        mail_date = self.mail.get('Date')
        if mail_date:
            tmp_date = parsedate_tz(mail_date)
            return datetime.datetime.fromtimestamp(mktime_tz(tmp_date))
        log.debug('No date on mail using now (UTC)')
        return datetime.datetime.now(tz=pytz.utc)

    @property
    def participants(self):
        """Mail participants."""
        participants = []
        for header in self.recipient_headers:
            addrs = []
            participant_type = header.capitalize()
            if self.mail.get(header):
                if ',' in self.mail.get(header):
                    parts = self.mail.get(header).split(',')
                    filtered = [x for x in parts if '@' in x]
                    addrs.extend(filtered)
                else:
                    addrs.append(self.mail.get(header))
            for addr in addrs:
                participant = MailParticipant(participant_type, addr)
                participants.append(participant)
        return participants

    @property
    def attachments(self):
        """Extract parts which we consider as attachments."""
        if not self.mail.is_multipart():
            return []
        attchs = []
        for p in walk_with_boundary(self.mail, ""):
            if not p.is_multipart():
                if MailAttachment.is_attachment(p):
                    attchs.append(MailAttachment(p))
        return attchs

    @property
    def extra_parameters(self):
        """Mail message extra parameters."""
        lists = self.mail.get_all("List-ID")
        lists_addr = getaddresses(lists) if lists else None
        lists_ids = [address[1] for address in lists_addr] \
            if lists_addr else []
        return {'lists': lists_ids}

    # Others parameters specific for mail message

    @property
    def headers(self):
        """Extract all headers into list.

        Duplicate on headers exists, group them by name
        with a related list of values
        """
        def keyfunc(item):
            return item[0]

        # Group multiple value for same headers into a dict of list
        headers = {}
        data = sorted(self.mail.items(), key=keyfunc)
        for k, g in groupby(data, key=keyfunc):
            headers[k] = [x[1] for x in g]
        return headers
Esempio n. 5
0
def msg_has_attachment(msg: Message) -> bool:
    return (msg.get_content_type() != "multipart"
            and msg.get("Content-Disposition") and msg.get_filename())
Esempio n. 6
0
class MailMessage(object):
    """
    Mail message structure.

    Got a mail in raw rfc2822 format, parse it to
    resolve all recipients emails, parts and group headers
    """

    recipient_headers = ['From', 'To', 'Cc', 'Bcc']
    message_type = 'mail'

    def __init__(self, raw):
        """Initialize structure from a raw mail."""
        try:
            self.mail = Message(raw)
        except Exception as exc:
            log.error('Parse message failed %s' % exc)
            raise
        if self.mail.defects:
            # XXX what to do ?
            log.warn('Defects on parsed mail %r' % self.mail.defects)
        self.recipients = self._extract_recipients()
        self.parts = self._extract_parts()
        self.headers = self._extract_headers()
        self.subject = self.mail.get('Subject')
        tmp_date = parsedate_tz(self.mail['Date'])
        self.date = datetime.fromtimestamp(mktime_tz(tmp_date))
        self.external_message_id = self.mail.get('Message-Id')
        self.external_parent_id = self.mail.get('In-Reply-To')
        self.size = len(raw)

    @property
    def text(self):
        """Message all text."""
        # XXX : more complexity ?
        return "\n".join([x.data for x in self.parts if x.can_index])

    def _extract_recipients(self):
        recip = {}
        for header in self.recipient_headers:
            addrs = []
            recipient_type = header.lower()
            if self.mail.get(header):
                if ',' in self.mail.get(header):
                    addrs.extend(self.mail.get(header).split(','))
                else:
                    addrs.append(self.mail.get(header))
            addrs = [clean_email_address(x) for x in addrs]
            recip[recipient_type] = addrs
        return recip

    def _extract_headers(self):
        """
        Extract all headers into list.

        Duplicate on headers exists, group them by name
        with a related list of values
        """
        def keyfunc(item):
            return item[0]

        # Group multiple value for same headers into a dict of list
        headers = {}
        data = sorted(self.mail.items(), key=keyfunc)
        for k, g in groupby(data, key=keyfunc):
            headers[k] = [x[1] for x in g]
        return headers

    def _extract_parts(self):
        """Multipart message, extract parts."""
        parts = []
        for p in self.mail.walk():
            if not p.is_multipart():
                parts.append(self._process_part(p))
        return parts

    def _process_part(self, part):
        return MailPart(part)

    @property
    def transport_privacy_index(self):
        """Evaluate transport privacy index."""
        # XXX : TODO
        return random.randint(0, 50)

    @property
    def content_privacy_index(self):
        """Evaluate content privacy index."""
        # XXX: real evaluation needed ;)
        if 'PGP' in [x.content_type for x in self.parts]:
            return random.randint(50, 100)
        else:
            return 0.0

    @property
    def spam_level(self):
        """Report spam level."""
        try:
            score = self.headers.get('X-Spam-Score')
            score = float(score[0])
        except:
            score = 0.0
        if score < 5.0:
            return 0.0
        if score >= 5.0 and score < 15.0:
            return min(score * 10, 100.0)
        return 100.0

    @property
    def importance_level(self):
        """Return percent estimated importance level of this message."""
        # XXX. real compute needed
        return 0 if self.spam_level else random.randint(50, 100)

    @property
    def lists(self):
        """List related to message."""
        lists = []
        for list_name in self.headers.get('List-ID', []):
            lists.append(list_name)
        return lists

    @property
    def from_(self):
        """Get from recipient."""
        from_ = self.recipients.get('from')
        if from_:
            # XXX should do better
            return from_[0][1]
        return None

    def lookup_sequence(self):
        """Build parameter sequence for lookups."""
        seq = []
        # first from parent
        if self.external_parent_id:
            seq.append(('parent', self.external_parent_id))
        # then list lookup
        for listname in self.lists:
            seq.append(('list', listname))
        # last try to lookup from sender address
        if self.from_:
            seq.append(('from', self.from_))
        return seq

    def to_parameter(self):
        """Transform mail to a NewMessage parameter."""
        msg = NewMessage()
        msg.type = 'email'
        msg.subject = self.subject
        msg.from_ = self.from_
        # XXX need transform to part parameter
        for part in self.parts:
            param = Part()
            param.content_type = part.content_type
            param.data = part.data
            param.size = part.size
            param.filename = part.filename
            param.can_index = part.can_index
            msg.parts.append(param)
        msg.headers = self.headers
        msg.date = self.date
        msg.size = self.size
        msg.text = self.text
        msg.external_parent_id = self.external_parent_id
        msg.external_message_id = self.external_message_id
        # XXX well ....
        msg.privacy_index = (self.transport_privacy_index +
                             self.content_privacy_index) / 2
        msg.importance_level = self.importance_level
        return msg