def _lookup(self, address): results = {} terms = ["from:%s" % x for x in address.split('@')] terms.append("has:pgpkey") session, idx, _, _ = self._do_search(search=terms) for messageid in session.results: email = Email(self._idx(), messageid) attachments = email.get_message_tree("attachments")["attachments"] for part in attachments: if part["mimetype"] == "application/pgp-keys": key = part["part"].get_payload(None, True) results.update(self._get_keydata(key)) return results
def _get_keys(self, messageid): keys = self.key_cache.get(messageid, []) if not keys: email = Email(self._idx(), messageid) attachments = email.get_message_tree("attachments")["attachments"] for part in attachments: if part["mimetype"] == "application/pgp-keys": key = part["part"].get_payload(None, True) for keydata in self._get_keydata(key): keys.append(keydata) self.key_cache[keydata["fingerprint"]] = key if len(keys) > 5: # Just to set some limit... break self.key_cache[messageid] = keys return keys
def _get_message_keys(self, messageid): keys = self.key_cache.get(messageid, []) if not keys: email = Email(self._idx(), messageid) attachments = email.get_message_tree(want=["attachments"] )["attachments"] for part in attachments: if _might_be_pgp_key(part["filename"], part["mimetype"]): key = part["part"].get_payload(None, True) for keydata in _get_keydata(key): keys.append((keydata, key)) if len(keys) > 5: # Just to set some limit... break self.key_cache[messageid] = keys return keys
def _get_message_keys(self, messageid): keys = self.key_cache.get(messageid, []) if not keys: email = Email(self._idx(), messageid) attachments = email.get_message_tree( want=["attachments"])["attachments"] for part in attachments: if _might_be_pgp_key(part["filename"], part["mimetype"]): key = part["part"].get_payload(None, True) for keydata in _get_keydata(key): keys.append((keydata, key)) if len(keys) > 5: # Just to set some limit... break self.key_cache[messageid] = keys return keys
def read_message(self, session, msg_mid, msg_id, msg, msg_size, msg_ts, mailbox=None): keywords = [] snippet = '' payload = [None] for part in msg.walk(): textpart = payload[0] = None ctype = part.get_content_type() charset = part.get_charset() or 'iso-8859-1' def _loader(p): if payload[0] is None: payload[0] = self.try_decode(p.get_payload(None, True), charset) return payload[0] if ctype == 'text/plain': textpart = _loader(part) elif ctype == 'text/html': _loader(part) if len(payload[0]) > 3: try: textpart = lxml.html.fromstring(payload[0] ).text_content() except: session.ui.warning(_('=%s/%s has bogus HTML.' ) % (msg_mid, msg_id)) textpart = payload[0] else: textpart = payload[0] elif 'pgp' in part.get_content_type(): keywords.append('pgp:has') att = part.get_filename() if att: att = self.try_decode(att, charset) keywords.append('attachment:has') keywords.extend([t + ':att' for t in re.findall(WORD_REGEXP, att.lower())]) textpart = (textpart or '') + ' ' + att if textpart: # FIXME: Does this lowercase non-ASCII characters correctly? keywords.extend(re.findall(WORD_REGEXP, textpart.lower())) # NOTE: As a side effect here, the cryptostate plugin will # add a 'crypto:has' keyword which we check for below # before performing further processing. for kwe in plugins.get_text_kw_extractors(): keywords.extend(kwe(self, msg, ctype, textpart)) if len(snippet) < 1024: snippet += ' ' + textpart for extract in plugins.get_data_kw_extractors(): keywords.extend(extract(self, msg, ctype, att, part, lambda: _loader(part))) if 'crypto:has' in keywords: e = Email(self, -1) e.msg_parsed = msg e.msg_info = self.BOGUS_METADATA[:] tree = e.get_message_tree(want=(e.WANT_MSG_TREE_PGP + ('text_parts', ))) # Look for inline PGP parts, update our status if found e.evaluate_pgp(tree, decrypt=session.config.prefs.index_encrypted) msg.signature_info = tree['crypto']['signature'] msg.encryption_info = tree['crypto']['encryption'] # Index the contents, if configured to do so if session.config.prefs.index_encrypted: for text in [t['data'] for t in tree['text_parts']]: keywords.extend(re.findall(WORD_REGEXP, text.lower())) for kwe in plugins.get_text_kw_extractors(): keywords.extend(kwe(self, msg, 'text/plain', text)) keywords.append('%s:id' % msg_id) keywords.extend(re.findall(WORD_REGEXP, self.hdr(msg, 'subject').lower())) keywords.extend(re.findall(WORD_REGEXP, self.hdr(msg, 'from').lower())) if mailbox: keywords.append('%s:mailbox' % mailbox.lower()) keywords.append('%s:hp' % HeaderPrint(msg)) for key in msg.keys(): key_lower = key.lower() if key_lower not in BORING_HEADERS: emails = ExtractEmails(self.hdr(msg, key).lower()) words = set(re.findall(WORD_REGEXP, self.hdr(msg, key).lower())) words -= STOPLIST keywords.extend(['%s:%s' % (t, key_lower) for t in words]) keywords.extend(['%s:%s' % (e, key_lower) for e in emails]) keywords.extend(['%s:email' % e for e in emails]) if 'list' in key_lower: keywords.extend(['%s:list' % t for t in words]) for key in EXPECTED_HEADERS: if not msg[key]: keywords.append('missing:%s' % key) for extract in plugins.get_meta_kw_extractors(): keywords.extend(extract(self, msg_mid, msg, msg_size, msg_ts)) snippet = snippet.replace('\n', ' ' ).replace('\t', ' ').replace('\r', '') return (set(keywords) - STOPLIST), snippet.strip()
def read_message(self, session, msg_mid, msg_id, msg, msg_size, msg_ts, mailbox=None): keywords = [] snippet = '' payload = [None] for part in msg.walk(): textpart = payload[0] = None ctype = part.get_content_type() charset = part.get_charset() or 'iso-8859-1' def _loader(p): if payload[0] is None: payload[0] = self.try_decode(p.get_payload(None, True), charset) return payload[0] if ctype == 'text/plain': textpart = _loader(part) elif ctype == 'text/html': _loader(part) if len(payload[0]) > 3: try: textpart = lxml.html.fromstring(payload[0] ).text_content() except: session.ui.warning(_('=%s/%s has bogus HTML.' ) % (msg_mid, msg_id)) textpart = payload[0] else: textpart = payload[0] elif 'pgp' in part.get_content_type(): keywords.append('pgp:has') att = part.get_filename() if att: att = self.try_decode(att, charset) keywords.append('attachment:has') keywords.extend([t + ':att' for t in re.findall(WORD_REGEXP, att.lower())]) textpart = (textpart or '') + ' ' + att if textpart: # FIXME: Does this lowercase non-ASCII characters correctly? # FIXME: What about encrypted content? # FIXME: Do this better. if ('-----BEGIN PGP' in textpart and '-----END PGP' in textpart): keywords.append('pgp:has') if '-----BEGIN PGP ENCRYPTED' in textpart: keywords.append('pgp-encrypted-text:has') else: keywords.append('pgp-signed-text:has') keywords.extend(re.findall(WORD_REGEXP, textpart.lower())) for extract in plugins.get_text_kw_extractors(): keywords.extend(extract(self, msg, ctype, lambda: textpart)) if len(snippet) < 1024: snippet += ' ' + textpart for extract in plugins.get_data_kw_extractors(): keywords.extend(extract(self, msg, ctype, att, part, lambda: _loader(part))) if (session.config.prefs.index_encrypted and 'pgp-encrypted-text:has' in keywords): e = Email(None, -1) e.msg_parsed = msg e.msg_info = ['' for i in range(0, self.MSG_FIELDS_V2)] tree = e.get_message_tree(want=['text_parts']) for text in [t['data'] for t in tree['text_parts']]: print 'OOO, INLINE PGP, PARSING, WOOT' keywords.extend(re.findall(WORD_REGEXP, text.lower())) for extract in plugins.get_text_kw_extractors(): keywords.extend(extract(self, msg, 'text/plain', lambda: text)) keywords.append('%s:id' % msg_id) keywords.extend(re.findall(WORD_REGEXP, self.hdr(msg, 'subject').lower())) keywords.extend(re.findall(WORD_REGEXP, self.hdr(msg, 'from').lower())) if mailbox: keywords.append('%s:mailbox' % mailbox.lower()) keywords.append('%s:hp' % HeaderPrint(msg)) for key in msg.keys(): key_lower = key.lower() if key_lower not in BORING_HEADERS: emails = ExtractEmails(self.hdr(msg, key).lower()) words = set(re.findall(WORD_REGEXP, self.hdr(msg, key).lower())) words -= STOPLIST keywords.extend(['%s:%s' % (t, key_lower) for t in words]) keywords.extend(['%s:%s' % (e, key_lower) for e in emails]) keywords.extend(['%s:email' % e for e in emails]) if 'list' in key_lower: keywords.extend(['%s:list' % t for t in words]) for key in EXPECTED_HEADERS: if not msg[key]: keywords.append('missing:%s' % key) for extract in plugins.get_meta_kw_extractors(): keywords.extend(extract(self, msg_mid, msg, msg_size, msg_ts)) snippet = snippet.replace('\n', ' ' ).replace('\t', ' ').replace('\r', '') return (set(keywords) - STOPLIST), snippet.strip()
def read_message(self, session, msg_mid, msg_id, msg, msg_size, msg_ts, mailbox=None): keywords = [] snippet = "" payload = [None] for part in msg.walk(): textpart = payload[0] = None ctype = part.get_content_type() charset = part.get_content_charset() or "iso-8859-1" def _loader(p): if payload[0] is None: payload[0] = self.try_decode(p.get_payload(None, True), charset) return payload[0] if ctype == "text/plain": textpart = _loader(part) elif ctype == "text/html": _loader(part) if len(payload[0]) > 3: try: textpart = lxml.html.fromstring(payload[0]).text_content() except: session.ui.warning(_("=%s/%s has bogus HTML.") % (msg_mid, msg_id)) textpart = payload[0] else: textpart = payload[0] elif "pgp" in part.get_content_type(): keywords.append("pgp:has") att = part.get_filename() if att: att = self.try_decode(att, charset) keywords.append("attachment:has") keywords.extend([t + ":att" for t in re.findall(WORD_REGEXP, att.lower())]) textpart = (textpart or "") + " " + att if textpart: # FIXME: Does this lowercase non-ASCII characters correctly? keywords.extend(re.findall(WORD_REGEXP, textpart.lower())) # NOTE: As a side effect here, the cryptostate plugin will # add a 'crypto:has' keyword which we check for below # before performing further processing. for kwe in plugins.get_text_kw_extractors(): keywords.extend(kwe(self, msg, ctype, textpart)) if len(snippet) < 1024: snippet += " " + textpart for extract in plugins.get_data_kw_extractors(): keywords.extend(extract(self, msg, ctype, att, part, lambda: _loader(part))) if "crypto:has" in keywords: e = Email(self, -1) e.msg_parsed = msg e.msg_info = self.BOGUS_METADATA[:] tree = e.get_message_tree(want=(e.WANT_MSG_TREE_PGP + ("text_parts",))) # Look for inline PGP parts, update our status if found e.evaluate_pgp(tree, decrypt=session.config.prefs.index_encrypted) msg.signature_info = tree["crypto"]["signature"] msg.encryption_info = tree["crypto"]["encryption"] # Index the contents, if configured to do so if session.config.prefs.index_encrypted: for text in [t["data"] for t in tree["text_parts"]]: keywords.extend(re.findall(WORD_REGEXP, text.lower())) for kwe in plugins.get_text_kw_extractors(): keywords.extend(kwe(self, msg, "text/plain", text)) keywords.append("%s:id" % msg_id) keywords.extend(re.findall(WORD_REGEXP, self.hdr(msg, "subject").lower())) keywords.extend(re.findall(WORD_REGEXP, self.hdr(msg, "from").lower())) if mailbox: keywords.append("%s:mailbox" % mailbox.lower()) keywords.append("%s:hp" % HeaderPrint(msg)) for key in msg.keys(): key_lower = key.lower() if key_lower not in BORING_HEADERS: emails = ExtractEmails(self.hdr(msg, key).lower()) words = set(re.findall(WORD_REGEXP, self.hdr(msg, key).lower())) words -= STOPLIST keywords.extend(["%s:%s" % (t, key_lower) for t in words]) keywords.extend(["%s:%s" % (e, key_lower) for e in emails]) keywords.extend(["%s:email" % e for e in emails]) if "list" in key_lower: keywords.extend(["%s:list" % t for t in words]) for key in EXPECTED_HEADERS: if not msg[key]: keywords.append("%s:missing" % key) for extract in plugins.get_meta_kw_extractors(): keywords.extend(extract(self, msg_mid, msg, msg_size, msg_ts)) snippet = snippet.replace("\n", " ").replace("\t", " ").replace("\r", "") return (set(keywords) - STOPLIST), snippet.strip()
def read_message(self, session, msg_mid, msg_id, msg, msg_size, msg_ts, mailbox=None): keywords = [] snippet = '' payload = [None] for part in msg.walk(): textpart = payload[0] = None ctype = part.get_content_type() charset = part.get_content_charset() or 'iso-8859-1' def _loader(p): if payload[0] is None: payload[0] = self.try_decode(p.get_payload(None, True), charset) return payload[0] if ctype == 'text/plain': textpart = _loader(part) elif ctype == 'text/html': _loader(part) if len(payload[0]) > 3: try: textpart = lxml.html.fromstring( payload[0]).text_content() except: session.ui.warning( _('=%s/%s has bogus HTML.') % (msg_mid, msg_id)) textpart = payload[0] else: textpart = payload[0] elif 'pgp' in part.get_content_type(): keywords.append('pgp:has') att = part.get_filename() if att: att = self.try_decode(att, charset) keywords.append('attachment:has') keywords.extend( [t + ':att' for t in re.findall(WORD_REGEXP, att.lower())]) textpart = (textpart or '') + ' ' + att if textpart: # FIXME: Does this lowercase non-ASCII characters correctly? keywords.extend(re.findall(WORD_REGEXP, textpart.lower())) # NOTE: As a side effect here, the cryptostate plugin will # add a 'crypto:has' keyword which we check for below # before performing further processing. for kwe in plugins.get_text_kw_extractors(): keywords.extend(kwe(self, msg, ctype, textpart)) if len(snippet) < 1024: snippet += ' ' + textpart for extract in plugins.get_data_kw_extractors(): keywords.extend( extract(self, msg, ctype, att, part, lambda: _loader(part))) if 'crypto:has' in keywords: e = Email(self, -1) e.msg_parsed = msg e.msg_info = self.BOGUS_METADATA[:] tree = e.get_message_tree(want=(e.WANT_MSG_TREE_PGP + ('text_parts', ))) # Look for inline PGP parts, update our status if found e.evaluate_pgp(tree, decrypt=session.config.prefs.index_encrypted) msg.signature_info = tree['crypto']['signature'] msg.encryption_info = tree['crypto']['encryption'] # Index the contents, if configured to do so if session.config.prefs.index_encrypted: for text in [t['data'] for t in tree['text_parts']]: keywords.extend(re.findall(WORD_REGEXP, text.lower())) for kwe in plugins.get_text_kw_extractors(): keywords.extend(kwe(self, msg, 'text/plain', text)) keywords.append('%s:id' % msg_id) keywords.extend( re.findall(WORD_REGEXP, self.hdr(msg, 'subject').lower())) keywords.extend(re.findall(WORD_REGEXP, self.hdr(msg, 'from').lower())) if mailbox: keywords.append('%s:mailbox' % mailbox.lower()) keywords.append('%s:hp' % HeaderPrint(msg)) for key in msg.keys(): key_lower = key.lower() if key_lower not in BORING_HEADERS: emails = ExtractEmails(self.hdr(msg, key).lower()) words = set(re.findall(WORD_REGEXP, self.hdr(msg, key).lower())) words -= STOPLIST keywords.extend(['%s:%s' % (t, key_lower) for t in words]) keywords.extend(['%s:%s' % (e, key_lower) for e in emails]) keywords.extend(['%s:email' % e for e in emails]) if 'list' in key_lower: keywords.extend(['%s:list' % t for t in words]) for key in EXPECTED_HEADERS: if not msg[key]: keywords.append('%s:missing' % key) for extract in plugins.get_meta_kw_extractors(): keywords.extend(extract(self, msg_mid, msg, msg_size, msg_ts)) snippet = snippet.replace('\n', ' ').replace('\t', ' ').replace('\r', '') return (set(keywords) - STOPLIST), snippet.strip()