def command(self, search=None): session, idx, start = self._do_search(search=search) nodes = [] links = [] res = {} for messageid in session.results: message = Email(self._idx(), messageid) try: msgfrom = ExtractEmails(message.get("from"))[0].lower() except IndexError, e: print "No e-mail address in '%s'" % message.get("from") continue msgto = [x.lower() for x in ExtractEmails(message.get("to"))] msgcc = [x.lower() for x in ExtractEmails(message.get("cc"))] msgbcc = [x.lower() for x in ExtractEmails(message.get("bcc"))] if msgfrom not in [m["email"] for m in nodes]: nodes.append({"email": msgfrom}) for msgset in [msgto, msgcc, msgbcc]: for address in msgset: if address not in [m["email"] for m in nodes]: nodes.append({"email": address}) curnodes = [x["email"] for x in nodes] fromid = curnodes.index(msgfrom) searchspace = [m for m in links if m["source"] == fromid] for recipient in msgset: index = curnodes.index(recipient) link = [m for m in searchspace if m["target"] == index] if len(link) == 0: links.append({ "source": fromid, "target": index, "value": 1 }) elif len(link) == 1: link[0]["value"] += 1 else: raise ValueError( "Too many links! - This should never happen.") if len(nodes) >= 200: # Let's put a hard upper limit on how many nodes we can have, for performance reasons. # There might be a better way to do this though... res["limit_hit"] = True break
def command(self, emails=None): session, config, idx = self.session, self.session.config, self._idx() args = list(self.args) bounce_to = [] while args and '@' in args[-1]: bounce_to.append(args.pop(-1)) for rcpt in (self.data.get('to', []) + self.data.get('cc', []) + self.data.get('bcc', [])): bounce_to.extend(ExtractEmails(rcpt)) if not emails: args.extend(['=%s' % mid for mid in self.data.get('mid', [])]) mids = self._choose_messages(args) emails = [Email(idx, i) for i in mids] # Process one at a time so we don't eat too much memory sent = [] missing_keys = [] for email in emails: try: msg_mid = email.get_msg_info(idx.MSG_MID) # FIXME: We are failing to capture error states with sufficient # granularity, messages may be delivered to some # recipients but not all... SendMail(session, [PrepareMessage(config, email.get_msg(pgpmime=False), rcpts=(bounce_to or None))]) sent.append(email) except KeyLookupError, kle: session.ui.warning(_('Missing keys %s') % kle.missing) missing_keys.extend(kle.missing) self._ignore_exception() except:
def command(self, emails=None): session, config, idx = self.session, self.session.config, self._idx() bounce_to = [] while self.args and '@' in self.args[-1]: bounce_to.append(self.args.pop(-1)) for rcpt in (self.data.get('to', []) + self.data.get('cc', []) + self.data.get('bcc', [])): bounce_to.extend(ExtractEmails(rcpt)) args = self.args[:] if not emails: args.extend(['=%s' % mid for mid in self.data.get('mid', [])]) mids = self._choose_messages(args) emails = [Email(idx, i) for i in mids] # Process one at a time so we don't eat too much memory sent = [] missing_keys = [] for email in emails: try: msg_mid = email.get_msg_info(idx.MSG_MID) SendMail(session, [ PrepareMessage(config, email.get_msg(pgpmime=False), rcpts=(bounce_to or None)) ]) sent.append(email) except KeyLookupError, kle: missing_keys.extend(kle.missing) self._ignore_exception() except:
def TransformOutgoing(self, sender, rcpts, msg, **kwargs): matched = False gnupg = None sender_keyid = None if self.config.prefs.openpgp_header: try: gnupg = gnupg or GnuPG(self.config) seckeys = dict([(uid["email"], fp) for fp, key in gnupg.list_secret_keys().iteritems() if key["capabilities_map"].get("encrypt") and key["capabilities_map"].get("sign") for uid in key["uids"]]) sender_keyid = seckeys.get(sender) except (KeyError, TypeError, IndexError, ValueError): traceback.print_exc() if sender_keyid and self.config.prefs.openpgp_header: msg["OpenPGP"] = ("id=%s; preference=%s" % (sender_keyid, self.config.prefs.openpgp_header)) if ('attach-pgp-pubkey' in msg and msg['attach-pgp-pubkey'][:3].lower() in ('yes', 'tru')): # FIXME: Check attach_pgp_pubkey for instructions on which key(s) # to attach. Attaching all of them may be a bit lame. gnupg = gnupg or GnuPG(self.config) keys = gnupg.address_to_keys(ExtractEmails(sender)[0]) key_count = 0 for fp, key in keys.iteritems(): if not any(key["capabilities_map"].values()): continue # We should never really hit this more than once. But if we # do, should still be fine. keyid = key["keyid"] data = gnupg.get_pubkey(keyid) try: from_name = key["uids"][0]["name"] filename = _('Encryption key for %s.asc') % from_name except: filename = _('My encryption key.asc') att = MIMEBase('application', 'pgp-keys') att.set_payload(data) encoders.encode_base64(att) del att['MIME-Version'] att.add_header('Content-Id', MakeContentID()) att.add_header('Content-Disposition', 'attachment', filename=filename) att.signature_info = SignatureInfo(parent=msg.signature_info) att.encryption_info = EncryptionInfo(parent=msg.encryption_info) msg.attach(att) key_count += 1 if key_count > 0: msg['x-mp-internal-pubkeys-attached'] = "Yes" return sender, rcpts, msg, matched, True
def __init__(self, session, idx, results=None, start=0, end=None, num=None, expand=None): dict.__init__(self) self.session = session self.expand = expand self.idx = idx results = results or session.results if not results: self._set_values([], 0, 0, 0) return terms = session.searched num = num or session.config.get('num_results', 20) if end: start = end - num if start > len(results): start = len(results) if start < 0: start = 0 rv = [] count = 0 expand_ids = [e.msg_idx for e in (expand or [])] for mid in results[start:start+num]: count += 1 msg_info = idx.get_msg_by_idx(mid) result = self._explain_msg_summary([ msg_info[MailIndex.MSG_IDX], msg_info[MailIndex.MSG_ID], msg_info[MailIndex.MSG_FROM], msg_info[MailIndex.MSG_SUBJECT], msg_info[MailIndex.MSG_DATE], msg_info[MailIndex.MSG_TAGS].split(','), session.config.is_editable_message(msg_info[MailIndex.MSG_PTRS]) ]) result['tags'] = sorted([idx.config['tag'].get(t,t) for t in idx.get_tags(msg_info=msg_info) if 'tag:%s' % t not in terms]) if not expand: conv = idx.get_conversation(msg_info) else: conv = [msg_info] conv_from = [c[MailIndex.MSG_FROM] for c in conv] result['short_from'] = self._compact(self._names(conv_from), 25) result['conv_count'] = len(conv) result['conv_idxs'] = [c[MailIndex.MSG_IDX] for c in conv] # FIXME: conv_people should look stuff in our contact list result['conv_people'] = people = [{ 'email': (ExtractEmails(p) or [''])[0], 'name': self._name(p), } for p in list(set(conv_from))] people.sort(key=lambda i: i['name']+i['email']) if expand and mid in expand_ids: exp_email = expand[expand_ids.index(mid)] result['message'] = self._message_details([exp_email])[0] rv.append(result) self._set_values(rv, start, count, len(results))
def index_email(self, session, email): msg = email.get_msg() msg_info = email.get_msg_info() mbox_idx = msg_info[self.MSG_PTRS].split(',')[0][:MBX_ID_LEN] msg_subj = self.hdr(msg, 'subject') msg_to = ExtractEmails(self.hdr(msg, 'to')) msg_cc = (ExtractEmails(self.hdr(msg, 'cc')) + ExtractEmails(self.hdr(msg, 'bcc'))) kw, sn = self.index_message(session, email.msg_mid(), msg_info[self.MSG_ID], msg, email.get_msg_size(), long(msg_info[self.MSG_DATE], 36), mailbox=mbox_idx, compact=False, filter_hooks=[self.filter_keywords]) tags = [ k.split(':')[0] for k in kw if k.endswith(':in') or k.endswith(':tag') ] snippet_max = session.config.sys.snippet_max self.edit_msg_info(msg_info, msg_from=self.hdr(msg, 'from'), msg_to=msg_to, msg_cc=msg_cc, msg_subject=msg_subj, msg_body=sn[:max(0, snippet_max - len(msg_subj))]) self.set_msg_at_idx_pos(email.msg_idx_pos, msg_info) # Reset the internal tags on this message for tag_id in [t for t in msg_info[self.MSG_TAGS].split(',') if t]: tag = session.config.get_tag(tag_id) if tag and tag.slug.startswith('mp_'): self.remove_tag(session, tag_id, msg_idxs=[email.msg_idx_pos]) # Add normal tags implied by a rescan print 'Applying %s' % tags for tag_id in tags: self.add_tag(session, tag_id, msg_idxs=[email.msg_idx_pos])
def command(self, emails=None): session, config, idx = self.session, self.session.config, self._idx() bounce_to = [] while self.args and '@' in self.args[-1]: bounce_to.append(self.args.pop(-1)) for rcpt in (self.data.get('to', []) + self.data.get('cc', []) + self.data.get('bcc', [])): bounce_to.extend(ExtractEmails(rcpt)) args = self.args[:] if not emails: args.extend(['=%s' % mid for mid in self.data.get('mid', [])]) mids = self._choose_messages(args) emails = [Email(idx, i) for i in mids] # Process one at a time so we don't eat too much memory sent = [] for email in emails: try: msg_mid = email.get_msg_info(idx.MSG_MID) SendMail(session, [PrepareMail(email, rcpts=(bounce_to or None))]) sent.append(email) except: session.ui.error('Failed to send %s' % email) self._ignore_exception() if 'compose' in config.sys.debug: sys.stderr.write(('compose/Sendit: Send %s to %s (sent: %s)\n' ) % (len(emails), bounce_to or '(header folks)', sent)) if sent: if self.BLANK_TAG: self._untag_emails(sent, self.BLANK_TAG) if self.DRAFT_TAG: self._untag_emails(sent, self.DRAFT_TAG) if self.SENT_TAG: self._tag_emails(sent, self.SENT_TAG) for email in sent: idx.index_email(self.session, email) return self._return_search_results(sent, sent=sent) else: return self._error('Nothing was sent')
def _explain_msg_summary(self, info): msg_ts = long(info[6], 36) days_ago = (time.time() - msg_ts) / (24 * 3600) msg_date = datetime.datetime.fromtimestamp(msg_ts) date = msg_date.strftime("%Y-%m-%d") urlmap = UrlMap(self.session) expl = { 'mid': info[0], 'id': info[1], 'from': info[2], 'from_email': ', '.join(ExtractEmails(info[2])), 'to': info[3], 'subject': info[4], 'snippet': info[5], 'timestamp': msg_ts, 'shorttime': msg_date.strftime("%H:%M"), 'date': date, 'tag_ids': info[7], 'url': urlmap.url_thread(info[0]) } if info[8]: expl['editing_url'] = urlmap.url_edit(info[0]) return expl
def command(self, emails=None): session, config, idx = self.session, self.session.config, self._idx() args = list(self.args) if self.session.config.sys.lockdown: return self._error(_('In lockdown, doing nothing.')) bounce_to = [] while args and '@' in args[-1]: bounce_to.append(args.pop(-1)) for rcpt in (self.data.get('to', []) + self.data.get('cc', []) + self.data.get('bcc', [])): bounce_to.extend(ExtractEmails(rcpt)) if not emails: args.extend(['=%s' % mid for mid in self.data.get('mid', [])]) mids = self._choose_messages(args) emails = [Email(idx, i) for i in mids] # Process one at a time so we don't eat too much memory sent = [] missing_keys = [] for email in emails: events = [] try: msg_mid = email.get_msg_info(idx.MSG_MID) # This is a unique sending-ID. This goes in the public (meant # for debugging help) section of the event-log, so we take # care to not reveal details about the message or recipients. msg_sid = sha1b64(email.get_msg_info(idx.MSG_ID), *sorted(bounce_to))[:8] # We load up any incomplete events for sending this message # to this set of recipients. If nothing is in flight, create # a new event for tracking this operation. events = list( config.event_log.incomplete(source=self.EVENT_SOURCE, data_mid=msg_mid, data_sid=msg_sid)) if not events: events.append( config.event_log.log(source=self.EVENT_SOURCE, flags=Event.RUNNING, message=_('Sending message'), data={ 'mid': msg_mid, 'sid': msg_sid })) SendMail(session, msg_mid, [ PrepareMessage(config, email.get_msg(pgpmime=False), rcpts=(bounce_to or None), events=events) ]) for ev in events: ev.flags = Event.COMPLETE config.event_log.log_event(ev) sent.append(email) except KeyLookupError, kle: # This is fatal, we don't retry message = _('Missing keys %s') % kle.missing for ev in events: ev.flags = Event.COMPLETE ev.message = message config.event_log.log_event(ev) session.ui.warning(message) missing_keys.extend(kle.missing) self._ignore_exception() # FIXME: Also fatal, when the SMTP server REJECTS the mail except:
def TransformOutgoing(self, sender, rcpts, msg, **kwargs): matched = False gnupg = None sender_keyid = None # Prefer to just get everything from the profile VCard, in the # common case... profile = self.config.vcards.get_vcard(sender) if profile: sender_keyid = profile.pgp_key crypto_format = profile.crypto_format or 'none' else: crypto_format = 'none' # Parse the openpgp_header data from the crypto_format openpgp_header = [ p.split(':')[-1] for p in crypto_format.split('+') if p.startswith('openpgp_header:') ] if not openpgp_header: openpgp_header = self.config.prefs.openpgp_header and ['CFG'] if openpgp_header[0] != 'N' and not sender_keyid: # This is a fallback: this shouldn't happen much in normal use try: gnupg = gnupg or GnuPG(self.config, event=GetThreadEvent()) seckeys = dict([ (uid["email"], fp) for fp, key in gnupg.list_secret_keys().iteritems() if key["capabilities_map"].get("encrypt") and key["capabilities_map"].get("sign") for uid in key["uids"] ]) sender_keyid = seckeys.get(sender) except (KeyError, TypeError, IndexError, ValueError): traceback.print_exc() if sender_keyid and openpgp_header: preference = { 'ES': 'signencrypt', 'SE': 'signencrypt', 'E': 'encrypt', 'S': 'sign', 'N': 'unprotected', 'CFG': self.config.prefs.openpgp_header }[openpgp_header[0].upper()] msg["OpenPGP"] = ("id=%s; preference=%s" % (sender_keyid, preference)) if ('attach-pgp-pubkey' in msg and msg['attach-pgp-pubkey'][:3].lower() in ('yes', 'tru')): gnupg = gnupg or GnuPG(self.config, event=GetThreadEvent()) if sender_keyid: keys = gnupg.list_keys(selectors=[sender_keyid]) else: keys = gnupg.address_to_keys(ExtractEmails(sender)[0]) key_count = 0 for fp, key in keys.iteritems(): if not any(key["capabilities_map"].values()): continue # We should never really hit this more than once. But if we # do, should still be fine. keyid = key["keyid"] data = gnupg.get_pubkey(keyid) try: from_name = key["uids"][0]["name"] filename = _('Encryption key for %s.asc') % from_name except: filename = _('My encryption key.asc') att = MIMEBase('application', 'pgp-keys') att.set_payload(data) encoders.encode_base64(att) del att['MIME-Version'] att.add_header('Content-Id', MakeContentID()) att.add_header('Content-Disposition', 'attachment', filename=filename) att.signature_info = SignatureInfo(parent=msg.signature_info) att.encryption_info = EncryptionInfo( parent=msg.encryption_info) msg.attach(att) key_count += 1 if key_count > 0: msg['x-mp-internal-pubkeys-attached'] = "Yes" return sender, rcpts, msg, matched, True
def command(self, emails=None): session, config, idx = self.session, self.session.config, self._idx() args = list(self.args) bounce_to = [] while args and '@' in args[-1]: bounce_to.append(args.pop(-1)) for rcpt in (self.data.get('to', []) + self.data.get('cc', []) + self.data.get('bcc', [])): bounce_to.extend(ExtractEmails(rcpt)) sender = self.data.get('from', [None])[0] if not sender and bounce_to: sender = idx.config.get_profile().get('email', None) if not emails: args.extend(['=%s' % mid for mid in self.data.get('mid', [])]) emails = [ self._actualize_ephemeral(i) for i in self._choose_messages(args, allow_ephemeral=True) ] # First make sure the draft tags are all gone, so other edits either # fail or complete while we wait for the lock. with GLOBAL_EDITING_LOCK: self._tag_drafts(emails, untag=True) self._tag_blank(emails, untag=True) # Process one at a time so we don't eat too much memory sent = [] missing_keys = [] locked_keys = [] for email in emails: events = [] try: msg_mid = email.get_msg_info(idx.MSG_MID) # This is a unique sending-ID. This goes in the public (meant # for debugging help) section of the event-log, so we take # care to not reveal details about the message or recipients. msg_sid = sha1b64(email.get_msg_info(idx.MSG_ID), *sorted(bounce_to))[:8] # We load up any incomplete events for sending this message # to this set of recipients. If nothing is in flight, create # a new event for tracking this operation. events = list( config.event_log.incomplete(source=self.EVENT_SOURCE, data_mid=msg_mid, data_sid=msg_sid)) if not events: events.append( config.event_log.log(source=self.EVENT_SOURCE, flags=Event.RUNNING, message=_('Sending message'), data={ 'mid': msg_mid, 'sid': msg_sid })) SendMail(session, msg_mid, [ PrepareMessage(config, email.get_msg(pgpmime=False), sender=sender, rcpts=(bounce_to or None), bounce=(True if bounce_to else False), events=events) ]) for ev in events: ev.flags = Event.COMPLETE config.event_log.log_event(ev) sent.append(email) # Encryption related failures are fatal, don't retry except (KeyLookupError, EncryptionFailureError, SignatureFailureError), exc: message = unicode(exc) session.ui.warning(message) if hasattr(exc, 'missing_keys'): missing_keys.extend(exc.missing) if hasattr(exc, 'from_key'): # FIXME: We assume signature failures happen because # the key is locked. Are there any other reasons? locked_keys.append(exc.from_key) for ev in events: ev.flags = Event.COMPLETE ev.message = message config.event_log.log_event(ev) self._ignore_exception() # FIXME: Also fatal, when the SMTP server REJECTS the mail except:
class MailIndex: """This is a lazily parsing object representing a mailpile index.""" MSG_MID = 0 MSG_PTRS = 1 MSG_ID = 2 MSG_DATE = 3 MSG_FROM = 4 MSG_TO = 5 MSG_SUBJECT = 6 MSG_SNIPPET = 7 MSG_TAGS = 8 MSG_REPLIES = 9 MSG_CONV_MID = 10 def __init__(self, config): self.config = config self.STATS = {} self.INDEX = [] self.INDEX_SORT = {} self.INDEX_CONV = [] self.PTRS = {} self.MSGIDS = {} self.EMAILS = [] self.EMAIL_IDS = {} self.CACHE = {} self.MODIFIED = set() self.EMAILS_SAVED = 0 def l2m(self, line): return line.decode('utf-8').split(u'\t') # A translation table for message parts stored in the index, consists of # a mapping from unicode ordinals to either another unicode ordinal or # None, to remove a character. By default it removes the ASCII control # characters and replaces tabs and newlines with spaces. NORM_TABLE = dict( [(i, None) for i in range(0, 0x20)], **{ ord(u'\t'): ord(u' '), ord(u'\r'): ord(u' '), ord(u'\n'): ord(u' '), 0x7F: None }) def m2l(self, message): # Normalize the message before saving it so we can be sure that we will # be able to read it back later. parts = [unicode(p).translate(self.NORM_TABLE) for p in message] return (u'\t'.join(parts)).encode('utf-8') def load(self, session=None): self.INDEX = [] self.CACHE = {} self.PTRS = {} self.MSGIDS = {} self.EMAILS = [] self.EMAIL_IDS = {} CachedSearchResultSet.DropCaches() def process_line(line): try: line = line.strip() if line.startswith('#'): pass elif line.startswith('@'): pos, email = line[1:].split('\t', 1) pos = int(pos, 36) while len(self.EMAILS) < pos + 1: self.EMAILS.append('') self.EMAILS[pos] = unquote(email) self.EMAIL_IDS[unquote(email).lower()] = pos elif line: words = line.split('\t') # FIXME: Delete this old crap. if len(words) == 10: # This is an old index file, reorder it! pos, p, unused, msgid, d, f, s, t, r, c = words ptrs = ','.join(['0' + ptr for ptr in p.split(',')]) line = '\t'.join( [pos, ptrs, msgid, d, f, '', s, '', t, r, c]) else: pos, ptrs, msgid = words[:3] pos = int(pos, 36) while len(self.INDEX) < pos + 1: self.INDEX.append('') self.INDEX[pos] = line self.MSGIDS[msgid] = pos for msg_ptr in ptrs.split(','): self.PTRS[msg_ptr] = pos except ValueError: pass if session: session.ui.mark('Loading metadata index...') try: fd = open(self.config.mailindex_file(), 'r') for line in fd: if line.startswith(GPG_BEGIN_MESSAGE): for line in decrypt_gpg([line], fd): process_line(line) else: process_line(line) fd.close() except IOError: if session: session.ui.warning(('Metadata index not found: %s') % self.config.mailindex_file()) self.cache_sort_orders(session) if session: session.ui.mark('Loaded metadata, %d messages' % len(self.INDEX)) self.EMAILS_SAVED = len(self.EMAILS) def save_changes(self, session=None): mods, self.MODIFIED = self.MODIFIED, set() if mods or len(self.EMAILS) > self.EMAILS_SAVED: if session: session.ui.mark("Saving metadata index changes...") fd = gpg_open(self.config.mailindex_file(), self.config.prefs.gpg_recipient, 'a') for eid in range(self.EMAILS_SAVED, len(self.EMAILS)): fd.write('@%s\t%s\n' % (b36(eid), quote(self.EMAILS[eid]))) for pos in mods: fd.write(self.INDEX[pos] + '\n') fd.close() flush_append_cache() if session: session.ui.mark("Saved metadata index changes") self.EMAILS_SAVED = len(self.EMAILS) def save(self, session=None): self.MODIFIED = set() if session: session.ui.mark("Saving metadata index...") fd = gpg_open(self.config.mailindex_file(), self.config.prefs.gpg_recipient, 'w') fd.write('# This is the mailpile.py index file.\n') fd.write('# We have %d messages!\n' % len(self.INDEX)) for eid in range(0, len(self.EMAILS)): fd.write('@%s\t%s\n' % (b36(eid), quote(self.EMAILS[eid]))) for item in self.INDEX: fd.write(item + '\n') fd.close() flush_append_cache() if session: session.ui.mark("Saved metadata index") def update_ptrs_and_msgids(self, session): session.ui.mark('Updating high level indexes') for offset in range(0, len(self.INDEX)): message = self.l2m(self.INDEX[offset]) if len(message) > self.MSG_CONV_MID: self.MSGIDS[message[self.MSG_ID]] = offset for msg_ptr in message[self.MSG_PTRS].split(','): self.PTRS[msg_ptr] = offset else: session.ui.warning('Bogus line: %s' % line) def try_decode(self, text, charset): for cs in (charset, 'iso-8859-1', 'utf-8'): if cs: try: return text.decode(cs) except (UnicodeEncodeError, UnicodeDecodeError, LookupError): pass return "".join(i for i in text if ord(i) < 128) def hdr(self, msg, name, value=None): try: if value is None and msg: # Security: RFC822 headers are not allowed to have (unencoded) # non-ascii characters in them, so we just strip them all out # before parsing. # FIXME: This is "safe", but can we be smarter/gentler? value = CleanText(msg[name], replace='_').clean # Note: decode_header does the wrong thing with "quoted" data. decoded = email.header.decode_header((value or '').replace('"', '')) return (' '.join([self.try_decode(t[0], t[1]) for t in decoded ])).replace('\r', ' ').replace('\t', ' ').replace('\n', ' ') except email.errors.HeaderParseError: return '' def update_location(self, session, msg_idx_pos, msg_ptr): msg_info = self.get_msg_at_idx_pos(msg_idx_pos) msg_ptrs = msg_info[self.MSG_PTRS].split(',') self.PTRS[msg_ptr] = msg_idx_pos # If message was seen in this mailbox before, update the location for i in range(0, len(msg_ptrs)): if (msg_ptrs[i][:MBX_ID_LEN] == msg_ptr[:MBX_ID_LEN]): msg_ptrs[i] = msg_ptr msg_ptr = None break # Otherwise, this is a new mailbox, record this sighting as well! if msg_ptr: msg_ptrs.append(msg_ptr) msg_info[self.MSG_PTRS] = ','.join(msg_ptrs) self.set_msg_at_idx_pos(msg_idx_pos, msg_info) def _parse_date(self, date_hdr): """Parse a Date: or Received: header into a unix timestamp.""" try: if ';' in date_hdr: date_hdr = date_hdr.split(';')[-1].strip() msg_ts = long(rfc822.mktime_tz(rfc822.parsedate_tz(date_hdr))) if (msg_ts > (time.time() + 24 * 3600)) or (msg_ts < 1): return None else: return msg_ts except (ValueError, TypeError, OverflowError): return None def _extract_date_ts(self, session, msg_mid, msg_id, msg, last_date): """Extract a date, sanity checking against the Received: headers.""" hdrs = [self.hdr(msg, 'date')] + (msg.get_all('received') or []) dates = [self._parse_date(date_hdr) for date_hdr in hdrs] msg_ts = dates[0] nz_dates = sorted([d for d in dates if d]) if nz_dates: median = nz_dates[len(nz_dates) / 2] if msg_ts and abs(msg_ts - median) < 31 * 24 * 3600: return msg_ts else: session.ui.warning( ('=%s/%s using Recieved: instead of Date:') % (msg_mid, msg_id)) return median else: # If the above fails, we assume the messages in the mailbox are in # chronological order and just add 1 second to the date of the last # message if date parsing fails for some reason. session.ui.warning('=%s/%s has a bogus date' % (msg_mid, msg_id)) return last_date + 1 def scan_mailbox(self, session, mailbox_idx, mailbox_fn, mailbox_opener): try: mbox = mailbox_opener(session, mailbox_idx) if mbox.editable: session.ui.mark('%s: Skipped: %s' % (mailbox_idx, mailbox_fn)) return 0 else: session.ui.mark('%s: Checking: %s' % (mailbox_idx, mailbox_fn)) except (IOError, OSError, NoSuchMailboxError), e: session.ui.mark( ('%s: Error opening: %s (%s)') % (mailbox_idx, mailbox_fn, e)) return 0 unparsed = mbox.unparsed() if not unparsed: return 0 if len(self.PTRS.keys()) == 0: self.update_ptrs_and_msgids(session) snippet_max = session.config.sys.snippet_max added = 0 msg_ts = int(time.time()) for ui in range(0, len(unparsed)): if mailpile.util.QUITTING: break i = unparsed[ui] parse_status = ('%s: Reading your mail: %d%% (%d/%d messages)') % ( mailbox_idx, 100 * ui / len(unparsed), ui, len(unparsed)) msg_ptr = mbox.get_msg_ptr(mailbox_idx, i) if msg_ptr in self.PTRS: if (ui % 317) == 0: session.ui.mark(parse_status) continue else: session.ui.mark(parse_status) # Message new or modified, let's parse it. msg = ParseMessage(mbox.get_file(i), pgpmime=False) msg_id = b64c( sha1b64((self.hdr(msg, 'message-id') or msg_ptr).strip())) if msg_id in self.MSGIDS: self.update_location(session, self.MSGIDS[msg_id], msg_ptr) added += 1 else: # Add new message! msg_mid = b36(len(self.INDEX)) msg_ts = self._extract_date_ts(session, msg_mid, msg_id, msg, msg_ts) keywords, snippet = self.index_message( session, msg_mid, msg_id, msg, msg_ts, mailbox=mailbox_idx, compact=False, filter_hooks=[self.filter_keywords]) msg_subject = self.hdr(msg, 'subject') msg_snippet = snippet[:max(0, snippet_max - len(msg_subject))] tags = [ k.split(':')[0] for k in keywords if k.endswith(':tag') ] msg_to = (ExtractEmails(self.hdr(msg, 'to')) + ExtractEmails(self.hdr(msg, 'cc')) + ExtractEmails(self.hdr(msg, 'bcc'))) msg_idx_pos, msg_info = self.add_new_msg( msg_ptr, msg_id, msg_ts, self.hdr(msg, 'from'), msg_to, msg_subject, msg_snippet, tags) self.set_conversation_ids(msg_info[self.MSG_MID], msg) mbox.mark_parsed(i) added += 1 if (added % 1000) == 0: GlobalPostingList.Optimize(session, self, quick=True) if added: mbox.save(session) session.ui.mark('%s: Indexed mailbox: %s' % (mailbox_idx, mailbox_fn)) return added
def read_message(self, session, msg_mid, msg_id, msg, msg_ts, mailbox=None): keywords = [] snippet = '' payload = [None] for part in msg.walk(): textpart = payload[0] = None ctype = part.get_content_type() charset = part.get_charset() or 'iso-8859-1' def _loader(p): if payload[0] is None: payload[0] = self.try_decode(p.get_payload(None, True), charset) return payload[0] if ctype == 'text/plain': textpart = _loader(part) elif ctype == 'text/html': _loader(part) if len(payload[0]) > 3: try: textpart = lxml.html.fromstring( payload[0]).text_content() except: session.ui.warning( ('=%s/%s has bogus HTML.') % (msg_mid, msg_id)) textpart = payload[0] else: textpart = payload[0] elif 'pgp' in part.get_content_type(): keywords.append('pgp:has') att = part.get_filename() if att: att = self.try_decode(att, charset) keywords.append('attachment:has') keywords.extend( [t + ':att' for t in re.findall(WORD_REGEXP, att.lower())]) textpart = (textpart or '') + ' ' + att if textpart: # FIXME: Does this lowercase non-ASCII characters correctly? # FIXME: What about encrypted content? keywords.extend(re.findall(WORD_REGEXP, textpart.lower())) # FIXME: Do this better. if ('-----BEGIN PGP' in textpart and '-----END PGP' in textpart): keywords.append('pgp:has') for extract in plugins.get_text_kw_extractors(): keywords.extend(extract(self, msg, ctype, lambda: textpart)) if len(snippet) < 1024: snippet += ' ' + textpart for extract in plugins.get_data_kw_extractors(): keywords.extend( extract(self, msg, ctype, att, part, lambda: _loader(part))) keywords.append('%s:id' % msg_id) keywords.extend( re.findall(WORD_REGEXP, self.hdr(msg, 'subject').lower())) keywords.extend(re.findall(WORD_REGEXP, self.hdr(msg, 'from').lower())) if mailbox: keywords.append('%s:mailbox' % mailbox.lower()) keywords.append('%s:hprint' % HeaderPrint(msg)) for key in msg.keys(): key_lower = key.lower() if key_lower not in BORING_HEADERS: emails = ExtractEmails(self.hdr(msg, key).lower()) words = set(re.findall(WORD_REGEXP, self.hdr(msg, key).lower())) words -= STOPLIST keywords.extend(['%s:%s' % (t, key_lower) for t in words]) keywords.extend(['%s:%s' % (e, key_lower) for e in emails]) keywords.extend(['%s:email' % e for e in emails]) if 'list' in key_lower: keywords.extend(['%s:list' % t for t in words]) for extract in plugins.get_meta_kw_extractors(): keywords.extend(extract(self, msg_mid, msg, msg_ts)) snippet = snippet.replace('\n', ' ').replace('\t', ' ').replace('\r', '') return (set(keywords) - STOPLIST), snippet.strip()
def _fparse(self, fromdata): email = ExtractEmails(fromdata)[0] name = fromdata.replace(email, '').replace('<>', '').strip() return email, (name or email)
def __init__(self, session, idx, results=None, start=0, end=None, num=None, expand=None): dict.__init__(self) self.session = session self.expand = expand self.idx = idx self['search_terms'] = terms = session.searched if 'tags' in idx.config: self['search_tags'] = [ idx.config.get_tag(t.split(':')[1], {}) for t in terms if t.startswith('in:') ] results = results or session.results if not results: self._set_values([], 0, 0, 0) return num = num or session.config.prefs.num_results if end: start = end - num if start > len(results): start = len(results) if start < 0: start = 0 rv = [] count = 0 expand_ids = [e.msg_idx_pos for e in (expand or [])] for idx_pos in results[start:start + num]: count += 1 msg_info = idx.get_msg_at_idx_pos(idx_pos) result = self._explain_msg_summary([ msg_info[MailIndex.MSG_MID], msg_info[MailIndex.MSG_ID], msg_info[MailIndex.MSG_FROM], idx.expand_to_list(msg_info), msg_info[MailIndex.MSG_SUBJECT], msg_info[MailIndex.MSG_SNIPPET], msg_info[MailIndex.MSG_DATE], msg_info[MailIndex.MSG_TAGS].split(','), session.config.is_editable_message(msg_info) ]) # FIXME: This is nice, but doing it in _explain_msg_summary # would be nicer. result['tags'] = [] if 'tags' in idx.config: searched = [t.get('slug') for t in self['search_tags']] for t in sorted(idx.get_tags(msg_info=msg_info)): tag = idx.config.get_tag(t) if tag: result['tags'].append( dict_merge(tag, {'searched': (tag['slug'] in searched)})) if not expand and 'flat' not in (session.order or ''): conv = idx.get_conversation(msg_info) else: conv = [msg_info] conv_from = [c[MailIndex.MSG_FROM] for c in conv] result['short_from'] = self._compact(self._names(conv_from), 25) result['conv_count'] = len(conv) result['conv_mids'] = [c[MailIndex.MSG_MID] for c in conv] # FIXME: conv_people should look stuff in our contact list result['conv_people'] = people = [{ 'email': (ExtractEmails(p) or [''])[0], 'name': self._name(p, short=False), } for p in list(set(conv_from))] people.sort(key=lambda i: i['name'] + i['email']) if expand and idx_pos in expand_ids: exp_email = expand[expand_ids.index(idx_pos)] result['message'] = self._message_details([exp_email])[0] rv.append(result) self._set_values(rv, start, count, len(results))
def read_message(self, session, msg_mid, msg_id, msg, msg_size, msg_ts, mailbox=None): keywords = [] snippet = '' payload = [None] for part in msg.walk(): textpart = payload[0] = None ctype = part.get_content_type() charset = part.get_content_charset() or 'iso-8859-1' def _loader(p): if payload[0] is None: payload[0] = self.try_decode(p.get_payload(None, True), charset) return payload[0] if ctype == 'text/plain': textpart = _loader(part) elif ctype == 'text/html': _loader(part) if len(payload[0]) > 3: try: textpart = lxml.html.fromstring( payload[0]).text_content() except: session.ui.warning( _('=%s/%s has bogus HTML.') % (msg_mid, msg_id)) textpart = payload[0] else: textpart = payload[0] elif 'pgp' in part.get_content_type(): keywords.append('pgp:has') att = part.get_filename() if att: att = self.try_decode(att, charset) keywords.append('attachment:has') keywords.extend( [t + ':att' for t in re.findall(WORD_REGEXP, att.lower())]) textpart = (textpart or '') + ' ' + att if textpart: # FIXME: Does this lowercase non-ASCII characters correctly? keywords.extend(re.findall(WORD_REGEXP, textpart.lower())) # NOTE: As a side effect here, the cryptostate plugin will # add a 'crypto:has' keyword which we check for below # before performing further processing. for kwe in plugins.get_text_kw_extractors(): keywords.extend(kwe(self, msg, ctype, textpart)) if len(snippet) < 1024: snippet += ' ' + textpart for extract in plugins.get_data_kw_extractors(): keywords.extend( extract(self, msg, ctype, att, part, lambda: _loader(part))) if 'crypto:has' in keywords: e = Email(self, -1) e.msg_parsed = msg e.msg_info = self.BOGUS_METADATA[:] tree = e.get_message_tree(want=(e.WANT_MSG_TREE_PGP + ('text_parts', ))) # Look for inline PGP parts, update our status if found e.evaluate_pgp(tree, decrypt=session.config.prefs.index_encrypted) msg.signature_info = tree['crypto']['signature'] msg.encryption_info = tree['crypto']['encryption'] # Index the contents, if configured to do so if session.config.prefs.index_encrypted: for text in [t['data'] for t in tree['text_parts']]: keywords.extend(re.findall(WORD_REGEXP, text.lower())) for kwe in plugins.get_text_kw_extractors(): keywords.extend(kwe(self, msg, 'text/plain', text)) keywords.append('%s:id' % msg_id) keywords.extend( re.findall(WORD_REGEXP, self.hdr(msg, 'subject').lower())) keywords.extend(re.findall(WORD_REGEXP, self.hdr(msg, 'from').lower())) if mailbox: keywords.append('%s:mailbox' % mailbox.lower()) keywords.append('%s:hp' % HeaderPrint(msg)) for key in msg.keys(): key_lower = key.lower() if key_lower not in BORING_HEADERS: emails = ExtractEmails(self.hdr(msg, key).lower()) words = set(re.findall(WORD_REGEXP, self.hdr(msg, key).lower())) words -= STOPLIST keywords.extend(['%s:%s' % (t, key_lower) for t in words]) keywords.extend(['%s:%s' % (e, key_lower) for e in emails]) keywords.extend(['%s:email' % e for e in emails]) if 'list' in key_lower: keywords.extend(['%s:list' % t for t in words]) for key in EXPECTED_HEADERS: if not msg[key]: keywords.append('%s:missing' % key) for extract in plugins.get_meta_kw_extractors(): keywords.extend(extract(self, msg_mid, msg, msg_size, msg_ts)) snippet = snippet.replace('\n', ' ').replace('\t', ' ').replace('\r', '') return (set(keywords) - STOPLIST), snippet.strip()
class MailIndex: """This is a lazily parsing object representing a mailpile index.""" MSG_MID = 0 MSG_PTRS = 1 MSG_ID = 2 MSG_DATE = 3 MSG_FROM = 4 MSG_TO = 5 MSG_CC = 6 MSG_KB = 7 MSG_SUBJECT = 8 MSG_BODY = 9 MSG_TAGS = 10 MSG_REPLIES = 11 MSG_THREAD_MID = 12 MSG_FIELDS_V1 = 11 MSG_FIELDS_V2 = 13 BOGUS_METADATA = [ None, '', None, '0', '(no sender)', '', '', '0', '(not in index)', '', '', '', '-1' ] MAX_INCREMENTAL_SAVES = 25 def __init__(self, config): self.config = config self.INDEX = [] self.INDEX_SORT = {} self.INDEX_THR = [] self.PTRS = {} self.TAGS = {} self.MSGIDS = {} self.EMAILS = [] self.EMAIL_IDS = {} self.CACHE = {} self.MODIFIED = set() self.EMAILS_SAVED = 0 self._saved_changes = 0 self._lock = threading.Lock() def l2m(self, line): return line.decode('utf-8').split(u'\t') # A translation table for message parts stored in the index, consists of # a mapping from unicode ordinals to either another unicode ordinal or # None, to remove a character. By default it removes the ASCII control # characters and replaces tabs and newlines with spaces. NORM_TABLE = dict( [(i, None) for i in range(0, 0x20)], **{ ord(u'\t'): ord(u' '), ord(u'\r'): ord(u' '), ord(u'\n'): ord(u' '), 0x7F: None }) def m2l(self, message): # Normalize the message before saving it so we can be sure that we will # be able to read it back later. parts = [unicode(p).translate(self.NORM_TABLE) for p in message] return (u'\t'.join(parts)).encode('utf-8') def load(self, session=None): self.INDEX = [] self.CACHE = {} self.PTRS = {} self.MSGIDS = {} self.EMAILS = [] self.EMAIL_IDS = {} CachedSearchResultSet.DropCaches() def process_line(line): try: line = line.strip() if line.startswith('#'): pass elif line.startswith('@'): pos, email = line[1:].split('\t', 1) pos = int(pos, 36) while len(self.EMAILS) < pos + 1: self.EMAILS.append('') unquoted_email = unquote(email).decode('utf-8') self.EMAILS[pos] = unquoted_email self.EMAIL_IDS[unquoted_email.split()[0].lower()] = pos elif line: words = line.split('\t') # Migration: converting old metadata into new! if len(words) != self.MSG_FIELDS_V2: # V1 -> V2 adds MSG_CC and MSG_KB if len(words) == self.MSG_FIELDS_V1: words[self.MSG_CC:self.MSG_CC] = [''] words[self.MSG_KB:self.MSG_KB] = ['0'] # Add V2 -> V3 here, etc. etc. if len(words) == self.MSG_FIELDS_V2: line = '\t'.join(words) else: raise Exception( _('Your metadata index is either ' 'too old, too new or corrupt!')) pos = int(words[self.MSG_MID], 36) while len(self.INDEX) < pos + 1: self.INDEX.append('') self.INDEX[pos] = line self.MSGIDS[words[self.MSG_ID]] = pos self.update_msg_tags(pos, words) for msg_ptr in words[self.MSG_PTRS].split(','): self.PTRS[msg_ptr] = pos except ValueError: pass if session: session.ui.mark(_('Loading metadata index...')) try: self._lock.acquire() fd = open(self.config.mailindex_file(), 'r') for line in fd: if line.startswith(GPG_BEGIN_MESSAGE): for line in decrypt_gpg([line], fd): process_line(line) else: process_line(line) fd.close() except IOError: if session: session.ui.warning( _('Metadata index not found: %s') % self.config.mailindex_file()) finally: self._lock.release() self.cache_sort_orders(session) if session: session.ui.mark( _('Loaded metadata, %d messages') % len(self.INDEX)) self.EMAILS_SAVED = len(self.EMAILS) def update_msg_tags(self, msg_idx_pos, msg_info): tags = set([t for t in msg_info[self.MSG_TAGS].split(',') if t]) for tid in (set(self.TAGS.keys()) - tags): self.TAGS[tid] -= set([msg_idx_pos]) for tid in tags: if tid not in self.TAGS: self.TAGS[tid] = set() self.TAGS[tid].add(msg_idx_pos) def save_changes(self, session=None): mods, self.MODIFIED = self.MODIFIED, set() if mods or len(self.EMAILS) > self.EMAILS_SAVED: if self._saved_changes >= self.MAX_INCREMENTAL_SAVES: return self.save(session=session) try: self._lock.acquire() if session: session.ui.mark(_("Saving metadata index changes...")) fd = gpg_open(self.config.mailindex_file(), self.config.prefs.gpg_recipient, 'a') for eid in range(self.EMAILS_SAVED, len(self.EMAILS)): quoted_email = quote(self.EMAILS[eid].encode('utf-8')) fd.write('@%s\t%s\n' % (b36(eid), quoted_email)) for pos in mods: fd.write(self.INDEX[pos] + '\n') fd.close() flush_append_cache() if session: session.ui.mark(_("Saved metadata index changes")) self.EMAILS_SAVED = len(self.EMAILS) self._saved_changes += 1 finally: self._lock.release() def save(self, session=None): try: self._lock.acquire() self.MODIFIED = set() if session: session.ui.mark(_("Saving metadata index...")) idxfile = self.config.mailindex_file() newfile = '%s.new' % idxfile fd = gpg_open(newfile, self.config.prefs.gpg_recipient, 'w') fd.write('# This is the mailpile.py index file.\n') fd.write('# We have %d messages!\n' % len(self.INDEX)) for eid in range(0, len(self.EMAILS)): quoted_email = quote(self.EMAILS[eid].encode('utf-8')) fd.write('@%s\t%s\n' % (b36(eid), quoted_email)) for item in self.INDEX: fd.write(item + '\n') fd.close() # Keep the last 5 index files around... just in case. backup_file(idxfile, backups=5, min_age_delta=10) os.rename(newfile, idxfile) flush_append_cache() self._saved_changes = 0 if session: session.ui.mark(_("Saved metadata index")) finally: self._lock.release() def update_ptrs_and_msgids(self, session): session.ui.mark(_('Updating high level indexes')) for offset in range(0, len(self.INDEX)): message = self.l2m(self.INDEX[offset]) if len(message) == self.MSG_FIELDS_V2: self.MSGIDS[message[self.MSG_ID]] = offset for msg_ptr in message[self.MSG_PTRS].split(','): self.PTRS[msg_ptr] = offset else: session.ui.warning(_('Bogus line: %s') % line) def try_decode(self, text, charset): for cs in (charset, 'iso-8859-1', 'utf-8'): if cs: try: return text.decode(cs) except (UnicodeEncodeError, UnicodeDecodeError, LookupError): pass return "".join(i for i in text if ord(i) < 128) def hdr(self, msg, name, value=None): try: if value is None and msg: # Security: RFC822 headers are not allowed to have (unencoded) # non-ascii characters in them, so we just strip them all out # before parsing. # FIXME: This is "safe", but can we be smarter/gentler? value = CleanText(msg[name], replace='_').clean # Note: decode_header does the wrong thing with "quoted" data. decoded = email.header.decode_header((value or '').replace('"', '')) return (' '.join([self.try_decode(t[0], t[1]) for t in decoded ])).replace('\r', ' ').replace('\t', ' ').replace('\n', ' ') except email.errors.HeaderParseError: return '' def update_location(self, session, msg_idx_pos, msg_ptr): msg_info = self.get_msg_at_idx_pos(msg_idx_pos) msg_ptrs = msg_info[self.MSG_PTRS].split(',') self.PTRS[msg_ptr] = msg_idx_pos # If message was seen in this mailbox before, update the location for i in range(0, len(msg_ptrs)): if msg_ptrs[i][:MBX_ID_LEN] == msg_ptr[:MBX_ID_LEN]: msg_ptrs[i] = msg_ptr msg_ptr = None break # Otherwise, this is a new mailbox, record this sighting as well! if msg_ptr: msg_ptrs.append(msg_ptr) msg_info[self.MSG_PTRS] = ','.join(msg_ptrs) self.set_msg_at_idx_pos(msg_idx_pos, msg_info) def _parse_date(self, date_hdr): """Parse a Date: or Received: header into a unix timestamp.""" try: if ';' in date_hdr: date_hdr = date_hdr.split(';')[-1].strip() msg_ts = long(rfc822.mktime_tz(rfc822.parsedate_tz(date_hdr))) if (msg_ts > (time.time() + 24 * 3600)) or (msg_ts < 1): return None else: return msg_ts except (ValueError, TypeError, OverflowError): return None def _extract_date_ts(self, session, msg_mid, msg_id, msg, last_date): """Extract a date, sanity checking against the Received: headers.""" hdrs = [self.hdr(msg, 'date')] + (msg.get_all('received') or []) dates = [self._parse_date(date_hdr) for date_hdr in hdrs] msg_ts = dates[0] nz_dates = sorted([d for d in dates if d]) if nz_dates: median = nz_dates[len(nz_dates) / 2] if msg_ts and abs(msg_ts - median) < 31 * 24 * 3600: return msg_ts else: session.ui.warning( _('=%s/%s using Received: instead of Date:') % (msg_mid, msg_id)) return median else: # If the above fails, we assume the messages in the mailbox are in # chronological order and just add 1 second to the date of the last # message if date parsing fails for some reason. session.ui.warning( _('=%s/%s has a bogus date') % (msg_mid, msg_id)) return last_date + 1 def encode_msg_id(self, msg_id): return b64c(sha1b64(msg_id.strip())) def get_msg_id(self, msg, msg_ptr): raw_msg_id = self.hdr(msg, 'message-id') if not raw_msg_id: # Create a very long pseudo-msgid for messages without a # Message-ID. This was a very badly behaved mailer, so if # we create duplicates this way, we are probably only # losing spam. Even then the Received line should save us. raw_msg_id = ('\t'.join([ self.hdr(msg, 'date'), self.hdr(msg, 'subject'), self.hdr(msg, 'received'), self.hdr(msg, 'from'), self.hdr(msg, 'to') ])).strip() # Fall back to the msg_ptr if all else fails. if not raw_msg_id: print _('WARNING: No proper Message-ID for %s') % msg_ptr return self.encode_msg_id(raw_msg_id or msg_ptr) def scan_mailbox(self, session, mailbox_idx, mailbox_fn, mailbox_opener): try: mbox = mailbox_opener(session, mailbox_idx) if mbox.editable: session.ui.mark( _('%s: Skipped: %s') % (mailbox_idx, mailbox_fn)) return 0 else: session.ui.mark( _('%s: Checking: %s') % (mailbox_idx, mailbox_fn)) except (IOError, OSError, NoSuchMailboxError), e: session.ui.mark( _('%s: Error opening: %s (%s)') % (mailbox_idx, mailbox_fn, e)) return 0 unparsed = mbox.unparsed() if not unparsed: return 0 if len(self.PTRS.keys()) == 0: self.update_ptrs_and_msgids(session) snippet_max = session.config.sys.snippet_max added = 0 msg_ts = int(time.time()) for ui in range(0, len(unparsed)): if mailpile.util.QUITTING: break i = unparsed[ui] parse_status = _('%s: Reading your mail: %d%% (%d/%d messages)' ) % (mailbox_idx, 100 * ui / len(unparsed), ui, len(unparsed)) msg_ptr = mbox.get_msg_ptr(mailbox_idx, i) if msg_ptr in self.PTRS: if (ui % 317) == 0: session.ui.mark(parse_status) play_nice_with_threads() continue else: session.ui.mark(parse_status) play_nice_with_threads() # Message new or modified, let's parse it. if 'rescan' in session.config.sys.debug: session.ui.debug('Reading message %s/%s' % (mailbox_idx, i)) try: msg_fd = mbox.get_file(i) msg = ParseMessage( msg_fd, pgpmime=session.config.prefs.index_encrypted) except (IOError, OSError, ValueError, IndexError, KeyError): if session.config.sys.debug: traceback.print_exc() session.ui.warning(('Reading message %s/%s FAILED, skipping') % (mailbox_idx, i)) continue msg_size = msg_fd.tell() msg_id = self.get_msg_id(msg, msg_ptr) if msg_id in self.MSGIDS: self.update_location(session, self.MSGIDS[msg_id], msg_ptr) added += 1 else: # Add new message! msg_mid = b36(len(self.INDEX)) msg_ts = self._extract_date_ts(session, msg_mid, msg_id, msg, msg_ts) play_nice_with_threads() keywords, snippet = self.index_message( session, msg_mid, msg_id, msg, msg_size, msg_ts, mailbox=mailbox_idx, compact=False, filter_hooks=plugins.filter_hooks([self.filter_keywords])) msg_subject = self.hdr(msg, 'subject') msg_snippet = snippet[:max(0, snippet_max - len(msg_subject))] tags = [ k.split(':')[0] for k in keywords if k.endswith(':in') or k.endswith(':tag') ] msg_to = ExtractEmails(self.hdr(msg, 'to')) msg_cc = (ExtractEmails(self.hdr(msg, 'cc')) + ExtractEmails(self.hdr(msg, 'bcc'))) msg_idx_pos, msg_info = self.add_new_msg( msg_ptr, msg_id, msg_ts, self.hdr(msg, 'from'), msg_to, msg_cc, msg_size, msg_subject, msg_snippet, tags) self.set_conversation_ids(msg_info[self.MSG_MID], msg) mbox.mark_parsed(i) added += 1 GlobalPostingList.Optimize(session, self, lazy=True, quick=True) if added: mbox.save(session) session.ui.mark( _('%s: Indexed mailbox: %s') % (mailbox_idx, mailbox_fn)) return added