def command(self): session, config, idx = self.session, self.session.config, self._idx() args = list(self.args) if args and args[-1][0] == "#": attid = args.pop() else: attid = self.data.get("att", 'application/pgp-keys') args.extend(["=%s" % x for x in self.data.get("mid", [])]) eids = self._choose_messages(args) if len(eids) < 0: return self._error("No messages selected", None) elif len(eids) > 1: return self._error("One message at a time, please", None) email = Email(idx, list(eids)[0]) fn, attr = email.extract_attachment(session, attid, mode='inline') if attr and attr["data"]: res = self._gnupg().import_keys(attr["data"]) # Previous crypto evaluations may now be out of date, so we # clear the cache so users can see results right away. ClearParseCache(pgpmime=True) return self._success("Imported key", res) return self._error("No results found", None)
def _get_message_keys(self, messageid): keys = self.key_cache.get(messageid, []) if not keys: email = Email(self._idx(), messageid) # First we check the Autocrypt headers msg = email.get_msg(pgpmime='all') for ach in ([extract_autocrypt_header(msg)] + extract_autocrypt_gossip_headers(msg)): if 'keydata' in ach: for keydata in get_keydata(ach['keydata'], autocrypt_header=ach, include_subkeys=False): keys.append((keydata, ach['keydata'])) # Then go looking at the attachments attachments = email.get_message_tree(want=["attachments"] )["attachments"] for part in attachments: if len(keys) > 100: # Just to set some limit... break if _might_be_pgp_key(part["filename"], part["mimetype"]): key = part["part"].get_payload(None, True) for keydata in get_keydata(key, include_subkeys=False): keys.append((keydata, key)) self.key_cache[messageid] = keys return keys
def _get_message_keys(self, messageid): keys = self.key_cache.get(messageid, []) if not keys: email = Email(self._idx(), messageid) attachments = email.get_message_tree(want=["attachments"] )["attachments"] for part in attachments: if _might_be_pgp_key(part["filename"], part["mimetype"]): key = part["part"].get_payload(None, True) for keydata in _get_keydata(key): keys.append((keydata, key)) if len(keys) > 5: # Just to set some limit... break self.key_cache[messageid] = keys return keys
def _get_message_keys(self, messageid): keys = self.key_cache.get(messageid, []) if not keys: email = Email(self._idx(), messageid) attachments = email.get_message_tree( want=["attachments"])["attachments"] for part in attachments: if _might_be_pgp_key(part["filename"], part["mimetype"]): key = part["part"].get_payload(None, True) for keydata in _get_keydata(key): keys.append((keydata, key)) if len(keys) > 5: # Just to set some limit... break self.key_cache[messageid] = keys return keys
def command(self): session, config, idx = self.session, self.session.config, self._idx() emails = [Email(idx, mid) for mid in self._choose_messages(self.args)] scores = self._classify(emails) tag = {} for mid in scores: for at_config in autotag_configs(config): at_tag = config.get_tag(at_config.match_tag) if not at_tag: continue wants = scores[mid].get(at_tag._key, [(False, )]) want = bool([True for w in wants if w[0]]) if want is True: if at_config.match_tag not in tag: tag[at_config.match_tag] = [mid] else: tag[at_config.match_tag].append(mid) elif at_config.unsure_tag and want is None: if at_config.unsure_tag not in tag: tag[at_config.unsure_tag] = [mid] else: tag[at_config.unsure_tag].append(mid) for tid in tag: idx.add_tag(session, tid, msg_idxs=[int(i, 36) for i in tag[tid]]) return self._success(_('Auto-tagged %d messages') % len(emails), tag)
def _get_canned(cls, idx, cid): try: return Email(idx, int(cid, 36) ).get_editing_strings().get('body', '') except (ValueError, IndexError, TypeError, OSError, IOError): traceback.print_exc() # FIXME, ugly return ''
def CreateForward(cls, idx, session, refs, msgid, with_atts=False, cid=None, ephemeral=False): trees = [ m.evaluate_pgp(m.get_message_tree(), decrypt=True) for m in refs ] ref_subjs = [t['headers_lc']['subject'] for t in trees] msg_bodies = [] msg_atts = [] for t in trees: # FIXME: Templates/settings for how we quote forwards? text = '-------- Original Message --------\n' for h in ('Date', 'Subject', 'From', 'To'): v = t['headers_lc'].get(h.lower(), None) if v: text += '%s: %s\n' % (h, v) text += '\n' text += ''.join([ p['data'] for p in t['text_parts'] if p['type'] in cls._TEXT_PARTTYPES ]) msg_bodies.append(text) if with_atts: for att in t['attachments']: if att['mimetype'] not in cls._ATT_MIMETYPES: msg_atts.append(att['part']) if not ephemeral: local_id, lmbox = session.config.open_local_mailbox(session) else: local_id, lmbox = -1, None fmt = 'forward-att-%s-%s' if msg_atts else 'forward-%s-%s' ephemeral = [ fmt % (msgid[1:-1].replace('@', '_'), refs[0].msg_mid()) ] if cid: # FIXME: Instead, we should use placeholders in the template # and insert the quoted bits in the right place (or # nowhere if the template doesn't want them). msg_bodies[:0] = [cls._get_canned(idx, cid)] email = Email.Create(idx, local_id, lmbox, msg_text='\n\n'.join(msg_bodies), msg_subject=cls.prefix_subject( ref_subjs[-1], 'Fwd:', cls._FW_REGEXP), msg_id=msgid, msg_atts=msg_atts, save=(not ephemeral), ephemeral_mid=ephemeral and ephemeral[0]) return email, ephemeral
def _actualize_ephemeral(self, ephemeral_mid): idx = self._idx() if isinstance(ephemeral_mid, int): # Not actually ephemeral, just return a normal Email return Email(idx, ephemeral_mid) msgid, mid = ephemeral_mid.rsplit('-', 1) etype, etarg, msgid = msgid.split('-', 2) if etarg not in ('all', 'att'): msgid = etarg + '-' + msgid msgid = '<%s>' % msgid.replace('_', '@') etype = etype.lower() enc_msgid = idx._encode_msg_id(msgid) msg_idx = idx.MSGIDS.get(enc_msgid) if msg_idx is not None: # Already actualized, just return a normal Email return Email(idx, msg_idx) if etype == 'forward': refs = [Email(idx, int(mid, 36))] e = Forward.CreateForward(idx, self.session, refs, msgid, with_atts=(etarg == 'att'))[0] self._track_action('fwded', refs) elif etype == 'reply': refs = [Email(idx, int(mid, 36))] e = Reply.CreateReply(idx, self.session, refs, msgid, reply_all=(etarg == 'all'))[0] self._track_action('replied', refs) else: e = Compose.CreateMessage(idx, self.session, msgid)[0] self._tag_blank([e]) self.session.ui.debug('Actualized: %s' % e.msg_mid()) return Email(idx, e.msg_idx_pos)
def CreateReply(cls, idx, session, refs, msgid, reply_all=False, cid=None, ephemeral=False): trees = [m.evaluate_pgp(m.get_message_tree(), decrypt=True) for m in refs] headers = cls._create_from_to_cc(idx, session, trees) if not reply_all and 'cc' in headers: del headers['cc'] ref_ids = [t['headers_lc'].get('message-id') for t in trees] ref_subjs = [t['headers_lc'].get('subject') for t in trees] msg_bodies = [] for t in trees: # FIXME: Templates/settings for how we quote replies? quoted = ''.join([p['data'] for p in t['text_parts'] if p['type'] in cls._TEXT_PARTTYPES and p['data']]) if quoted: target_width = session.config.prefs.line_length if target_width > 40: quoted = reflow_text(quoted, target_width=target_width-2) text = ((_('%s wrote:') % t['headers_lc']['from']) + '\n' + quoted) msg_bodies.append('\n\n' + text.replace('\n', '\n> ')) if not ephemeral: local_id, lmbox = session.config.open_local_mailbox(session) else: local_id, lmbox = -1, None fmt = 'reply-all-%s-%s' if reply_all else 'reply-%s-%s' ephemeral = [fmt % (msgid[1:-1].replace('@', '_'), refs[0].msg_mid())] if 'cc' in headers: fmt = _('Composing a reply from %(from)s to %(to)s, cc %(cc)s') else: fmt = _('Composing a reply from %(from)s to %(to)s') session.ui.debug(fmt % headers) if cid: # FIXME: Instead, we should use placeholders in the template # and insert the quoted bits in the right place (or # nowhere if the template doesn't want them). msg_bodies[:0] = [cls._get_canned(idx, cid)] return (Email.Create(idx, local_id, lmbox, msg_text='\n\n'.join(msg_bodies), msg_subject=cls.prefix_subject( ref_subjs[-1], 'Re:', cls._RE_REGEXP), msg_from=headers.get('from', None), msg_to=headers.get('to', []), msg_cc=headers.get('cc', []), msg_references=[i for i in ref_ids if i], msg_id=msgid, save=(not ephemeral), ephemeral_mid=ephemeral and ephemeral[0]), ephemeral)
def command(self): session, config, idx = self.session, self.session.config, self._idx() results = [] args = list(self.args) args.extend( ['=%s' % mid.replace('=', '') for mid in self.data.get('mid', [])]) if args and args[0].lower() == 'raw': raw = args.pop(0) else: raw = False emails = [Email(idx, mid) for mid in self._choose_messages(args)] rv = self._side_effects(emails) if rv is not None: # This is here so derived classes can do funky things. return rv for email in emails: if raw: subject = email.get_msg_info(idx.MSG_SUBJECT) results.append( self.RawResult({ 'summary': _('Raw message: %s') % subject, 'source': email.get_file().read() })) else: old_result = None for result in results: if email.msg_idx_pos in result.results: old_result = result if old_result: old_result.add_email(email) continue # Get conversation conv = idx.get_conversation(msg_idx=email.msg_idx_pos) # Sort our results by date... def sort_conv_key(info): return -int(info[idx.MSG_DATE], 36) conv.sort(key=sort_conv_key) # Convert to index positions only conv = [int(info[idx.MSG_MID], 36) for info in conv] session.results = conv results.append( SearchResults(session, idx, emails=[email], num=len(conv))) if len(results) == 1: return self._success(_('Displayed a single message'), result=results[0]) else: session.results = [] return self._success(_('Displayed %d messages') % len(results), result=results)
def _get_message_keys(self, messageid, autocrypt=True, autocrypt_gossip=True, attachments=True): keys = self.key_cache.get(messageid, []) if not keys: email = Email(self._idx(), messageid) # First we check the Autocrypt headers loop_count = 0 msg = email.get_msg(pgpmime='all') ac_headers = [] if autocrypt: ac_headers.append(extract_autocrypt_header(msg)) if autocrypt_gossip: ac_headers.extend(extract_autocrypt_gossip_headers(msg)) for ach in ac_headers: loop_count += 1 if 'keydata' in ach: for keyinfo in get_keyinfo(ach['keydata'], autocrypt_header=ach, key_info_class=MailpileKeyInfo): keyinfo.is_autocrypt = True keyinfo.is_gossip = (loop_count > 1) keys.append((keyinfo, ach['keydata'])) # Then go looking at the attachments atts = [] if attachments: atts.extend( email.get_message_tree( want=["attachments"])["attachments"]) for part in atts: if len(keys) > 100: # Just to set some limit... break if _might_be_pgp_key(part["filename"], part["mimetype"]): key = part["part"].get_payload(None, True) for keyinfo in get_keyinfo(key, key_info_class=MailpileKeyInfo): keys.append((keyinfo, key)) self.key_cache[messageid] = keys return keys
def CreateMessage(cls, idx, session, msgid, cid=None, ephemeral=False): if not ephemeral: local_id, lmbox = session.config.open_local_mailbox(session) else: local_id, lmbox = -1, None ephemeral = ['new-E-%s-mail' % msgid[1:-1].replace('@', '_')] profiles = session.config.vcards.find_vcards([], kinds=['profile']) return (Email.Create(idx, local_id, lmbox, save=(not ephemeral), msg_text=(cid and cls._get_canned(idx, cid) or ''), msg_id=msgid, ephemeral_mid=ephemeral and ephemeral[0], use_default_from=(len(profiles) == 1)), ephemeral)
def command(self): session, config, idx = self.session, self.session.config, self._idx() with_atts = False ephemeral = False args = list(self.args) if not args: args = ["=%s" % x for x in self.data.get('mid', [])] ephemeral = truthy((self.data.get('ephemeral') or [False])[0]) with_atts = truthy((self.data.get('atts') or [False])[0]) else: while args: if args[0].lower() == 'att': with_atts = args.pop(0) or True elif args[0].lower() == 'ephemeral': ephemeral = args.pop(0) or True else: break # Make sure GET does not change backend state if self.data.get('_method', 'POST') != 'POST': ephemeral = True if ephemeral and with_atts: raise UsageError( _('Sorry, ephemeral messages cannot have ' 'attachments at this time.')) refs = [Email(idx, i) for i in self._choose_messages(args)] if refs: cid = self.data.get('cid', [None])[0] email, ephemeral = self.CreateForward(idx, session, refs, self._new_msgid(), with_atts=with_atts, cid=cid, ephemeral=ephemeral) if not ephemeral: self._track_action('fwded', refs) self._tag_blank([email]) return self._edit_messages([email], ephemeral=ephemeral) else: return self._error(_('No message found'))
def command(self): session, config, idx = self.session, self.session.config, self._idx() reply_all = False ephemeral = False args = list(self.args) if not args: args = ["=%s" % x for x in self.data.get('mid', [])] ephemeral = truthy((self.data.get('ephemeral') or [False])[0]) reply_all = truthy((self.data.get('reply_all') or [False])[0]) else: while args: if args[0].lower() == 'all': reply_all = args.pop(0) or True elif args[0].lower() == 'ephemeral': ephemeral = args.pop(0) or True else: break # Make sure GET does not change backend state, allow on CLI. if self.data.get('_method', 'POST') != 'POST': ephemeral = True refs = [Email(idx, i) for i in self._choose_messages(args)] if refs: try: cid = self.data.get('cid', [None])[0] email, ephemeral = self.CreateReply(idx, session, refs, self._new_msgid(), reply_all=reply_all, cid=cid, ephemeral=ephemeral) except NoFromAddressError: return self._error( _('You must configure a ' 'From address first.')) if not ephemeral: self._track_action('replied', refs) self._tag_blank([email]) return self._edit_messages([email], ephemeral=ephemeral) else: return self._error(_('No message found'))
def command(self): session, config, idx = self.session, self.session.config, self._idx() updated = [] args = list(self.args) for e in [Email(idx, i) for i in self._choose_messages(args)]: autocrypt_meta_kwe( idx, e.msg_mid(), e.get_msg(), None, int(e.get_msg_info(e.index.MSG_DATE), 36), update_cb=lambda u, k: updated.append((u, k)), save_DB=False) updated = [(u[0].as_dict(), sorted(list(u[1]))) for u in updated if u[0] is not None] if updated: save_Autocrypt_DB(config) return self._success("Updated %d records" % len(updated), updated)
def command(self): session, config, idx = self.session, self.session.config, self._idx() mode = 'download' name_fmt = None args = list(self.args) if args[0] in ('inline', 'inline-preview', 'preview', 'get', 'download'): mode = args.pop(0) if len(args) > 0 and args[-1].startswith('>'): forbid = security.forbid_command(self, security.CC_ACCESS_FILESYSTEM) if forbid: return self._error(forbid) name_fmt = args.pop(-1)[1:] if (args[0].startswith('#') or args[0].startswith('part-') or args[0].startswith('ext:')): cid = args.pop(0) else: cid = args.pop(-1) emails = [Email(idx, i) for i in self._choose_messages(args)] results = [] for e in emails: if cid[0] == '*': tree = e.get_message_tree(want=['attachments']) cids = [('#%s' % a['count']) for a in tree['attachments'] if a['filename'].lower().endswith(cid[1:].lower())] else: cids = [cid] for c in cids: fn, info = e.extract_attachment(session, c, name_fmt=name_fmt, mode=mode) if info: info['idx'] = e.msg_idx_pos if fn: info['created_file'] = fn results.append(info) return results
def process_message(self, peer, mailfrom, rcpttos, data): # We can assume that the mailfrom and rcpttos have checked out # and this message is indeed intended for us. Spool it to disk # and add to the index! session, config = self.session, self.session.config blank_tid = config.get_tags(type='blank')[0]._key idx = config.index play_nice_with_threads() try: message = email.parser.Parser().parsestr(data) lid, lmbox = config.open_local_mailbox(session) e = Email.Create(idx, lid, lmbox, ephemeral_mid=False) idx.add_tag(session, blank_tid, msg_idxs=[e.msg_idx_pos], conversation=False) e.update_from_msg(session, message) idx.remove_tag(session, blank_tid, msg_idxs=[e.msg_idx_pos], conversation=False) return None except: traceback.print_exc() return '400 Oops wtf'
def command(self): session, config, idx = self.session, self.session.config, self._idx() args = list(self.args) flags = [] while args and args[0][:1] == '-': flags.append(args.pop(0)) msg_idxs = list(self._choose_messages(args)) if not msg_idxs: return self._error('No messages selected') wrote = [] for msg_idx in msg_idxs: e = Email(idx, msg_idx) ts = long(e.get_msg_info(field=idx.MSG_DATE), 36) dt = datetime.datetime.fromtimestamp(ts) subject = e.get_msg_info(field=idx.MSG_SUBJECT) fn = ('%4.4d-%2.2d-%2.2d.%s.%s.html' % (dt.year, dt.month, dt.day, CleanText(subject, banned=CleanText.NONDNS, replace='_').clean.replace('____', '_')[:50], e.msg_mid())).encode('ascii', 'ignore') session.ui.mark(_('Printing e-mail to %s') % fn) smv = SingleMessageView(session, arg=['=%s' % e.msg_mid()]) html = smv.run().as_html() if '-sign' in flags: key = config.prefs.gpg_recipient html = '<printed ts=%d -->\n%s\n<!-- \n' % (time.time(), html) rc, signed = self._gnupg().sign(html.encode('utf-8'), fromkey=key, clearsign=True) if rc != 0: return self._error('Failed to sign printout') html = '<!--\n%s\n-->\n' % signed.decode('utf-8') with open(fn, 'wb') as fd: fd.write(html.encode('utf-8')) wrote.append({'mid': e.msg_mid(), 'filename': fn}) return self._success(_('Printed to %d files') % len(wrote), wrote)
def command(self): session, config, idx = self.session, self.session.config, self._idx() args = list(self.args) emails = [Email(idx, i) for i in self._choose_messages(args)] db = get_AutoCrypt_DB(config)['state'] updated = [] for e in emails: msg = e.get_msg() if 'autocrypt' in msg: sender = e.get_sender() update = AutoCrypt_process_email( config, e.get_msg(), e.msg_mid(), int(e.get_msg_info(e.index.MSG_DATE), 36), sender) if update is not None: # Note: update==False means an entry was removed, which # is an interesting event! updated.append(sender) if updated: save_AutoCrypt_DB(config) return self._success("Updated %d records" % len(updated), updated)
def command(self): session, config, idx = self.session, self.session.config, self._idx() args = list(self.args) flags = [] while args and args[0][:1] == '-': flags.append(args.pop(0)) msg_idxs = list(self._choose_messages(args)) if not msg_idxs: return self._error('No messages selected') wrote = [] for msg_idx in msg_idxs: e = Email(idx, msg_idx) ts = long(e.get_msg_info(field=idx.MSG_DATE), 36) dt = datetime.datetime.fromtimestamp(ts) subject = e.get_msg_info(field=idx.MSG_SUBJECT) fn = ('%4.4d-%2.2d-%2.2d.%s.%s.html' % (dt.year, dt.month, dt.day, CleanText(subject, banned=CleanText.NONDNS, replace='_' ).clean.replace('____', '_')[:50], e.msg_mid()) ).encode('ascii', 'ignore') session.ui.mark(_('Printing e-mail to %s') % fn) smv = SingleMessageView(session, arg=['=%s' % e.msg_mid()]) html = smv.run().as_html() if '-sign' in flags: key = config.prefs.gpg_recipient html = '<printed ts=%d -->\n%s\n<!-- \n' % (time.time(), html) rc, signed = self._gnupg().sign(html.encode('utf-8'), fromkey=key, clearsign=True) if rc != 0: return self._error('Failed to sign printout') html = '<!--\n%s\n-->\n' % signed.decode('utf-8') with open(fn, 'wb') as fd: fd.write(html.encode('utf-8')) wrote.append({'mid': e.msg_mid(), 'filename': fn}) return self._success(_('Printed to %d files') % len(wrote), wrote)
def command(self, save=True): session, config, idx = self.session, self.session.config, self._idx() mbox_type = config.prefs.export_format args = list(self.args) if args and ':' in args[-1]: mbox_type, path = args.pop(-1).split(':', 1) else: path = self.export_path(mbox_type) flat = notags = False while args and args[0][:1] == '-': option = args.pop(0).replace('-', '') if option == 'flat': flat = True elif option == 'notags': notags = True if os.path.exists(path): return self._error('Already exists: %s' % path) msg_idxs = list(self._choose_messages(args)) if not msg_idxs: session.ui.warning('No messages selected') return False # Exporting messages without their threads barely makes any # sense. if not flat: for i in reversed(range(0, len(msg_idxs))): mi = msg_idxs[i] msg_idxs[i:i+1] = [int(m[idx.MSG_MID], 36) for m in idx.get_conversation(msg_idx=mi)] # Let's always export in the same order. Stability is nice. msg_idxs.sort() try: mbox = self.create_mailbox(mbox_type, path) except (IOError, OSError): mbox = None if mbox is None: if not os.path.exists(os.path.dirname(path)): reason = _('Parent directory does not exist.') else: reason = _('Is the disk full? Are permissions lacking?') return self._error(_('Failed to create mailbox: %s') % reason) exported = {} failed = [] while msg_idxs: msg_idx = msg_idxs.pop(0) if msg_idx not in exported: e = Email(idx, msg_idx) session.ui.mark(_('Exporting message =%s ...') % e.msg_mid()) fd = e.get_file() if fd is None: failed.append(e.msg_mid()) session.ui.warning(_('Message =%s is unreadable! Skipping.' ) % e.msg_mid()) continue try: data = fd.read() if not notags: tags = [tag.slug for tag in (self.session.config.get_tag(t) or t for t in e.get_msg_info(idx.MSG_TAGS).split(',') if t) if hasattr(tag, 'slug')] lf = '\r\n' if ('\r\n' in data[:200]) else '\n' header, body = data.split(lf+lf, 1) data = str(lf.join([ header, 'X-Mailpile-Tags: ' + '; '.join(sorted(tags) ).encode('utf-8'), '', body ])) mbox.add(data.replace('\r\n', '\n')) exported[msg_idx] = 1 finally: fd.close() mbox.flush() result = { 'exported': len(exported), 'created': path } if failed: result['failed'] = failed return self._success( _('Exported %d messages to %s') % (len(exported), path), result)
def _retrain(self, tags=None): "Retrain autotaggers" session, config, idx = self.session, self.session.config, self._idx() tags = tags or [asb.match_tag for asb in autotag_configs(config)] tids = [config.get_tag(t)._key for t in tags if t] session.ui.mark(_('Retraining SpamBayes autotaggers')) if not config.real_hasattr('autotag'): config.real_setattr('autotag', {}) # Find all the interesting messages! We don't look in the trash, # but we do look at interesting spam. # # Note: By specifically stating that we DON'T want trash, we # disable the search engine's default result suppression # and guarantee these results don't corrupt the somewhat # lame/broken result cache. # no_trash = ['-in:%s' % t._key for t in config.get_tags(type='trash')] interest = {} for ttype in ('replied', 'read', 'tagged'): interest[ttype] = set() for tag in config.get_tags(type=ttype): interest[ttype] |= idx.search(session, ['in:%s' % tag.slug] + no_trash ).as_set() session.ui.notify(_('Have %d interesting %s messages' ) % (len(interest[ttype]), ttype)) retrained, unreadable = [], [] count_all = 0 for at_config in autotag_configs(config): at_tag = config.get_tag(at_config.match_tag) if at_tag and at_tag._key in tids: session.ui.mark('Retraining: %s' % at_tag.name) yn = [(set(), set(), 'in:%s' % at_tag.slug, True), (set(), set(), '-in:%s' % at_tag.slug, False)] # Get the current message sets: tagged and untagged messages # excluding trash. for tset, mset, srch, which in yn: mset |= idx.search(session, [srch] + no_trash).as_set() # If we have any exclude_tags, they are particularly # interesting, so we'll look at them first. interesting = [] for etagid in at_config.exclude_tags: etag = config.get_tag(etagid) if etag._key not in interest: srch = ['in:%s' % etag._key] + no_trash interest[etag._key] = idx.search(session, srch ).as_set() interesting.append(etag._key) interesting.extend(['replied', 'read', 'tagged', None]) # Go through the interest types in order of preference and # while we still lack training data, add to the training set. for ttype in interesting: for tset, mset, srch, which in yn: # False positives are really annoying, and generally # speaking any autotagged subset should be a small # part of the Universe. So we divide the corpus # budget 33% True, 67% False. full_size = int(at_config.corpus_size * (0.33 if which else 0.67)) want = min(full_size // len(interesting), max(0, full_size - len(tset))) # Make sure we always fully utilize our budget if full_size > len(tset) and not ttype: want = full_size - len(tset) if want: if ttype: adding = sorted(list(mset & interest[ttype])) else: adding = sorted(list(mset)) adding = set(list(reversed(adding))[:want]) tset |= adding mset -= adding # Load classifier, reset atagger = config.load_auto_tagger(at_config) atagger.reset(at_config) for tset, mset, srch, which in yn: count = 0 # We go through the list of message in order, to avoid # thrashing caches too badly. for msg_idx in sorted(list(tset)): try: e = Email(idx, msg_idx) count += 1 count_all += 1 session.ui.mark( _('Reading %s (%d/%d, %s=%s)' ) % (e.msg_mid(), count, len(tset), at_tag.name, which)) atagger.learn(at_config, e.get_msg(), self._get_keywords(e), which) play_nice_with_threads() if mailpile.util.QUITTING: return self._error('Aborted') except (IndexError, TypeError, ValueError, OSError, IOError): if 'autotag' in session.config.sys.debug: import traceback traceback.print_exc() unreadable.append(msg_idx) session.ui.warning( _('Failed to process message at =%s' ) % (b36(msg_idx))) # We got this far without crashing, so save the result. config.save_auto_tagger(at_config) retrained.append(at_tag.name) message = _('Retrained SpamBayes auto-tagging for %s' ) % ', '.join(retrained) session.ui.mark(message) return self._success(message, result={ 'retrained': retrained, 'unreadable': unreadable, 'read_messages': count_all })
def command(self): session, config, idx = self.session, self.session.config, self._idx() # Command-line arguments... msgs = list(self.args) timeout = -1 tracking_id = None with_header = False without_mid = False columns = [] while msgs and msgs[0].lower() != '--': arg = msgs.pop(0) if arg.startswith('--timeout='): timeout = float(arg[10:]) elif arg.startswith('--header'): with_header = True elif arg.startswith('--no-mid'): without_mid = True else: columns.append(arg) if msgs and msgs[0].lower() == '--': msgs.pop(0) # Form arguments... timeout = float(self.data.get('timeout', [timeout])[0]) with_header |= truthy(self.data.get('header', [''])[0]) without_mid |= truthy(self.data.get('no-mid', [''])[0]) tracking_id = self.data.get('track-id', [tracking_id])[0] columns.extend(self.data.get('term', [])) msgs.extend(['=%s' % mid.replace('=', '') for mid in self.data.get('mid', [])]) # Add a header to the CSV if requested if with_header: results = [[col.split('||')[0].split(':', 1)[0].split('=', 1)[0] for col in columns]] if not without_mid: results[0] = ['MID'] + results[0] else: results = [] deadline = (time.time() + timeout) if (timeout > 0) else None msg_idxs = self._choose_messages(msgs) progress = [] for msg_idx in msg_idxs: e = Email(idx, msg_idx) if self.event and tracking_id: progress.append(msg_idx) self.event.private_data = {"progress": len(progress), "track-id": tracking_id, "total": len(msg_idxs), "reading": e.msg_mid()} self.event.message = _('Digging into =%s') % e.msg_mid() self._update_event_state(self.event.RUNNING, log=True) else: session.ui.mark(_('Digging into =%s') % e.msg_mid()) row = [] if without_mid else ['%s' % e.msg_mid()] for cellspec in columns: row.extend(self._cell(idx, e, cellspec)) results.append(row) if deadline and deadline < time.time(): break return self._success(_('Found %d rows in %d messages' ) % (len(results), len(msg_idxs)), results)
def command(self): session, config, idx = self.session, self.session.config, self._idx() emails = [Email(idx, mid) for mid in self._choose_messages(self.args)] return self._success( _('Classified %d messages') % len(emails), self._classify(emails))
def _retrain(self, tags=None): "Retrain autotaggers" session, config, idx = self.session, self.session.config, self._idx() tags = tags or [asb.match_tag for asb in autotag_configs(config)] tids = [config.get_tag(t)._key for t in tags if t] session.ui.mark(_('Retraining SpamBayes autotaggers')) if not config.real_hasattr('autotag'): config.real_setattr('autotag', {}) # Find all the interesting messages! We don't look in the trash, # but we do look at interesting spam. # # Note: By specifically stating that we DON'T want trash, we # disable the search engine's default result suppression # and guarantee these results don't corrupt the somewhat # lame/broken result cache. # no_trash = ['-in:%s' % t._key for t in config.get_tags(type='trash')] interest = {} for ttype in ('replied', 'read', 'tagged'): interest[ttype] = set() for tag in config.get_tags(type=ttype): interest[ttype] |= idx.search(session, ['in:%s' % tag.slug] + no_trash).as_set() session.ui.notify( _('Have %d interesting %s messages') % (len(interest[ttype]), ttype)) retrained, unreadable = [], [] count_all = 0 for at_config in autotag_configs(config): at_tag = config.get_tag(at_config.match_tag) if at_tag and at_tag._key in tids: session.ui.mark('Retraining: %s' % at_tag.name) yn = [(set(), set(), 'in:%s' % at_tag.slug, True), (set(), set(), '-in:%s' % at_tag.slug, False)] # Get the current message sets: tagged and untagged messages # excluding trash. for tset, mset, srch, which in yn: mset |= idx.search(session, [srch] + no_trash).as_set() # If we have any exclude_tags, they are particularly # interesting, so we'll look at them first. interesting = [] for etagid in at_config.exclude_tags: etag = config.get_tag(etagid) if etag._key not in interest: srch = ['in:%s' % etag._key] + no_trash interest[etag._key] = idx.search(session, srch).as_set() interesting.append(etag._key) interesting.extend(['replied', 'read', 'tagged', None]) # Go through the interest types in order of preference and # while we still lack training data, add to the training set. for ttype in interesting: for tset, mset, srch, which in yn: # False positives are really annoying, and generally # speaking any autotagged subset should be a small # part of the Universe. So we divide the corpus # budget 33% True, 67% False. full_size = int(at_config.corpus_size * (0.33 if which else 0.67)) want = min(full_size // len(interesting), max(0, full_size - len(tset))) # Make sure we always fully utilize our budget if full_size > len(tset) and not ttype: want = full_size - len(tset) if want: if ttype: adding = sorted(list(mset & interest[ttype])) else: adding = sorted(list(mset)) adding = set(list(reversed(adding))[:want]) tset |= adding mset -= adding # Load classifier, reset atagger = config.load_auto_tagger(at_config) atagger.reset(at_config) for tset, mset, srch, which in yn: count = 0 # We go through the list of message in order, to avoid # thrashing caches too badly. for msg_idx in sorted(list(tset)): try: e = Email(idx, msg_idx) count += 1 count_all += 1 session.ui.mark( _('Reading %s (%d/%d, %s=%s)') % (e.msg_mid(), count, len(tset), at_tag.name, which)) atagger.learn(at_config, e.get_msg(), self._get_keywords(e), which) play_nice_with_threads() if mailpile.util.QUITTING: return self._error('Aborted') except (IndexError, TypeError, ValueError, OSError, IOError): if 'autotag' in session.config.sys.debug: import traceback traceback.print_exc() unreadable.append(msg_idx) session.ui.warning( _('Failed to process message at =%s') % (b36(msg_idx))) # We got this far without crashing, so save the result. config.save_auto_tagger(at_config) retrained.append(at_tag.name) message = _('Retrained SpamBayes auto-tagging for %s') % ', '.join( retrained) session.ui.mark(message) return self._success(message, result={ 'retrained': retrained, 'unreadable': unreadable, 'read_messages': count_all })
def command(self): session, config, idx = self.session, self.session.config, self._idx() # Command-line arguments... msgs = list(self.args) timeout = -1 tracking_id = None with_header = False without_mid = False columns = [] while msgs and msgs[0].lower() != '--': arg = msgs.pop(0) if arg.startswith('--timeout='): timeout = float(arg[10:]) elif arg.startswith('--header'): with_header = True elif arg.startswith('--no-mid'): without_mid = True else: columns.append(arg) if msgs and msgs[0].lower() == '--': msgs.pop(0) # Form arguments... timeout = float(self.data.get('timeout', [timeout])[0]) with_header |= truthy(self.data.get('header', [''])[0]) without_mid |= truthy(self.data.get('no-mid', [''])[0]) tracking_id = self.data.get('track-id', [tracking_id])[0] columns.extend(self.data.get('term', [])) msgs.extend( ['=%s' % mid.replace('=', '') for mid in self.data.get('mid', [])]) # Add a header to the CSV if requested if with_header: results = [[ col.split('||')[0].split(':', 1)[0].split('=', 1)[0] for col in columns ]] if not without_mid: results[0] = ['MID'] + results[0] else: results = [] deadline = (time.time() + timeout) if (timeout > 0) else None msg_idxs = self._choose_messages(msgs) progress = [] for msg_idx in msg_idxs: e = Email(idx, msg_idx) if self.event and tracking_id: progress.append(msg_idx) self.event.private_data = { "progress": len(progress), "track-id": tracking_id, "total": len(msg_idxs), "reading": e.msg_mid() } self.event.message = _('Digging into =%s') % e.msg_mid() self._update_event_state(self.event.RUNNING, log=True) else: session.ui.mark(_('Digging into =%s') % e.msg_mid()) row = [] if without_mid else ['%s' % e.msg_mid()] for cellspec in columns: row.extend(self._cell(idx, e, cellspec)) results.append(row) if deadline and deadline < time.time(): break return self._success( _('Found %d rows in %d messages') % (len(results), len(msg_idxs)), results)
def _do_search(self, search=None, process_args=False): session = self.session if (self.context is None or search or session.searched != self._search_args): session.searched = search or [] want_index = 'default' if search is None or process_args: prefix = '' for arg in self._search_args: if arg.endswith(':'): prefix = arg.lower() elif ':' in arg or (arg and arg[0] in ('-', '+')): if arg.startswith('index:'): want_index = arg[6:] else: prefix = '' session.searched.append(arg.lower()) elif prefix and '@' in arg: session.searched.append(prefix + arg.lower()) else: words = re.findall(WORD_REGEXP, arg.lower()) session.searched.extend( [prefix + word for word in words]) if not session.searched: session.searched = ['all:mail'] idx = self.switch_indexes(want_index) context = session.results if self.context else None session.results = list( idx.search(session, session.searched, context=context).as_set()) if '*' in self._email_view_pairs.values(): # If we are auto-choosing which message from a thread to # display, then we want the raw results so we can only # choose from messages that matched our search. We have to # save this, since the sort below may collapse the results. raw_results = list(session.results) for pmid, emid in list(self._email_view_pairs.iteritems()): # Make sure all our requested messages are amongst results pmid_idx = int(pmid, 36) if pmid_idx not in session.results: session.results.append(pmid_idx) if ('flat' in session.order) and emid != '*': # Flat mode doesn't really use view pairs, so also make # sure our actual view target is among results. emid_idx = int(emid, 36) if emid_idx not in session.results: session.results.append(emid_idx) if session.order: idx.sort_results(session, session.results, session.order) else: idx = self._idx() self._emails = [] pivot_pos = any_pos = len(session.results) if self._email_view_pairs: new_tids = set( [t._key for t in session.config.get_tags(type='unread')]) for pmid, emid in list(self._email_view_pairs.iteritems()): try: if emid == '*': pmid_idx = int(pmid, 36) conversation = idx.get_conversation(msg_idx=pmid_idx) # Find oldest message in conversation that is unread # and matches our search criteria... matches = [] for info in conversation: if new_tids & set(info[idx.MSG_TAGS].split(',')): imid_idx = int(info[idx.MSG_MID], 36) if imid_idx in raw_results: matches.append((int(info[idx.MSG_DATE], 36), imid_idx)) if matches: emid_idx = min(matches)[1] emid = b36(emid_idx) else: emid = pmid emid_idx = pmid_idx self._email_view_pairs[pmid] = emid else: emid_idx = int(emid, 36) conversation = idx.get_conversation(msg_idx=emid_idx) if 'flat' not in session.order: for info in conversation: cmid = info[idx.MSG_MID] self._email_view_pairs[cmid] = emid # Calculate visibility... for cmid in self._email_view_pairs: try: cpos = session.results.index(int(cmid, 36)) except ValueError: cpos = -1 if cpos >= 0: any_pos = min(any_pos, cpos) if (cpos > self._start and cpos < self._start + self._num + 1): pivot_pos = min(cpos, pivot_pos) self._emails.append(Email(idx, emid_idx)) except ValueError: self._email_view_pairs = {} if 'flat' in (session.order or ''): # Above we have guaranteed that the target message is in the # result set; unset this dictionary to force a flat display # of the chosen message. self._email_view_pairs = {} # Adjust the visible window of results if we are expanding an # individual message, to guarantee visibility. if pivot_pos < len(session.results): self._start = max(0, pivot_pos - max(self._num // 5, 2)) elif any_pos < len(session.results): self._start = max(0, any_pos - max(self._num // 5, 2)) if self._emails: self._email_view_side_effects(self._emails) return session, idx