def command(self): session, config, idx = self.session, self.session.config, self._idx() args = list(self.args) flags = [] while args and args[0][:1] == '-': flags.append(args.pop(0)) msg_idxs = list(self._choose_messages(args)) if not msg_idxs: return self._error('No messages selected') wrote = [] for msg_idx in msg_idxs: e = Email(idx, msg_idx) ts = long(e.get_msg_info(field=idx.MSG_DATE), 36) dt = datetime.datetime.fromtimestamp(ts) subject = e.get_msg_info(field=idx.MSG_SUBJECT) fn = ('%4.4d-%2.2d-%2.2d.%s.%s.html' % (dt.year, dt.month, dt.day, CleanText(subject, banned=CleanText.NONDNS, replace='_' ).clean.replace('____', '_')[:50], e.msg_mid()) ).encode('ascii', 'ignore') session.ui.mark(_('Printing e-mail to %s') % fn) smv = SingleMessageView(session, arg=['=%s' % e.msg_mid()]) html = smv.run().as_html() if '-sign' in flags: key = config.prefs.gpg_recipient html = '<printed ts=%d -->\n%s\n<!-- \n' % (time.time(), html) rc, signed = self._gnupg().sign(html.encode('utf-8'), fromkey=key, clearsign=True) if rc != 0: return self._error('Failed to sign printout') html = '<!--\n%s\n-->\n' % signed.decode('utf-8') with open(fn, 'wb') as fd: fd.write(html.encode('utf-8')) wrote.append({'mid': e.msg_mid(), 'filename': fn}) return self._success(_('Printed to %d files') % len(wrote), wrote)
def command(self): session, config, idx = self.session, self.session.config, self._idx() args = list(self.args) flags = [] while args and args[0][:1] == '-': flags.append(args.pop(0)) msg_idxs = list(self._choose_messages(args)) if not msg_idxs: return self._error('No messages selected') wrote = [] for msg_idx in msg_idxs: e = Email(idx, msg_idx) ts = long(e.get_msg_info(field=idx.MSG_DATE), 36) dt = datetime.datetime.fromtimestamp(ts) subject = e.get_msg_info(field=idx.MSG_SUBJECT) fn = ('%4.4d-%2.2d-%2.2d.%s.%s.html' % (dt.year, dt.month, dt.day, CleanText(subject, banned=CleanText.NONDNS, replace='_').clean.replace('____', '_')[:50], e.msg_mid())).encode('ascii', 'ignore') session.ui.mark(_('Printing e-mail to %s') % fn) smv = SingleMessageView(session, arg=['=%s' % e.msg_mid()]) html = smv.run().as_html() if '-sign' in flags: key = config.prefs.gpg_recipient html = '<printed ts=%d -->\n%s\n<!-- \n' % (time.time(), html) rc, signed = self._gnupg().sign(html.encode('utf-8'), fromkey=key, clearsign=True) if rc != 0: return self._error('Failed to sign printout') html = '<!--\n%s\n-->\n' % signed.decode('utf-8') with open(fn, 'wb') as fd: fd.write(html.encode('utf-8')) wrote.append({'mid': e.msg_mid(), 'filename': fn}) return self._success(_('Printed to %d files') % len(wrote), wrote)
def command(self, save=True): session, config, idx = self.session, self.session.config, self._idx() mbox_type = config.prefs.export_format args = list(self.args) if args and ':' in args[-1]: mbox_type, path = args.pop(-1).split(':', 1) else: path = self.export_path(mbox_type) flat = notags = False while args and args[0][:1] == '-': option = args.pop(0).replace('-', '') if option == 'flat': flat = True elif option == 'notags': notags = True if os.path.exists(path): return self._error('Already exists: %s' % path) msg_idxs = list(self._choose_messages(args)) if not msg_idxs: session.ui.warning('No messages selected') return False # Exporting messages without their threads barely makes any # sense. if not flat: for i in reversed(range(0, len(msg_idxs))): mi = msg_idxs[i] msg_idxs[i:i+1] = [int(m[idx.MSG_MID], 36) for m in idx.get_conversation(msg_idx=mi)] # Let's always export in the same order. Stability is nice. msg_idxs.sort() try: mbox = self.create_mailbox(mbox_type, path) except (IOError, OSError): mbox = None if mbox is None: if not os.path.exists(os.path.dirname(path)): reason = _('Parent directory does not exist.') else: reason = _('Is the disk full? Are permissions lacking?') return self._error(_('Failed to create mailbox: %s') % reason) exported = {} failed = [] while msg_idxs: msg_idx = msg_idxs.pop(0) if msg_idx not in exported: e = Email(idx, msg_idx) session.ui.mark(_('Exporting message =%s ...') % e.msg_mid()) fd = e.get_file() if fd is None: failed.append(e.msg_mid()) session.ui.warning(_('Message =%s is unreadable! Skipping.' ) % e.msg_mid()) continue try: data = fd.read() if not notags: tags = [tag.slug for tag in (self.session.config.get_tag(t) or t for t in e.get_msg_info(idx.MSG_TAGS).split(',') if t) if hasattr(tag, 'slug')] lf = '\r\n' if ('\r\n' in data[:200]) else '\n' header, body = data.split(lf+lf, 1) data = str(lf.join([ header, 'X-Mailpile-Tags: ' + '; '.join(sorted(tags) ).encode('utf-8'), '', body ])) mbox.add(data.replace('\r\n', '\n')) exported[msg_idx] = 1 finally: fd.close() mbox.flush() result = { 'exported': len(exported), 'created': path } if failed: result['failed'] = failed return self._success( _('Exported %d messages to %s') % (len(exported), path), result)
def _retrain(self, tags=None): "Retrain autotaggers" session, config, idx = self.session, self.session.config, self._idx() tags = tags or [asb.match_tag for asb in autotag_configs(config)] tids = [config.get_tag(t)._key for t in tags if t] session.ui.mark(_('Retraining SpamBayes autotaggers')) if not config.real_hasattr('autotag'): config.real_setattr('autotag', {}) # Find all the interesting messages! We don't look in the trash, # but we do look at interesting spam. # # Note: By specifically stating that we DON'T want trash, we # disable the search engine's default result suppression # and guarantee these results don't corrupt the somewhat # lame/broken result cache. # no_trash = ['-in:%s' % t._key for t in config.get_tags(type='trash')] interest = {} for ttype in ('replied', 'read', 'tagged'): interest[ttype] = set() for tag in config.get_tags(type=ttype): interest[ttype] |= idx.search(session, ['in:%s' % tag.slug] + no_trash).as_set() session.ui.notify( _('Have %d interesting %s messages') % (len(interest[ttype]), ttype)) retrained, unreadable = [], [] count_all = 0 for at_config in autotag_configs(config): at_tag = config.get_tag(at_config.match_tag) if at_tag and at_tag._key in tids: session.ui.mark('Retraining: %s' % at_tag.name) yn = [(set(), set(), 'in:%s' % at_tag.slug, True), (set(), set(), '-in:%s' % at_tag.slug, False)] # Get the current message sets: tagged and untagged messages # excluding trash. for tset, mset, srch, which in yn: mset |= idx.search(session, [srch] + no_trash).as_set() # If we have any exclude_tags, they are particularly # interesting, so we'll look at them first. interesting = [] for etagid in at_config.exclude_tags: etag = config.get_tag(etagid) if etag._key not in interest: srch = ['in:%s' % etag._key] + no_trash interest[etag._key] = idx.search(session, srch).as_set() interesting.append(etag._key) interesting.extend(['replied', 'read', 'tagged', None]) # Go through the interest types in order of preference and # while we still lack training data, add to the training set. for ttype in interesting: for tset, mset, srch, which in yn: # False positives are really annoying, and generally # speaking any autotagged subset should be a small # part of the Universe. So we divide the corpus # budget 33% True, 67% False. full_size = int(at_config.corpus_size * (0.33 if which else 0.67)) want = min(full_size // len(interesting), max(0, full_size - len(tset))) # Make sure we always fully utilize our budget if full_size > len(tset) and not ttype: want = full_size - len(tset) if want: if ttype: adding = sorted(list(mset & interest[ttype])) else: adding = sorted(list(mset)) adding = set(list(reversed(adding))[:want]) tset |= adding mset -= adding # Load classifier, reset atagger = config.load_auto_tagger(at_config) atagger.reset(at_config) for tset, mset, srch, which in yn: count = 0 # We go through the list of message in order, to avoid # thrashing caches too badly. for msg_idx in sorted(list(tset)): try: e = Email(idx, msg_idx) count += 1 count_all += 1 session.ui.mark( _('Reading %s (%d/%d, %s=%s)') % (e.msg_mid(), count, len(tset), at_tag.name, which)) atagger.learn(at_config, e.get_msg(), self._get_keywords(e), which) play_nice_with_threads() if mailpile.util.QUITTING: return self._error('Aborted') except (IndexError, TypeError, ValueError, OSError, IOError): if 'autotag' in session.config.sys.debug: import traceback traceback.print_exc() unreadable.append(msg_idx) session.ui.warning( _('Failed to process message at =%s') % (b36(msg_idx))) # We got this far without crashing, so save the result. config.save_auto_tagger(at_config) retrained.append(at_tag.name) message = _('Retrained SpamBayes auto-tagging for %s') % ', '.join( retrained) session.ui.mark(message) return self._success(message, result={ 'retrained': retrained, 'unreadable': unreadable, 'read_messages': count_all })
def command(self): session, config, idx = self.session, self.session.config, self._idx() # Command-line arguments... msgs = list(self.args) timeout = -1 tracking_id = None with_header = False without_mid = False columns = [] while msgs and msgs[0].lower() != '--': arg = msgs.pop(0) if arg.startswith('--timeout='): timeout = float(arg[10:]) elif arg.startswith('--header'): with_header = True elif arg.startswith('--no-mid'): without_mid = True else: columns.append(arg) if msgs and msgs[0].lower() == '--': msgs.pop(0) # Form arguments... timeout = float(self.data.get('timeout', [timeout])[0]) with_header |= truthy(self.data.get('header', [''])[0]) without_mid |= truthy(self.data.get('no-mid', [''])[0]) tracking_id = self.data.get('track-id', [tracking_id])[0] columns.extend(self.data.get('term', [])) msgs.extend( ['=%s' % mid.replace('=', '') for mid in self.data.get('mid', [])]) # Add a header to the CSV if requested if with_header: results = [[ col.split('||')[0].split(':', 1)[0].split('=', 1)[0] for col in columns ]] if not without_mid: results[0] = ['MID'] + results[0] else: results = [] deadline = (time.time() + timeout) if (timeout > 0) else None msg_idxs = self._choose_messages(msgs) progress = [] for msg_idx in msg_idxs: e = Email(idx, msg_idx) if self.event and tracking_id: progress.append(msg_idx) self.event.private_data = { "progress": len(progress), "track-id": tracking_id, "total": len(msg_idxs), "reading": e.msg_mid() } self.event.message = _('Digging into =%s') % e.msg_mid() self._update_event_state(self.event.RUNNING, log=True) else: session.ui.mark(_('Digging into =%s') % e.msg_mid()) row = [] if without_mid else ['%s' % e.msg_mid()] for cellspec in columns: row.extend(self._cell(idx, e, cellspec)) results.append(row) if deadline and deadline < time.time(): break return self._success( _('Found %d rows in %d messages') % (len(results), len(msg_idxs)), results)
def command(self): session, config, idx = self.session, self.session.config, self._idx() # Command-line arguments... msgs = list(self.args) timeout = -1 tracking_id = None with_header = False without_mid = False columns = [] while msgs and msgs[0].lower() != '--': arg = msgs.pop(0) if arg.startswith('--timeout='): timeout = float(arg[10:]) elif arg.startswith('--header'): with_header = True elif arg.startswith('--no-mid'): without_mid = True else: columns.append(arg) if msgs and msgs[0].lower() == '--': msgs.pop(0) # Form arguments... timeout = float(self.data.get('timeout', [timeout])[0]) with_header |= truthy(self.data.get('header', [''])[0]) without_mid |= truthy(self.data.get('no-mid', [''])[0]) tracking_id = self.data.get('track-id', [tracking_id])[0] columns.extend(self.data.get('term', [])) msgs.extend(['=%s' % mid.replace('=', '') for mid in self.data.get('mid', [])]) # Add a header to the CSV if requested if with_header: results = [[col.split('||')[0].split(':', 1)[0].split('=', 1)[0] for col in columns]] if not without_mid: results[0] = ['MID'] + results[0] else: results = [] deadline = (time.time() + timeout) if (timeout > 0) else None msg_idxs = self._choose_messages(msgs) progress = [] for msg_idx in msg_idxs: e = Email(idx, msg_idx) if self.event and tracking_id: progress.append(msg_idx) self.event.private_data = {"progress": len(progress), "track-id": tracking_id, "total": len(msg_idxs), "reading": e.msg_mid()} self.event.message = _('Digging into =%s') % e.msg_mid() self._update_event_state(self.event.RUNNING, log=True) else: session.ui.mark(_('Digging into =%s') % e.msg_mid()) row = [] if without_mid else ['%s' % e.msg_mid()] for cellspec in columns: row.extend(self._cell(idx, e, cellspec)) results.append(row) if deadline and deadline < time.time(): break return self._success(_('Found %d rows in %d messages' ) % (len(results), len(msg_idxs)), results)
def _retrain(self, tags=None): "Retrain autotaggers" session, config, idx = self.session, self.session.config, self._idx() tags = tags or [asb.match_tag for asb in autotag_configs(config)] tids = [config.get_tag(t)._key for t in tags if t] session.ui.mark(_('Retraining SpamBayes autotaggers')) if not config.real_hasattr('autotag'): config.real_setattr('autotag', {}) # Find all the interesting messages! We don't look in the trash, # but we do look at interesting spam. # # Note: By specifically stating that we DON'T want trash, we # disable the search engine's default result suppression # and guarantee these results don't corrupt the somewhat # lame/broken result cache. # no_trash = ['-in:%s' % t._key for t in config.get_tags(type='trash')] interest = {} for ttype in ('replied', 'read', 'tagged'): interest[ttype] = set() for tag in config.get_tags(type=ttype): interest[ttype] |= idx.search(session, ['in:%s' % tag.slug] + no_trash ).as_set() session.ui.notify(_('Have %d interesting %s messages' ) % (len(interest[ttype]), ttype)) retrained, unreadable = [], [] count_all = 0 for at_config in autotag_configs(config): at_tag = config.get_tag(at_config.match_tag) if at_tag and at_tag._key in tids: session.ui.mark('Retraining: %s' % at_tag.name) yn = [(set(), set(), 'in:%s' % at_tag.slug, True), (set(), set(), '-in:%s' % at_tag.slug, False)] # Get the current message sets: tagged and untagged messages # excluding trash. for tset, mset, srch, which in yn: mset |= idx.search(session, [srch] + no_trash).as_set() # If we have any exclude_tags, they are particularly # interesting, so we'll look at them first. interesting = [] for etagid in at_config.exclude_tags: etag = config.get_tag(etagid) if etag._key not in interest: srch = ['in:%s' % etag._key] + no_trash interest[etag._key] = idx.search(session, srch ).as_set() interesting.append(etag._key) interesting.extend(['replied', 'read', 'tagged', None]) # Go through the interest types in order of preference and # while we still lack training data, add to the training set. for ttype in interesting: for tset, mset, srch, which in yn: # False positives are really annoying, and generally # speaking any autotagged subset should be a small # part of the Universe. So we divide the corpus # budget 33% True, 67% False. full_size = int(at_config.corpus_size * (0.33 if which else 0.67)) want = min(full_size // len(interesting), max(0, full_size - len(tset))) # Make sure we always fully utilize our budget if full_size > len(tset) and not ttype: want = full_size - len(tset) if want: if ttype: adding = sorted(list(mset & interest[ttype])) else: adding = sorted(list(mset)) adding = set(list(reversed(adding))[:want]) tset |= adding mset -= adding # Load classifier, reset atagger = config.load_auto_tagger(at_config) atagger.reset(at_config) for tset, mset, srch, which in yn: count = 0 # We go through the list of message in order, to avoid # thrashing caches too badly. for msg_idx in sorted(list(tset)): try: e = Email(idx, msg_idx) count += 1 count_all += 1 session.ui.mark( _('Reading %s (%d/%d, %s=%s)' ) % (e.msg_mid(), count, len(tset), at_tag.name, which)) atagger.learn(at_config, e.get_msg(), self._get_keywords(e), which) play_nice_with_threads() if mailpile.util.QUITTING: return self._error('Aborted') except (IndexError, TypeError, ValueError, OSError, IOError): if 'autotag' in session.config.sys.debug: import traceback traceback.print_exc() unreadable.append(msg_idx) session.ui.warning( _('Failed to process message at =%s' ) % (b36(msg_idx))) # We got this far without crashing, so save the result. config.save_auto_tagger(at_config) retrained.append(at_tag.name) message = _('Retrained SpamBayes auto-tagging for %s' ) % ', '.join(retrained) session.ui.mark(message) return self._success(message, result={ 'retrained': retrained, 'unreadable': unreadable, 'read_messages': count_all })