def command(self, emails=None): session, config, idx = self.session, self.session.config, self._idx() args = list(self.args) if self.session.config.sys.lockdown: return self._error(_('In lockdown, doing nothing.')) bounce_to = [] while args and '@' in args[-1]: bounce_to.append(args.pop(-1)) for rcpt in (self.data.get('to', []) + self.data.get('cc', []) + self.data.get('bcc', [])): bounce_to.extend(ExtractEmails(rcpt)) if not emails: args.extend(['=%s' % mid for mid in self.data.get('mid', [])]) mids = self._choose_messages(args) emails = [Email(idx, i) for i in mids] # First make sure the draft tags are all gone, so other edits either # fail or complete while we wait for the lock. with GLOBAL_EDITING_LOCK: self._tag_drafts(emails, untag=True) self._tag_blank(emails, untag=True) # Process one at a time so we don't eat too much memory sent = [] missing_keys = [] for email in emails: events = [] try: msg_mid = email.get_msg_info(idx.MSG_MID) # This is a unique sending-ID. This goes in the public (meant # for debugging help) section of the event-log, so we take # care to not reveal details about the message or recipients. msg_sid = sha1b64(email.get_msg_info(idx.MSG_ID), *sorted(bounce_to))[:8] # We load up any incomplete events for sending this message # to this set of recipients. If nothing is in flight, create # a new event for tracking this operation. events = list(config.event_log.incomplete( source=self.EVENT_SOURCE, data_mid=msg_mid, data_sid=msg_sid)) if not events: events.append(config.event_log.log( source=self.EVENT_SOURCE, flags=Event.RUNNING, message=_('Sending message'), data={'mid': msg_mid, 'sid': msg_sid})) SendMail(session, msg_mid, [PrepareMessage(config, email.get_msg(pgpmime=False), rcpts=(bounce_to or None), events=events)]) for ev in events: ev.flags = Event.COMPLETE config.event_log.log_event(ev) sent.append(email) except KeyLookupError, kle: # This is fatal, we don't retry message = _('Missing keys %s') % kle.missing for ev in events: ev.flags = Event.COMPLETE ev.message = message config.event_log.log_event(ev) session.ui.warning(message) missing_keys.extend(kle.missing) self._ignore_exception() # FIXME: Also fatal, when the SMTP server REJECTS the mail except:
def _add_from_messages(self): pairs, idx = [], self._idx() for email in [Email(idx, i) for i in self._choose_messages(self.args)]: pairs.append(self._fparse(email.get_msg_info(idx.MSG_FROM))) return pairs
def command(self): session, config, idx = self.session, self.session.config, self._idx() # Command-line arguments... msgs = list(self.args) timeout = -1 tracking_id = None with_header = False without_mid = False columns = [] while msgs and msgs[0].lower() != '--': arg = msgs.pop(0) if arg.startswith('--timeout='): timeout = float(arg[10:]) elif arg.startswith('--header'): with_header = True elif arg.startswith('--no-mid'): without_mid = True else: columns.append(arg) if msgs and msgs[0].lower() == '--': msgs.pop(0) # Form arguments... timeout = float(self.data.get('timeout', [timeout])[0]) with_header |= self._truthy(self.data.get('header', [''])[0]) without_mid |= self._truthy(self.data.get('no-mid', [''])[0]) tracking_id = self.data.get('track-id', [tracking_id])[0] columns.extend(self.data.get('term', [])) msgs.extend( ['=%s' % mid.replace('=', '') for mid in self.data.get('mid', [])]) # Add a header to the CSV if requested if with_header: results = [[ col.split('||')[0].split(':', 1)[0].split('=', 1)[0] for col in columns ]] if not without_mid: results[0] = ['MID'] + results[0] else: results = [] deadline = (time.time() + timeout) if (timeout > 0) else None msg_idxs = self._choose_messages(msgs) progress = [] for msg_idx in msg_idxs: e = Email(idx, msg_idx) if self.event and tracking_id: progress.append(msg_idx) self.event.private_data = { "progress": len(progress), "track-id": tracking_id, "total": len(msg_idxs), "reading": e.msg_mid() } self.event.message = _('Digging into =%s') % e.msg_mid() self._update_event_state(self.event.RUNNING, log=True) else: session.ui.mark(_('Digging into =%s') % e.msg_mid()) row = [] if without_mid else ['%s' % e.msg_mid()] for cellspec in columns: row.extend(self._cell(idx, e, cellspec)) results.append(row) if deadline and deadline < time.time(): break return self._success( _('Found %d rows in %d messages') % (len(results), len(msg_idxs)), results)
def command(self, save=True): session, config, idx = self.session, self.session.config, self._idx() mbox_type = config.prefs.export_format args = list(self.args) if args and ':' in args[-1]: mbox_type, path = args.pop(-1).split(':', 1) else: path = self.export_path(mbox_type) flat = notags = False while args and args[0][:1] == '-': option = args.pop(0).replace('-', '') if option == 'flat': flat = True elif option == 'notags': notags = True if os.path.exists(path): return self._error('Already exists: %s' % path) msg_idxs = list(self._choose_messages(args)) if not msg_idxs: session.ui.warning('No messages selected') return False # Exporting messages without their threads barely makes any # sense. if not flat: for i in reversed(range(0, len(msg_idxs))): mi = msg_idxs[i] msg_idxs[i:i+1] = [int(m[idx.MSG_MID], 36) for m in idx.get_conversation(msg_idx=mi)] # Let's always export in the same order. Stability is nice. msg_idxs.sort() try: mbox = self.create_mailbox(mbox_type, path) except (IOError, OSError): mbox = None if mbox is None: if not os.path.exists(os.path.dirname(path)): reason = _('Parent directory does not exist.') else: reason = _('Is the disk full? Are permissions lacking?') return self._error(_('Failed to create mailbox: %s') % reason) exported = {} failed = [] while msg_idxs: msg_idx = msg_idxs.pop(0) if msg_idx not in exported: e = Email(idx, msg_idx) session.ui.mark(_('Exporting message =%s ...') % e.msg_mid()) fd = e.get_file() if fd is None: failed.append(e.msg_mid()) session.ui.warning(_('Message =%s is unreadable! Skipping.' ) % e.msg_mid()) continue try: data = fd.read() if not notags: tags = [tag.slug for tag in (self.session.config.get_tag(t) or t for t in e.get_msg_info(idx.MSG_TAGS).split(',') if t) if hasattr(tag, 'slug')] lf = '\r\n' if ('\r\n' in data[:200]) else '\n' header, body = data.split(lf+lf, 1) data = str(lf.join([ header, 'X-Mailpile-Tags: ' + '; '.join(sorted(tags) ).encode('utf-8'), '', body ])) mbox.add(data.replace('\r\n', '\n')) exported[msg_idx] = 1 finally: fd.close() mbox.flush() result = { 'exported': len(exported), 'created': path } if failed: result['failed'] = failed return self._success( _('Exported %d messages to %s') % (len(exported), path), result)
def _retrain(self, tags=None): "Retrain autotaggers" session, config, idx = self.session, self.session.config, self._idx() tags = tags or [asb.match_tag for asb in config.prefs.autotag] tids = [config.get_tag(t)._key for t in tags if t] session.ui.mark(_('Retraining SpamBayes autotaggers')) if not hasattr(config, 'autotag'): config.autotag = {} # Find all the interesting messages! We don't look in the trash, # but we do look at interesting spam. # # Note: By specifically stating that we DON'T want trash, we # disable the search engine's default result suppression # and guarantee these results don't corrupt the somewhat # lame/broken result cache. # no_trash = ['-in:%s' % t._key for t in config.get_tags(type='trash')] interest = {} for ttype in ('replied', 'fwded', 'read', 'tagged'): interest[ttype] = set() for tag in config.get_tags(type=ttype): interest[ttype] |= idx.search(session, ['in:%s' % tag.slug] + no_trash ).as_set() session.ui.notify(_('Have %d interesting %s messages' ) % (len(interest[ttype]), ttype)) retrained, unreadable = [], [] count_all = 0 for at_config in config.prefs.autotag: at_tag = config.get_tag(at_config.match_tag) if at_tag and at_tag._key in tids: session.ui.mark('Retraining: %s' % at_tag.name) yn = [(set(), set(), 'in:%s' % at_tag.slug, True), (set(), set(), '-in:%s' % at_tag.slug, False)] # Get the current message sets: tagged and untagged messages # excluding trash. for tset, mset, srch, which in yn: mset |= idx.search(session, [srch] + no_trash).as_set() # If we have any exclude_tags, they are particularly # interesting, so we'll look at them first. interesting = [] for etagid in at_config.exclude_tags: etag = config.get_tag(etagid) if etag._key not in interest: srch = ['in:%s' % etag._key] + no_trash interest[etag._key] = idx.search(session, srch ).as_set() interesting.append(etag._key) interesting.extend(['replied', 'fwded', 'read', 'tagged', None]) # Go through the interest types in order of preference and # while we still lack training data, add to the training set. for ttype in interesting: for tset, mset, srch, which in yn: # FIXME: Is this a good idea? No single data source # is allowed to be more than 50% of the corpus, to # try and encourage diversity. want = min(at_config.corpus_size / 4, max(0, at_config.corpus_size / 2 - len(tset))) if want: if ttype: adding = sorted(list(mset & interest[ttype])) else: adding = sorted(list(mset)) adding = set(list(reversed(adding))[:want]) tset |= adding mset -= adding # Load classifier, reset atagger = config.load_auto_tagger(at_config) atagger.reset(at_config) for tset, mset, srch, which in yn: count = 0 for msg_idx in tset: try: e = Email(idx, msg_idx) count += 1 count_all += 1 session.ui.mark( _('Reading %s (%d/%d, %s=%s)' ) % (e.msg_mid(), count, len(tset), at_tag.name, which)) atagger.learn(at_config, e.get_msg(), self._get_keywords(e), which) except (IndexError, TypeError, ValueError, OSError, IOError): if session.config.sys.debug: import traceback traceback.print_exc() unreadable.append(msg_idx) session.ui.warning( _('Failed to process message at =%s' ) % (b36(msg_idx))) # We got this far without crashing, so save the result. config.save_auto_tagger(at_config) retrained.append(at_tag.name) message = _('Retrained SpamBayes auto-tagging for %s' ) % ', '.join(retrained) session.ui.mark(message) return self._success(message, result={ 'retrained': retrained, 'unreadable': unreadable, 'read_messages': count_all })
def command(self): session, config, idx = self.session, self.session.config, self._idx() emails = [Email(idx, mid) for mid in self._choose_messages(self.args)] return self._success(_('Classified %d messages') % len(emails), self._classify(emails))
def _do_search(self, search=None, process_args=False): session, idx = self.session, self._idx() if self.context is None or search or session.searched != self._search_args: session.searched = search or [] if search is None or process_args: prefix = '' for arg in self._search_args: if arg.endswith(':'): prefix = arg elif ':' in arg or (arg and arg[0] in ('-', '+')): if not arg.startswith('vfs:'): arg = arg.lower() prefix = '' session.searched.append(arg) elif prefix and '@' in arg: session.searched.append(prefix + arg.lower()) else: words = re.findall(WORD_REGEXP, arg.lower()) session.searched.extend( [prefix + word for word in words]) if not session.searched: session.searched = ['all:mail'] context = session.results if self.context else None session.results = list( idx.search(session, session.searched, context=context).as_set()) if session.order: idx.sort_results(session, session.results, session.order) self._emails = [] pivot_pos = any_pos = len(session.results) for pmid, emid in list(self._email_view_pairs.iteritems()): try: emid_idx = int(emid, 36) for info in idx.get_conversation(msg_idx=emid_idx): cmid = info[idx.MSG_MID] self._email_view_pairs[cmid] = emid # Calculate visibility... try: cpos = session.results.index(int(cmid, 36)) except ValueError: cpos = -1 if cpos >= 0: any_pos = min(any_pos, cpos) if (cpos > self._start and cpos < self._start + self._num + 1): pivot_pos = min(cpos, pivot_pos) self._emails.append(Email(idx, emid_idx)) except ValueError: self._email_view_pairs = {} # Adjust the visible window of results if we are expanding an # individual message, to guarantee visibility. if pivot_pos < len(session.results): self._start = max(0, pivot_pos - max(self._num // 5, 2)) elif any_pos < len(session.results): self._start = max(0, any_pos - max(self._num // 5, 2)) if self._emails: self._email_view_side_effects(self._emails) return session, idx
def command(self): session, config, idx = self.session, self.session.config, self._idx() if self.args and self.args[0].lower().startswith('att'): with_atts = self.args.pop(0) or True else: with_atts = False refs = [Email(idx, i) for i in self._choose_messages(self.args)] if refs: trees = [ m.evaluate_pgp(m.get_message_tree(), decrypt=True) for m in refs ] ref_subjs = [t['headers_lc']['subject'] for t in trees] msg_bodies = [] msg_atts = [] for t in trees: # FIXME: Templates/settings for how we quote forwards? text = '-------- Original Message --------\n' for h in ('Date', 'Subject', 'From', 'To'): v = t['headers_lc'].get(h.lower(), None) if v: text += '%s: %s\n' % (h, v) text += '\n' text += ''.join([ p['data'] for p in t['text_parts'] if p['type'] in ('text', 'quote', 'pgpsignedtext', 'pgpsecuretext', 'pgpverifiedtext') ]) msg_bodies.append(text) if with_atts: for att in t['attachments']: if att['mimetype'] not in ( 'application/pgp-signature', ): msg_atts.append(att['part']) local_id, lmbox = config.open_local_mailbox(session) email = Email.Create(idx, local_id, lmbox, msg_text='\n\n'.join(msg_bodies), msg_subject=('Fwd: %s' % ref_subjs[-1])) if msg_atts: msg = email.get_msg() for att in msg_atts: msg.attach(att) email.update_from_msg(msg) try: idx.add_tag( session, session.config.get_tag_id('Drafts'), msg_idxs=[int(email.get_msg_info(idx.MSG_IDX), 36)], conversation=False) except (TypeError, ValueError, IndexError): self._ignore_exception() return self._edit_new_messages(session, idx, [email]) else: return self._error('No message found')
def _retrain(self, tags=None): "Retrain autotaggers" session, config, idx = self.session, self.session.config, self._idx() tags = tags or [asb.match_tag for asb in autotag_configs(config)] tids = [config.get_tag(t)._key for t in tags if t] session.ui.mark(_('Retraining SpamBayes autotaggers')) if not config.real_hasattr('autotag'): config.real_setattr('autotag', {}) # Find all the interesting messages! We don't look in the trash, # but we do look at interesting spam. # # Note: By specifically stating that we DON'T want trash, we # disable the search engine's default result suppression # and guarantee these results don't corrupt the somewhat # lame/broken result cache. # no_trash = ['-in:%s' % t._key for t in config.get_tags(type='trash')] interest = {} for ttype in ('replied', 'read', 'tagged'): interest[ttype] = set() for tag in config.get_tags(type=ttype): interest[ttype] |= idx.search(session, ['in:%s' % tag.slug] + no_trash ).as_set() session.ui.notify(_('Have %d interesting %s messages' ) % (len(interest[ttype]), ttype)) retrained, unreadable = [], [] count_all = 0 for at_config in autotag_configs(config): at_tag = config.get_tag(at_config.match_tag) if at_tag and at_tag._key in tids: session.ui.mark('Retraining: %s' % at_tag.name) yn = [(set(), set(), 'in:%s' % at_tag.slug, True), (set(), set(), '-in:%s' % at_tag.slug, False)] # Get the current message sets: tagged and untagged messages # excluding trash. for tset, mset, srch, which in yn: mset |= idx.search(session, [srch] + no_trash).as_set() # If we have any exclude_tags, they are particularly # interesting, so we'll look at them first. interesting = [] for etagid in at_config.exclude_tags: etag = config.get_tag(etagid) if etag._key not in interest: srch = ['in:%s' % etag._key] + no_trash interest[etag._key] = idx.search(session, srch ).as_set() interesting.append(etag._key) interesting.extend(['replied', 'read', 'tagged', None]) # Go through the interest types in order of preference and # while we still lack training data, add to the training set. for ttype in interesting: for tset, mset, srch, which in yn: # False positives are really annoying, and generally # speaking any autotagged subset should be a small # part of the Universe. So we divide the corpus # budget 33% True, 67% False. full_size = int(at_config.corpus_size * (0.33 if which else 0.67)) want = min(full_size // len(interesting), max(0, full_size - len(tset))) # Make sure we always fully utilize our budget if full_size > len(tset) and not ttype: want = full_size - len(tset) if want: if ttype: adding = sorted(list(mset & interest[ttype])) else: adding = sorted(list(mset)) adding = set(list(reversed(adding))[:want]) tset |= adding mset -= adding # Load classifier, reset atagger = config.load_auto_tagger(at_config) atagger.reset(at_config) for tset, mset, srch, which in yn: count = 0 # We go through the list of message in order, to avoid # thrashing caches too badly. for msg_idx in sorted(list(tset)): try: e = Email(idx, msg_idx) count += 1 count_all += 1 session.ui.mark( _('Reading %s (%d/%d, %s=%s)' ) % (e.msg_mid(), count, len(tset), at_tag.name, which)) atagger.learn(at_config, e.get_msg(), self._get_keywords(e), which) play_nice_with_threads() except (IndexError, TypeError, ValueError, OSError, IOError): if 'autotag' in session.config.sys.debug: import traceback traceback.print_exc() unreadable.append(msg_idx) session.ui.warning( _('Failed to process message at =%s' ) % (b36(msg_idx))) # We got this far without crashing, so save the result. config.save_auto_tagger(at_config) retrained.append(at_tag.name) message = _('Retrained SpamBayes auto-tagging for %s' ) % ', '.join(retrained) session.ui.mark(message) return self._success(message, result={ 'retrained': retrained, 'unreadable': unreadable, 'read_messages': count_all })
def command(self): session, config, idx = self.session, self.session.config, self._idx() emails = [Email(idx, mid) for mid in self._choose_messages(self.args)] return self._classify(emails)
def command(self, emails=None): session, config, idx = self.session, self.session.config, self._idx() args = list(self.args) bounce_to = [] while args and '@' in args[-1]: bounce_to.append(args.pop(-1)) for rcpt in (self.data.get('to', []) + self.data.get('cc', []) + self.data.get('bcc', [])): bounce_to.extend(ExtractEmails(rcpt)) if not emails: args.extend(['=%s' % mid for mid in self.data.get('mid', [])]) mids = self._choose_messages(args) emails = [Email(idx, i) for i in mids] # Process one at a time so we don't eat too much memory sent = [] missing_keys = [] for email in emails: events = [] try: msg_mid = email.get_msg_info(idx.MSG_MID) # This is a unique sending-ID. This goes in the public (meant # for debugging help) section of the event-log, so we take # care to not reveal details about the message or recipients. msg_sid = sha1b64(email.get_msg_info(idx.MSG_ID), *sorted(bounce_to))[:8] # We load up any incomplete events for sending this message # to this set of recipients. If nothing is in flight, create # a new event for tracking this operation. events = list( config.event_log.incomplete(source=self, data_mid=msg_mid, data_sid=msg_sid)) if not events: events.append( config.event_log.log(source=self, flags=Event.RUNNING, message=_('Sending message'), data={ 'mid': msg_mid, 'sid': msg_sid })) SendMail(session, [ PrepareMessage(config, email.get_msg(pgpmime=False), rcpts=(bounce_to or None), events=events) ]) for ev in events: ev.flags = Event.COMPLETE config.event_log.log_event(ev) sent.append(email) except KeyLookupError, kle: # This is fatal, we don't retry message = _('Missing keys %s') % kle.missing for ev in events: ev.flags = Event.COMPLETE ev.message = message config.event_log.log_event(ev) session.ui.warning(message) missing_keys.extend(kle.missing) self._ignore_exception() except:
def _do_search(self, search=None, process_args=False): session = self.session if (self.context is None or search or session.searched != self._search_args): session.searched = search or [] want_index = 'default' if search is None or process_args: prefix = '' for arg in self._search_args: if arg.endswith(':'): prefix = arg.lower() elif ':' in arg or (arg and arg[0] in ('-', '+')): if arg.startswith('index:'): want_index = arg[6:] else: prefix = '' session.searched.append(arg.lower()) elif prefix and '@' in arg: session.searched.append(prefix + arg.lower()) else: words = re.findall(WORD_REGEXP, arg.lower()) session.searched.extend([prefix + word for word in words]) if not session.searched: session.searched = ['all:mail'] idx = self.switch_indexes(want_index) context = session.results if self.context else None session.results = list(idx.search(session, session.searched, context=context).as_set()) if '*' in self._email_view_pairs.values(): # If we are auto-choosing which message from a thread to # display, then we want the raw results so we can only # choose from messages that matched our search. We have to # save this, since the sort below may collapse the results. raw_results = list(session.results) for pmid, emid in list(self._email_view_pairs.iteritems()): # Make sure all our requested messages are amongst results pmid_idx = int(pmid, 36) if pmid_idx not in session.results: session.results.append(pmid_idx) if ('flat' in session.order) and emid != '*': # Flat mode doesn't really use view pairs, so also make # sure our actual view target is among results. emid_idx = int(emid, 36) if emid_idx not in session.results: session.results.append(emid_idx) if session.order: idx.sort_results(session, session.results, session.order) else: idx = self._idx() self._emails = [] pivot_pos = any_pos = len(session.results) if self._email_view_pairs: new_tids = set( [t._key for t in session.config.get_tags(type='unread')]) for pmid, emid in list(self._email_view_pairs.iteritems()): try: if emid == '*': pmid_idx = int(pmid, 36) conversation = idx.get_conversation(msg_idx=pmid_idx) # Find oldest message in conversation that is unread # and matches our search criteria... matches = [] for info in conversation: if new_tids & set(info[idx.MSG_TAGS].split(',')): imid_idx = int(info[idx.MSG_MID], 36) if imid_idx in raw_results: matches.append((int(info[idx.MSG_DATE], 36), imid_idx)) if matches: emid_idx = min(matches)[1] emid = b36(emid_idx) else: emid = pmid emid_idx = pmid_idx self._email_view_pairs[pmid] = emid else: emid_idx = int(emid, 36) conversation = idx.get_conversation(msg_idx=emid_idx) if 'flat' not in session.order: for info in conversation: cmid = info[idx.MSG_MID] self._email_view_pairs[cmid] = emid # Calculate visibility... for cmid in self._email_view_pairs: try: cpos = session.results.index(int(cmid, 36)) except ValueError: cpos = -1 if cpos >= 0: any_pos = min(any_pos, cpos) if (cpos > self._start and cpos < self._start + self._num + 1): pivot_pos = min(cpos, pivot_pos) self._emails.append(Email(idx, emid_idx)) except ValueError: self._email_view_pairs = {} if 'flat' in session.order: # Above we have guaranteed that the target message is in the # result set; unset this dictionary to force a flat display # of the chosen message. self._email_view_pairs = {} # Adjust the visible window of results if we are expanding an # individual message, to guarantee visibility. if pivot_pos < len(session.results): self._start = max(0, pivot_pos - max(self._num // 5, 2)) elif any_pos < len(session.results): self._start = max(0, any_pos - max(self._num // 5, 2)) if self._emails: self._email_view_side_effects(self._emails) return session, idx
def command(self, save=True): session, config, idx = self.session, self.session.config, self._idx() mbox_type = config.prefs.export_format if self.session.config.sys.lockdown: return self._error(_('In lockdown, doing nothing.')) args = list(self.args) if args and ':' in args[-1]: mbox_type, path = args.pop(-1).split(':', 1) else: path = self.export_path(mbox_type) flat = notags = False while args and args[0][:1] == '-': option = args.pop(0).replace('-', '') if option == 'flat': flat = True elif option == 'notags': notags = True if os.path.exists(path): return self._error('Already exists: %s' % path) msg_idxs = list(self._choose_messages(args)) if not msg_idxs: session.ui.warning('No messages selected') return False # Exporting messages without their threads barely makes any # sense. if not flat: for i in reversed(range(0, len(msg_idxs))): mi = msg_idxs[i] msg_idxs[i:i + 1] = [ int(m[idx.MSG_MID], 36) for m in idx.get_conversation(msg_idx=mi) ] # Let's always export in the same order. Stability is nice. msg_idxs.sort() mbox = self.create_mailbox(mbox_type, path) exported = {} while msg_idxs: msg_idx = msg_idxs.pop(0) if msg_idx not in exported: e = Email(idx, msg_idx) session.ui.mark('Exporting =%s ...' % e.msg_mid()) fd = e.get_file() try: data = fd.read() if not notags: tags = [ tag.slug for tag in (self.session.config.get_tag(t) or t for t in e.get_msg_info(idx.MSG_TAGS).split(',') if t) if hasattr(tag, 'slug') ] lf = '\r\n' if ('\r\n' in data[:200]) else '\n' header, body = data.split(lf + lf, 1) data = str( lf.join([ header, 'X-Mailpile-Tags: ' + '; '.join(sorted(tags)).encode('utf-8'), '', body ])) mbox.add(data) exported[msg_idx] = 1 finally: fd.close() mbox.flush() return self._success( _('Exported %d messages to %s') % (len(exported), path), { 'exported': len(exported), 'created': path })
def read_message(self, session, msg_mid, msg_id, msg, msg_size, msg_ts, mailbox=None): keywords = [] snippet = '' payload = [None] for part in msg.walk(): textpart = payload[0] = None ctype = part.get_content_type() charset = part.get_content_charset() or 'iso-8859-1' def _loader(p): if payload[0] is None: payload[0] = self.try_decode(p.get_payload(None, True), charset) return payload[0] if ctype == 'text/plain': textpart = _loader(part) elif ctype == 'text/html': _loader(part) if len(payload[0]) > 3: try: textpart = lxml.html.fromstring( payload[0]).text_content() except: session.ui.warning( _('=%s/%s has bogus HTML.') % (msg_mid, msg_id)) textpart = payload[0] else: textpart = payload[0] elif 'pgp' in part.get_content_type(): keywords.append('pgp:has') att = part.get_filename() if att: att = self.try_decode(att, charset) keywords.append('attachment:has') keywords.extend( [t + ':att' for t in re.findall(WORD_REGEXP, att.lower())]) textpart = (textpart or '') + ' ' + att if textpart: # FIXME: Does this lowercase non-ASCII characters correctly? keywords.extend(re.findall(WORD_REGEXP, textpart.lower())) # NOTE: As a side effect here, the cryptostate plugin will # add a 'crypto:has' keyword which we check for below # before performing further processing. for kwe in plugins.get_text_kw_extractors(): keywords.extend(kwe(self, msg, ctype, textpart)) if len(snippet) < 1024: snippet += ' ' + textpart for extract in plugins.get_data_kw_extractors(): keywords.extend( extract(self, msg, ctype, att, part, lambda: _loader(part))) if 'crypto:has' in keywords: e = Email(self, -1) e.msg_parsed = msg e.msg_info = self.BOGUS_METADATA[:] tree = e.get_message_tree(want=(e.WANT_MSG_TREE_PGP + ('text_parts', ))) # Look for inline PGP parts, update our status if found e.evaluate_pgp(tree, decrypt=session.config.prefs.index_encrypted) msg.signature_info = tree['crypto']['signature'] msg.encryption_info = tree['crypto']['encryption'] # Index the contents, if configured to do so if session.config.prefs.index_encrypted: for text in [t['data'] for t in tree['text_parts']]: keywords.extend(re.findall(WORD_REGEXP, text.lower())) for kwe in plugins.get_text_kw_extractors(): keywords.extend(kwe(self, msg, 'text/plain', text)) keywords.append('%s:id' % msg_id) keywords.extend( re.findall(WORD_REGEXP, self.hdr(msg, 'subject').lower())) keywords.extend(re.findall(WORD_REGEXP, self.hdr(msg, 'from').lower())) if mailbox: keywords.append('%s:mailbox' % mailbox.lower()) keywords.append('%s:hp' % HeaderPrint(msg)) for key in msg.keys(): key_lower = key.lower() if key_lower not in BORING_HEADERS: emails = ExtractEmails(self.hdr(msg, key).lower()) words = set(re.findall(WORD_REGEXP, self.hdr(msg, key).lower())) words -= STOPLIST keywords.extend(['%s:%s' % (t, key_lower) for t in words]) keywords.extend(['%s:%s' % (e, key_lower) for e in emails]) keywords.extend(['%s:email' % e for e in emails]) if 'list' in key_lower: keywords.extend(['%s:list' % t for t in words]) for key in EXPECTED_HEADERS: if not msg[key]: keywords.append('%s:missing' % key) for extract in plugins.get_meta_kw_extractors(): keywords.extend(extract(self, msg_mid, msg, msg_size, msg_ts)) snippet = snippet.replace('\n', ' ').replace('\t', ' ').replace('\r', '') return (set(keywords) - STOPLIST), snippet.strip()
def _add_from_messages(self, args): pairs, idx = [], self._idx() for email in [Email(idx, i) for i in self._choose_messages(args)]: pairs.append(ExtractEmailAndName(email.get_msg_info(idx.MSG_FROM))) return pairs