def get_training_data(progress=False): training_data = [] training_labels = [] db = Database() # query that returns all the messages q = Query(db, '') if progress: count = q.count_messages() n = 0 pbar = ProgressBar(widgets=[Percentage(), Bar(), ETA()], maxval=count).start() data = [] for m in q.search_messages(): if progress: n += 1 pbar.update(n) data.append(m.get_header('To')) data.append(m.get_header('From')) data.append(m.get_header('Subject')) data.append(m.get_part(1).decode("utf8", errors="ignore")) try: training_data.append('\n'.join(data)) except UnicodeDecodeError: print map(lambda x: type(x), data) sys.exit(1) training_labels.append(erase_irrelevant_tags(list(m.get_tags()))) data = [] if progress: pbar.finish() return training_data, training_labels
def part(): db = Database() query_string = '' part_num = 0 first_search_term = 0 for (num, arg) in enumerate(sys.argv[1:]): if arg.startswith('--part='): part_num_str = arg.split("=")[1] try: part_num = int(part_num_str) except ValueError: # just emulating behavior exit(1) elif not arg.startswith('--'): # save the position of the first sys.argv # that is a search term first_search_term = num + 1 if first_search_term: # mangle arguments wrapping terms with spaces in quotes querystr = quote_query_line(sys.argv[first_search_term:]) qry = Query(db, querystr) msgs = [msg for msg in qry.search_messages()] if not msgs: sys.exit(1) elif len(msgs) > 1: raise Exception("search term did not match precisely one message") else: msg = msgs[0] print msg.get_part(part_num)
def search(): db = Database() query_string = '' sort_order = "newest-first" first_search_term = 0 for (num, arg) in enumerate(sys.argv[1:]): if arg.startswith('--sort='): sort_order = arg.split("=")[1] if not sort_order in ("oldest-first", "newest-first"): raise Exception("unknown sort order") elif not arg.startswith('--'): # save the position of the first sys.argv that is a search term first_search_term = num + 1 if first_search_term: # mangle arguments wrapping terms with spaces in quotes querystr = quote_query_line(sys.argv[first_search_term:]) qry = Query(db, querystr) if sort_order == "oldest-first": qry.set_sort(Query.SORT.OLDEST_FIRST) else: qry.set_sort(Query.SORT.NEWEST_FIRST) threads = qry.search_threads() for thread in threads: print thread
def get_named_queries(self): """ returns the named queries stored in the database. :rtype: dict (str -> str) mapping alias to full query string """ db = Database(path=self.path) return {k[6:]: v for k, v in db.get_configs('query.')}
def get_all_tags(self): """ returns all tagsstrings used in the database :rtype: list of str """ db = Database(path=self.path) return [t for t in db.get_all_tags()]
def write_tags(ids, tags): db = Database(mode=Database.MODE.READ_WRITE) for i, ts in zip(ids, tags): m = db.find_message(i) m.remove_tag("new") m.add_tag("inbox") for t in ts: m.add_tag(t)
def _get_notmuch_message(self, mid): """returns :class:`notmuch.database.Message` with given id""" mode = Database.MODE.READ_ONLY db = Database(path=self.path, mode=mode) try: return db.find_message(mid) except: errmsg = 'no message with id %s exists!' % mid raise NonexistantObjectError(errmsg)
def notmuch_status(self): with Database() as db: inbox = Query(db, 'not tag:spam and tag:inbox').search_messages() unread = Query(db, 'not tag:spam and tag:unread').search_messages() inbox_count = len(list(inbox)) unread_count = len(list(unread)) return { 'full_text': 'M: {}({})'.format(inbox_count, unread_count), 'cached_until': self.py3.time_in(5) }
def make_query(query): parser = RawConfigParser() parser.read(CONFIG_FILE) try: tags = parser.get("search", "exclude_tags").split(";") except (NoSectionError, NoOptionError): tags = [] q = Database().create_query(query) map(q.exclude_tag, tags) return q
def query(self, querystring): """ creates :class:`notmuch.Query` objects on demand :param querystring: The query string to use for the lookup :type query: str. :returns: :class:`notmuch.Query` -- the query object. """ mode = Database.MODE.READ_ONLY db = Database(path=self.path, mode=mode) return db.create_query(querystring)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--verbose", help="print logging messages to stdout", action="store_true") parser.add_argument("--progress", help="print a progress bar", action="store_true") subparsers = parser.add_subparsers(dest="command") subparsers.add_parser( "train", help="train the tagger from standard notmuch database") subparsers.add_parser("tag", help="tag the mails with a new-tag") subparsers.add_parser( "validate", help= "show a classification report on stdout when trained on 0.6 of the maildir and tested on the other 0.4." ) args = parser.parse_args() db = Database() path = db.get_path() db.close() filename = os.path.join(path, "blaecksprutte.db") warnings.simplefilter('ignore', UndefinedMetricWarning) level = logging.ERROR if args.verbose: level = logging.INFO log = logging.getLogger(__name__) out_hdlr = logging.StreamHandler(sys.stdout) out_hdlr.setFormatter(logging.Formatter('%(message)s')) out_hdlr.setLevel(level) log.addHandler(out_hdlr) log.setLevel(level) if args.command == 'train': v, b, c = train_from_bottom(log, args.progress) with open(filename, 'wb') as f: pickle.dump([v, b, c], f, pickle.HIGHEST_PROTOCOL) if args.command == 'tag': tag_new_mails(filename, log) if args.command == 'validate': validate(log, args.progress)
def query(self, querystring): """ creates :class:`notmuch.Query` objects on demand :param querystring: The query string to use for the lookup :type query: str. :returns: :class:`notmuch.Query` -- the query object. """ mode = Database.MODE.READ_ONLY db = Database(path=self.path, mode=mode) q = db.create_query(querystring) # add configured exclude tags for tag in settings.get('exclude_tags'): q.exclude_tag(tag) return q
def get_new_mails(): db = Database() query = Query(db, 'tag:new') data = [] ids = [] m_data = [] for m in query.search_messages(): m_data.append(m.get_header('To')) m_data.append(m.get_header('From')) m_data.append(m.get_header('Subject')) m_data.append(m.get_part(1).decode("utf8", errors="ignore")) try: data.append('\n'.join(m_data)) ids.append(m.get_message_id()) except UnicodeDecodeError: print map(lambda x: type(x), m_data) sys.exit(1) m_data = [] return data, ids
def show(): entire_thread = False db = Database() out_format = "text" querystr = '' first_search_term = None # ugly homegrown option parsing # TODO: use OptionParser for (num, arg) in enumerate(sys.argv[1:]): if arg == '--entire-thread': entire_thread = True elif arg.startswith("--format="): out_format = arg.split("=")[1] if out_format == 'json': # for compatibility use --entire-thread for json entire_thread = True if not out_format in ("json", "text"): raise Exception("unknown format") elif not arg.startswith('--'): # save the position of the first sys.argv that is a search term first_search_term = num + 1 if first_search_term: # mangle arguments wrapping terms with spaces in quotes querystr = quote_query_line(sys.argv[first_search_term:]) threads = Query(db, querystr).search_threads() first_toplevel = True if out_format == "json": sys.stdout.write("[") for thread in threads: msgs = thread.get_toplevel_messages() if not first_toplevel: if out_format == "json": sys.stdout.write(", ") first_toplevel = False msgs.print_messages(out_format, 0, entire_thread) if out_format == "json": sys.stdout.write("]") sys.stdout.write("\n")
def setUpClass(cls): # create temporary notmuch config with tempfile.NamedTemporaryFile(mode='w+', delete=False) as f: f.write( textwrap.dedent("""\ [maildir] synchronize_flags = true """)) cls.notmuch_config_path = f.name cls.addClassCleanup(os.unlink, f.name) # define an empty notmuch database in a temporary directory cls.dbpath = tempfile.mkdtemp() cls.db = Database(path=cls.dbpath, create=True) cls.db.close() cls.manager = DBManager(cls.dbpath) # clean up temporary database cls.addClassCleanup(shutil.rmtree, cls.dbpath) # let global settings manager read our temporary notmuch config settings.read_notmuch_config(cls.notmuch_config_path)
def db(): return Database(DATABASE_PATH, create=False, mode=Database.MODE.READ_WRITE)
from notmuch import Database, Query from bottle import Bottle, request, response, abort, redirect, view, TEMPLATE_PATH TEMPLATE_PATH.append('views') app = Bottle() db = Database() @app.route('/<identifier>/') def slash(identifier): redirect('/' + identifier) @app.get('/searches/:querystr') @view('flat') def search(querystr): query = Query(db, querystr).search_threads() threads = [('/threads/' + t.get_thread_id(), t.get_subject()) \ for t in query] return {'heading': 'Results for "%s"' % querystr, 'list': threads} def _get_thread(thread_id): querystr = 'thread:' + thread_id thread = next(iter(Query(db, querystr).search_threads())) return thread.get_subject() @app.get('/threads/:thread_id') @view('flat') def thread(thread_id):
def index(): db = Database() msgs = Query(db, 'inbox').search_messages() return render_template('index.html',msgs=list(msgs)[:100])
def main(): # Handle command line options #------------------------------------ # No option given, print USAGE and exit if len(sys.argv) == 1: Notmuch().cmd_usage() #------------------------------------ elif sys.argv[1] == 'setup': """Interactively setup notmuch for first use.""" exit("Not implemented.") #------------------------------------- elif sys.argv[1] == 'new': """Check for new and removed messages.""" Notmuch().cmd_new() #------------------------------------- elif sys.argv[1] == 'help': """Print the help text""" Notmuch().cmd_help(sys.argv[1:]) #------------------------------------- elif sys.argv[1] == 'part': part() #------------------------------------- elif sys.argv[1] == 'search': search() #------------------------------------- elif sys.argv[1] == 'show': show() #------------------------------------- elif sys.argv[1] == 'reply': db = Database() if len(sys.argv) == 2: # no search term. abort exit("Error: notmuch reply requires at least one search term.") # mangle arguments wrapping terms with spaces in quotes querystr = quote_query_line(sys.argv[2:]) msgs = Query(db, querystr).search_messages() print Notmuch().format_reply(msgs) #------------------------------------- elif sys.argv[1] == 'count': if len(sys.argv) == 2: # no further search term, count all querystr = '' else: # mangle arguments wrapping terms with spaces in quotes querystr = quote_query_line(sys.argv[2:]) print Database().create_query(querystr).count_messages() #------------------------------------- elif sys.argv[1] == 'tag': # build lists of tags to be added and removed add = [] remove = [] while not sys.argv[2] == '--' and \ (sys.argv[2].startswith('+') or sys.argv[2].startswith('-')): if sys.argv[2].startswith('+'): # append to add list without initial + add.append(sys.argv.pop(2)[1:]) else: # append to remove list without initial - remove.append(sys.argv.pop(2)[1:]) # skip eventual '--' if sys.argv[2] == '--': sys.argv.pop(2) # the rest is search terms querystr = quote_query_line(sys.argv[2:]) db = Database(mode=Database.MODE.READ_WRITE) msgs = Query(db, querystr).search_messages() for msg in msgs: # actually add and remove all tags map(msg.add_tag, add) map(msg.remove_tag, remove) #------------------------------------- elif sys.argv[1] == 'search-tags': if len(sys.argv) == 2: # no further search term print "\n".join(Database().get_all_tags()) else: # mangle arguments wrapping terms with spaces in quotes querystr = quote_query_line(sys.argv[2:]) db = Database() msgs = Query(db, querystr).search_messages() print "\n".join([t for t in msgs.collect_tags()]) #------------------------------------- elif sys.argv[1] == 'dump': if len(sys.argv) == 2: f = sys.stdout else: f = open(sys.argv[2], "w") db = Database() query = Query(db, '') query.set_sort(Query.SORT.MESSAGE_ID) msgs = query.search_messages() for msg in msgs: f.write("%s (%s)\n" % (msg.get_message_id(), msg.get_tags())) #------------------------------------- elif sys.argv[1] == 'restore': if len(sys.argv) == 2: print("No filename given. Reading dump from stdin.") f = sys.stdin else: f = open(sys.argv[2], "r") # split the msg id and the tags MSGID_TAGS = re.compile("(\S+)\s\((.*)\)$") db = Database(mode=Database.MODE.READ_WRITE) #read each line of the dump file for line in f: msgs = MSGID_TAGS.match(line) if not msgs: sys.stderr.write("Warning: Ignoring invalid input line: %s" % line) continue # split line in components and fetch message msg_id = msgs.group(1) new_tags = set(msgs.group(2).split()) msg = db.find_message(msg_id) if msg == None: sys.stderr.write( "Warning: Cannot apply tags to missing message: %s\n" % msg_id) continue # do nothing if the old set of tags is the same as the new one old_tags = set(msg.get_tags()) if old_tags == new_tags: continue # set the new tags msg.freeze() # only remove tags if the new ones are not a superset anyway if not (new_tags > old_tags): msg.remove_all_tags() for tag in new_tags: msg.add_tag(tag) msg.thaw() #------------------------------------- else: # unknown command exit("Error: Unknown command '%s' (see \"notmuch help\")" % sys.argv[1])
def cmd_new(self): """Run 'notmuch new'""" #get the database directory db = Database(mode=Database.MODE.READ_WRITE) path = db.get_path() print self._add_new_files_recursively(path, db)
def main(): parser = argparse.ArgumentParser( description="Sync message 'X-Keywords' header with notmuch tags.") parser.add_argument("-V", "--version", action="version", version="%(prog)s " + "v%s (%s)" % (__version__, __date__)) parser.add_argument("-q", "--query", dest="query", required=True, help="notmuch database query string") parser.add_argument("-p", "--db-path", dest="dbpath", help="notmuch database path (default to try user configuration)") parser.add_argument("-n", "--dry-run", dest="dryrun", action="store_true", help="dry run") parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="show verbose information") # Exclusive argument group for sync mode exgroup1 = parser.add_mutually_exclusive_group(required=True) exgroup1.add_argument("-m", "--merge-keywords-tags", dest="direction_merge", action="store_true", help="merge 'X-Keywords' and tags and update both") exgroup1.add_argument("-k", "--keywords-to-tags", dest="direction_keywords2tags", action="store_true", help="sync 'X-Keywords' to notmuch tags") exgroup1.add_argument("-t", "--tags-to-keywords", dest="direction_tags2keywords", action="store_true", help="sync notmuch tags to 'X-Keywords'") # Exclusive argument group for tag operation mode exgroup2 = parser.add_mutually_exclusive_group(required=False) exgroup2.add_argument("-a", "--add-only", dest="mode_addonly", action="store_true", help="only add notmuch tags") exgroup2.add_argument("-r", "--remove-only", dest="mode_removeonly", action="store_true", help="only remove notmuch tags") # Parse args = parser.parse_args() # Sync direction if args.direction_merge: sync_direction = SyncDirection.MERGE_KEYWORDS_TAGS elif args.direction_keywords2tags: sync_direction = SyncDirection.KEYWORDS_TO_TAGS elif args.direction_tags2keywords: sync_direction = SyncDirection.TAGS_TO_KEYWORDS else: raise ValueError("Invalid synchronization direction") # Sync mode if args.mode_addonly: sync_mode = SyncMode.ADD_ONLY elif args.mode_removeonly: sync_mode = SyncMode.REMOVE_ONLY else: sync_mode = SyncMode.ADD_REMOVE # if args.dbpath: dbpath = os.path.abspath(os.path.expanduser(args.dbpath)) else: dbpath = None # db = Database(path=dbpath, create=False, mode=Database.MODE.READ_WRITE) dbinfo = get_notmuch_revision(dbpath=dbpath) q = Query(db, args.query) total_msgs = q.count_messages() msgs = q.search_messages() # if args.verbose: print("# Notmuch database path: %s" % dbpath) print("# Database revision: %d (uuid: %s)" % (dbinfo['revision'], dbinfo['uuid'])) print("# Query: %s" % args.query) print("# Sync direction: %s" % sync_direction.name) print("# Sync mode: %s" % sync_mode.name) print("# Total messages to check: %d" % total_msgs) print("# Dryn run: %s" % args.dryrun) # for msg in msgs: kwmsg = KwMessage(msg) kwmsg.sync(direction=sync_direction, mode=sync_mode, dryrun=args.dryrun, verbose=args.verbose) # db.close()
def flush(self): """ write out all queued write-commands in order, each one in a separate :meth:`atomic <notmuch.Database.begin_atomic>` transaction. If this fails the current action is rolled back, stays in the write queue and an exception is raised. You are responsible to retry flushing at a later time if you want to ensure that the cached changes are applied to the database. :exception: :exc:`~errors.DatabaseROError` if db is opened read-only :exception: :exc:`~errors.DatabaseLockedError` if db is locked """ if self.ro: raise DatabaseROError() if self.writequeue: # read notmuch's config regarding imap flag synchronization sync = settings.get_notmuch_setting('maildir', 'synchronize_flags') # go through writequeue entries while self.writequeue: current_item = self.writequeue.popleft() logging.debug('write-out item: %s', str(current_item)) # watch out for notmuch errors to re-insert current_item # to the queue on errors try: # the first two coordinants are cnmdname and post-callback cmd, afterwards = current_item[:2] logging.debug('cmd created') # acquire a writeable db handler try: mode = Database.MODE.READ_WRITE db = Database(path=self.path, mode=mode) except NotmuchError: raise DatabaseLockedError() logging.debug('got write lock') # make this a transaction db.begin_atomic() logging.debug('got atomic') if cmd == 'add': logging.debug('add') path, tags = current_item[2:] msg, _ = db.add_message(path, sync_maildir_flags=sync) logging.debug('added msg') msg.freeze() logging.debug('freeze') for tag in tags: msg.add_tag(tag.encode(DB_ENC), sync_maildir_flags=sync) logging.debug('added tags ') msg.thaw() logging.debug('thaw') elif cmd == 'remove': path = current_item[2] db.remove_message(path) else: # tag/set/untag querystring, tags = current_item[2:] query = db.create_query(querystring) for msg in query.search_messages(): msg.freeze() if cmd == 'tag': for tag in tags: msg.add_tag(tag.encode(DB_ENC), sync_maildir_flags=sync) if cmd == 'set': msg.remove_all_tags() for tag in tags: msg.add_tag(tag.encode(DB_ENC), sync_maildir_flags=sync) elif cmd == 'untag': for tag in tags: msg.remove_tag(tag.encode(DB_ENC), sync_maildir_flags=sync) msg.thaw() logging.debug('ended atomic') # end transaction and reinsert queue item on error if db.end_atomic() != notmuch.STATUS.SUCCESS: raise DatabaseError('end_atomic failed') logging.debug('ended atomic') # close db db.close() logging.debug('closed db') # call post-callback if callable(afterwards): logging.debug(str(afterwards)) afterwards() logging.debug('called callback') # re-insert item to the queue upon Xapian/NotmuchErrors except (XapianError, NotmuchError) as e: logging.exception(e) self.writequeue.appendleft(current_item) raise DatabaseError(unicode(e)) except DatabaseLockedError as e: logging.debug('index temporarily locked') self.writequeue.appendleft(current_item) raise e logging.debug('flush finished')
def main(): parser = argparse.ArgumentParser() parser.add_argument("--verbose", help="print logging messages to stdout", action="store_true") parser.add_argument("--progress", help="print a progress bar", action="store_true") subparsers = parser.add_subparsers(dest="command") subparsers.add_parser( "train", help="train the tagger from standard notmuch database") subparsers.add_parser("tag", help="tag the mails with a new-tag") subparsers.add_parser( "validate", help= "show a classification report on stdout when trained on 0.6 of the maildir and tested on the other 0.4." ) subparsers.add_parser( "optimize", help= "perform a grid search with 60 different possible hyperparameters to find the best ones" ) args = parser.parse_args() db = Database() path = db.get_path() db.close() model_filename = os.path.join(path, "blaecksprutte.db") pipeline_filename = os.path.join(path, "best_pipeline.db") warnings.simplefilter('ignore', UndefinedMetricWarning) warnings.simplefilter('ignore', FutureWarning) warnings.simplefilter('ignore', UserWarning) level = logging.ERROR if args.verbose: level = logging.INFO log = logging.getLogger(__name__) out_hdlr = logging.StreamHandler(sys.stdout) out_hdlr.setFormatter(logging.Formatter('%(message)s')) out_hdlr.setLevel(level) log.addHandler(out_hdlr) log.setLevel(level) if args.command == 'train': train(log, pipeline_filename, model_filename, args.progress) if args.command == 'tag': if not os.path.isfile(model_filename): log.warn( "no existing model file found: training model. This may take some time!" ) train(log, pipeline_filename, model_filename, args.progress) tag_new_mails(model_filename, log) if args.command == 'validate': validate(log, pipeline_filename, args.progress) if args.command == 'optimize': optimize(log, pipeline_filename, args.progress)