Exemple #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--verbose",
                        help="print logging messages to stdout",
                        action="store_true")
    parser.add_argument("--progress",
                        help="print a progress bar",
                        action="store_true")
    subparsers = parser.add_subparsers(dest="command")
    subparsers.add_parser(
        "train", help="train the tagger from standard notmuch database")
    subparsers.add_parser("tag", help="tag the mails with a new-tag")
    subparsers.add_parser(
        "validate",
        help=
        "show a classification report on stdout when trained on 0.6 of the maildir and tested on the other 0.4."
    )
    args = parser.parse_args()

    db = Database()
    path = db.get_path()
    db.close()

    filename = os.path.join(path, "blaecksprutte.db")

    warnings.simplefilter('ignore', UndefinedMetricWarning)

    level = logging.ERROR

    if args.verbose:
        level = logging.INFO

    log = logging.getLogger(__name__)
    out_hdlr = logging.StreamHandler(sys.stdout)
    out_hdlr.setFormatter(logging.Formatter('%(message)s'))
    out_hdlr.setLevel(level)
    log.addHandler(out_hdlr)
    log.setLevel(level)

    if args.command == 'train':
        v, b, c = train_from_bottom(log, args.progress)
        with open(filename, 'wb') as f:
            pickle.dump([v, b, c], f, pickle.HIGHEST_PROTOCOL)

    if args.command == 'tag':
        tag_new_mails(filename, log)

    if args.command == 'validate':
        validate(log, args.progress)
Exemple #2
0
    def flush(self):
        """
        write out all queued write-commands in order, each one in a separate
        :meth:`atomic <notmuch.Database.begin_atomic>` transaction.

        If this fails the current action is rolled back, stays in the write
        queue and an exception is raised.
        You are responsible to retry flushing at a later time if you want to
        ensure that the cached changes are applied to the database.

        :exception: :exc:`~errors.DatabaseROError` if db is opened read-only
        :exception: :exc:`~errors.DatabaseLockedError` if db is locked
        """
        if self.ro:
            raise DatabaseROError()
        if self.writequeue:
            # read notmuch's config regarding imap flag synchronization
            sync = settings.get_notmuch_setting('maildir', 'synchronize_flags')

            # go through writequeue entries
            while self.writequeue:
                current_item = self.writequeue.popleft()
                logging.debug('write-out item: %s', str(current_item))

                # watch out for notmuch errors to re-insert current_item
                # to the queue on errors
                try:
                    # the first two coordinants are cnmdname and post-callback
                    cmd, afterwards = current_item[:2]
                    logging.debug('cmd created')

                    # acquire a writeable db handler
                    try:
                        mode = Database.MODE.READ_WRITE
                        db = Database(path=self.path, mode=mode)
                    except NotmuchError:
                        raise DatabaseLockedError()
                    logging.debug('got write lock')

                    # make this a transaction
                    db.begin_atomic()
                    logging.debug('got atomic')

                    if cmd == 'add':
                        logging.debug('add')
                        path, tags = current_item[2:]
                        msg, _ = db.add_message(path, sync_maildir_flags=sync)
                        logging.debug('added msg')
                        msg.freeze()
                        logging.debug('freeze')
                        for tag in tags:
                            msg.add_tag(tag.encode(DB_ENC),
                                        sync_maildir_flags=sync)
                        logging.debug('added tags ')
                        msg.thaw()
                        logging.debug('thaw')

                    elif cmd == 'remove':
                        path = current_item[2]
                        db.remove_message(path)

                    else:  # tag/set/untag
                        querystring, tags = current_item[2:]
                        query = db.create_query(querystring)
                        for msg in query.search_messages():
                            msg.freeze()
                            if cmd == 'tag':
                                for tag in tags:
                                    msg.add_tag(tag.encode(DB_ENC),
                                                sync_maildir_flags=sync)
                            if cmd == 'set':
                                msg.remove_all_tags()
                                for tag in tags:
                                    msg.add_tag(tag.encode(DB_ENC),
                                                sync_maildir_flags=sync)
                            elif cmd == 'untag':
                                for tag in tags:
                                    msg.remove_tag(tag.encode(DB_ENC),
                                                   sync_maildir_flags=sync)
                            msg.thaw()

                    logging.debug('ended atomic')
                    # end transaction and reinsert queue item on error
                    if db.end_atomic() != notmuch.STATUS.SUCCESS:
                        raise DatabaseError('end_atomic failed')
                    logging.debug('ended atomic')

                    # close db
                    db.close()
                    logging.debug('closed db')

                    # call post-callback
                    if callable(afterwards):
                        logging.debug(str(afterwards))
                        afterwards()
                        logging.debug('called callback')

                # re-insert item to the queue upon Xapian/NotmuchErrors
                except (XapianError, NotmuchError) as e:
                    logging.exception(e)
                    self.writequeue.appendleft(current_item)
                    raise DatabaseError(unicode(e))
                except DatabaseLockedError as e:
                    logging.debug('index temporarily locked')
                    self.writequeue.appendleft(current_item)
                    raise e
                logging.debug('flush finished')
Exemple #3
0
    def flush(self):
        """
        write out all queued write-commands in order, each one in a separate
        :meth:`atomic <notmuch.Database.begin_atomic>` transaction.

        If this fails the current action is rolled back, stays in the write
        queue and an exception is raised.
        You are responsible to retry flushing at a later time if you want to
        ensure that the cached changes are applied to the database.

        :exception: :exc:`~errors.DatabaseROError` if db is opened read-only
        :exception: :exc:`~errors.DatabaseLockedError` if db is locked
        """
        if self.ro:
            raise DatabaseROError()
        if self.writequeue:
            # read notmuch's config regarding imap flag synchronization
            sync = settings.get_notmuch_setting('maildir', 'synchronize_flags')

            # go through writequeue entries
            while self.writequeue:
                current_item = self.writequeue.popleft()
                logging.debug('write-out item: %s', str(current_item))

                # watch out for notmuch errors to re-insert current_item
                # to the queue on errors
                try:
                    # the first two coordinants are cnmdname and post-callback
                    cmd, afterwards = current_item[:2]
                    logging.debug('cmd created')

                    # aquire a writeable db handler
                    try:
                        mode = Database.MODE.READ_WRITE
                        db = Database(path=self.path, mode=mode)
                    except NotmuchError:
                        raise DatabaseLockedError()
                    logging.debug('got write lock')

                    # make this a transaction
                    db.begin_atomic()
                    logging.debug('got atomic')

                    if cmd == 'add':
                        logging.debug('add')
                        path, tags = current_item[2:]
                        msg, _ = db.add_message(path, sync_maildir_flags=sync)
                        logging.debug('added msg')
                        msg.freeze()
                        logging.debug('freeze')
                        for tag in tags:
                            msg.add_tag(tag.encode(DB_ENC),
                                        sync_maildir_flags=sync)
                        logging.debug('added tags ')
                        msg.thaw()
                        logging.debug('thaw')

                    elif cmd == 'remove':
                        path = current_item[2]
                        db.remove_message(path)

                    else:  # tag/set/untag
                        querystring, tags = current_item[2:]
                        query = db.create_query(querystring)
                        for msg in query.search_messages():
                            msg.freeze()
                            if cmd == 'tag':
                                for tag in tags:
                                    msg.add_tag(tag.encode(DB_ENC),
                                                sync_maildir_flags=sync)
                            if cmd == 'set':
                                msg.remove_all_tags()
                                for tag in tags:
                                    msg.add_tag(tag.encode(DB_ENC),
                                                sync_maildir_flags=sync)
                            elif cmd == 'untag':
                                for tag in tags:
                                    msg.remove_tag(tag.encode(DB_ENC),
                                                   sync_maildir_flags=sync)
                            msg.thaw()

                    logging.debug('ended atomic')
                    # end transaction and reinsert queue item on error
                    if db.end_atomic() != notmuch.STATUS.SUCCESS:
                        raise DatabaseError('end_atomic failed')
                    logging.debug('ended atomic')

                    # close db
                    db.close()
                    logging.debug('closed db')

                    # call post-callback
                    if callable(afterwards):
                        logging.debug(str(afterwards))
                        afterwards()
                        logging.debug('called callback')

                # re-insert item to the queue upon Xapian/NotmuchErrors
                except (XapianError, NotmuchError) as e:
                    logging.exception(e)
                    self.writequeue.appendleft(current_item)
                    raise DatabaseError(unicode(e))
                except DatabaseLockedError as e:
                    logging.debug('index temporarily locked')
                    self.writequeue.appendleft(current_item)
                    raise e
                logging.debug('flush finished')
Exemple #4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--verbose",
                        help="print logging messages to stdout",
                        action="store_true")
    parser.add_argument("--progress",
                        help="print a progress bar",
                        action="store_true")
    subparsers = parser.add_subparsers(dest="command")
    subparsers.add_parser(
        "train", help="train the tagger from standard notmuch database")
    subparsers.add_parser("tag", help="tag the mails with a new-tag")
    subparsers.add_parser(
        "validate",
        help=
        "show a classification report on stdout when trained on 0.6 of the maildir and tested on the other 0.4."
    )
    subparsers.add_parser(
        "optimize",
        help=
        "perform a grid search with 60 different possible hyperparameters to find the best ones"
    )
    args = parser.parse_args()

    db = Database()
    path = db.get_path()
    db.close()

    model_filename = os.path.join(path, "blaecksprutte.db")
    pipeline_filename = os.path.join(path, "best_pipeline.db")

    warnings.simplefilter('ignore', UndefinedMetricWarning)
    warnings.simplefilter('ignore', FutureWarning)
    warnings.simplefilter('ignore', UserWarning)

    level = logging.ERROR

    if args.verbose:
        level = logging.INFO

    log = logging.getLogger(__name__)
    out_hdlr = logging.StreamHandler(sys.stdout)
    out_hdlr.setFormatter(logging.Formatter('%(message)s'))
    out_hdlr.setLevel(level)
    log.addHandler(out_hdlr)
    log.setLevel(level)

    if args.command == 'train':
        train(log, pipeline_filename, model_filename, args.progress)

    if args.command == 'tag':
        if not os.path.isfile(model_filename):
            log.warn(
                "no existing model file found: training model. This may take some time!"
            )
            train(log, pipeline_filename, model_filename, args.progress)
        tag_new_mails(model_filename, log)

    if args.command == 'validate':
        validate(log, pipeline_filename, args.progress)

    if args.command == 'optimize':
        optimize(log, pipeline_filename, args.progress)
def main():
    parser = argparse.ArgumentParser(
            description="Sync message 'X-Keywords' header with notmuch tags.")
    parser.add_argument("-V", "--version", action="version",
            version="%(prog)s " + "v%s (%s)" % (__version__, __date__))
    parser.add_argument("-q", "--query", dest="query", required=True,
            help="notmuch database query string")
    parser.add_argument("-p", "--db-path", dest="dbpath",
            help="notmuch database path (default to try user configuration)")
    parser.add_argument("-n", "--dry-run", dest="dryrun",
            action="store_true", help="dry run")
    parser.add_argument("-v", "--verbose", dest="verbose",
            action="store_true", help="show verbose information")
    # Exclusive argument group for sync mode
    exgroup1 = parser.add_mutually_exclusive_group(required=True)
    exgroup1.add_argument("-m", "--merge-keywords-tags",
            dest="direction_merge", action="store_true",
            help="merge 'X-Keywords' and tags and update both")
    exgroup1.add_argument("-k", "--keywords-to-tags",
            dest="direction_keywords2tags", action="store_true",
            help="sync 'X-Keywords' to notmuch tags")
    exgroup1.add_argument("-t", "--tags-to-keywords",
            dest="direction_tags2keywords", action="store_true",
            help="sync notmuch tags to 'X-Keywords'")
    # Exclusive argument group for tag operation mode
    exgroup2 = parser.add_mutually_exclusive_group(required=False)
    exgroup2.add_argument("-a", "--add-only", dest="mode_addonly",
            action="store_true", help="only add notmuch tags")
    exgroup2.add_argument("-r", "--remove-only", dest="mode_removeonly",
            action="store_true", help="only remove notmuch tags")
    # Parse
    args = parser.parse_args()
    # Sync direction
    if args.direction_merge:
        sync_direction = SyncDirection.MERGE_KEYWORDS_TAGS
    elif args.direction_keywords2tags:
        sync_direction = SyncDirection.KEYWORDS_TO_TAGS
    elif args.direction_tags2keywords:
        sync_direction = SyncDirection.TAGS_TO_KEYWORDS
    else:
        raise ValueError("Invalid synchronization direction")
    # Sync mode
    if args.mode_addonly:
        sync_mode = SyncMode.ADD_ONLY
    elif args.mode_removeonly:
        sync_mode = SyncMode.REMOVE_ONLY
    else:
        sync_mode = SyncMode.ADD_REMOVE
    #
    if args.dbpath:
        dbpath = os.path.abspath(os.path.expanduser(args.dbpath))
    else:
        dbpath = None
    #
    db = Database(path=dbpath, create=False, mode=Database.MODE.READ_WRITE)
    dbinfo = get_notmuch_revision(dbpath=dbpath)
    q = Query(db, args.query)
    total_msgs = q.count_messages()
    msgs = q.search_messages()
    #
    if args.verbose:
        print("# Notmuch database path: %s" % dbpath)
        print("# Database revision: %d (uuid: %s)" %
                (dbinfo['revision'], dbinfo['uuid']))
        print("# Query: %s" % args.query)
        print("# Sync direction: %s" % sync_direction.name)
        print("# Sync mode: %s" % sync_mode.name)
        print("# Total messages to check: %d" % total_msgs)
        print("# Dryn run: %s" % args.dryrun)
    #
    for msg in msgs:
        kwmsg = KwMessage(msg)
        kwmsg.sync(direction=sync_direction, mode=sync_mode,
                   dryrun=args.dryrun, verbose=args.verbose)
    #
    db.close()