Esempio n. 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--verbose",
                        help="print logging messages to stdout",
                        action="store_true")
    parser.add_argument("--progress",
                        help="print a progress bar",
                        action="store_true")
    subparsers = parser.add_subparsers(dest="command")
    subparsers.add_parser(
        "train", help="train the tagger from standard notmuch database")
    subparsers.add_parser("tag", help="tag the mails with a new-tag")
    subparsers.add_parser(
        "validate",
        help=
        "show a classification report on stdout when trained on 0.6 of the maildir and tested on the other 0.4."
    )
    args = parser.parse_args()

    db = Database()
    path = db.get_path()
    db.close()

    filename = os.path.join(path, "blaecksprutte.db")

    warnings.simplefilter('ignore', UndefinedMetricWarning)

    level = logging.ERROR

    if args.verbose:
        level = logging.INFO

    log = logging.getLogger(__name__)
    out_hdlr = logging.StreamHandler(sys.stdout)
    out_hdlr.setFormatter(logging.Formatter('%(message)s'))
    out_hdlr.setLevel(level)
    log.addHandler(out_hdlr)
    log.setLevel(level)

    if args.command == 'train':
        v, b, c = train_from_bottom(log, args.progress)
        with open(filename, 'wb') as f:
            pickle.dump([v, b, c], f, pickle.HIGHEST_PROTOCOL)

    if args.command == 'tag':
        tag_new_mails(filename, log)

    if args.command == 'validate':
        validate(log, args.progress)
Esempio n. 2
0
 def cmd_new(self):
     """Run 'notmuch new'"""
     #get the database directory
     db = Database(mode=Database.MODE.READ_WRITE)
     path = db.get_path()
     print self._add_new_files_recursively(path, db)
Esempio n. 3
0
 def cmd_new(self):
     """Run 'notmuch new'"""
     #get the database directory
     db = Database(mode=Database.MODE.READ_WRITE)
     path = db.get_path()
     print self._add_new_files_recursively(path, db)
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--verbose",
                        help="print logging messages to stdout",
                        action="store_true")
    parser.add_argument("--progress",
                        help="print a progress bar",
                        action="store_true")
    subparsers = parser.add_subparsers(dest="command")
    subparsers.add_parser(
        "train", help="train the tagger from standard notmuch database")
    subparsers.add_parser("tag", help="tag the mails with a new-tag")
    subparsers.add_parser(
        "validate",
        help=
        "show a classification report on stdout when trained on 0.6 of the maildir and tested on the other 0.4."
    )
    subparsers.add_parser(
        "optimize",
        help=
        "perform a grid search with 60 different possible hyperparameters to find the best ones"
    )
    args = parser.parse_args()

    db = Database()
    path = db.get_path()
    db.close()

    model_filename = os.path.join(path, "blaecksprutte.db")
    pipeline_filename = os.path.join(path, "best_pipeline.db")

    warnings.simplefilter('ignore', UndefinedMetricWarning)
    warnings.simplefilter('ignore', FutureWarning)
    warnings.simplefilter('ignore', UserWarning)

    level = logging.ERROR

    if args.verbose:
        level = logging.INFO

    log = logging.getLogger(__name__)
    out_hdlr = logging.StreamHandler(sys.stdout)
    out_hdlr.setFormatter(logging.Formatter('%(message)s'))
    out_hdlr.setLevel(level)
    log.addHandler(out_hdlr)
    log.setLevel(level)

    if args.command == 'train':
        train(log, pipeline_filename, model_filename, args.progress)

    if args.command == 'tag':
        if not os.path.isfile(model_filename):
            log.warn(
                "no existing model file found: training model. This may take some time!"
            )
            train(log, pipeline_filename, model_filename, args.progress)
        tag_new_mails(model_filename, log)

    if args.command == 'validate':
        validate(log, pipeline_filename, args.progress)

    if args.command == 'optimize':
        optimize(log, pipeline_filename, args.progress)