Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser(description="Positive and Unlabeled Extractor 1.0")
    parser.add_argument("--corpus_dir", type=str, help="Corpus root dir.")
    parser.add_argument("--samples_name", type=str, help="The samples's name in corpus.")
    parser.add_argument("--result_dir", type=str, help="Dir to save result.")

    subparsers = parser.add_subparsers(dest="subcommand", title="subcommands", help="sub-command help")

    # -------- import_samples --------
    parser_import_samples = subparsers.add_parser("import_samples", help="import samples help")
    parser_import_samples.add_argument("--xls_file", type=str, help="The Excel file name will be imported.")
    parser_import_samples.set_defaults(func=cmd_import_samples)

    # -------- rebuild --------
    parser_rebuild = subparsers.add_parser("rebuild", help="rebuild help")
    parser_rebuild.set_defaults(func=cmd_rebuild)

    # -------- show --------
    parser_show = subparsers.add_parser("show", help="show help")
    parser_show.set_defaults(func=cmd_show)

    # -------- query_sample --------
    parser_query_sample = subparsers.add_parser("query_sample", help="query sample help")
    parser_query_sample.add_argument("--sample_id", type=int, help="The sample id.")
    parser_query_sample.set_defaults(func=cmd_query_sample)

    # -------- test --------
    parser_test = subparsers.add_parser("test", help="test help")
    parser_test.set_defaults(func=cmd_test)

    # -------- export_samples --------
    parser_export_samples = subparsers.add_parser("export_samples", help="export samples help")
    parser_export_samples.add_argument("--xls_file", type=str, help="The Excel file name will be imported.")
    parser_export_samples.set_defaults(func=cmd_export_samples)

    # -------- query_keywords --------
    parser_query_keywords = subparsers.add_parser("query_keywords", help="query keywords help")
    parser_query_keywords.set_defaults(func=cmd_query_keywords)

    # -------- iem --------
    parser_iem = subparsers.add_parser("iem", help="IEM help")
    parser_iem.add_argument("--positive_name", type=str, help="The positive samples's name in corpus.")
    parser_iem.add_argument("--unlabeled_name", type=str, help="The unlabeled samples's name in corpus.")
    parser_iem.set_defaults(func=cmd_iem)

    # -------- sem --------
    parser_sem = subparsers.add_parser("sem", help="SEM help")
    parser_sem.add_argument("--positive_name", type=str, help="The positive samples's name in corpus.")
    parser_sem.add_argument("--unlabeled_name", type=str, help="The unlabeled samples's name in corpus.")
    parser_sem.set_defaults(func=cmd_sem)

    # -------- pulearning --------
    parser_pulearning = subparsers.add_parser("pulearning", help="PULearning help")
    parser_pulearning.add_argument("--positive_name", type=str, help="The positive samples's name in corpus.")
    parser_pulearning.add_argument("--unlabeled_name", type=str, help="The unlabeled samples's name in corpus.")
    parser_pulearning.set_defaults(func=cmd_pulearning)

    args = parser.parse_args()
    print args

    aa = AppArgs(["/etc/diggerd/diggerrc", "~/.diggerrc", "./.diggerrc"])
    update_args(aa, args)

    aa.write_to_file("./.diggerrc")

    args.func(aa)
Ejemplo n.º 2
0
def main_1():
    s_time = datetime.utcnow()

    aa = AppArgs()
    aa.parse_from_file('/etc/diggerd/diggerrc')
    aa.parse_from_file('~/.diggerrc')
    aa.parse_from_file('./.diggerrc')

    args = docopt(__doc__, version="Positive and Unlabeled Extractor 1.0")
    #print args

    corpus_dir = args['--corpus_dir']
    result_dir = args['--result_dir']
    #positive_name_list = args['--positive_name']
    positive_name = args['--positive_name']
    unlabeled_name = args['--unlabeled_name']
    model_file = args['--model_file']
    svm_file = args['--svm_file']
    samples_name = args['--samples_name']
    model_name = args['--model_name']
    arg_sample_id = args['--sample_id']
    if not arg_sample_id is None:
        sample_id = int(arg_sample_id)
    else:
        sample_id = None
    xls_file = args['--xls_file']

    aa.parse_from_args(args)
    corpus_dir = aa.corpus_dir
    samples_name = aa.samples_name

    aa.print_status()


    if args['test']:
        do_test(corpus_dir, positive_name, unlabeled_name, model_file, svm_file)
    elif args['import_samples']:
        do_import_samples(corpus_dir, samples_name, xls_file)
    elif args['export_samples']:
        do_export_samples(corpus_dir, samples_name, xls_file)
    elif args['export_urls']:
        do_export_urls(corpus_dir, samples_name, xls_file)
    elif args['rebuild']:
        do_rebuild(corpus_dir, samples_name)
    elif args['rebuild_categories']:
        do_rebuild_categories(corpus_dir, samples_name)
    elif args['query_sample']:
        if not samples_name is None:
            do_query_sample(corpus_dir, samples_name, sample_id)
        else:
            do_query_sample_by_pu(corpus_dir, positive_name_list, unlabeled_name, sample_id)
    elif args['query_categories']:
        do_query_categories(corpus_dir, samples_name, xls_file)
    elif args['query_keywords']:
        do_query_keywords(corpus_dir, samples_name, result_dir)
    elif args['refresh']:
        do_refresh(corpus_dir, samples_name)
    elif args['show']:
        do_show(corpus_dir, samples_name)
    elif args['purge']:
        do_purge(corpus_dir, samples_name)
    elif args['sne']:
        do_sne(corpus_dir, samples_name, result_dir)
    elif args['train']:
        do_train(corpus_dir, samples_name, model_name, result_dir)
    elif args['predict']:
        do_predict(corpus_dir, samples_name, model_name, result_dir)
    elif args['iem']:
        do_iem(corpus_dir, positive_name, unlabeled_name, result_dir)
    elif args['sem']:
        do_sem(corpus_dir, positive_name, unlabeled_name, result_dir)

    e_time = datetime.utcnow()
    t_time = (e_time - s_time)
    logging.info(Logger.info("Done.(%s)" % (str(t_time))))