def main(): parser = argparse.ArgumentParser(description="Positive and Unlabeled Extractor 1.0") parser.add_argument("--corpus_dir", type=str, help="Corpus root dir.") parser.add_argument("--samples_name", type=str, help="The samples's name in corpus.") parser.add_argument("--result_dir", type=str, help="Dir to save result.") subparsers = parser.add_subparsers(dest="subcommand", title="subcommands", help="sub-command help") # -------- import_samples -------- parser_import_samples = subparsers.add_parser("import_samples", help="import samples help") parser_import_samples.add_argument("--xls_file", type=str, help="The Excel file name will be imported.") parser_import_samples.set_defaults(func=cmd_import_samples) # -------- rebuild -------- parser_rebuild = subparsers.add_parser("rebuild", help="rebuild help") parser_rebuild.set_defaults(func=cmd_rebuild) # -------- show -------- parser_show = subparsers.add_parser("show", help="show help") parser_show.set_defaults(func=cmd_show) # -------- query_sample -------- parser_query_sample = subparsers.add_parser("query_sample", help="query sample help") parser_query_sample.add_argument("--sample_id", type=int, help="The sample id.") parser_query_sample.set_defaults(func=cmd_query_sample) # -------- test -------- parser_test = subparsers.add_parser("test", help="test help") parser_test.set_defaults(func=cmd_test) # -------- export_samples -------- parser_export_samples = subparsers.add_parser("export_samples", help="export samples help") parser_export_samples.add_argument("--xls_file", type=str, help="The Excel file name will be imported.") parser_export_samples.set_defaults(func=cmd_export_samples) # -------- query_keywords -------- parser_query_keywords = subparsers.add_parser("query_keywords", help="query keywords help") parser_query_keywords.set_defaults(func=cmd_query_keywords) # -------- iem -------- parser_iem = subparsers.add_parser("iem", help="IEM help") parser_iem.add_argument("--positive_name", type=str, help="The positive samples's name in corpus.") parser_iem.add_argument("--unlabeled_name", type=str, help="The unlabeled samples's name in corpus.") parser_iem.set_defaults(func=cmd_iem) # -------- sem -------- parser_sem = subparsers.add_parser("sem", help="SEM help") parser_sem.add_argument("--positive_name", type=str, help="The positive samples's name in corpus.") parser_sem.add_argument("--unlabeled_name", type=str, help="The unlabeled samples's name in corpus.") parser_sem.set_defaults(func=cmd_sem) # -------- pulearning -------- parser_pulearning = subparsers.add_parser("pulearning", help="PULearning help") parser_pulearning.add_argument("--positive_name", type=str, help="The positive samples's name in corpus.") parser_pulearning.add_argument("--unlabeled_name", type=str, help="The unlabeled samples's name in corpus.") parser_pulearning.set_defaults(func=cmd_pulearning) args = parser.parse_args() print args aa = AppArgs(["/etc/diggerd/diggerrc", "~/.diggerrc", "./.diggerrc"]) update_args(aa, args) aa.write_to_file("./.diggerrc") args.func(aa)
def main_1(): s_time = datetime.utcnow() aa = AppArgs() aa.parse_from_file('/etc/diggerd/diggerrc') aa.parse_from_file('~/.diggerrc') aa.parse_from_file('./.diggerrc') args = docopt(__doc__, version="Positive and Unlabeled Extractor 1.0") #print args corpus_dir = args['--corpus_dir'] result_dir = args['--result_dir'] #positive_name_list = args['--positive_name'] positive_name = args['--positive_name'] unlabeled_name = args['--unlabeled_name'] model_file = args['--model_file'] svm_file = args['--svm_file'] samples_name = args['--samples_name'] model_name = args['--model_name'] arg_sample_id = args['--sample_id'] if not arg_sample_id is None: sample_id = int(arg_sample_id) else: sample_id = None xls_file = args['--xls_file'] aa.parse_from_args(args) corpus_dir = aa.corpus_dir samples_name = aa.samples_name aa.print_status() if args['test']: do_test(corpus_dir, positive_name, unlabeled_name, model_file, svm_file) elif args['import_samples']: do_import_samples(corpus_dir, samples_name, xls_file) elif args['export_samples']: do_export_samples(corpus_dir, samples_name, xls_file) elif args['export_urls']: do_export_urls(corpus_dir, samples_name, xls_file) elif args['rebuild']: do_rebuild(corpus_dir, samples_name) elif args['rebuild_categories']: do_rebuild_categories(corpus_dir, samples_name) elif args['query_sample']: if not samples_name is None: do_query_sample(corpus_dir, samples_name, sample_id) else: do_query_sample_by_pu(corpus_dir, positive_name_list, unlabeled_name, sample_id) elif args['query_categories']: do_query_categories(corpus_dir, samples_name, xls_file) elif args['query_keywords']: do_query_keywords(corpus_dir, samples_name, result_dir) elif args['refresh']: do_refresh(corpus_dir, samples_name) elif args['show']: do_show(corpus_dir, samples_name) elif args['purge']: do_purge(corpus_dir, samples_name) elif args['sne']: do_sne(corpus_dir, samples_name, result_dir) elif args['train']: do_train(corpus_dir, samples_name, model_name, result_dir) elif args['predict']: do_predict(corpus_dir, samples_name, model_name, result_dir) elif args['iem']: do_iem(corpus_dir, positive_name, unlabeled_name, result_dir) elif args['sem']: do_sem(corpus_dir, positive_name, unlabeled_name, result_dir) e_time = datetime.utcnow() t_time = (e_time - s_time) logging.info(Logger.info("Done.(%s)" % (str(t_time))))