def coverage(args): parser = argparse.ArgumentParser(description="args for coverage") parser.add_argument("-o", "--out_dir", help="output dir", type=str, default="") parser.add_argument("-d", "--debug", help="debug mode", action="store_true", default=False) parser.add_argument("-v", "--verbose", help="verbose mode", action="store_true", default=False) options = parser.parse_args(args) log_config = dict(name=__file__, debug=options.debug) out_dir = get_res_filepath(options.out_dir) if options.verbose: log_config['console_verbosity'] = logging.INFO logger = init_log(**log_config) Coverage(out_dir=out_dir, logger=logger).analyze()
def preprocess(args): parser = argparse.ArgumentParser(description="args for preprocess") parser.add_argument("data_type", type=str) parser.add_argument("input", type=str) parser.add_argument("-o", "--out_dir", help="output dir", type=str, default="") parser.add_argument("-w", "--workers", help="number of workers", type=int, default=20) parser.add_argument("-d", "--debug", help="debug mode", action="store_true", default=False) parser.add_argument("-v", "--verbose", help="verbose mode", action="store_true", default=False) options = parser.parse_args(args) log_config = dict(name=__file__, debug=options.debug) out_dir = get_res_filepath(folder=options.out_dir) if options.verbose: log_config['console_verbosity'] = logging.INFO logger = init_log(**log_config) processor = get_processor(options.data_type, options.input, out_dir, logger, options.workers) processor.start()
def get_text_stats(args): parser = argparse.ArgumentParser(description="args for preprocess") parser.add_argument("data_type", choices=["reddit", "hackforums", "darkode", "nulled"], type=str) parser.add_argument("-o", "--out_dir", help="output dir", type=str, default="") parser.add_argument("-d", "--debug", help="debug mode", action="store_true", default=False) parser.add_argument("-v", "--verbose", help="verbose mode", action="store_true", default=False) options = parser.parse_args(args) log_config = dict(name=__file__, debug=options.debug) out_dir = get_res_filepath(options.out_dir) if options.verbose: log_config['console_verbosity'] = logging.INFO logger = init_log(**log_config) TextStats(data_type=options.data_type, out_dir=out_dir, logger=logger).analyze()
def stats(args): global logger parser = argparse.ArgumentParser(description="args for parse_annotated") parser.add_argument("-a", "--annotations", help="annotations dir", type=str, default="annotations.json") parser.add_argument("-m", "--model", help="model name", type=str, default="forums.it100") parser.add_argument("-o", "--out_file", help="output dir", type=str, default="stats") # parser.add_argument( # "-w", "--workers", help="number of workers", type=int, default=10) parser.add_argument("-d", "--debug", help="debug mode", action="store_true", default=False) parser.add_argument("-s", "--sentence", help="output sentence", action="store_true", default=False) parser.add_argument("-v", "--verbose", help="verbose mode", action="store_true", default=False) options = parser.parse_args(args) log_config = dict(name=__file__, debug=options.debug) if options.verbose: log_config['console_verbosity'] = logging.INFO logger = init_log(**log_config) annotations = json.load(open(get_res_filepath(fn=options.annotations))) stats_impl(annotations=annotations, model=options.model, out_file=options.out_file, sen=options.sentence)
def prepare(args): parser = argparse.ArgumentParser(description="args for prepare") parser.add_argument( "-i", "--in_dir", help="input dir", type=str, default="") parser.add_argument( "-o", "--out_dir", help="output dir", type=str, default="") parser.add_argument( "-t", "--workers", help="number of workers", type=int, default=10) parser.add_argument( "-f", '--forums', nargs='+', required=True, choices=allforums, help='specifies target forum(s)') parser.add_argument( "-d", "--debug", help="debug mode", action="store_true", default=False) parser.add_argument( "-v", "--verbose", help="verbose mode", action="store_true", default=False) options = parser.parse_args(args) selections = [f for f in options.forums if f in allforums] log_config = dict(name=__file__, debug=options.debug) out_dir = get_res_filepath(folder=os.path.join('text2data', options.out_dir)) in_dir = os.path.join(PREPROCESSED_DIR, options.in_dir) if options.verbose: log_config['console_verbosity'] = logging.INFO logger = init_log(**log_config) TrainingPrepare( in_dir=in_dir, out_dir=out_dir, logger=logger, forums=selections, workers=options.workers).go()
def parse_annotated(args): global logger parser = argparse.ArgumentParser(description="args for parse_annotated") parser.add_argument("-i", "--in_dir", help="input dir", type=str, default="") parser.add_argument("-o", "--out_file", help="output dir", type=str, default="annotations.json") parser.add_argument("-w", "--workers", help="number of workers", type=int, default=10) parser.add_argument("-d", "--debug", help="debug mode", action="store_true", default=False) parser.add_argument("-v", "--verbose", help="verbose mode", action="store_true", default=False) options = parser.parse_args(args) log_config = dict(name=__file__, debug=options.debug) out_file = get_res_filepath(fn=options.out_file) in_dir = os.path.join(PREPROCESSED_DIR, options.in_dir) if options.verbose: log_config['console_verbosity'] = logging.INFO logger = init_log(**log_config) parse_annotated_impl(in_dir=in_dir, out_file=out_file)
def prepare(args): parser = argparse.ArgumentParser(description="args for prepare") parser.add_argument("-i", "--in_dir", help="input dir", type=str, default="") parser.add_argument("-o", "--out_dir", help="output dir", type=str, default="") parser.add_argument("-t", "--workers", help="number of workers", type=int, default=10) parser.add_argument("-f", '--forums', nargs='+', required=True, choices=all_dark_forums + all_white_forums, help='specifies target forum(s)') parser.add_argument("-d", "--debug", help="debug mode", action="store_true", default=False) parser.add_argument("-v", "--verbose", help="verbose mode", action="store_true", default=False) options = parser.parse_args(args) dark_selections = [] white_selections = [] wiki = False for choice in options.forums: if choice == "wiki": wiki = True elif choice in all_dark_forums: dark_selections.append(choice) elif choice in all_white_forums: white_selections.append(choice) log_config = dict(name=__file__, debug=options.debug) out_dir = get_res_filepath(folder=options.out_dir) in_dir = os.path.join(PREPROCESSED_DIR, options.in_dir) if options.verbose: log_config['console_verbosity'] = logging.INFO logger = init_log(**log_config) TrainingPrepare(in_dir=in_dir, out_dir=out_dir, logger=logger, dark=dark_selections, white=white_selections, wiki=wiki, workers=options.workers).go()