Esempio n. 1
0
def coverage(args):

    parser = argparse.ArgumentParser(description="args for coverage")
    parser.add_argument("-o",
                        "--out_dir",
                        help="output dir",
                        type=str,
                        default="")
    parser.add_argument("-d",
                        "--debug",
                        help="debug mode",
                        action="store_true",
                        default=False)
    parser.add_argument("-v",
                        "--verbose",
                        help="verbose mode",
                        action="store_true",
                        default=False)

    options = parser.parse_args(args)
    log_config = dict(name=__file__, debug=options.debug)
    out_dir = get_res_filepath(options.out_dir)
    if options.verbose:
        log_config['console_verbosity'] = logging.INFO
    logger = init_log(**log_config)

    Coverage(out_dir=out_dir, logger=logger).analyze()
Esempio n. 2
0
def preprocess(args):
    parser = argparse.ArgumentParser(description="args for preprocess")
    parser.add_argument("data_type", type=str)
    parser.add_argument("input", type=str)
    parser.add_argument("-o",
                        "--out_dir",
                        help="output dir",
                        type=str,
                        default="")
    parser.add_argument("-w",
                        "--workers",
                        help="number of workers",
                        type=int,
                        default=20)
    parser.add_argument("-d",
                        "--debug",
                        help="debug mode",
                        action="store_true",
                        default=False)
    parser.add_argument("-v",
                        "--verbose",
                        help="verbose mode",
                        action="store_true",
                        default=False)

    options = parser.parse_args(args)
    log_config = dict(name=__file__, debug=options.debug)
    out_dir = get_res_filepath(folder=options.out_dir)
    if options.verbose:
        log_config['console_verbosity'] = logging.INFO
    logger = init_log(**log_config)

    processor = get_processor(options.data_type, options.input, out_dir,
                              logger, options.workers)
    processor.start()
Esempio n. 3
0
def get_text_stats(args):

    parser = argparse.ArgumentParser(description="args for preprocess")
    parser.add_argument("data_type",
                        choices=["reddit", "hackforums", "darkode", "nulled"],
                        type=str)
    parser.add_argument("-o",
                        "--out_dir",
                        help="output dir",
                        type=str,
                        default="")
    parser.add_argument("-d",
                        "--debug",
                        help="debug mode",
                        action="store_true",
                        default=False)
    parser.add_argument("-v",
                        "--verbose",
                        help="verbose mode",
                        action="store_true",
                        default=False)

    options = parser.parse_args(args)
    log_config = dict(name=__file__, debug=options.debug)
    out_dir = get_res_filepath(options.out_dir)
    if options.verbose:
        log_config['console_verbosity'] = logging.INFO
    logger = init_log(**log_config)

    TextStats(data_type=options.data_type, out_dir=out_dir,
              logger=logger).analyze()
Esempio n. 4
0
def stats(args):
    global logger
    parser = argparse.ArgumentParser(description="args for parse_annotated")
    parser.add_argument("-a",
                        "--annotations",
                        help="annotations dir",
                        type=str,
                        default="annotations.json")
    parser.add_argument("-m",
                        "--model",
                        help="model name",
                        type=str,
                        default="forums.it100")
    parser.add_argument("-o",
                        "--out_file",
                        help="output dir",
                        type=str,
                        default="stats")
    # parser.add_argument(
    #     "-w", "--workers", help="number of workers", type=int, default=10)
    parser.add_argument("-d",
                        "--debug",
                        help="debug mode",
                        action="store_true",
                        default=False)
    parser.add_argument("-s",
                        "--sentence",
                        help="output sentence",
                        action="store_true",
                        default=False)
    parser.add_argument("-v",
                        "--verbose",
                        help="verbose mode",
                        action="store_true",
                        default=False)

    options = parser.parse_args(args)

    log_config = dict(name=__file__, debug=options.debug)

    if options.verbose:
        log_config['console_verbosity'] = logging.INFO
    logger = init_log(**log_config)

    annotations = json.load(open(get_res_filepath(fn=options.annotations)))
    stats_impl(annotations=annotations,
               model=options.model,
               out_file=options.out_file,
               sen=options.sentence)
Esempio n. 5
0
def prepare(args):
    parser = argparse.ArgumentParser(description="args for prepare")
    parser.add_argument(
        "-i", "--in_dir", help="input dir", type=str, default="")
    parser.add_argument(
        "-o", "--out_dir", help="output dir", type=str, default="")
    parser.add_argument(
        "-t", "--workers", help="number of workers", type=int, default=10)
    parser.add_argument(
        "-f",
        '--forums',
        nargs='+',
        required=True,
        choices=allforums,
        help='specifies target forum(s)')
    parser.add_argument(
        "-d", "--debug", help="debug mode", action="store_true", default=False)
    parser.add_argument(
        "-v",
        "--verbose",
        help="verbose mode",
        action="store_true",
        default=False)

    options = parser.parse_args(args)

    selections = [f for f in options.forums if f in allforums]

    log_config = dict(name=__file__, debug=options.debug)
    out_dir = get_res_filepath(folder=os.path.join('text2data', options.out_dir))
    in_dir = os.path.join(PREPROCESSED_DIR, options.in_dir)
    if options.verbose:
        log_config['console_verbosity'] = logging.INFO
    logger = init_log(**log_config)

    TrainingPrepare(
        in_dir=in_dir,
        out_dir=out_dir,
        logger=logger,
        forums=selections,
        workers=options.workers).go()
Esempio n. 6
0
def parse_annotated(args):
    global logger
    parser = argparse.ArgumentParser(description="args for parse_annotated")
    parser.add_argument("-i",
                        "--in_dir",
                        help="input dir",
                        type=str,
                        default="")
    parser.add_argument("-o",
                        "--out_file",
                        help="output dir",
                        type=str,
                        default="annotations.json")
    parser.add_argument("-w",
                        "--workers",
                        help="number of workers",
                        type=int,
                        default=10)
    parser.add_argument("-d",
                        "--debug",
                        help="debug mode",
                        action="store_true",
                        default=False)
    parser.add_argument("-v",
                        "--verbose",
                        help="verbose mode",
                        action="store_true",
                        default=False)

    options = parser.parse_args(args)

    log_config = dict(name=__file__, debug=options.debug)
    out_file = get_res_filepath(fn=options.out_file)
    in_dir = os.path.join(PREPROCESSED_DIR, options.in_dir)
    if options.verbose:
        log_config['console_verbosity'] = logging.INFO
    logger = init_log(**log_config)

    parse_annotated_impl(in_dir=in_dir, out_file=out_file)
Esempio n. 7
0
def prepare(args):
    parser = argparse.ArgumentParser(description="args for prepare")
    parser.add_argument("-i",
                        "--in_dir",
                        help="input dir",
                        type=str,
                        default="")
    parser.add_argument("-o",
                        "--out_dir",
                        help="output dir",
                        type=str,
                        default="")
    parser.add_argument("-t",
                        "--workers",
                        help="number of workers",
                        type=int,
                        default=10)
    parser.add_argument("-f",
                        '--forums',
                        nargs='+',
                        required=True,
                        choices=all_dark_forums + all_white_forums,
                        help='specifies target forum(s)')
    parser.add_argument("-d",
                        "--debug",
                        help="debug mode",
                        action="store_true",
                        default=False)
    parser.add_argument("-v",
                        "--verbose",
                        help="verbose mode",
                        action="store_true",
                        default=False)

    options = parser.parse_args(args)

    dark_selections = []
    white_selections = []
    wiki = False

    for choice in options.forums:
        if choice == "wiki":
            wiki = True
        elif choice in all_dark_forums:
            dark_selections.append(choice)
        elif choice in all_white_forums:
            white_selections.append(choice)
    log_config = dict(name=__file__, debug=options.debug)
    out_dir = get_res_filepath(folder=options.out_dir)
    in_dir = os.path.join(PREPROCESSED_DIR, options.in_dir)
    if options.verbose:
        log_config['console_verbosity'] = logging.INFO
    logger = init_log(**log_config)

    TrainingPrepare(in_dir=in_dir,
                    out_dir=out_dir,
                    logger=logger,
                    dark=dark_selections,
                    white=white_selections,
                    wiki=wiki,
                    workers=options.workers).go()