Exemple #1
0
def run(args: Namespace):
    """
    actual function which is doing some task
    Args:
        args:  program arguments
    """
    aligner = Aligner(args.rsc_src)
    restore_dic = load_restore_dic('{}/restore.dic'.format(args.rsc_src))
    if not restore_dic:
        sys.exit(1)
    vocab_out = load_vocab_out(args.rsc_src)

    khaiii_api = KhaiiiApi(args.lib_path, args.rsc_dir, '{"errpatch": false}')

    for line_num, line in enumerate(sys.stdin, start=1):
        line = line.rstrip('\r\n')
        if not line or line[0] == '#':
            continue
        raw, left, right = line.split('\t')
        left_align = align_patch((aligner, restore_dic, vocab_out), raw, left)
        if not left_align:
            logging.info('invalid %d-th line: left align: %s', line_num, line)
            continue
        right_align = align_patch((aligner, restore_dic, vocab_out), raw,
                                  right)
        if not right_align:
            logging.info('invalid %d-th line: right align: %s', line_num, line)
            continue
        if len(left_align) != len(right_align):
            logging.info('invalid %d-th line: left/right diff: %s', line_num,
                         line)
            continue
        pos_cnt = 0
        neg_cnt = 0
        for sent in _sent_iter(args):
            pos_cnt_sent, neg_cnt_sent = _cnt_pos_neg(
                khaiii_api, raw, (left_align, right_align),
                (aligner, restore_dic, vocab_out), sent)
            pos_cnt += pos_cnt_sent
            neg_cnt += neg_cnt_sent
            if neg_cnt > 0:
                break
        if neg_cnt > 0 or pos_cnt == 0:
            logging.info('invalid %d-th line: +%d, -%d: %s', line_num, pos_cnt,
                         neg_cnt, line)
            continue
        print('{}\t{}\t{}'.format(raw, left, right))
Exemple #2
0
def run(args: Namespace):
    """
    run function which is the start point of program
    Args:
        args:  program arguments
    """
    restore_dic = load_restore_dic('{}/restore.dic'.format(args.rsc_src))
    if not restore_dic:
        sys.exit(1)
    vocab_out = load_vocab_out(args.rsc_src)
    if not vocab_out:
        sys.exit(2)
    vocab_new = {}

    bin_dic = _make_bin(restore_dic, vocab_out, vocab_new)

    _save_restore_dic(args.rsc_dir, bin_dic)
    _save_restore_one(args.rsc_dir, vocab_out, vocab_new)
    append_new_entries(args.rsc_src, None, vocab_new)
Exemple #3
0
def run(args: Namespace):
    """
    run function which is the start point of program
    Args:
        args:  program arguments
    """
    aligner = Aligner(args.rsc_src)
    restore_dic = load_restore_dic('{}/restore.dic'.format(args.rsc_src))
    if not restore_dic:
        sys.exit(1)
    vocab_out = load_vocab_out(args.rsc_src)

    entries = _load_entries(args)
    if not entries:
        logging.error('no entry to compile')
        sys.exit(2)
    _check_dup(entries)
    entries = [e for e in entries if not e.is_sharp]  # 주석 처리한 엔트리는 제외
    _set_align((aligner, restore_dic, vocab_out), entries)
    _save_trie(args.rsc_dir, entries)
Exemple #4
0
def run(args: Namespace):
    """
    run function which is the start point of program
    Args:
        args:  program arguments
    """
    aligner = Aligner(args.rsc_src)
    restore_dic = parse_restore_dic('{}/restore.dic'.format(args.rsc_src))
    if not restore_dic:
        sys.exit(1)
    restore_new = defaultdict(dict)
    vocab_out = load_vocab_out(args.rsc_src)
    vocab_new = {}

    entries = _load_entries(args)
    _check_dup(entries)
    entries = [e for e in entries if not e.is_sharp]  # 주석 처리한 엔트리는 제외
    _set_align(aligner, sejong_corpus.Word, entries)
    _set_tag_out(restore_dic, restore_new, vocab_out, vocab_new, entries)

    append_new_entries(args.rsc_src, restore_new, vocab_new)
    _save_trie(args.rsc_dir, entries)