def run(args: Namespace): """ actual function which is doing some task Args: args: program arguments """ aligner = Aligner(args.rsc_src) restore_dic = load_restore_dic('{}/restore.dic'.format(args.rsc_src)) if not restore_dic: sys.exit(1) vocab_out = load_vocab_out(args.rsc_src) khaiii_api = KhaiiiApi(args.lib_path, args.rsc_dir, '{"errpatch": false}') for line_num, line in enumerate(sys.stdin, start=1): line = line.rstrip('\r\n') if not line or line[0] == '#': continue raw, left, right = line.split('\t') left_align = align_patch((aligner, restore_dic, vocab_out), raw, left) if not left_align: logging.info('invalid %d-th line: left align: %s', line_num, line) continue right_align = align_patch((aligner, restore_dic, vocab_out), raw, right) if not right_align: logging.info('invalid %d-th line: right align: %s', line_num, line) continue if len(left_align) != len(right_align): logging.info('invalid %d-th line: left/right diff: %s', line_num, line) continue pos_cnt = 0 neg_cnt = 0 for sent in _sent_iter(args): pos_cnt_sent, neg_cnt_sent = _cnt_pos_neg( khaiii_api, raw, (left_align, right_align), (aligner, restore_dic, vocab_out), sent) pos_cnt += pos_cnt_sent neg_cnt += neg_cnt_sent if neg_cnt > 0: break if neg_cnt > 0 or pos_cnt == 0: logging.info('invalid %d-th line: +%d, -%d: %s', line_num, pos_cnt, neg_cnt, line) continue print('{}\t{}\t{}'.format(raw, left, right))
def run(args: Namespace): """ run function which is the start point of program Args: args: program arguments """ restore_dic = load_restore_dic('{}/restore.dic'.format(args.rsc_src)) if not restore_dic: sys.exit(1) vocab_out = load_vocab_out(args.rsc_src) if not vocab_out: sys.exit(2) vocab_new = {} bin_dic = _make_bin(restore_dic, vocab_out, vocab_new) _save_restore_dic(args.rsc_dir, bin_dic) _save_restore_one(args.rsc_dir, vocab_out, vocab_new) append_new_entries(args.rsc_src, None, vocab_new)
def run(args: Namespace): """ run function which is the start point of program Args: args: program arguments """ aligner = Aligner(args.rsc_src) restore_dic = load_restore_dic('{}/restore.dic'.format(args.rsc_src)) if not restore_dic: sys.exit(1) vocab_out = load_vocab_out(args.rsc_src) entries = _load_entries(args) if not entries: logging.error('no entry to compile') sys.exit(2) _check_dup(entries) entries = [e for e in entries if not e.is_sharp] # 주석 처리한 엔트리는 제외 _set_align((aligner, restore_dic, vocab_out), entries) _save_trie(args.rsc_dir, entries)
def run(args: Namespace): """ run function which is the start point of program Args: args: program arguments """ aligner = Aligner(args.rsc_src) restore_dic = parse_restore_dic('{}/restore.dic'.format(args.rsc_src)) if not restore_dic: sys.exit(1) restore_new = defaultdict(dict) vocab_out = load_vocab_out(args.rsc_src) vocab_new = {} entries = _load_entries(args) _check_dup(entries) entries = [e for e in entries if not e.is_sharp] # 주석 처리한 엔트리는 제외 _set_align(aligner, sejong_corpus.Word, entries) _set_tag_out(restore_dic, restore_new, vocab_out, vocab_new, entries) append_new_entries(args.rsc_src, restore_new, vocab_new) _save_trie(args.rsc_dir, entries)