Beispiel #1
0
def _set_align(rsc_src: Tuple[Aligner, dict, Dict[str, int]],
               entries: List[Entry]):
    """
    음절과 형태소 분석 결과를 정렬한다.
    Args:
        rsc_src:  (Aligner, restore dic, vocab out) resource triple
        Word:  Word 타입
        entries:  엔트리 리스트
    """
    bad_entries = []
    for entry in entries:
        if entry.is_sharp:
            continue
        entry.left_align = align_patch(rsc_src, entry.raw,
                                       Morph.to_str(entry.left))
        if not entry.left_align:
            entry.err_msg = 'fail to align left'
            bad_entries.append(entry)
            continue
        entry.right_align = align_patch(rsc_src, entry.raw,
                                        Morph.to_str(entry.right))
        if not entry.right_align:
            entry.err_msg = 'fail to align right'
            bad_entries.append(entry)
            continue
        assert len(entry.left_align) == len(entry.right_align)
    print_errors(bad_entries)
Beispiel #2
0
def _load_entries(args: Namespace) -> List[Entry]:
    """
    패치 엔트리를 파일로부터 로드한다.
    Args:
        args:  program arguments
    Returns:
        엔트리 리스트
    """
    good_entries = []
    bad_entries = []
    for file_path in glob.glob('{}/{}.errpatch.*'.format(
            args.rsc_src, args.model_size)):
        file_name = os.path.basename(file_path)
        logging.info(file_name)
        for line_num, line in enumerate(open(file_path, 'r', encoding='UTF-8'),
                                        start=1):
            line = line.rstrip('\r\n')
            if not line:
                continue
            entry = Entry(file_path, line_num, line)
            if entry.err_msg:
                bad_entries.append(entry)
            else:
                good_entries.append(entry)
    print_errors(bad_entries)
    return good_entries
Beispiel #3
0
def _check_dup(entries: List[Entry]):
    """
    중복된 엔트리가 없는 지 확인한다.
    Args:
        entries:  엔트리 리스트
    """
    bad_entries = []
    key_dic = {}
    for entry in entries:
        if entry.key_str() in key_dic:
            dup_entry = key_dic[entry.key_str()]
            entry.err_msg = '[DUPLICATED] with "{}"'.format(dup_entry)
            bad_entries.append(entry)
        else:
            key_dic[entry.key_str()] = entry
    print_errors(bad_entries)