Beispiel #1
0
def run(args: Namespace):
    """
    run function which is the start point of program
    Args:
        args:  program arguments
    """
    aligner = Aligner(args.rsc_src)
    funmap = open(args.unmapped, 'w',
                  encoding='UTF-8') if args.unmapped else None

    for sent in sent_iter(args):
        if 0.0 < args.sample < 1.0 and random.random() >= args.sample:
            continue
        word_per_maps = []
        for word in sent.words:
            try:
                maps = aligner.align(word)
            except AlignError as algn_err:
                if funmap:
                    algn_err.add_msg(str(word))
                    print(algn_err, file=funmap)
                maps = []
            word_per_maps.append(maps)
        _print_sent(sent, word_per_maps)

    _print_restore_dic(args)
    aligner.print_middle_cnt()
Beispiel #2
0
def _set_align(aligner: Aligner, Word: type, entries: List[Entry]):  # pylint: disable=invalid-name
    """
    음절과 형태소 분석 결과를 정렬한다.
    Args:
        aligner:  Aligner 객체
        Word:  Word 타입
        entries:  엔트리 리스트
    """
    bad_entries = []
    for entry in entries:
        if entry.is_sharp:
            continue
        word = Word.parse(
            '\t'.join(['', entry.word,
                       Morph.to_str(entry.morphs)]), '', 0)
        try:
            entry.align = aligner.align(word)
        except AlignError as map_exc:
            entry.err_msg = 'fail to align'
            logging.error(map_exc)
            bad_entries.append(entry)
    print_errors(bad_entries)