Example #1
0
def run():
    """
    run function which is the start point of program
    """
    file_name = os.path.basename(sys.stdin.name)
    for line_num, line in enumerate(sys.stdin, start=1):
        line = line.rstrip('\r\n')
        if not WORD_ID_PTN.match(line):
            print(line)
            continue
        try:
            Word.parse(line, file_name, line_num)
        except ParseError as par_err:
            if 'raw-morph mismatch' in str(par_err):
                line = _recover(line)
            else:
                raise par_err
        print(line)
def run():
    """
    run function which is the start point of program
    """
    file_name = os.path.basename(sys.stdin.name)
    for line_num, line in enumerate(sys.stdin, start=1):
        line = line.rstrip('\r\n')
        if not WORD_ID_PTN.match(line):
            print(line)
            continue
        word = Word.parse(line, file_name, line_num)
        _attach_missing_symbol(word)
        print(word)
Example #3
0
def align_patch(rsc_src: Tuple[Aligner, Dict, Dict[str, int]], raw: str, morph_str: str) \
        -> List[int]:
    """
    패치의 원문과 분석 결과를 음절단위 매핑(정렬)을 수행한다.
    Args:
        rsc_src:  (Aligner, restore dic, vocab out) resource triple
        raw:  원문
        morph_str:  형태소 분석 결과 (패치 기술 형식)
    Returns:
        정렬에 기반한 출력 태그 번호
    """
    aligner, restore_dic, vocab_out = rsc_src
    raw_words = raw.strip().split()
    morphs = morph_str.split(' + ')
    morphs_strip = morphs
    if morphs[0] in [WORD_DELIM_STR, SENT_DELIM_STR]:
        morphs_strip = morphs_strip[1:]
    if morphs[-1] in [WORD_DELIM_STR, SENT_DELIM_STR]:
        morphs_strip = morphs_strip[:-1]
    morph_words = _split_list(morphs_strip, WORD_DELIM_STR)
    tag_nums = []
    restore_new = defaultdict(dict)
    vocab_new = defaultdict(list)
    for raw_word, morph_word in zip(raw_words, morph_words):
        word = Word.parse('\t'.join(['', raw_word, ' + '.join(morph_word)]),
                          '', 0)
        try:
            word_align = aligner.align(word)
            _, word_tag_nums = align_to_tag(raw_word, word_align,
                                            (restore_dic, restore_new),
                                            (vocab_out, vocab_new))
            if restore_new or vocab_new:
                logging.debug('needs dic update: %s', word)
                return []
        except AlignError as algn_err:
            logging.debug('alignment error: %s', word)
            logging.debug(str(algn_err))
            return []
        if tag_nums:
            tag_nums.append(WORD_DELIM_NUM)
        tag_nums.extend(word_tag_nums)
    if morphs[0] in [WORD_DELIM_STR, SENT_DELIM_STR]:
        tag_nums.insert(
            0,
            WORD_DELIM_NUM if morphs[0] == WORD_DELIM_STR else SENT_DELIM_NUM)
    if morphs[-1] in [WORD_DELIM_STR, SENT_DELIM_STR]:
        tag_nums.append(WORD_DELIM_NUM if morphs[-1] ==
                        WORD_DELIM_STR else SENT_DELIM_NUM)
    return tag_nums
Example #4
0
def run():
    """
    run function which is the start point of program
    """
    file_name = os.path.basename(sys.stdin.name)
    for line_num, line in enumerate(sys.stdin, start=1):
        line = line.rstrip('\r\n')
        if not WORD_ID_PTN.match(line):
            print(line)
            continue
        word = Word.parse(line, file_name, line_num)
        try:
            _recover(word)
        except IndexError as idx_err:
            logging.error('%s(%d): %s: %s', file_name, line_num, idx_err, word)
        print(word)