Python preprocess_sentence Examples

Programming Language: Python

Namespace/Package Name: knp.knpinfo

Method/Function: preprocess_sentence

Examples at hotexamples.com: 3

Python preprocess_sentence - 3 examples found. These are the top rated real world Python examples of knp.knpinfo.preprocess_sentence extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: print_pairs.py Project: somay/build-compression-corpus

def grammarize_headline(headline, sent):
    juman_prc.stdin.write(preprocess_sentence(sent) + '\n')
    sent_juman_output = read_until_EOS(juman_prc.stdout)
    sent_morphemes = decode_juman_info(sent_juman_output)

    sent_words = extract_open_classes(sent_morphemes)

    headline = preprocess_sentence(headline)
    titles = [s for t in headline.split('　') for s in t.split('ーー')]
    while titles:
        title = '　'.join(titles) + '\n'
        juman_prc.stdin.write(preprocess_sentence(title))
        title_juman_output = read_until_EOS(juman_prc.stdout)
        title_morphemes = decode_juman_info(title_juman_output)
        
        if len(title_morphemes) <= 6:
            return

        open_classes = extract_open_classes(title_morphemes)
        # TODO: 単語の順序も考える
        if len(open_classes) >= 4 and set(open_classes).issubset(set(sent_words)):
            knp_prc.stdin.write(sent_juman_output)
            sent_knp_output = read_until_EOS(knp_prc.stdout)
            knp_info = analyze_knp(sent_knp_output)
            oc_pairs = mark_words_in_sent(knp_info['morphemes'], title_morphemes, open_classes)
            try:
                compressed, alignment = compress_sentence(knp_info, title_morphemes, oc_pairs)
            except BadPairException:
                return
            return compressed, alignment
        else:
            titles = titles[:-1]

Example #2

Show file

File: preprocess.py Project: somay/build-compression-corpus

#!/usr/bin/python3

from knp.knpinfo import preprocess_sentence
import sys
if __name__ == '__main__':
    for sent in sys.stdin:
        if sent[0] == ' ':
            sent = sent[1:]
        print(preprocess_sentence(sent), end='')
        sys.stdout.flush()

Example #3

Show file

File: print_pairs.py Project: somay/build-compression-corpus

            knp_info = analyze_knp(sent_knp_output)
            oc_pairs = mark_words_in_sent(knp_info['morphemes'], title_morphemes, open_classes)
            try:
                compressed, alignment = compress_sentence(knp_info, title_morphemes, oc_pairs)
            except BadPairException:
                return
            return compressed, alignment
        else:
            titles = titles[:-1]


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print('usage: ./print_pairs.py xml-file-like-毎日新聞コーパス > file-to-store-pairs.txt', file=sys.stderr)
        sys.exit(1)
    for hline, sent in yield_headline_and_1st_sent(sys.argv[1]):
        sent = sent.lstrip().rstrip()
        compressed_alignment = grammarize_headline(hline, sent)
        if compressed_alignment:
            compressed, alignment = compressed_alignment
            print(hline)
            print(preprocess_sentence(sent))
            print(compressed)
            for i, j in alignment:
                print(str(i) + '-' + str(j), end=' ')
            print('\n')
            # sys.stdin.readline()
    knp_prc.terminate()
    juman_prc.terminate()
    sys.exit(0)