コード例 #1
0
ファイル: ltp.py プロジェクト: stephenLee/summarize36ke
def tokenizer(input):
    payload = {
        's': input,
        'x': 'n',
        'c': 'utf-8',
        't': LTPOption.WS
    }
    try:
        r = requests.post(ServiceURL, data=payload)
    except Exception as e:
        print e

    result = LTML(r.content)
    tokenizer_sentences = []
    pids = result.count_paragraph()
    ws_result = []
    original_sentences = []
    for pid in xrange(pids):
        for sid in xrange(result.count_sentence(pid)):
            tokenizer_sentences.append(
                [word.encode('utf8') for word in result.get_words(pid, sid)])
            original_sentences.append(result.get_sentences(pid, sid))
            ws_result = ws_result + [word.encode(
                'utf8') for word in result.get_words(pid, sid)]
    return (original_sentences, tokenizer_sentences, ws_result)
コード例 #2
0
ファイル: ltp.py プロジェクト: stephenLee/summarize36ke
def pos(input):
    payload = {
        's': input,
        'x': 'n',
        'c': 'utf-8',
        't': LTPOption.POS
    }
    try:
        r = requests.post(ServiceURL, data=payload)
    except Exception as e:
        print e

    result = LTML(r.content)
    pids = result.count_paragraph()
    pos_result = []
    for pid in xrange(pids):
        for sid in xrange(result.count_sentence(pid)):
            print "|".join(
                [word.encode('utf8') for word in result.get_words_by_pos(pid, sid)])
            pos_result = pos_result + [word.encode('utf8') for word in result.get_words_by_pos(pid, sid)]
    return pos_result