Python load_xmlの例

プログラミング言語: Python

名前空間/パッケージ名: k54

メソッド/関数: load_xml

hotexamples.comのコード掲載数: 3

Python load_xml - 3件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのk54.load_xmlの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

from k54 import load_xml
import re

# sentence_list[sentenceIndex][tokenIndex]で任意の単語が取得できるようなリストを作成
sentence_list = [[]]  # インデックス調整の為予め1つ要素を入れておく

for sentence in load_xml()['root']['document']['sentences']['sentence']:
    token_list = ['']  # インデックス調整の為（ｒｙ

    # 13文目の為の処理
    if isinstance(sentence['tokens']['token'], dict):
        token_list.append(sentence['tokens']['token']['word'])

    else:
        for token in sentence['tokens']['token']:
            token_list.append(token['word'])
    sentence_list.append(token_list)

for core in load_xml()['root']['document']['coreference']['coreference']:
    rep = ''
    for mention in core['mention']:
        if '@representative' in mention:  # 代表参照表現なら文字列を取得
            rep = (mention['text'])
        else:
            sent = int(mention['sentence'])
            start = int(mention['start'])
            end = int(mention['end'])

            # 参照表現の先頭に '「 [代表表現] ( ' を追加
            sentence_list[sent][
                start] = '「 ' + rep + ' ( ' + sentence_list[sent][start]

コード例 #2

ファイルを表示

from k54 import load_xml


def print_person_name(word='', pos='', ner=''):
    if pos == 'NNP' and ner == 'PERSON':
        print(word)


for sentence in load_xml()['root']['document']['sentences']['sentence']:
    tokens = sentence['tokens']['token']

    # tokenが1つしか無い時の処理
    if isinstance(tokens, dict):
        print_person_name(tokens['word'], tokens['POS'], tokens['NER'])
        continue

    for t in tokens:
        print_person_name(t['word'], t['POS'], t['NER'])

コード例 #3

ファイルを表示

ファイル: k59.py プロジェクト: kamuiroeru/NLP_Knock

from k54 import load_xml
import re

parse_strings = [sentences['parse'] for sentences in load_xml()['root']['document']['sentences']['sentence']]

for ps in parse_strings:
    start = 0  # (NP の開始位置を保存
    while len(ps) > start:
        start += 1

        if ps[start:start + 3] == '(NP':  # (NPが見つかった時

            # 終端位置endを探す、countは()の数をカウント
            end, count = start + 1, 1
            while count:  # count が0以上の間
                if ps[end] == '(':
                    count += 1
                elif ps[end] == ')':
                    count -= 1
                end += 1

            outList = []
            # 先頭と終端がわかったので、中に入っている単語だけ抜き取る
            for word in ps[start:end + 1].split(' '):
                if word and word[-1] == ')':
                    outList.append(word.replace(')', ''))

            outStr = re.sub(r' ([,.;:?!])', r'\1', ' '.join(outList))  # カンマやピリオドの前の空白を削除
            outStr = outStr.replace('-LRB- ', '(').replace(' -RRB-', ')')  # ()を復元

            print(outStr)