def compare_morph(g, a):
    gfeats = Morph(from_string=g.feats).feats
    afeats = Morph(from_string=a.feats).feats

    new_row = dict()
    this_instance_vals = list()
    for feat in MORPH_FEATS:
        if feat in gfeats:
            # correct if feature exists and value matches
            if gfeats.get(feat) == afeats.get(feat):
                new_row[feat] = 1
            else:
                new_row[feat] = 0
        else:
            # not in gold,
            # ignorieren wir das
            new_row[feat] = np.nan
        this_instance_vals.append(new_row[feat])
    return pd.Series(this_instance_vals).mean(), new_row
Exemple #2
0
            chunks.append(chunk)
        else:
            chunks = []

        chunk = Chunk(idx, dst)

    elif line == "EOS":
        chunks.append(chunk)
        for k, v in srcs_dict.items():
            chunks[k].update_srsc(v)

        sents.append(chunks)
        srcs_dict.clear()

    else:
        morph = Morph(line)
        chunk.update_morph(morph)

for sent in sents:
    for m in sent:
        dst = m.dst
        srcs = m.srcs

        verb = None
        for morph in m.morphs:
            if morph.pos == "動詞":
                verb = morph.base
                break
        if verb:
            if dst != -1:
                subs = []
Exemple #3
0
                sentence_list_temp = []
                temp = []
            temp1 = line[:-1].split(" ")
            num_list.append(temp1[1])
            dst_list.append(temp1[2][:-1])

        elif "\t" in line:
            item = line.strip().split("\t")
            try:
                surf = item[0]
                items = item[1].split(",")
            except IndexError:
                next
            if item == ['記号,空白,*,*,*,*,\u3000,\u3000,']:
                surf = "\u3000"
            one_morph.append(Morph(surf, items[6], items[0], items[1]))
            sentence_list_temp.append(surf)

        elif "EOS" in line:
            temp = []
            if len(sentence_list_temp) > 0:
                for item in sentence_list_temp:
                    temp.append(item)
                sentence_list.append("".join(temp))
                morph_list.append(one_morph)
                one_morph = []
                sentence_list_temp = []
                temp = []
            if len(morph_list) == 0:
                one_sent = []
                dst_list = []
Exemple #4
0
import CaboCha
from common import Morph

all_sent = []
sent = []
with open("./data/neko.txt.cabocha") as f:
    for line in f:
        if line[0] == "*":
            next
        if "\t" in line:
            item = line.strip().split("\t")
            try:
                surf = item[0]
                items = item[1].split(",")
            except IndexError:
                next
            if not item == ['記号,空白,*,*,*,*,\u3000,\u3000,']:
                sent.append(Morph(surf, items[6], items[0], items[1]))
        elif "EOS" in line:
            if len(sent):
                all_sent.append(sent)
                sent = []

for item in all_sent[1]:
    print('surface=%s\tbase=%s\tpos=%s\tpos1=%s' %
          (item.surface, item.base, item.pos, item.pos1))