def main(): lines = open(sys.argv[1]).readlines() text = nlp040.parse_text(lines) for sentence in text: for i in range(len(sentence)): if sentence[i].contains('名詞'): out = extract(sentence, i) if len(out): p(' -> '.join(out) + '\n')
def main(): lines = open(sys.argv[1]).readlines() text = nlp040.parse_text(lines) for sentence in text: for chunk in sentence: if len(chunk.srcs) > 0 and chunk.contains('動詞') \ and any([sentence[idx].contains('名詞') for idx in chunk.srcs]): txt = '' for src in chunk.srcs: txt += sentence[src].text() print(txt + '\t' + chunk.text())
def main(): lines = open(sys.argv[1]).readlines() text = nlp040.parse_text(lines) for sentence in text: for chunk in sentence: if len(chunk.srcs) > 0: txt = '' for src in chunk.srcs: txt += sentence[src].text() print(txt + '\t' + chunk.text())
def main(): lines = open(sys.argv[1]).readlines() text = nlp040.parse_text(lines) with open(sys.argv[2], 'w') as o: i = 0 o.write('digraph 吾輩は猫である {\n') for sentence in text[0:10]: o.write('subgraph sentence{0} {{\n'.format(i)) for chunk in sentence: if chunk.dst > 0: o.write('"' + chunk.text() + '" -> "' + sentence[chunk.dst].text() + '";\n') o.write('}\n') i += 1 o.write('}\n')
def main(): lines = open(sys.argv[1]).readlines() text = nlp040.parse_text(lines) with open(sys.argv[2], 'w') as o: for sentence in text: for chunk in sentence: for x in chunk.get('動詞'): ys = [] zs = [] for src in chunk.srcs: for y in sentence[src].get('助詞'): ys.append(y.base) zs.append(sentence[src].text()) if len(ys) > 0: lis = sorted(zip(ys, zs), key=lambda x: x[0]) ys = [x[0] for x in lis] zs = [x[1] for x in lis] o.write(x.base + '\t' + ' '.join(ys) + '\t' + ' '.join(zs) + '\n')