def main(in_fname): doc = marujirou.create_doc(in_fname) symbol_filter = lambda m: m.pos != '補助記号' for sent in doc: for chunk in sent: if chunk.dst != -1: print '%s\t%s' % (chunk.get_raw(symbol_filter), sent[chunk.dst].get_raw(symbol_filter))
def main(in_fname): doc = marujirou.create_doc(in_fname) symbol_filter = lambda m: m.pos != "補助記号" for sent in doc: for chunk in sent: if chunk.dst != -1 and chunk.hasNoun() and sent[chunk.dst].hasVerb(): print "%s\t%s" % (chunk.get_raw(symbol_filter), sent[chunk.dst].get_raw(symbol_filter))
def main(in_fname): doc = marujirou.create_doc(in_fname) for sent in doc: nps = list() for chunk in sent: if chunk.hasNoun(): nps.append(chunk) for path in get_all_path(sent, nps): print path
def main(in_fname): doc = marujirou.create_doc(in_fname) for sent in doc: for chunk in sent: if chunk.dst != -1 and chunk.hasNoun(): outputs = chunk.get_raw() while chunk.dst != -1: outputs += ' -> %s' % sent[chunk.dst].get_raw() chunk = sent[chunk.dst] print '%s' % outputs
def main(in_fname): doc = marujirou.create_doc(in_fname) for sent in doc: for chunk in sent: if chunk.srcs and chunk.hasVerb(): pred = chunk.get_first_verb().base cases = sorted([sent[src].get_case().base for src in chunk.srcs if sent[src].hasCase()]) if not cases: continue print '%s\t%s' % (pred, ' '.join(cases))
def main(in_fname): doc = marujirou.create_doc(in_fname) print "digraph dependency {" for sent_id, sent in enumerate(doc): for chunk_id, chunk in enumerate(sent): if chunk.dst != -1: print '%d.%d [label="%s"]' % (sent_id, chunk_id, chunk.get_raw()) print "%d.%d -> %d.%d;" % (sent_id, chunk_id, sent_id, chunk.dst) elif chunk.srcs: print "%d.%d [label=%s]" % (sent_id, chunk_id, chunk.get_raw()) print "}"
def main(in_fname): doc = marujirou.create_doc(in_fname) for sent in doc: for chunk in sent: if chunk.srcs and chunk.hasVerb(): pred = chunk.get_first_verb().base cases_args = sorted([(sent[src].get_case().base, sent[src].get_raw()) for src in chunk.srcs if sent[src].hasCase()]) if not cases_args: continue cases, args = zip(*cases_args) print '%s\t%s\t%s' % (pred, ' '.join(cases), ' '.join(args))
def main(in_fname): doc = marujirou.create_doc(in_fname) for sent in doc: for chunk in sent: if chunk.srcs and chunk.isSahenWoVerb(): pred = chunk.get_sahen_wo_verb() cases_args = sorted([(sent[src].get_case().base, sent[src].get_raw()) for src in chunk.srcs if sent[src].hasCase()]) if not cases_args: continue cases, args = zip(*cases_args) print '%s\t%s\t%s' % (pred, ' '.join(cases), ' '.join(args))
def main(in_fname): doc = marujirou.create_doc(in_fname) print 'digraph dependency {' for sent_id, sent in enumerate(doc): for chunk_id, chunk in enumerate(sent): if chunk.dst != -1: print '%d.%d [label="%s"]' % (sent_id, chunk_id, chunk.get_raw()) print '%d.%d -> %d.%d;' % (sent_id, chunk_id, sent_id, chunk.dst) elif chunk.srcs: print '%d.%d [label=%s]' % (sent_id, chunk_id, chunk.get_raw()) print '}'