def read_conllx_triples( corpus_file ): #='/tmp/autocorpus3/autocorpus3/static/Chinese_train_pos.xml.utf8.Chinese_medicine.segmented.conllx'): triples = [] sents = concat([ DependencyCorpusView(corpus_file, tagged=False, group_by_sent=True, dependencies=True) ]) for sent in sents: dg = DependencyGraph(sent, top_relation_label='root') triples += dg.triples() return triples
def parse(sent): con_parse, = con_parser.raw_parse(sent) dep_parse, = dep_parser.raw_parse(sent) print() print("Constituency Tree:") con_parse.pretty_print() dg = DependencyGraph(dep_parse.to_conll(4)) print() print("Dependency Tree:") dg.tree().pprint() print() print("Dependencies:") for governor, dependency, dependent in dg.triples(): print(governor, dependency, dependent)