def parse_and_store(finput, foutput, model=rhead): with open(foutput, "w") as fp: if model in (rhead, lhead): fp.write("\n\n".join(str(model(dg)) for dg in idgcorpus(finput))) else: fp.write("\n".join( "%d:%s" % ( len(t.leaves()), toString(model(t, _root=True, _unary=False))) for t in itreecorpus( finput)))
def cloneAsDGn(iwildcard,target,n=None): if n: clone(idgcorpus(iwildcard), target, cmap = DependencyGraph.filteredcopy, cfilter=lambda dg: 1<=dg.length()<=n ) else: clone(idgcorpus(iwildcard), target, cmap = DependencyGraph.filteredcopy)
def test_edge_count_in_coNLL10(self): for dg in idgcorpus('coNLL10.dp'): self.assertEqual(dg.length(), len(dg.edgeset(False)))
def test_edge_count_in_dependency_graph(self): for dg in idgcorpus('conll/*/*.dp'): self.assertEqual(dg.length(), len(dg.edgeset(False)))
def test_corpus_size(self): ncorpus = 0 for dg in idgcorpus('conll/*/*.dp'): ncorpus+=1 self.assertEqual(49208, ncorpus)