def test_parse_file(self): parser = AlpinoParser() id2graph = {} parser.parse_file("data/gb-sample.xml", id2graph) for id, graph in id2graph.items(): print id print graph.get_graph_token_string().encode("utf-8") print print graph.graph_to_string().encode("utf-8") print 78 * "="
def test_parse_string(self): """ test incremental parsing """ parser = AlpinoParser() parser.parse_string("<treebank>") # extract xml for dependency structures from graphbank file xml = open("data/gb-sample.xml").read() xml = xml.split("<treebank>", 1)[1] while not xml.startswith("</treebank>"): ds, xml = xml.split("</alpino_ds>", 1) ds += "</alpino_ds>" # remove id ds = "<alpino_ds>" + ds.split(">", 1)[1] id2graph = parser.parse_string(ds) id, graph = id2graph.items()[0] print id2graph print id print graph.get_graph_token_string().encode("utf-8") print print graph.graph_to_string().encode("utf-8") print 78 * "="
def init_graph_xml_parser(self): self._alpino_xml_parser = AlpinoParser() self._alpino_parser_reused = 0 # feed fake root node to the xml parser self._alpino_xml_parser.parse_string( '<?xml version="1.0" encoding="utf-8"?>\n<treebank>')