def loads_xml(cls, bytestring, encoding=None, **kwargs): """ Currently processes "<dmrs>...</dmrs>" To be updated for "<dmrslist>...</dmrslist>"... Expects a bytestring; to load from a string instead, specify encoding """ from pydmrs.serial import loads_xml return loads_xml(bytestring, encoding=encoding, cls=cls, **kwargs)
test_connectedness = not allow_disconnected_dmrs and dmrs.is_connected(ignored_nodeids=filterable_nodeids) # If DMRS should remain connected, check that removing filterable nodes will not result in a disconnected DMRS if test_connectedness: filtered_nodeids = set() for nodeid in filterable_nodeids: if dmrs.is_connected(removed_nodeids=filtered_nodeids|{nodeid}, ignored_nodeids=filterable_nodeids): filtered_nodeids.add(nodeid) else: filtered_nodeids = filterable_nodeids # Remove filtered nodes and their links from the DMRS for nodeid in filtered_nodeids: dmrs.remove_node(nodeid) return dmrs # If run from the command line, process the given file if __name__ == '__main__': with open(args.input_dmrs, 'r', encoding="utf-8") as fin, open(args.output_dmrs, 'w') as fout: content = fin.read().strip() for dmrs_string in split_dmrs_string(content): dmrs = loads_xml(dmrs_string) simplified_dmrs = gpred_filtering(dmrs) simplified_dmrs_string = dumps_xml(simplified_dmrs) fout.write('{}\n\n'.format(simplified_dmrs_string.decode('utf-8')))
# If DMRS should remain connected, check that removing filterable nodes will not result in a disconnected DMRS if test_connectedness: filtered_nodeids = set() for nodeid in filterable_nodeids: if dmrs.is_connected(removed_nodeids=filtered_nodeids | {nodeid}, ignored_nodeids=filterable_nodeids): filtered_nodeids.add(nodeid) else: filtered_nodeids = filterable_nodeids # Remove filtered nodes and their links from the DMRS for nodeid in filtered_nodeids: dmrs.remove_node(nodeid) return dmrs # If run from the command line, process the given file if __name__ == '__main__': with open(args.input_dmrs, 'r', encoding="utf-8") as fin, open(args.output_dmrs, 'w') as fout: content = fin.read().strip() for dmrs_string in split_dmrs_string(content): dmrs = loads_xml(dmrs_string) simplified_dmrs = gpred_filtering(dmrs) simplified_dmrs_string = dumps_xml(simplified_dmrs) fout.write('{}\n\n'.format(simplified_dmrs_string.decode('utf-8')))