Beispiel #1
0
 def dumps_xml(self, encoding=None):
     """
     Currently creates "<dmrs>...</dmrs>"
     To be updated for "<dmrslist>...</dmrslist>"...
     Returns a bytestring; to return a string instead, specify encoding
     """
     from pydmrs.serial import dumps_xml
     return dumps_xml(self, encoding=encoding)
Beispiel #2
0
 def dumps_xml(self, encoding=None):
     """
     Currently creates "<dmrs>...</dmrs>"
     To be updated for "<dmrslist>...</dmrslist>"...
     Returns a bytestring; to return a string instead, specify encoding
     """
     from pydmrs.serial import dumps_xml
     return dumps_xml(self, encoding=encoding)
Beispiel #3
0
    test_connectedness = not allow_disconnected_dmrs and dmrs.is_connected(ignored_nodeids=filterable_nodeids)

    # If DMRS should remain connected, check that removing filterable nodes will not result in a disconnected DMRS
    if test_connectedness:
        filtered_nodeids = set()
        for nodeid in filterable_nodeids:
            if dmrs.is_connected(removed_nodeids=filtered_nodeids|{nodeid}, ignored_nodeids=filterable_nodeids):
                filtered_nodeids.add(nodeid)

    else:
        filtered_nodeids = filterable_nodeids

    # Remove filtered nodes and their links from the DMRS
    for nodeid in filtered_nodeids:
        dmrs.remove_node(nodeid)

    return dmrs


# If run from the command line, process the given file
if __name__ == '__main__':

    with open(args.input_dmrs, 'r', encoding="utf-8") as fin, open(args.output_dmrs, 'w') as fout:
        content = fin.read().strip()

        for dmrs_string in split_dmrs_string(content):
            dmrs = loads_xml(dmrs_string)
            simplified_dmrs = gpred_filtering(dmrs)
            simplified_dmrs_string = dumps_xml(simplified_dmrs)
            fout.write('{}\n\n'.format(simplified_dmrs_string.decode('utf-8')))
Beispiel #4
0
    # If DMRS should remain connected, check that removing filterable nodes will not result in a disconnected DMRS
    if test_connectedness:
        filtered_nodeids = set()
        for nodeid in filterable_nodeids:
            if dmrs.is_connected(removed_nodeids=filtered_nodeids | {nodeid},
                                 ignored_nodeids=filterable_nodeids):
                filtered_nodeids.add(nodeid)

    else:
        filtered_nodeids = filterable_nodeids

    # Remove filtered nodes and their links from the DMRS
    for nodeid in filtered_nodeids:
        dmrs.remove_node(nodeid)

    return dmrs


# If run from the command line, process the given file
if __name__ == '__main__':

    with open(args.input_dmrs, 'r',
              encoding="utf-8") as fin, open(args.output_dmrs, 'w') as fout:
        content = fin.read().strip()

        for dmrs_string in split_dmrs_string(content):
            dmrs = loads_xml(dmrs_string)
            simplified_dmrs = gpred_filtering(dmrs)
            simplified_dmrs_string = dumps_xml(simplified_dmrs)
            fout.write('{}\n\n'.format(simplified_dmrs_string.decode('utf-8')))