def test_GPred_from_string(self): """ GPred.from_string should instantiate GPreds It requires a string without a leading underscore """ # No intermediate underscores pron_rel = GPred.from_string('pron_rel') pron = GPred.from_string('pron') self.assertEqual(GPred('pron'), pron_rel) self.assertEqual(GPred('pron'), pron) self.assertIsInstance(pron_rel, GPred) self.assertIsInstance(pron, GPred) # Intermediate underscores udef_q_rel = GPred.from_string('udef_q_rel') udef_q = GPred.from_string('udef_q') self.assertEqual(GPred('udef_q'), udef_q_rel) self.assertEqual(GPred('udef_q'), udef_q) self.assertIsInstance(udef_q_rel, GPred) self.assertIsInstance(udef_q, GPred) # Leading underscore or not a string with self.assertRaises(ValueError): GPred.from_string("_the_q_rel") with self.assertRaises(TypeError): GPred.from_string(1)
if __name__ == '__main__': parser = argparse.ArgumentParser(description='DMRS simplification tool') parser.add_argument('-c', '--config', default=None, help='Path to simplifaction configuration file. By default, configuration in __config__/default_simplification.conf is used.') parser.add_argument('input_dmrs', help='Specify input DMRS file') parser.add_argument('output_dmrs', help='Specify output dmrs file.') args = parser.parse_args() if args.config is not None: # Load the given file config = load_config(args.config, default=False) else: config = load_config(DEFAULT_CONFIG_FILE) else: config = load_config(DEFAULT_CONFIG_FILE) DEFAULT_FILTER = frozenset(GPred.from_string(x) for x in get_config_option(config, 'General Predicate Filtering', 'filter', opt_type=list)) DEFAULT_ALLOW_DISC = get_config_option(config, 'General Predicate Filtering', 'allow_disconnected_dmrs') def gpred_filtering(dmrs, gpred_filter=DEFAULT_FILTER, allow_disconnected_dmrs=DEFAULT_ALLOW_DISC): """ Remove general predicate nodes on the filter list from the DMRS. :param dmrs_xml: Input DMRS object :param gpred_filter: A list of general predicates to filter (as strings) :param allow_disconnected_dmrs: Remove gpred nodes even if their removal would result in a disconnected DMRS. If DMRS was already disconnected, gpred nodes are removed regardless. :return: Output DMRS object """ filterable_nodeids = set() # Find general predicate nodes to filter
def test_GPred_str(self): """ The 'informal' string representation of a GPred """ pronstring = 'pron' self.assertEqual(str(GPred.from_string(pronstring)), pronstring)
def loads_xml(bytestring, encoding=None, cls=ListDmrs, **kwargs): """ Currently processes "<dmrs>...</dmrs>" To be updated for "<dmrslist>...</dmrslist>"... Expects a bytestring; to load from a string instead, specify encoding Produces a ListDmrs by default; for a different type, specify cls """ if encoding: bytestring = bytestring.encode(encoding) xml = ET.XML(bytestring) dmrs = cls(**kwargs) dmrs.cfrom = int(xml.get('cfrom')) if 'cfrom' in xml.attrib else None dmrs.cto = int(xml.get('cto')) if 'cto' in xml.attrib else None dmrs.surface = xml.get('surface') dmrs.ident = int(xml.get('ident')) if 'ident' in xml.attrib else None index_id = int(xml.get('index')) if 'index' in xml.attrib else None top_id = None for elem in xml: if elem.tag == 'node': nodeid = int(elem.get('nodeid')) if 'nodeid' in elem.attrib else None cfrom = int(elem.get('cfrom')) if 'cfrom' in elem.attrib else None cto = int(elem.get('cto')) if 'cto' in elem.attrib else None surface = elem.get('surface') base = elem.get('base') carg = elem.get('carg') pred = None sortinfo = None for sub in elem: if sub.tag == 'realpred': try: pred = RealPred(sub.get('lemma'), sub.get('pos'), sub.get('sense')) except PydmrsValueError: # If the whole pred name is under 'lemma', rather than split between 'lemma', 'pos', 'sense' pred = RealPred.from_string(sub.get('lemma')) warn("RealPred given as string rather than lemma, pos, sense", PydmrsWarning) elif sub.tag == 'gpred': try: pred = GPred.from_string(sub.text) except PydmrsValueError: # If the string is actually for a RealPred, not a GPred pred = RealPred.from_string(sub.text) warn("RealPred string found in a <gpred> tag", PydmrsWarning) elif sub.tag == 'sortinfo': sortinfo = sub.attrib else: raise PydmrsValueError(sub.tag) dmrs.add_node(cls.Node(nodeid=nodeid, pred=pred, carg=carg, sortinfo=sortinfo, cfrom=cfrom, cto=cto, surface=surface, base=base)) elif elem.tag == 'link': start = int(elem.get('from')) end = int(elem.get('to')) if start == 0: top_id = end else: rargname = None post = None for sub in elem: if sub.tag == 'rargname': rargname = sub.text elif sub.tag == 'post': post = sub.text else: raise PydmrsValueError(sub.tag) dmrs.add_link(Link(start, end, rargname, post)) else: raise PydmrsValueError(elem.tag) if top_id: dmrs.top = dmrs[top_id] if index_id: dmrs.index = dmrs[index_id] return dmrs
default=None, help= 'Path to simplifaction configuration file. By default, configuration in __config__/default_simplification.conf is used.' ) parser.add_argument('input_dmrs', help='Specify input DMRS file') parser.add_argument('output_dmrs', help='Specify output dmrs file.') args = parser.parse_args() if args.config is not None: # Load the given file config = load_config(args.config, default=False) else: config = load_config(DEFAULT_CONFIG_FILE) else: config = load_config(DEFAULT_CONFIG_FILE) DEFAULT_FILTER = frozenset( GPred.from_string(x) for x in get_config_option( config, 'General Predicate Filtering', 'filter', opt_type=list)) DEFAULT_ALLOW_DISC = get_config_option(config, 'General Predicate Filtering', 'allow_disconnected_dmrs') def gpred_filtering(dmrs, gpred_filter=DEFAULT_FILTER, allow_disconnected_dmrs=DEFAULT_ALLOW_DISC): """ Remove general predicate nodes on the filter list from the DMRS. :param dmrs_xml: Input DMRS object :param gpred_filter: A list of general predicates to filter (as strings) :param allow_disconnected_dmrs: Remove gpred nodes even if their removal would result in a disconnected DMRS. If DMRS was already disconnected, gpred nodes are removed regardless. :return: Output DMRS object