def test_get_matching_nodeids(self): # Match "the cat" onto "the dog chases the cat" (exact fit, only one match) matches1 = aligned_matching.get_matching_nodeids( self.the_cat, self.the_dog_chases_the_cat) self.assertEqual(len(matches1), 1) self.assertCountEqual(matches1[0], [(2, 5), (1, 4)]) # all_surface = True all_matches1 = aligned_matching.get_matching_nodeids( self.the_cat, self.the_dog_chases_the_cat, all_surface=True) # The same as earlier self.assertListEqual(matches1[0], all_matches1[0]) # Extra surface nodes: between dog and cat all_matches1 = aligned_matching.get_matching_nodeids( self.dog_cat, self.the_dog_chases_the_cat, all_surface=True) self.assertCountEqual(all_matches1[0], [(2, 5), (1, 2), (None, 3), (None, 4)]) # Match "the dog chases the cat" onto "the cat chases the dog" (inexact fit) matches2 = aligned_matching.get_matching_nodeids( self.the_dog_chases_the_cat, self.the_cat_chases_the_dog) # Two options: "the dog" matches or "the cat" matches, 'chases' doesn't because it's not part of the longest match self.assertEqual(len(matches2), 2) self.assertCountEqual(matches2, [[(5, 2), (4, 1)], [(2, 5), (1, 4)]]) # No match found matches = aligned_matching.get_matching_nodeids( self.the_mouse, self.dog_cat) self.assertListEqual(matches, []) # Should be the same as 'the cat'. mixed_cat = ListDmrs(surface='the cat') mixed_cat.add_node( Node(nodeid=2, pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7, sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))) mixed_cat.add_node( Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3)) mixed_cat.add_link(Link(start=1, end=2, rargname='RSTR', post='H')) mixed = aligned_matching.get_matching_nodeids( mixed_cat, self.the_dog_chases_the_cat) self.assertListEqual(mixed, matches1)
def test_get_matching_nodeids(self): # Match "the cat" onto "the dog chases the cat" (exact fit) matches1 = aligned_matching.get_matching_nodeids(self.the_cat, self.the_dog_chases_the_cat) self.assertEqual(len(matches1), 2) self.assertCountEqual(matches1[0], [(2, 5), (1, 1)]) self.assertCountEqual(matches1[1], [(2, 5), (1, 4)]) # all_surface = True all_matches1 = aligned_matching.get_matching_nodeids(self.the_cat, self.the_dog_chases_the_cat, all_surface=True) self.assertListEqual(matches1[1], all_matches1[1]) # Extra surface nodes self.assertCountEqual(all_matches1[0], [(2, 5), (1, 1), (None, 2), (None, 3), (None, 4)]) # Match "the dog chases the cat" onto "the cat chases the dog" (inexact fit) matches2 = aligned_matching.get_matching_nodeids(self.the_dog_chases_the_cat, self.the_cat_chases_the_dog) self.assertEqual(len(matches2), 1) self.assertCountEqual(matches2[0], [(4, 4), (3, 3), (1, 1)]) all_matches2 = aligned_matching.get_matching_nodeids(self.the_dog_chases_the_cat, self.the_cat_chases_the_dog, all_surface=True) self.assertEqual(len(all_matches2), 1) self.assertCountEqual(all_matches2[0], [(4, 4), (3, 3), (1, 1), (None, 2)]) # No match found the_mouse = examples_dmrs.the_mouse() \ .convert_to(abstractSortDictDmrs(node_key=span_pred_key)) dog_cat = examples_dmrs.dog_cat() \ .convert_to(abstractSortDictDmrs(node_key=span_pred_key)) matches = aligned_matching.get_matching_nodeids(the_mouse, dog_cat) self.assertListEqual(matches, []) # Should be the same as 'the cat'. mixed_cat = ListDmrs(surface='the cat') mixed_cat.add_node(Node(nodeid=2, pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7, sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))) mixed_cat.add_node(Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3)) mixed_cat.add_link(Link(start=1, end=2, rargname='RSTR', post='H')) mixed = aligned_matching.get_matching_nodeids(mixed_cat, self.the_dog_chases_the_cat) self.assertListEqual(mixed, matches1)
assert False, 'Invalid paraphrases file format.' try: assert not next(lines) except StopIteration: break return paraphrases def paraphrase(dmrs, paraphrases, hierarchy=None): """ """ assert isinstance(dmrs, Dmrs), 'Object in dmrs_iter is not a Dmrs.' for (search_dmrs, replace_dmrs) in paraphrases: paraphrased_dmrs = dmrs_mapping(dmrs, search_dmrs, replace_dmrs, hierarchy=hierarchy) if paraphrased_dmrs is None: break else: dmrs = paraphrased_dmrs return dmrs if __name__ == '__main__': assert len(sys.argv) == 2 and not sys.stdin.isatty(), 'Invalid arguments' paraphrases = read_paraphrases_file(sys.argv[1]) for line in sys.stdin: dmrs = ListDmrs.loads_xml(line[:-1]) sys.stdout.write(str(paraphrase(dmrs, paraphrases)) + '\n')
:return Iterator of dicts containing the matching node ids. """ queries = {} search_dmrs = parse_graphlang(search_dmrs_graphlang, queries=queries) queries = [(key, queries[key]) for key in sorted(queries)] for dmrs in dmrs_iter: assert isinstance(dmrs, Dmrs), 'Object in dmrs_iter is not a Dmrs.' # perform an exact matching of search_dmrs against dmrs matchings = dmrs_exact_matching(search_dmrs, dmrs) if results_per_dmrs: results = [] for matching in matchings: # extract matched values if results_as_dict: result = {key: query(matching, dmrs) for key, query in queries} else: result = tuple(query(matching, dmrs) for _, query in queries) if results_per_dmrs: results.append(result) else: yield result if results_per_dmrs: yield results if __name__ == '__main__': assert len(sys.argv) == 2 and not sys.stdin.isatty(), 'Invalid arguments' search_dmrs = sys.argv[1] dmrs_iter = (ListDmrs.loads_xml(line[:-1]) for line in sys.stdin) sys.stdout.write(str(next(dmrs_query(dmrs_iter, search_dmrs, results_as_dict=True))) + '\n')
results = [] for matching in matchings: # extract matched values if results_as_dict: result = {key: query(matching, dmrs) for key, query in queries} else: result = tuple(query(matching, dmrs) for _, query in queries) if results_per_dmrs: results.append(result) else: yield result if results_per_dmrs: yield results if __name__ == '__main__': from pydmrs.core import ListDmrs # Example sentences: # A mouse ate the whole cheese. # Lions eat around 15 zebras per year. # Their children eat so many sweets. # Potatoes are mostly eaten by humans. dmrs_iter = [ListDmrs.loads_xml('<dmrs cfrom="-1" cto="-1"><node cfrom="0" cto="1" nodeid="10000"><realpred lemma="a" pos="q" /><sortinfo /></node><node cfrom="2" cto="7" nodeid="10001"><realpred lemma="mouse" pos="n" sense="1" /><sortinfo cvarsort="x" ind="+" num="sg" pers="3" /></node><node cfrom="8" cto="11" nodeid="10002"><realpred lemma="eat" pos="v" sense="1" /><sortinfo cvarsort="e" mood="indicative" perf="-" prog="-" sf="prop" tense="past" /></node><node cfrom="12" cto="15" nodeid="10003"><realpred lemma="the" pos="q" /><sortinfo /></node><node cfrom="16" cto="21" nodeid="10004"><realpred lemma="whole" pos="a" sense="1" /><sortinfo cvarsort="e" mood="indicative" perf="-" prog="bool" sf="prop" tense="untensed" /></node><node cfrom="22" cto="29" nodeid="10005"><realpred lemma="cheese" pos="n" sense="1" /><sortinfo cvarsort="x" num="sg" pers="3" /></node><link from="0" to="10002"><rargname /><post>H</post></link><link from="10000" to="10001"><rargname>RSTR</rargname><post>H</post></link><link from="10002" to="10001"><rargname>ARG1</rargname><post>NEQ</post></link><link from="10002" to="10005"><rargname>ARG2</rargname><post>NEQ</post></link><link from="10003" to="10005"><rargname>RSTR</rargname><post>H</post></link><link from="10004" to="10005"><rargname>ARG1</rargname><post>EQ</post></link></dmrs>'), ListDmrs.loads_xml('<dmrs cfrom="-1" cto="-1"><node cfrom="0" cto="5" nodeid="10000"><gpred>udef_q_rel</gpred><sortinfo /></node><node cfrom="0" cto="5" nodeid="10001"><realpred lemma="lion" pos="n" sense="1" /><sortinfo cvarsort="x" ind="+" num="pl" pers="3" /></node><node cfrom="6" cto="9" nodeid="10002"><realpred lemma="eat" pos="v" sense="1" /><sortinfo cvarsort="e" mood="indicative" perf="-" prog="-" sf="prop" tense="pres" /></node><node cfrom="10" cto="16" nodeid="10003"><realpred lemma="around" pos="x" sense="deg" /><sortinfo cvarsort="e" mood="indicative" perf="-" prog="-" sf="prop" tense="untensed" /></node><node cfrom="17" cto="19" nodeid="10004"><gpred>udef_q_rel</gpred><sortinfo /></node><node carg=""15"" cfrom="17" cto="19" nodeid="10005"><gpred>card_rel</gpred><sortinfo cvarsort="e" mood="indicative" perf="-" prog="-" sf="prop" tense="untensed" /></node><node cfrom="20" cto="26" nodeid="10006"><realpred lemma="zebra" pos="n" sense="1" /><sortinfo cvarsort="x" ind="+" num="pl" pers="3" /></node><node cfrom="27" cto="30" nodeid="10007"><realpred lemma="per" pos="p" /><sortinfo cvarsort="e" mood="indicative" perf="-" prog="-" sf="prop" tense="untensed" /></node><node cfrom="27" cto="30" nodeid="10008"><gpred>udef_q_rel</gpred><sortinfo /></node><node cfrom="31" cto="36" nodeid="10009"><realpred lemma="year" pos="n" sense="1" /><sortinfo cvarsort="x" ind="+" num="sg" pers="3" /></node><link from="0" to="10002"><rargname /><post>H</post></link><link from="10000" to="10001"><rargname>RSTR</rargname><post>H</post></link><link from="10002" to="10001"><rargname>ARG1</rargname><post>NEQ</post></link><link from="10002" to="10006"><rargname>ARG2</rargname><post>NEQ</post></link><link from="10003" to="10005"><rargname>ARG1</rargname><post>EQ</post></link><link from="10004" to="10006"><rargname>RSTR</rargname><post>H</post></link><link from="10005" to="10006"><rargname>ARG1</rargname><post>EQ</post></link><link from="10007" to="10006"><rargname>ARG1</rargname><post>EQ</post></link><link from="10007" to="10009"><rargname>ARG2</rargname><post>NEQ</post></link><link from="10008" to="10009"><rargname>RSTR</rargname><post>H</post></link></dmrs>'), ListDmrs.loads_xml('<dmrs cfrom="-1" cto="-1"><node cfrom="0" cto="5" nodeid="10000"><gpred>def_explicit_q_rel</gpred><sortinfo /></node><node cfrom="0" cto="5" nodeid="10001"><gpred>poss_rel</gpred><sortinfo cvarsort="e" mood="indicative" perf="-" prog="-" sf="prop" tense="untensed" /></node><node cfrom="0" cto="5" nodeid="10002"><gpred>pronoun_q_rel</gpred><sortinfo /></node><node cfrom="0" cto="5" nodeid="10003"><gpred>pron_rel</gpred><sortinfo cvarsort="x" num="pl" pers="3" pt="std" /></node><node cfrom="6" cto="14" nodeid="10004"><realpred lemma="child" pos="n" sense="1" /><sortinfo cvarsort="x" ind="+" num="pl" pers="3" /></node><node cfrom="15" cto="18" nodeid="10005"><realpred lemma="eat" pos="v" sense="1" /><sortinfo cvarsort="e" mood="indicative" perf="-" prog="-" sf="prop" tense="pres" /></node><node cfrom="19" cto="34" nodeid="10006"><gpred>udef_q_rel</gpred><sortinfo /></node><node cfrom="19" cto="21" nodeid="10007"><gpred>comp_so_rel</gpred><sortinfo cvarsort="e" mood="indicative" perf="-" prog="-" sf="prop" tense="untensed" /></node><node cfrom="22" cto="26" nodeid="10008"><gpred>much-many_a_rel</gpred><sortinfo cvarsort="e" mood="indicative" perf="-" prog="-" sf="prop" tense="untensed" /></node><node cfrom="27" cto="34" nodeid="10009"><realpred lemma="sweet" pos="n" sense="1" /><sortinfo cvarsort="x" ind="+" num="pl" pers="3" /></node><link from="0" to="10005"><rargname /><post>H</post></link><link from="10000" to="10004"><rargname>RSTR</rargname><post>H</post></link><link from="10001" to="10003"><rargname>ARG2</rargname><post>NEQ</post></link><link from="10001" to="10004"><rargname>ARG1</rargname><post>EQ</post></link><link from="10002" to="10003"><rargname>RSTR</rargname><post>H</post></link><link from="10005" to="10004"><rargname>ARG1</rargname><post>NEQ</post></link><link from="10005" to="10009"><rargname>ARG2</rargname><post>NEQ</post></link><link from="10006" to="10009"><rargname>RSTR</rargname><post>H</post></link><link from="10007" to="10008"><rargname>ARG1</rargname><post>EQ</post></link><link from="10008" to="10009"><rargname>ARG1</rargname><post>EQ</post></link></dmrs>'), ListDmrs.loads_xml('<dmrs cfrom="-1" cto="-1"><node cfrom="0" cto="8" nodeid="10000"><gpred>udef_q_rel</gpred><sortinfo /></node><node cfrom="0" cto="8" nodeid="10001"><realpred lemma="potato" pos="n" sense="1" /><sortinfo cvarsort="x" ind="+" num="pl" pers="3" /></node><node cfrom="13" cto="19" nodeid="10002"><realpred lemma="mostly" pos="a" sense="1" /><sortinfo cvarsort="e" mood="indicative" perf="-" prog="-" sf="prop" tense="untensed" /></node><node cfrom="20" cto="25" nodeid="10003"><realpred lemma="eat" pos="v" sense="1" /><sortinfo cvarsort="e" mood="indicative" perf="-" prog="-" sf="prop" tense="pres" /></node><node cfrom="29" cto="36" nodeid="10004"><gpred>udef_q_rel</gpred><sortinfo /></node><node cfrom="29" cto="36" nodeid="10005"><realpred lemma="human" pos="n" sense="1" /><sortinfo cvarsort="x" ind="+" num="pl" pers="3" /></node><link from="0" to="10003"><rargname /><post>H</post></link><link from="10000" to="10001"><rargname>RSTR</rargname><post>H</post></link><link from="10002" to="10003"><rargname>ARG1</rargname><post>EQ</post></link><link from="10003" to="10001"><rargname>ARG2</rargname><post>NEQ</post></link><link from="10003" to="10005"><rargname>ARG1</rargname><post>NEQ</post></link><link from="10004" to="10005"><rargname>RSTR</rargname><post>H</post></link></dmrs>')] search_dmrs_str = '_?1_?_?_rel i <-1- _eat_v_1_rel e? -2-> _?2_?_?_rel i' print('- not dict, not per dmrs:', list(dmrs_query(dmrs_iter, search_dmrs_str, results_as_dict=False, results_per_dmrs=False))) print('- dict, not per dmrs:', list(dmrs_query(dmrs_iter, search_dmrs_str, results_as_dict=True, results_per_dmrs=False))) print('- not dict, per dmrs:', list(dmrs_query(dmrs_iter, search_dmrs_str, results_as_dict=False, results_per_dmrs=True))) print('- dict, per dmrs:', list(dmrs_query(dmrs_iter, search_dmrs_str, results_as_dict=True, results_per_dmrs=True)))