def setUp(self): self.the_cat = examples_dmrs.the_cat().convert_to( abstractSortDictDmrs(node_key=span_pred_key)) # Checks if the matching code converts to SortDictDmrs with span_pred_key self.the_cat_chases_the_dog = examples_dmrs.the_cat_chases_the_dog().convert_to( abstractSortDictDmrs(node_key=span_pred_key)) self.the_dog_chases_the_cat = examples_dmrs.the_dog_chases_the_cat().convert_to( abstractSortDictDmrs(node_key=span_pred_key))
def setUp(self): self.the_cat = examples_dmrs.the_cat().convert_to( abstractSortDictDmrs(node_key=span_pred_key)) # Checks if the matching code converts to SortDictDmrs with span_pred_key self.the_cat_chases_the_dog = examples_dmrs.the_cat_chases_the_dog( ).convert_to(abstractSortDictDmrs(node_key=span_pred_key)) self.the_dog_chases_the_cat = examples_dmrs.the_dog_chases_the_cat( ).convert_to(abstractSortDictDmrs(node_key=span_pred_key)) self.the_mouse = examples_dmrs.the_mouse() \ .convert_to(abstractSortDictDmrs(node_key=span_pred_key)) self.dog_cat = examples_dmrs.dog_cat() \ .convert_to(abstractSortDictDmrs(node_key=span_pred_key))
def get_matching_nodeids(small_dmrs, large_dmrs, all_surface=False, large_excluded=None): """ Finds matching pairs of nodeids between small_dmrs and large_dmrs. Starts by matching all nodes but quantifiers, then matches quantifiers for nouns with matches. :param small_dmrs A DMRS object used as a match query, :param large_dmrs A DMRS object to be searched for a match. :param all_surface If true, include all nodes from the aligned surface region. If false, find only the nodes with equivalents in small_dmrs. :param large_excluded The nodeids from the large DMRS to be ignored during matching. :return A list of lists of matched nodeid pairs (small_dmrs nodeid, large_dmrs nodeid). A list of lists, in case more than one best match found. """ # Convert DMRSs to SortDictDmrs with span_pred_key node if needed. if not isinstance(small_dmrs, SortDictDmrs) or (small_dmrs.node_key != span_pred_key): small_dmrs = small_dmrs.convert_to(abstractSortDictDmrs(node_key=span_pred_key)) if not isinstance(large_dmrs, SortDictDmrs) or (large_dmrs.node_key != span_pred_key): large_dmrs = large_dmrs.convert_to(abstractSortDictDmrs(node_key=span_pred_key)) # Filter quantifiers. small_no_qs = [n for n in small_dmrs.nodes if not small_dmrs.is_quantifier(n.nodeid)] large_no_qs = [n for n in large_dmrs.nodes if not large_dmrs.is_quantifier(n.nodeid)] # Filter compound_name and compund predicates. filtered_pred = ['compound', 'compound_name'] filtered_small = [n for n in small_no_qs if str(n.pred) not in filtered_pred] filtered_large = [n for n in large_no_qs if str(n.pred) not in filtered_pred] longest_matches = match_nodes(filtered_small, filtered_large, excluded=large_excluded) # list of lists of nodeid pairs add_quantifier_matches(small_dmrs, large_dmrs, longest_matches) add_compound_matches(small_dmrs, large_dmrs, longest_matches, filtered_pred) max_len = len(max(longest_matches, key=len)) if longest_matches else 0 longest_matches = [m for m in longest_matches if len(m) == max_len] # Returned in reverse span_pred_key order. all_matched_nodeids = [] for match in longest_matches: matched_large_nodeids = list(reversed((list(zip(*match))[1]))) # span_pred_key order if all_surface: extra_overlap_nodeids = find_extra_surface_nodeids(matched_large_nodeids, large_dmrs) match.extend([(None, nodeid) for nodeid in extra_overlap_nodeids]) all_matched_nodeids.append(match) return all_matched_nodeids
def test_get_matching_nodeids(self): # Match "the cat" onto "the dog chases the cat" (exact fit) matches1 = aligned_matching.get_matching_nodeids(self.the_cat, self.the_dog_chases_the_cat) self.assertEqual(len(matches1), 2) self.assertCountEqual(matches1[0], [(2, 5), (1, 1)]) self.assertCountEqual(matches1[1], [(2, 5), (1, 4)]) # all_surface = True all_matches1 = aligned_matching.get_matching_nodeids(self.the_cat, self.the_dog_chases_the_cat, all_surface=True) self.assertListEqual(matches1[1], all_matches1[1]) # Extra surface nodes self.assertCountEqual(all_matches1[0], [(2, 5), (1, 1), (None, 2), (None, 3), (None, 4)]) # Match "the dog chases the cat" onto "the cat chases the dog" (inexact fit) matches2 = aligned_matching.get_matching_nodeids(self.the_dog_chases_the_cat, self.the_cat_chases_the_dog) self.assertEqual(len(matches2), 1) self.assertCountEqual(matches2[0], [(4, 4), (3, 3), (1, 1)]) all_matches2 = aligned_matching.get_matching_nodeids(self.the_dog_chases_the_cat, self.the_cat_chases_the_dog, all_surface=True) self.assertEqual(len(all_matches2), 1) self.assertCountEqual(all_matches2[0], [(4, 4), (3, 3), (1, 1), (None, 2)]) # No match found the_mouse = examples_dmrs.the_mouse() \ .convert_to(abstractSortDictDmrs(node_key=span_pred_key)) dog_cat = examples_dmrs.dog_cat() \ .convert_to(abstractSortDictDmrs(node_key=span_pred_key)) matches = aligned_matching.get_matching_nodeids(the_mouse, dog_cat) self.assertListEqual(matches, []) # Should be the same as 'the cat'. mixed_cat = ListDmrs(surface='the cat') mixed_cat.add_node(Node(nodeid=2, pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7, sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))) mixed_cat.add_node(Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3)) mixed_cat.add_link(Link(start=1, end=2, rargname='RSTR', post='H')) mixed = aligned_matching.get_matching_nodeids(mixed_cat, self.the_dog_chases_the_cat) self.assertListEqual(mixed, matches1)
def get_matching_nodeids(small_dmrs, large_dmrs, all_surface=False, large_excluded=None): """ Finds matching pairs of nodeids between small_dmrs and large_dmrs. Starts by matching all nodes but quantifiers, then matches quantifiers for nouns with matches. :param small_dmrs A DMRS object used as a match query, :param large_dmrs A DMRS object to be searched for a match. :param all_surface If true, include all nodes from the aligned surface region. If false, find only the nodes with equivalents in small_dmrs. :param large_excluded The nodeids from the large DMRS to be ignored during matching. :return A list of lists of matched nodeid pairs (small_dmrs nodeid, large_dmrs nodeid). A list of lists, in case more than one best match found. """ # Convert DMRSs to SortDictDmrs with span_pred_key node if needed. if not isinstance(small_dmrs, SortDictDmrs) or (small_dmrs.node_key != span_pred_key): small_dmrs = small_dmrs.convert_to(abstractSortDictDmrs(node_key=span_pred_key)) if not isinstance(large_dmrs, SortDictDmrs) or (large_dmrs.node_key != span_pred_key): large_dmrs = large_dmrs.convert_to(abstractSortDictDmrs(node_key=span_pred_key)) # Filter quantifiers. small_no_qs = [n for n in small_dmrs.nodes if not small_dmrs.is_quantifier(n.nodeid)] large_no_qs = [n for n in large_dmrs.nodes if not large_dmrs.is_quantifier(n.nodeid)] longest_matches = match_nodes(small_no_qs, large_no_qs, excluded=large_excluded) # list of lists of nodeid pairs add_quantifier_matches(small_dmrs, large_dmrs, longest_matches) max_len = len(max(longest_matches, key=len)) if longest_matches else 0 longest_matches = [m for m in longest_matches if len(m) == max_len] # Returned in reverse span_pred_key order. all_matched_nodeids = [] for match in longest_matches: matched_large_nodeids = list(reversed((list(zip(*match))[1]))) # span_pred_key order if all_surface: extra_overlap_nodeids = find_extra_surface_nodeids(matched_large_nodeids, large_dmrs) match.extend([(None, nodeid) for nodeid in extra_overlap_nodeids]) all_matched_nodeids.append(match) return all_matched_nodeids