def compose(self, other, fusion=None, other_head=False, hierarchy=None): assert isinstance(other, Dmrs) nodeid_mapping = dict() # unify anchors if fusion is None: for anchor1 in self.anchors: for anchor2 in other.anchors: if anchor1 != anchor2: continue node1 = self.anchors[anchor1] node2 = other.anchors[anchor2] node1.unify(node2, hierarchy=hierarchy) nodeid_mapping[node2.nodeid] = node1.nodeid else: for anchor1, anchor2 in fusion.items(): node1 = self.anchors[anchor1] node2 = other.anchors[anchor2] node1.unify(node2, hierarchy=hierarchy) nodeid_mapping[node2.nodeid] = node1.nodeid # add missing nodes, update node ids for node2 in other.iter_nodes(): nodeid2 = node2.nodeid if nodeid2 in nodeid_mapping: node2.nodeid = nodeid_mapping[nodeid2] else: node2.nodeid = None nodeid_mapping[nodeid2] = self.add_node(node2) # add missing links, update existing links links1 = set((link1.start, link1.end) for link1 in self.iter_links()) for link2 in other.iter_links(): start = nodeid_mapping[link2.start] end = nodeid_mapping[link2.end] if (start, end) not in links1: link1 = Link(start, end, link2.rargname, link2.post) self.add_link(link1) if other_head and (start, end) in links1: self.remove_link((start, end)) link1 = Link(start, end, link2.rargname, link2.post) self.add_link(link1) # update index and top if other_head: if other.index is None: self.index = None else: self.index = self[other.index.nodeid] if other.top is None: self.top = None else: self.top = self[other.top.nodeid] # set anchors if other_head: self.anchors = { anchor: self[node2.nodeid] for anchor, node2 in other.anchors.items() }
def the_dog_chases_the_cat_and_the_mouse(): return DictDmrs(nodes=[ Node(nodeid=1, pred=RealPred('the', 'q')), Node(nodeid=2, pred=RealPred('dog', 'n', '1'), sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')), Node(nodeid=3, pred=RealPred('chase', 'v', '1'), sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')), Node(nodeid=4, pred=RealPred('the', 'q')), Node(nodeid=5, pred=RealPred('cat', 'n', '1'), sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')), Node(nodeid=6, pred=GPred('udef_q')), Node(nodeid=7, pred=RealPred('and', 'c'), sortinfo=InstanceSortinfo(pers='3', num='pl')), Node(nodeid=8, pred=RealPred('the', 'q')), Node(nodeid=9, pred=RealPred('mouse', 'n', '1'), sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')) ], links=[ Link(start=1, end=2, rargname='RSTR', post='H'), Link(start=3, end=2, rargname='ARG1', post='NEQ'), Link(start=3, end=7, rargname='ARG2', post='NEQ'), Link(start=4, end=5, rargname='RSTR', post='H'), Link(start=6, end=7, rargname='RSTR', post='H'), Link(start=7, end=5, rargname='L-INDEX', post='NEQ'), Link(start=7, end=9, rargname='R-INDEX', post='NEQ'), Link(start=8, end=9, rargname='RSTR', post='H') ], index=3, top=3)
def test_Link_str(self): """ The 'informal' string representation of a Link should show a labelled arrow pointing from the start to the end """ link = Link(0, 1, 'RSTR', 'H') self.assertEqual(str(link), "(0 - RSTR/H -> 1)")
def test_Link_repr(self): """ The 'official' string representation of a Link should evaluate to an equivalent Link """ link = Link(0, 1, 'RSTR', 'H') self.assertEqual(link, eval(repr(link)))
def test_Link_labelstring(self): """ The labelstring of a link should be its label's string """ link = Link(0, 1, 'RSTR', 'H') labelstring = 'RSTR/H' self.assertEqual(link.labelstring, labelstring)
def test_Link_label(self): """ The label of a link should be a LinkLabel """ link = Link(0, 1, 'RSTR', 'H') label = LinkLabel('RSTR', 'H') self.assertIsInstance(link.label, LinkLabel) self.assertEqual(link.label, label)
def simplify(dmrs): """ Simplify an input DMRS to a form that can be converted to robot commands """ # Remove unnecessary GPreds (defaults, plus pronouns) gpred_filtering(dmrs, extended_filter) # Remove quantifiers for node in copy(dmrs.nodes): if dmrs.is_quantifier(node.nodeid): dmrs.remove_node(node.nodeid) # Apply mapping rules for before, after in rename: for node in dmrs.iter_nodes(): if node.pred == before: node.pred = after for first, label, second, new in shrink: for node in copy(dmrs.nodes): if node.pred == first: nid = node.nodeid for link in dmrs.get_out(nid, rargname=label.rargname, post=label.post): if dmrs[link.end].pred == second: # We've found a match endid = link.end dmrs.remove_link(link) # Copy links from second node to first for old_link in dmrs.get_out(endid): dmrs.add_link( Link(nid, old_link.end, old_link.rargname, old_link.post)) for old_link in dmrs.get_in(endid): dmrs.add_link( Link(old_link.start, nid, old_link.rargname, old_link.post)) # Remove the second node and update the first dmrs.remove_node(link.end) dmrs[nid].pred = new return dmrs
def the_mouse(): dmrs = DictDmrs(surface='the mouse') dmrs.add_node(Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3)) dmrs.add_node( Node(nodeid=2, pred=RealPred('mouse', 'n', '1'), cfrom=4, cto=9, sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))) dmrs.add_link(Link(start=1, end=2, rargname='RSTR', post='H')) return dmrs
def the_cat_chases_the_dog(): return DictDmrs(surface='the cat chases the dog', nodes=[ Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3), Node(nodeid=2, pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7, sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')), Node(nodeid=3, pred=RealPred('chase', 'v', '1'), cfrom=8, cto=14, sortinfo=EventSortinfo(sf='prop', tense='pres', mood='indicative')), Node(nodeid=4, pred=RealPred('the', 'q'), cfrom=15, cto=18), Node(nodeid=5, pred=RealPred('dog', 'n', '1'), cfrom=19, cto=22, sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')) ], links=[ Link(start=1, end=2, rargname='RSTR', post='H'), Link(start=3, end=2, rargname='ARG1', post='NEQ'), Link(start=3, end=5, rargname='ARG2', post='NEQ'), Link(start=4, end=5, rargname='RSTR', post='H') ], index=3, top=3)
def test_find_best_matches(self): # Match "the cat" onto "the dog chases the cat" (exact fit) matches = general_matching.find_best_matches(self.cat_dmrs, self.small_dmrs) self.assertEqual(len(matches), 1) self.assertCountEqual(matches[0].nodeid_pairs, [(2, 5), (1, 4)]) self.assertCountEqual( matches[0].link_pairs, [(Link(1, 2, 'RSTR', 'H'), Link(4, 5, 'RSTR', 'H'))]) # Match "the dog chases the cat" onto "the cat chases the dog" (inexact fit) matches = general_matching.find_best_matches(self.small_dmrs, self.reverse_dmrs) self.assertEqual(len(matches), 1) self.assertCountEqual(matches[0].nodeid_pairs, [(5, 2), (4, 1), (3, 3), (2, 5), (1, 4)]) self.assertCountEqual( matches[0].link_pairs, [(Link(4, 5, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H')), (Link(1, 2, 'RSTR', 'H'), Link(4, 5, 'RSTR', 'H'))]) # No match found matches = general_matching.find_best_matches(examples_dmrs.the_mouse(), self.reverse_dmrs) self.assertIsNone(matches) # More than one match found. matches = general_matching.find_best_matches(self.cat_dmrs, self.large_dmrs) self.assertEqual(len(matches), 2) self.assertCountEqual(matches[0].nodeid_pairs, [(2, 5), (1, 4)]) self.assertCountEqual( matches[0].link_pairs, [(Link(1, 2, 'RSTR', 'H'), Link(4, 5, 'RSTR', 'H'))]) self.assertCountEqual(matches[1].nodeid_pairs, [(2, 8), (1, 7)]) self.assertCountEqual( matches[1].link_pairs, [(Link(1, 2, 'RSTR', 'H'), Link(7, 8, 'RSTR', 'H'))])
def test_Link_copy(self): """ copy.copy should return an equal Link copy.deepcopy should also return an equal Link """ from copy import copy, deepcopy link = Link(0, 1, 'RSTR', 'H') link_copy = copy(link) link_deep = deepcopy(link) self.assertEqual(link, link_copy) self.assertEqual(link, link_deep) self.assertIsNot(link, link_copy) self.assertIsNot(link, link_deep)
def reverse_link(dmrs, link): """ Reverse a Link in a Dmrs graph. The start and end nodeids are switched, and "_REV" is appended to the rargname (or removed if already present) """ if link.rargname[-4:] == "_REV": new_rargname = link.rargname[:-4] else: new_rargname = link.rargname + "_REV" new_link = Link(link.end, link.start, new_rargname, link.post) dmrs.remove_link(link) dmrs.add_link(new_link) return new_link
def test_iter_outgoing(self): with self.assertRaises(PydmrsValueError): self.test_dmrs.iter_outgoing(15) self.test_dmrs.add_link(Link(3, 4, 'None', 'EQ')) out_it = self.test_dmrs.iter_outgoing(3) # Check that an iterator returned self.assertTrue(hasattr(out_it, '__next__')) # EQ link counted as outgoing self.assertCountEqual(list(out_it), [ Link(3, 5, 'ARG2', 'NEQ'), Link(3, 2, 'ARG1', 'NEQ'), Link(3, 4, None, 'EQ') ]) # TODO: Treat EQ links symmetrically or not at all, as long as it's consistent. # Test e.g. # self.test_dmrs.add_link(Link(4, 3, 'None', 'EQ')) # out_it = self.test_dmrs.iter_outgoing(3) # self.assertIn(Link(4, 3, 'None', 'EQ'), list(out_it)) # No outgoing links out_it = self.test_dmrs.iter_outgoing(2) with self.assertRaises(StopIteration): next(out_it)
def test_get_matched_subgraph(self): match = general_matching.find_best_matches(self.cat_dmrs, self.small_dmrs)[0] subgraph = general_matching.get_matched_subgraph( self.small_dmrs, match) expected = DictDmrs( nodes=[ Node(nodeid=4, pred=RealPred('the', 'q')), Node(nodeid=5, pred=RealPred('cat', 'n', '1'), sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')) ], links=[Link(start=4, end=5, rargname='RSTR', post='H')]) self.assertListEqual(subgraph.nodes, expected.nodes) self.assertListEqual(subgraph.links, expected.links)
def test_get_matching_nodeids(self): # Match "the cat" onto "the dog chases the cat" (exact fit, only one match) matches1 = aligned_matching.get_matching_nodeids( self.the_cat, self.the_dog_chases_the_cat) self.assertEqual(len(matches1), 1) self.assertCountEqual(matches1[0], [(2, 5), (1, 4)]) # all_surface = True all_matches1 = aligned_matching.get_matching_nodeids( self.the_cat, self.the_dog_chases_the_cat, all_surface=True) # The same as earlier self.assertListEqual(matches1[0], all_matches1[0]) # Extra surface nodes: between dog and cat all_matches1 = aligned_matching.get_matching_nodeids( self.dog_cat, self.the_dog_chases_the_cat, all_surface=True) self.assertCountEqual(all_matches1[0], [(2, 5), (1, 2), (None, 3), (None, 4)]) # Match "the dog chases the cat" onto "the cat chases the dog" (inexact fit) matches2 = aligned_matching.get_matching_nodeids( self.the_dog_chases_the_cat, self.the_cat_chases_the_dog) # Two options: "the dog" matches or "the cat" matches, 'chases' doesn't because it's not part of the longest match self.assertEqual(len(matches2), 2) self.assertCountEqual(matches2, [[(5, 2), (4, 1)], [(2, 5), (1, 4)]]) # No match found matches = aligned_matching.get_matching_nodeids( self.the_mouse, self.dog_cat) self.assertListEqual(matches, []) # Should be the same as 'the cat'. mixed_cat = ListDmrs(surface='the cat') mixed_cat.add_node( Node(nodeid=2, pred=RealPred('cat', 'n', '1'), cfrom=4, cto=7, sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))) mixed_cat.add_node( Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3)) mixed_cat.add_link(Link(start=1, end=2, rargname='RSTR', post='H')) mixed = aligned_matching.get_matching_nodeids( mixed_cat, self.the_dog_chases_the_cat) self.assertListEqual(mixed, matches1)
def compose(self, other, fusion): assert isinstance(other, ComposableDmrs) composition = deepcopy(self) nodeid_mapping = dict() for anchor1, anchor2 in fusion.items(): node1 = composition.anchors[anchor1] node2 = other.anchors[anchor2] node1.unify(node2) nodeid_mapping[node2.nodeid] = node1.nodeid for nodeid2 in other: if nodeid2 in nodeid_mapping: continue node1 = deepcopy(other[nodeid2]) node1.nodeid = None nodeid_mapping[nodeid2] = composition.add_node(node1) for link2 in other.iter_links(): link1 = Link(nodeid_mapping[link2.start], nodeid_mapping[link2.end], link2.rargname, link2.post) composition.add_link(link1) if composition.index is None and other.index is not None: composition.index = composition[nodeid_mapping[other.index.nodeid]] if composition.top is None and other.top is not None: composition.top = composition[nodeid_mapping[other.top.nodeid]] return composition
def dmrs_mapping(dmrs, search_dmrs, replace_dmrs, equalities=(), hierarchy=None, copy_dmrs=True, iterative=True, all_matches=True, require_connected=True, max_matches=100): """ Performs an exact DMRS (sub)graph matching of a (sub)graph against a containing graph. :param dmrs DMRS graph to map. :param search_dmrs DMRS subgraph to replace. :param replace_dmrs DMRS subgraph to replace with. :param equalities :param hierarchy An optional predicate hierarchy. :param copy_dmrs True if DMRS graph argument should be copied before being mapped. :param iterative True if all possible mappings should be performed iteratively to the same DMRS graph, instead of a separate copy per mapping (iterative=False requires copy_dmrs=True). :param all_matches True if all possible matches should be returned, instead of only the first (or None). :param require_connected True if mappings resulting in a disconnected DMRS graph should be ignored. :param max_matches: Maximum number of matches. :return Mapped DMRS graph (resp. a list of graphs in case of iterative=False and all_matches=True) """ assert copy_dmrs or iterative, 'Invalid argument combination.' # extract anchor node mapping between search_dmrs and replace_dmrs sub_mapping = {} optional_nodeids = [] for search_node in search_dmrs.iter_nodes(): if not isinstance(search_node, AnchorNode): continue if not search_node.required: optional_nodeids.append(search_node.nodeid) for replace_node in replace_dmrs.iter_nodes(): if not isinstance(replace_node, AnchorNode) or all(anchor not in replace_node.anchors for anchor in search_node.anchors): continue assert search_node.nodeid not in sub_mapping, 'Node matches multiple nodes.' + str(search_node) sub_mapping[search_node.nodeid] = replace_node.nodeid if search_node.nodeid not in sub_mapping: assert not search_node.requires_target, 'Un-matched anchor node.' # set up variables according to settings if iterative: result_dmrs = copy.deepcopy(dmrs) if copy_dmrs else dmrs matchings = dmrs_exact_matching(search_dmrs, dmrs, optional_nodeids=optional_nodeids, equalities=equalities, hierarchy=hierarchy, match_top_index=True) else: matchings = dmrs_exact_matching(search_dmrs, dmrs, optional_nodeids=optional_nodeids, equalities=equalities, hierarchy=hierarchy, match_top_index=True) if not iterative and all_matches: result = [] # continue while there is a match for search_dmrs count = 0 for _ in range(max_matches): if iterative: pass # matchings = dmrs_exact_matching(search_dmrs, result_dmrs, optional_nodeids=optional_nodeids, equalities=equalities, hierarchy=hierarchy, match_top_index=True) else: result_dmrs = copy.deepcopy(dmrs) if copy_dmrs else dmrs # return mapping(s) if there are no more matches left try: search_matching = next(matchings) count += 1 except StopIteration: if not all_matches: if copy_dmrs: return None else: return False elif iterative: if not require_connected or result_dmrs.is_connected(): if copy_dmrs: return result_dmrs else: return count > 0 else: if copy_dmrs: return None else: return False else: return result # remove nodes in the matched search_dmrs if they are no anchor nodes, otherwise perform mapping() # mapping() performs the mapping process (with whatever it involves) specific to this node type (e.g. fill underspecified values) for nodeid in search_dmrs: search_node = search_dmrs[nodeid] if isinstance(search_node, AnchorNode): search_node.before_map(result_dmrs, search_matching[nodeid]) replace_matching = {} for nodeid in search_matching: if nodeid in sub_mapping: replace_dmrs[sub_mapping[nodeid]].map(result_dmrs, search_matching[nodeid], hierarchy=hierarchy) replace_dmrs[sub_mapping[nodeid]].after_map(result_dmrs, search_matching[nodeid]) replace_matching[sub_mapping[nodeid]] = search_matching[nodeid] elif search_matching[nodeid] is not None: result_dmrs.remove_node(search_matching[nodeid]) # add copies of the non-anchor nodes for the matched replace_dmrs for nodeid in replace_dmrs: if nodeid in replace_matching: continue node = copy.deepcopy(replace_dmrs[nodeid]) node.nodeid = result_dmrs.free_nodeid() result_dmrs.add_node(node) replace_matching[nodeid] = node.nodeid # set top/index if specified in replace_dmrs if replace_dmrs.top is not None: result_dmrs.top = result_dmrs[replace_matching[replace_dmrs.top.nodeid]] if replace_dmrs.index is not None: result_dmrs.index = result_dmrs[replace_matching[replace_dmrs.index.nodeid]] # remove all links in the matched search_dmrs links = [] matching_values = set(search_matching.values()) for link in result_dmrs.iter_links(): if link.start in matching_values and link.end in matching_values: links.append(link) result_dmrs.remove_links(links) # add all links for the matched replace_dmrs for link in replace_dmrs.iter_links(): link = Link(replace_matching[link.start], replace_matching[link.end], link.rargname, link.post) result_dmrs.add_link(link) # add/return result if not require_connected or result_dmrs.is_connected(): if all_matches and not iterative: result.append(result_dmrs) elif not all_matches: if copy_dmrs: return result_dmrs else: return True raise Exception('More than {} matches!'.format(max_matches))
def setUp(self): self.match = general_matching.Match( [(2, 3), (4, 2)], [(Link(4, 5, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H'))])
def test_Match_add(self): self.assertIsNone(self.match.add(general_matching.Match())) self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2)]) self.assertCountEqual( self.match.link_pairs, [(Link(4, 5, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H'))]) incompatible_match = general_matching.Match( [(1, 2), (8, 1)], [(Link(1, 8, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H'))]) self.match.add(incompatible_match) self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2), (8, 1)]) self.assertCountEqual( self.match.link_pairs, [(Link(4, 5, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H'))]) compatible_match = general_matching.Match( [(1, 5), (3, 4)], [(Link(1, 3, 'ARG1', 'NEQ'), Link(1, 5, 'ARG2', 'NEQ'))]) self.match.add(compatible_match) self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2), (1, 5), (8, 1), (3, 4)]) self.assertCountEqual( self.match.link_pairs, [(Link(4, 5, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H')), (Link(1, 3, 'ARG1', 'NEQ'), Link(1, 5, 'ARG2', 'NEQ'))])
def _parse_link(string, left_nodeid, right_nodeid, queries, equalities): assert ' ' not in string, 'Links must not contain spaces.' l = 0 r = len(string) - 1 if string[l] == '<': # pointing left start = right_nodeid end = left_nodeid l += 1 elif string[r] == '>': # pointing right start = left_nodeid end = right_nodeid r -= 1 else: # invalid link assert False, 'Link must have a direction.' assert string[l] in '-=' and string[ r] in '-=', 'Link line must consist of either "-" or "=".' link_char = string[l] while l < len(string) and string[l] == link_char: # arbitrary left length l += 1 while r >= 0 and string[r] == link_char: # arbitrary right length r -= 1 if l + 1 < r: # explicit specification r += 1 if string[l:r] == 'rstr': # rargname RSTR uniquely determines post H rargname = 'rstr' post = 'h' elif string[l:r] == 'eq': # post EQ uniquely determines rargname None rargname = None post = 'eq' else: m = string.find('/', l, r) if m >= 0: if l == m and m + 1 == r: rargname = None post = None elif l == m: rargname = None post = _parse_value( string[m + 1:r], '?', queries, equalities, (lambda matching, dmrs: ','.join( link.post for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end]))) elif m + 1 == r: rargname = _parse_value( string[l:m], '?', queries, equalities, (lambda matching, dmrs: ','.join( link.rargname for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end]))) post = None else: # problem: doesn't combine rargname and post rargname = _parse_value( string[l:m], '?', queries, equalities, (lambda matching, dmrs: ','.join( link.rargname for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end]))) post = _parse_value( string[m + 1:r], '?', queries, equalities, (lambda matching, dmrs: ','.join( link.post for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end]))) else: rargname = _parse_value( string[l:r], '?', queries, equalities, (lambda matching, dmrs: ','.join( link.labelstring for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end]))) post = None return Link(start, end, rargname, post) if l > r: # no specification symbol if link_char == '=': rargname = None post = 'eq' else: rargname = 'rstr' post = 'h' else: if string[l] == '?': # no equal constraint rargname = '?' post = '?' value = _parse_value( string[l:r + 1], None, queries, equalities, (lambda matching, dmrs: ','.join( link.labelstring for link in dmrs.get_out(matching[start], itr=True) if link.end == matching[end]))) assert not value elif l == r: # one specification symbol, i.e. variable link if link_char == '=': post = 'eq' else: post = 'neq' elif l + 1 == r: # two specification symbol, i.e. handle link assert string[ r] == 'h', 'Second link specification symbol must be "h".' if link_char == '=': post = 'heq' else: post = 'h' else: assert False # never reached if string[l] == 'n': # ARG/ARGN (underspecified ARG) rargname = 'arg' elif string[l] in '1234': # ARG{1,2,3,4} rargname = 'arg' + str(string[l]) elif string[l] in 'lr': # {L,R}-{INDEX,HNDL} if l == r: rargname = str(string[l]).upper() + '-index' else: rargname = str(string[l]).upper() + '-hndl' elif string[l] != '?': assert False, 'Invalid link specification symbol.' return Link(start, end, rargname, post)