def find_match(start_id1, start_id2, dmrs1, dmrs2, matched_nodes, matched_links): """ Finds a match between dmrs1 and dmrs2. :param dmrs1 A DMRS object. For matching, the small dmrs. :param dmrs2 A DMRS object. For matching, the large dmrs. :param start_id1 A nodeid of a node from dmrs1 from which the graph traversal should be started. :param start_id2 A nodeid of a node from dmrs2 from which the graph traversal should be started. :param matched_nodes Nodes matched so far during the graph traversal Gets updated during recursion. Use an empty list for the top call. :param matched_link Link matched so far during the graph traversal. Gets updated during recursion. Use an empty list for the top call. The two start nodes should be equivalent by are_equal_nodes criterion. The function finds any links shared by the two start nodes (equivalent according to are"equal_links) and follows them. The pairs of nodes at other end of the links are added to a queue. Then the function calls itself recursively with the queued pairs of nodes as the start nodes. The recursion stops when no shared links are found and the queue is empty. :return A Match composed of updated matched_nodes, matched_links. """ assert(are_equal_nodes(dmrs1[start_id1], dmrs2[start_id2])) matched_nodes.append((start_id1, start_id2)) node_queue = [] links1 = dmrs1.get_out(start_id1) links2 = dmrs2.get_out(start_id2) for link1 in links1: if link1 not in [pair[0] for pair in matched_links]: for link2 in links2: if link2 not in [pair[1] for pair in matched_links]: if are_equal_links(link1, link2, dmrs1, dmrs2): matched_links.append((link1, link2)) node_queue.append((link1.end, link2.end)) break links1 = dmrs1.get_in(start_id1) links2 = dmrs2.get_in(start_id2) for link1 in links1: if link1 not in [pair[0] for pair in matched_links]: for link2 in links2: if link2 not in [pair[1] for pair in matched_links]: if are_equal_links(link1, link2, dmrs1, dmrs2): matched_links.append((link1, link2)) node_queue.append((link1.start, link2.start)) break for nodeid1, nodeid2 in node_queue: if (nodeid1, nodeid2) not in matched_nodes: find_match(nodeid1, nodeid2, dmrs1, dmrs2, matched_nodes, matched_links) return Match(matched_nodes, matched_links)
def get_link_diff(small_dmrs, matched_subgraph, matching_nodeids): """ Returns three list of links: 1) links present only in the small dmrs 2) links present only in the matched subgraph 3) common links. """ both = [] small_only = [] subgraph_only= [] for small_nodeid, subgraph_nodeid in matching_nodeids: if small_nodeid: small_links = small_dmrs.get_out(small_nodeid) subgraph_links = list(matched_subgraph.get_out(subgraph_nodeid)) links_flag = [False]*len(subgraph_links) for link1 in small_links: match_found = False for link2 in subgraph_links: if are_equal_links(link1, link2, small_dmrs, matched_subgraph): both.append(link1) match_found = True links_flag[subgraph_links.index(link2)] = True break if not match_found: small_only.append(link1) for i in range(0, len(subgraph_links)): if not links_flag[i]: subgraph_only.append(links[i]) else: subgraph_only.extend(matched_subgraph.get_out(subgraph_nodeid)) for nodeid in small_dmrs: if nodeid not in list(zip(*matching_nodeids))[0]: small_only.extend(small_dmrs.get_out(nodeid)) return small_only, subgraph_only, both
def extend_match(match, start_nodeids, dmrs1, dmrs2, underspecified=True): """ Finds a match between dmrs1 and dmrs2. :param match: A Match object to be extended. :param start_nodeids: A tuple of matching nodeids with which to start to match extension. :param dmrs1 A DMRS object. For matching, the small dmrs. :param dmrs2 A DMRS object. For matching, the large dmrs. :param underspecified: If True (default), treat underspecified nodes as equal. The two start nodes should be equivalent by are_equal_nodes criterion. The function finds any links shared by the two start nodes (equivalent according to are"equal_links) and follows them. The pairs of nodes at other end of the links are added to a queue. Then the function calls itself recursively with the queued pairs of nodes as the start nodes. The recursion stops when no shared links are found and the queue is empty. :return A Match composed of updated matched_nodes, matched_links. """ match.nodeid_pairs.append(start_nodeids) matched_first = set(x[0] for x in match.nodeid_pairs) if match.link_pairs: matched_links1, matched_links2 = tuple( set(x) for x in zip(*match.link_pairs)) else: matched_links1, matched_links2 = set(), set() node_queue = [] start_id1, start_id2 = start_nodeids links1 = dmrs1.get_out(start_id1) links1.update(dmrs1.get_in(start_id1)) links1.update(dmrs1.get_eq(start_id1)) links2 = dmrs2.get_out(start_id2) links2.update(dmrs2.get_in(start_id2)) links2.update(dmrs2.get_eq(start_id2)) for link1 in links1: if link1 not in matched_links1: for link2 in links2: if link2 not in matched_links2: if are_equal_links(link1, link2, dmrs1, dmrs2): if link1.start in matched_first and match.get_second( link1.start) != link2.start: continue if link1.end in matched_first and match.get_second( link1.end) != link2.end: continue match.link_pairs.append((link1, link2)) matched_links1.add(link1) matched_links2.add(link2) paired1 = link1.start if link1.end == start_id1 else link1.end paired2 = link2.start if link2.end == start_id2 else link2.end node_queue.append((paired1, paired2)) break for nodeid1, nodeid2 in node_queue: if (nodeid1, nodeid2) not in match.nodeid_pairs and are_equal_nodes( dmrs1[nodeid1], dmrs2[nodeid2], underspecified): extend_match(match, (nodeid1, nodeid2), dmrs1, dmrs2, underspecified)
def get_link_diff(small_dmrs, matched_subgraph, matching_nodeids): """ :param small_dmrs A Dmrs which we're matching. :param matched_subgraph A Dmrs. A subgraph of the larger DMRS returned as a match for small_dmrs. :param matching_nodeids A list of pairs of nodeids. The first nodeid in each pair comes from small_dmrs, the second comes from the large dmrs. :return three list of links: 1) links present only in the small dmrs 2) links present only in the matched subgraph 3) common links. """ both = [] small_only = [] subgraph_only = [] checked_eq_links = set() for small_nodeid, subgraph_nodeid in matching_nodeids: if small_nodeid: small_links = small_dmrs.get_out(small_nodeid) | small_dmrs.get_eq( small_nodeid) subgraph_links = list(matched_subgraph.get_out(subgraph_nodeid)) links_flag = [False] * len(subgraph_links) for link1 in small_links: # Check if the EQ has been counted already. if not link1.rargname: if link1 in checked_eq_links: continue checked_eq_links.add(link1) match_found = False for link2 in subgraph_links: if are_equal_links(link1, link2, small_dmrs, matched_subgraph): both.append(link1) match_found = True links_flag[subgraph_links.index(link2)] = True break if not match_found: small_only.append(link1) for i in range(0, len(subgraph_links)): if not links_flag[i]: subgraph_only.append(subgraph_links[i]) else: subgraph_only.extend(matched_subgraph.get_out(subgraph_nodeid)) checked_eq_links = set() for nodeid in small_dmrs: if nodeid not in list(zip(*matching_nodeids))[0]: small_only.extend(small_dmrs.get_out(nodeid)) eq_links = small_dmrs.get_eq(nodeid) small_only.extend( {link for link in eq_links if link not in checked_eq_links}) checked_eq_links.update(eq_links) return small_only, subgraph_only, both
def find_match(start_id1, start_id2, dmrs1, dmrs2, matched_nodes, matched_links): """ Finds a match between dmrs1 and dmrs2. :param dmrs1 A DMRS object. For matching, the small dmrs. :param dmrs2 A DMRS object. For matching, the large dmrs. :param start_id1 A nodeid of a node from dmrs1 from which the graph traversal should be started. :param start_id2 A nodeid of a node from dmrs2 from which the graph traversal should be started. :param matched_nodes Nodes matched so far during the graph traversal Gets updated during recursion. Use an empty list for the top call. :param matched_links Link matched so far during the graph traversal. Gets updated during recursion. Use an empty list for the top call. The two start nodes should be equivalent by are_equal_nodes criterion. The function finds any links shared by the two start nodes (equivalent according to are"equal_links) and follows them. The pairs of nodes at other end of the links are added to a queue. Then the function calls itself recursively with the queued pairs of nodes as the start nodes. The recursion stops when no shared links are found and the queue is empty. :return A Match composed of updated matched_nodes, matched_links. """ assert (are_equal_nodes(dmrs1[start_id1], dmrs2[start_id2])) matched_nodes.append((start_id1, start_id2)) node_queue = [] links1 = dmrs1.get_out(start_id1) links1.update(dmrs1.get_in(start_id1)) links2 = dmrs2.get_out(start_id2) links2.update(dmrs2.get_in(start_id2)) for link1 in links1: if link1 not in [pair[0] for pair in matched_links]: for link2 in links2: if link2 not in [pair[1] for pair in matched_links]: if are_equal_links(link1, link2, dmrs1, dmrs2): matched_links.append((link1, link2)) paired1 = link1.start if link1.end == start_id1 else link1.end paired2 = link2.start if link2.end == start_id2 else link2.end node_queue.append((paired1, paired2)) break for nodeid1, nodeid2 in node_queue: if (nodeid1, nodeid2) not in matched_nodes: find_match(nodeid1, nodeid2, dmrs1, dmrs2, matched_nodes, matched_links) return Match(matched_nodes, matched_links)
def get_link_diff(small_dmrs, matched_subgraph, matching_nodeids): """ :param small_dmrs A Dmrs which we're matching. :param matched_subgraph A Dmrs. A subgraph of the larger DMRS returned as a match for small_dmrs. :param matching_nodeids A list of pairs of nodeids. The first nodeid in each pair comes from small_dmrs, the second comes from the large dmrs. :return three list of links: 1) links present only in the small dmrs 2) links present only in the matched subgraph 3) common links. """ both = [] small_only = [] subgraph_only = [] for small_nodeid, subgraph_nodeid in matching_nodeids: if small_nodeid: small_links = small_dmrs.get_out(small_nodeid) subgraph_links = list(matched_subgraph.get_out(subgraph_nodeid)) links_flag = [False] * len(subgraph_links) for link1 in small_links: match_found = False for link2 in subgraph_links: if are_equal_links(link1, link2, small_dmrs, matched_subgraph): both.append(link1) match_found = True links_flag[subgraph_links.index(link2)] = True break if not match_found: small_only.append(link1) for i in range(0, len(subgraph_links)): if not links_flag[i]: subgraph_only.append(subgraph_links[i]) else: subgraph_only.extend(matched_subgraph.get_out(subgraph_nodeid)) for nodeid in small_dmrs: if nodeid not in list(zip(*matching_nodeids))[0]: small_only.extend(small_dmrs.get_out(nodeid)) return small_only, subgraph_only, both