예제 #1
0
def pair_same_node_groups(dmrs1, dmrs2):
    """ Finds which nodes in dmrs1 are equivalent to which nodes in dmrs2.
        :param dmrs1 A DMRS object. For matching, the small dmrs.
        :param dmrs2 A DMRS object. For matching, the large dmrs.
        :return A list of tuples (pred, nodes from dmrs1, nodes from dmrs2). All
                nodes in nodes from dmrs1 and nodes form dmrs2 are quivalent.
                The pred is their common predicate. The list of tuples is sorted
                 by pred.
    """
    grouped_nodes1 = group_same_nodes(dmrs1.nodes)
    grouped_nodes2 = group_same_nodes(dmrs2.nodes)
    grouped_nodes = []
    i = 0
    j = 0
    while i < len(grouped_nodes1) and j < len(grouped_nodes2):
        pred1, group1 = grouped_nodes1[i]
        pred2, group2 = grouped_nodes2[j]
        if pred1 == pred2 and are_equal_nodes(dmrs1[group1[0]], dmrs2[group2[0]]):
            grouped_nodes.append((pred1, group1, group2))
            i += 1
            j += 1
        else:
            if pred1 > pred2:
                j += 1
            else:
                i += 1
    return grouped_nodes
예제 #2
0
def group_same_nodes(nodes):
    """ Groups nodeids of equivalent nodes into sublists, using are_equal_nodes
        as the equivalency criterion.

        :param nodes A list of nodes.
        :return A list of tuples (pred, id list) sorted by pred. The pred is
                the shared predicate of the group; the id_list is a list of
                nodeids of equivalent nodes.
    """
    grouped_nodes = []
    group_node_type = None
    current_group = []
    sorted_nodes = sorted(nodes, key=lambda n: str(n.pred))
    for node in sorted_nodes:
        if not group_node_type:
            group_node_type = node
            current_group.append(node.nodeid)
        elif are_equal_nodes(node, group_node_type):
            current_group.append(node.nodeid)
        else:
            grouped_nodes.append((group_node_type.pred, current_group))
            current_group = [node.nodeid]
            group_node_type = node
    grouped_nodes.append((group_node_type.pred, current_group))
    return grouped_nodes
예제 #3
0
def pair_same_node_groups(dmrs1, dmrs2):
    """ Finds which nodes in dmrs1 are equivalent to which nodes in dmrs2.
        :param dmrs1 A DMRS object. For matching, the small dmrs.
        :param dmrs2 A DMRS object. For matching, the large dmrs.
        :return A list of tuples (pred, nodes from dmrs1, nodes from dmrs2). All
                nodes in nodes from dmrs1 and nodes form dmrs2 are quivalent.
                The pred is their common predicate. The list of tuples is sorted
                 by pred.
    """
    grouped_nodes1 = group_same_nodes(dmrs1.nodes)
    grouped_nodes2 = group_same_nodes(dmrs2.nodes)
    grouped_nodes = []
    i = 0
    j = 0
    while i < len(grouped_nodes1) and j < len(grouped_nodes2):
        pred1, group1 = grouped_nodes1[i]
        pred2, group2 = grouped_nodes2[j]
        if pred1 == pred2 and are_equal_nodes(dmrs1[group1[0]],
                                              dmrs2[group2[0]]):
            grouped_nodes.append((pred1, group1, group2))
            i += 1
            j += 1
        else:
            if str(pred1) > str(pred2):
                j += 1
            else:
                i += 1
    return grouped_nodes
예제 #4
0
def group_same_nodes(nodes):
    """ Groups nodeids of equivalent nodes into sublists, using are_equal_nodes
        as the equivalency criterion.

        :param nodes A list of nodes.
        :return A list of tuples (pred, id list) sorted by pred. The pred is
                the shared predicate of the group; the id_list is a list of
                nodeids of equivalent nodes.
    """
    grouped_nodes = []
    group_node_type = None
    current_group = []
    sorted_nodes = sorted(nodes, key=lambda n: str(n.pred))
    for node in sorted_nodes:
        if not group_node_type:
            group_node_type = node
            current_group.append(node.nodeid)
        elif are_equal_nodes(node, group_node_type):
            current_group.append(node.nodeid)
        else:
            grouped_nodes.append((group_node_type.pred, current_group))
            current_group = [node.nodeid]
            group_node_type = node
    grouped_nodes.append((group_node_type.pred, current_group))
    return grouped_nodes
예제 #5
0
def extend_match(match, start_nodeids, dmrs1, dmrs2, underspecified=True):
    """ Finds a match between dmrs1 and dmrs2.
        :param match: A Match object to be extended.
        :param start_nodeids: A tuple of matching nodeids with which to start to match extension.
        :param dmrs1 A DMRS object. For matching, the small dmrs.
        :param dmrs2 A DMRS object. For matching, the large dmrs.
        :param underspecified: If True (default), treat underspecified nodes as equal.

        The two start nodes should be equivalent by are_equal_nodes criterion.

        The function finds any links shared by the two start nodes (equivalent
        according to are"equal_links) and follows them. The pairs of nodes at
        other end of the links are added to a queue. Then the function calls
        itself recursively with the queued pairs of nodes as the start nodes.
        The recursion stops when no shared links are found and the queue is empty.

        :return A Match composed of updated matched_nodes, matched_links.
    """
    match.nodeid_pairs.append(start_nodeids)
    matched_first = set(x[0] for x in match.nodeid_pairs)
    if match.link_pairs:
        matched_links1, matched_links2 = tuple(
            set(x) for x in zip(*match.link_pairs))
    else:
        matched_links1, matched_links2 = set(), set()
    node_queue = []
    start_id1, start_id2 = start_nodeids
    links1 = dmrs1.get_out(start_id1)
    links1.update(dmrs1.get_in(start_id1))
    links1.update(dmrs1.get_eq(start_id1))
    links2 = dmrs2.get_out(start_id2)
    links2.update(dmrs2.get_in(start_id2))
    links2.update(dmrs2.get_eq(start_id2))
    for link1 in links1:
        if link1 not in matched_links1:
            for link2 in links2:
                if link2 not in matched_links2:
                    if are_equal_links(link1, link2, dmrs1, dmrs2):
                        if link1.start in matched_first and match.get_second(
                                link1.start) != link2.start:
                            continue
                        if link1.end in matched_first and match.get_second(
                                link1.end) != link2.end:
                            continue
                        match.link_pairs.append((link1, link2))
                        matched_links1.add(link1)
                        matched_links2.add(link2)
                        paired1 = link1.start if link1.end == start_id1 else link1.end
                        paired2 = link2.start if link2.end == start_id2 else link2.end
                        node_queue.append((paired1, paired2))
                        break

    for nodeid1, nodeid2 in node_queue:
        if (nodeid1, nodeid2) not in match.nodeid_pairs and are_equal_nodes(
                dmrs1[nodeid1], dmrs2[nodeid2], underspecified):
            extend_match(match, (nodeid1, nodeid2), dmrs1, dmrs2,
                         underspecified)
예제 #6
0
def find_match(start_id1, start_id2, dmrs1, dmrs2, matched_nodes, matched_links):
    """ Finds a match between dmrs1 and dmrs2.
        :param dmrs1 A DMRS object. For matching, the small dmrs.
        :param dmrs2 A DMRS object. For matching, the large dmrs.
        :param start_id1 A nodeid of a node from dmrs1 from which the graph traversal should be started.
        :param start_id2 A nodeid of a node from dmrs2 from which the graph traversal should be started.
        :param matched_nodes Nodes matched so far during the graph traversal
                             Gets updated during recursion. Use an empty list for the top call.
        :param matched_link Link matched so far during the graph traversal.
                            Gets updated during recursion. Use an empty list for the top call.

        The two start nodes should be equivalent by are_equal_nodes criterion.

        The function finds any links shared by the two start nodes (equivalent
        according to are"equal_links) and follows them. The pairs of nodes at
        other end of the links are added to a queue. Then the function calls
        itself recursively with the queued pairs of nodes as the start nodes.
        The recursion stops when no shared links are found and the queue is empty.

        :return A Match composed of updated matched_nodes, matched_links.
    """
    assert(are_equal_nodes(dmrs1[start_id1], dmrs2[start_id2]))
    matched_nodes.append((start_id1, start_id2))

    node_queue = []

    links1 = dmrs1.get_out(start_id1)
    links2 = dmrs2.get_out(start_id2)
    for link1 in links1:
        if link1 not in [pair[0] for pair in matched_links]:
            for link2 in links2:
                if link2 not in [pair[1] for pair in matched_links]:
                    if are_equal_links(link1, link2, dmrs1, dmrs2):
                        matched_links.append((link1, link2))
                        node_queue.append((link1.end, link2.end))
                        break

    links1 = dmrs1.get_in(start_id1)
    links2 = dmrs2.get_in(start_id2)
    for link1 in links1:
        if link1 not in [pair[0] for pair in matched_links]:
            for link2 in links2:
                if link2 not in [pair[1] for pair in matched_links]:
                    if are_equal_links(link1, link2, dmrs1, dmrs2):
                        matched_links.append((link1, link2))
                        node_queue.append((link1.start, link2.start))
                        break

    for nodeid1, nodeid2 in node_queue:
        if (nodeid1, nodeid2) not in matched_nodes:
            find_match(nodeid1, nodeid2, dmrs1, dmrs2, matched_nodes, matched_links)
    return Match(matched_nodes, matched_links)
예제 #7
0
def fill_tree(root, sorted_nodes1, sorted_nodes2):
    """ Builds a tree, starting at root, of matching subsequences between
        sorted_nodes1 and sorted_nodes2. The subsequences match if the order
        of the nodes is preserved and they satisfy are_equal_nodes.

        :param root A TreeNode root of the constructed tree.
        :param sorted_nodes1 A list of nodes (from the smaller DMRS) sorted by sort_nodes.
        :param sorted_nodes2 A list of nodes (from the larger DMRS) sorted by sort_nodes.
    """
    for id1 in range(root.id1+1, len(sorted_nodes1)):
        for id2 in range(root.id2+1, len(sorted_nodes2)):
            if are_equal_nodes(sorted_nodes1[id1], sorted_nodes2[id2]):
                child_node = TreeNode(id1, id2)
                fill_tree(child_node, sorted_nodes1, sorted_nodes2)
                root.add_child(child_node)
예제 #8
0
def find_match(start_id1, start_id2, dmrs1, dmrs2, matched_nodes,
               matched_links):
    """ Finds a match between dmrs1 and dmrs2.
        :param dmrs1 A DMRS object. For matching, the small dmrs.
        :param dmrs2 A DMRS object. For matching, the large dmrs.
        :param start_id1 A nodeid of a node from dmrs1 from which the graph traversal should be started.
        :param start_id2 A nodeid of a node from dmrs2 from which the graph traversal should be started.
        :param matched_nodes Nodes matched so far during the graph traversal
                             Gets updated during recursion. Use an empty list for the top call.
        :param matched_links Link matched so far during the graph traversal.
                            Gets updated during recursion. Use an empty list for the top call.

        The two start nodes should be equivalent by are_equal_nodes criterion.

        The function finds any links shared by the two start nodes (equivalent
        according to are"equal_links) and follows them. The pairs of nodes at
        other end of the links are added to a queue. Then the function calls
        itself recursively with the queued pairs of nodes as the start nodes.
        The recursion stops when no shared links are found and the queue is empty.

        :return A Match composed of updated matched_nodes, matched_links.
    """
    assert (are_equal_nodes(dmrs1[start_id1], dmrs2[start_id2]))
    matched_nodes.append((start_id1, start_id2))

    node_queue = []

    links1 = dmrs1.get_out(start_id1)
    links1.update(dmrs1.get_in(start_id1))
    links2 = dmrs2.get_out(start_id2)
    links2.update(dmrs2.get_in(start_id2))
    for link1 in links1:
        if link1 not in [pair[0] for pair in matched_links]:
            for link2 in links2:
                if link2 not in [pair[1] for pair in matched_links]:
                    if are_equal_links(link1, link2, dmrs1, dmrs2):
                        matched_links.append((link1, link2))
                        paired1 = link1.start if link1.end == start_id1 else link1.end
                        paired2 = link2.start if link2.end == start_id2 else link2.end
                        node_queue.append((paired1, paired2))
                        break

    for nodeid1, nodeid2 in node_queue:
        if (nodeid1, nodeid2) not in matched_nodes:
            find_match(nodeid1, nodeid2, dmrs1, dmrs2, matched_nodes,
                       matched_links)
    return Match(matched_nodes, matched_links)
예제 #9
0
def find_all_matches(dmrs1, dmrs2, underspecified=False):
    """ Finds all regions with potential matches between two DMRS graphs.
        :param dmrs1 A DMRS object. For matching, the small dmrs.
        :param dmrs2 A DMRS object. For matching, the large dmrs.
        :param underspecified: If True, the underspecified nodes in dmrs1 will be matched to more specific ones in
                            dmrs2.

        The function initiates a extend_match top call and repeats it until all
        possible pairings are explored. GPreds and quantifiers 'a' and 'the'
        are not allowed as the start ndoes of extend_match to narrow down the search
        space.

        :return A list of Match objects where pairs come from (dmrs1, dmrs2).
        """
    node_pairings = pair_same_node_groups(dmrs1, dmrs2, underspecified)
    matches = []
    checked_node_pairs = []

    # Sort pairs so that the ones with fewer matching combination are considered first.
    # Exclude GPreds and some quantifiers from the pool of start nodes.
    filter_func = lambda pairing: isinstance(pairing[0], RealPred) and pairing[
        0].lemma not in ['a', 'the']
    filtered_pairings = filter(filter_func, node_pairings)
    sorted_pairings = sorted(
        filtered_pairings,
        key=lambda pairing: len(pairing[1]) * len(pairing[2]))

    if not sorted_pairings:
        sorted_pairings = node_pairings
    for pred, group1, group2 in sorted_pairings:
        all_pairs = product(group1, group2)
        for pair in all_pairs:
            if pair not in checked_node_pairs and are_equal_nodes(
                    dmrs1[pair[0]], dmrs2[pair[1]],
                    underspecified=underspecified):
                match = Match([], [])
                extend_match(match, (pair[0], pair[1]), dmrs1, dmrs2,
                             underspecified)
                checked_node_pairs.extend(match.nodeid_pairs)
                matches.append(match)
    return matches  # (matched_nodes, matched_links)