Beispiel #1
0
    def test_iter_incoming(self):
        with self.assertRaises(PydmrsValueError):
            self.test_dmrs.iter_incoming(15)

        self.test_dmrs.add_link(Link(4, 2, 'None', 'EQ'))
        in_it = self.test_dmrs.iter_incoming(2)
        # Check that an iterator returned
        self.assertTrue(hasattr(in_it, '__next__'))
        # EQ link counted as incoming
        self.assertCountEqual(list(in_it), [
            Link(1, 2, 'RSTR', 'H'),
            Link(3, 2, 'ARG1', 'NEQ'),
            Link(4, 2, None, 'EQ')
        ])

        # TODO: Treat EQ links somehow.
        # Test e.g.
        # self.test_dmrs.add_link(Link(2, 4, 'None', 'EQ'))
        # in_it = self.test_dmrs.iter_incoming(2)
        # self.assertIn(Link(2, 4, 'None', 'EQ'), list(in_it))

        # No incoming links
        in_it = self.test_dmrs.iter_incoming(3)
        with self.assertRaises(StopIteration):
            next(in_it)
Beispiel #2
0
 def test_Match_init(self):
     self.assertEqual(general_matching.Match().nodeid_pairs, [])
     self.assertEqual(general_matching.Match().link_pairs, [])
     self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2)])
     self.assertCountEqual(
         self.match.link_pairs,
         [(Link(4, 5, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H'))])
Beispiel #3
0
 def compose(self, other, fusion=None, other_head=False, hierarchy=None):
     assert isinstance(other, Dmrs)
     nodeid_mapping = dict()
     # unify anchors
     if fusion is None:
         for anchor1 in self.anchors:
             for anchor2 in other.anchors:
                 if anchor1 != anchor2:
                     continue
                 node1 = self.anchors[anchor1]
                 node2 = other.anchors[anchor2]
                 node1.unify(node2, hierarchy=hierarchy)
                 nodeid_mapping[node2.nodeid] = node1.nodeid
     else:
         for anchor1, anchor2 in fusion.items():
             # if anchor1 not in self.anchors or anchor2 not in other.anchors:
             #     continue
             node1 = self.anchors[anchor1]
             node2 = other.anchors[anchor2]
             node1.unify(node2, hierarchy=hierarchy)
             nodeid_mapping[node2.nodeid] = node1.nodeid
     # add missing nodes, update node ids
     for node2 in other.iter_nodes():
         nodeid2 = node2.nodeid
         if nodeid2 in nodeid_mapping:
             node2.nodeid = nodeid_mapping[nodeid2]
         else:
             node2.nodeid = None
             nodeid_mapping[nodeid2] = self.add_node(node2)
     # add missing links, update existing links
     links1 = set((link1.start, link1.end) for link1 in self.iter_links())
     for link2 in other.iter_links():
         start = nodeid_mapping[link2.start]
         end = nodeid_mapping[link2.end]
         if (start, end) not in links1:
             link1 = Link(start, end, link2.rargname, link2.post)
             self.add_link(link1)
         if other_head and (start, end) in links1:
             self.remove_link((start, end))
             link1 = Link(start, end, link2.rargname, link2.post)
             self.add_link(link1)
     # update index and top
     if other_head:
         if other.index is None:
             self.index = None
         else:
             self.index = self[other.index.nodeid]
         if other.top is None:
             self.top = None
         else:
             self.top = self[other.top.nodeid]
     # set anchors
     if other_head:
         self.anchors = {
             anchor: self[node2.nodeid]
             for anchor, node2 in other.anchors.items()
         }
Beispiel #4
0
 def test_get_missing_elements(self):
     match = general_matching.find_best_matches(
         examples_dmrs.the_dog_chases_the_mouse(), self.small_dmrs)[0]
     missing = pydmrs.matching.match_evaluation.get_missing_elements(
         match, examples_dmrs.the_dog_chases_the_mouse())
     self.assertCountEqual(
         missing,
         [4, 5, Link(3, 5, 'ARG2', 'NEQ'),
          Link(4, 5, 'RSTR', 'H')])
Beispiel #5
0
    def test_Link_new(self):
        """
        Links should have exactly four slots (start, end, rargname, post).
        The constructor should take either positional or keyword arguments.
        The slots should be accessible by attribute names.
        """
        # Check four arguments
        self.assert_ex_link(Link(0, 1, 'RSTR', 'H'))
        self.assert_ex_link(Link(start=0, end=1, rargname='RSTR', post='H'))

        # Check None values
        self.assertIsNone(Link(0, 1, '', 'H').rargname)
        self.assertIsNone(Link(0, 1, 'RSTR', 'NONE').post)
        self.assertIsNone(Link(0, 1, 'NULL', 'H').rargname)
        self.assertIsNone(Link(0, 1, 'RSTR', 'NIL').post)

        # Check wrong numbers of arguments
        with self.assertRaises(TypeError):
            Link(0, 1, 2)
        with self.assertRaises(TypeError):
            Link(0, 1, 2, 3, 4)

        # Check equal start and end
        with self.assertRaises(Warning):
            warnings.simplefilter('error')
            Link(0, 0, 1, 2)
        warnings.resetwarnings()
Beispiel #6
0
def the_dog_chases_the_cat_and_the_mouse():
    return DictDmrs(nodes=[
        Node(nodeid=1, pred=RealPred('the', 'q')),
        Node(nodeid=2,
             pred=RealPred('dog', 'n', '1'),
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
        Node(nodeid=3,
             pred=RealPred('chase', 'v', '1'),
             sortinfo=EventSortinfo(sf='prop', tense='pres',
                                    mood='indicative')),
        Node(nodeid=4, pred=RealPred('the', 'q')),
        Node(nodeid=5,
             pred=RealPred('cat', 'n', '1'),
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
        Node(nodeid=6, pred=GPred('udef_q')),
        Node(nodeid=7,
             pred=RealPred('and', 'c'),
             sortinfo=InstanceSortinfo(pers='3', num='pl')),
        Node(nodeid=8, pred=RealPred('the', 'q')),
        Node(nodeid=9,
             pred=RealPred('mouse', 'n', '1'),
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))
    ],
                    links=[
                        Link(start=1, end=2, rargname='RSTR', post='H'),
                        Link(start=3, end=2, rargname='ARG1', post='NEQ'),
                        Link(start=3, end=7, rargname='ARG2', post='NEQ'),
                        Link(start=4, end=5, rargname='RSTR', post='H'),
                        Link(start=6, end=7, rargname='RSTR', post='H'),
                        Link(start=7, end=5, rargname='L-INDEX', post='NEQ'),
                        Link(start=7, end=9, rargname='R-INDEX', post='NEQ'),
                        Link(start=8, end=9, rargname='RSTR', post='H')
                    ],
                    index=3,
                    top=3)
Beispiel #7
0
 def test_Link_repr(self):
     """
     The 'official' string representation of a Link
     should evaluate to an equivalent Link
     """
     link = Link(0, 1, 'RSTR', 'H')
     self.assertEqual(link, eval(repr(link)))
Beispiel #8
0
 def test_Link_labelstring(self):
     """
     The labelstring of a link should be its label's string 
     """
     link = Link(0, 1, 'RSTR', 'H')
     labelstring = 'RSTR/H'
     self.assertEqual(link.labelstring, labelstring)
Beispiel #9
0
 def test_Link_str(self):
     """
     The 'informal' string representation of a Link
     should show a labelled arrow pointing from the start to the end
     """
     link = Link(0, 1, 'RSTR', 'H')
     self.assertEqual(str(link), "(0 - RSTR/H -> 1)")
Beispiel #10
0
 def test_Link_label(self):
     """
     The label of a link should be a LinkLabel
     """
     link = Link(0, 1, 'RSTR', 'H')
     label = LinkLabel('RSTR', 'H')
     self.assertIsInstance(link.label, LinkLabel)
     self.assertEqual(link.label, label)
Beispiel #11
0
def simplify(dmrs):
    """
    Simplify an input DMRS to a form that can be converted to robot commands
    """
    # Remove unnecessary GPreds (defaults, plus pronouns)
    gpred_filtering(dmrs, extended_filter)

    # Remove quantifiers
    for node in copy(dmrs.nodes):
        if dmrs.is_quantifier(node.nodeid):
            dmrs.remove_node(node.nodeid)

    # Apply mapping rules
    for before, after in rename:
        for node in dmrs.iter_nodes():
            if node.pred == before:
                node.pred = after

    for first, label, second, new in shrink:
        for node in copy(dmrs.nodes):
            if node.pred == first:
                nid = node.nodeid
                for link in dmrs.get_out(nid,
                                         rargname=label.rargname,
                                         post=label.post):
                    if dmrs[link.end].pred == second:
                        # We've found a match
                        endid = link.end
                        dmrs.remove_link(link)
                        # Copy links from second node to first
                        for old_link in dmrs.get_out(endid):
                            dmrs.add_link(
                                Link(nid, old_link.end, old_link.rargname,
                                     old_link.post))
                        for old_link in dmrs.get_in(endid):
                            dmrs.add_link(
                                Link(old_link.start, nid, old_link.rargname,
                                     old_link.post))
                        # Remove the second node and update the first
                        dmrs.remove_node(link.end)
                        dmrs[nid].pred = new

    return dmrs
Beispiel #12
0
def the_mouse():
    dmrs = DictDmrs(surface='the mouse')
    dmrs.add_node(Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3))
    dmrs.add_node(
        Node(nodeid=2,
             pred=RealPred('mouse', 'n', '1'),
             cfrom=4,
             cto=9,
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')))
    dmrs.add_link(Link(start=1, end=2, rargname='RSTR', post='H'))
    return dmrs
Beispiel #13
0
def the_cat_chases_the_dog():
    return DictDmrs(surface='the cat chases the dog',
                    nodes=[
                        Node(nodeid=1,
                             pred=RealPred('the', 'q'),
                             cfrom=0,
                             cto=3),
                        Node(nodeid=2,
                             pred=RealPred('cat', 'n', '1'),
                             cfrom=4,
                             cto=7,
                             sortinfo=InstanceSortinfo(pers='3',
                                                       num='sg',
                                                       ind='+')),
                        Node(nodeid=3,
                             pred=RealPred('chase', 'v', '1'),
                             cfrom=8,
                             cto=14,
                             sortinfo=EventSortinfo(sf='prop',
                                                    tense='pres',
                                                    mood='indicative')),
                        Node(nodeid=4,
                             pred=RealPred('the', 'q'),
                             cfrom=15,
                             cto=18),
                        Node(nodeid=5,
                             pred=RealPred('dog', 'n', '1'),
                             cfrom=19,
                             cto=22,
                             sortinfo=InstanceSortinfo(pers='3',
                                                       num='sg',
                                                       ind='+'))
                    ],
                    links=[
                        Link(start=1, end=2, rargname='RSTR', post='H'),
                        Link(start=3, end=2, rargname='ARG1', post='NEQ'),
                        Link(start=3, end=5, rargname='ARG2', post='NEQ'),
                        Link(start=4, end=5, rargname='RSTR', post='H')
                    ],
                    index=3,
                    top=3)
Beispiel #14
0
    def test_find_best_matches(self):
        # Match "the cat" onto "the dog chases the cat" (exact fit)
        matches = general_matching.find_best_matches(self.cat_dmrs,
                                                     self.small_dmrs)

        self.assertEqual(len(matches), 1)
        self.assertCountEqual(matches[0].nodeid_pairs, [(2, 5), (1, 4)])
        self.assertCountEqual(
            matches[0].link_pairs,
            [(Link(1, 2, 'RSTR', 'H'), Link(4, 5, 'RSTR', 'H'))])

        # Match "the dog chases the cat" onto "the cat chases the dog" (inexact fit)
        matches = general_matching.find_best_matches(self.small_dmrs,
                                                     self.reverse_dmrs)
        self.assertEqual(len(matches), 1)
        self.assertCountEqual(matches[0].nodeid_pairs, [(5, 2), (4, 1), (3, 3),
                                                        (2, 5), (1, 4)])
        self.assertCountEqual(
            matches[0].link_pairs,
            [(Link(4, 5, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H')),
             (Link(1, 2, 'RSTR', 'H'), Link(4, 5, 'RSTR', 'H'))])

        # No match found
        matches = general_matching.find_best_matches(examples_dmrs.the_mouse(),
                                                     self.reverse_dmrs)
        self.assertIsNone(matches)

        # More than one match found.
        matches = general_matching.find_best_matches(self.cat_dmrs,
                                                     self.large_dmrs)
        self.assertEqual(len(matches), 2)
        self.assertCountEqual(matches[0].nodeid_pairs, [(2, 5), (1, 4)])
        self.assertCountEqual(
            matches[0].link_pairs,
            [(Link(1, 2, 'RSTR', 'H'), Link(4, 5, 'RSTR', 'H'))])
        self.assertCountEqual(matches[1].nodeid_pairs, [(2, 8), (1, 7)])
        self.assertCountEqual(
            matches[1].link_pairs,
            [(Link(1, 2, 'RSTR', 'H'), Link(7, 8, 'RSTR', 'H'))])
Beispiel #15
0
 def test_Link_copy(self):
     """
     copy.copy should return an equal Link
     copy.deepcopy should also return an equal Link
     """
     from copy import copy, deepcopy
     link = Link(0, 1, 'RSTR', 'H')
     link_copy = copy(link)
     link_deep = deepcopy(link)
     self.assertEqual(link, link_copy)
     self.assertEqual(link, link_deep)
     self.assertIsNot(link, link_copy)
     self.assertIsNot(link, link_deep)
Beispiel #16
0
def reverse_link(dmrs, link):
    """
    Reverse a Link in a Dmrs graph.
    The start and end nodeids are switched,
    and "_REV" is appended to the rargname (or removed if already present)
    """
    if link.rargname[-4:] == "_REV":
        new_rargname = link.rargname[:-4]
    else:
        new_rargname = link.rargname + "_REV"
    new_link = Link(link.end, link.start, new_rargname, link.post)
    dmrs.remove_link(link)
    dmrs.add_link(new_link)
    return new_link
Beispiel #17
0
    def test_iter_outgoing(self):
        with self.assertRaises(PydmrsValueError):
            self.test_dmrs.iter_outgoing(15)

        self.test_dmrs.add_link(Link(3, 4, 'None', 'EQ'))
        out_it = self.test_dmrs.iter_outgoing(3)
        # Check that an iterator returned
        self.assertTrue(hasattr(out_it, '__next__'))
        # EQ link counted as outgoing
        self.assertCountEqual(list(out_it), [
            Link(3, 5, 'ARG2', 'NEQ'),
            Link(3, 2, 'ARG1', 'NEQ'),
            Link(3, 4, None, 'EQ')
        ])
        # TODO: Treat EQ links symmetrically or not at all, as long as it's consistent.
        # Test e.g.
        # self.test_dmrs.add_link(Link(4, 3, 'None', 'EQ'))
        # out_it = self.test_dmrs.iter_outgoing(3)
        # self.assertIn(Link(4, 3, 'None', 'EQ'), list(out_it))

        # No outgoing links
        out_it = self.test_dmrs.iter_outgoing(2)
        with self.assertRaises(StopIteration):
            next(out_it)
Beispiel #18
0
 def test_get_matched_subgraph(self):
     match = general_matching.find_best_matches(self.cat_dmrs,
                                                self.small_dmrs)[0]
     subgraph = general_matching.get_matched_subgraph(
         self.small_dmrs, match)
     expected = DictDmrs(
         nodes=[
             Node(nodeid=4, pred=RealPred('the', 'q')),
             Node(nodeid=5,
                  pred=RealPred('cat', 'n', '1'),
                  sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))
         ],
         links=[Link(start=4, end=5, rargname='RSTR', post='H')])
     self.assertListEqual(subgraph.nodes, expected.nodes)
     self.assertListEqual(subgraph.links, expected.links)
    def test_get_matching_nodeids(self):
        # Match "the cat" onto "the dog chases the cat" (exact fit, only one match)
        matches1 = aligned_matching.get_matching_nodeids(
            self.the_cat, self.the_dog_chases_the_cat)
        self.assertEqual(len(matches1), 1)
        self.assertCountEqual(matches1[0], [(2, 5), (1, 4)])

        # all_surface = True
        all_matches1 = aligned_matching.get_matching_nodeids(
            self.the_cat, self.the_dog_chases_the_cat, all_surface=True)
        # The same as earlier
        self.assertListEqual(matches1[0], all_matches1[0])
        # Extra surface nodes: between dog and cat

        all_matches1 = aligned_matching.get_matching_nodeids(
            self.dog_cat, self.the_dog_chases_the_cat, all_surface=True)
        self.assertCountEqual(all_matches1[0], [(2, 5), (1, 2), (None, 3),
                                                (None, 4)])

        # Match "the dog chases the cat" onto "the cat chases the dog" (inexact fit)
        matches2 = aligned_matching.get_matching_nodeids(
            self.the_dog_chases_the_cat, self.the_cat_chases_the_dog)
        # Two options: "the dog" matches or "the cat" matches, 'chases' doesn't because it's not part of the longest match
        self.assertEqual(len(matches2), 2)
        self.assertCountEqual(matches2, [[(5, 2), (4, 1)], [(2, 5), (1, 4)]])

        # No match found
        matches = aligned_matching.get_matching_nodeids(
            self.the_mouse, self.dog_cat)
        self.assertListEqual(matches, [])

        # Should be the same as 'the cat'.
        mixed_cat = ListDmrs(surface='the cat')
        mixed_cat.add_node(
            Node(nodeid=2,
                 pred=RealPred('cat', 'n', '1'),
                 cfrom=4,
                 cto=7,
                 sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')))
        mixed_cat.add_node(
            Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3))
        mixed_cat.add_link(Link(start=1, end=2, rargname='RSTR', post='H'))
        mixed = aligned_matching.get_matching_nodeids(
            mixed_cat, self.the_dog_chases_the_cat)
        self.assertListEqual(mixed, matches1)
Beispiel #20
0
def loads_xml(bytestring,
              encoding=None,
              cls=ListDmrs,
              convert_legacy_prontype=True,
              **kwargs):
    """
    Currently processes "<dmrs>...</dmrs>"
    To be updated for "<dmrslist>...</dmrslist>"...
    Expects a bytestring; to load from a string instead, specify encoding
    Produces a ListDmrs by default; for a different type, specify cls
    """
    if encoding:
        bytestring = bytestring.encode(encoding)
    xml = ET.XML(bytestring)

    dmrs = cls(**kwargs)

    dmrs.cfrom = int(xml.get('cfrom')) if 'cfrom' in xml.attrib else None
    dmrs.cto = int(xml.get('cto')) if 'cto' in xml.attrib else None
    dmrs.surface = xml.get('surface')
    dmrs.ident = int(xml.get('ident')) if 'ident' in xml.attrib else None
    index_id = int(xml.get('index')) if 'index' in xml.attrib else None
    top_id = None

    for elem in xml:
        if elem.tag == 'node':
            node = Node.from_xml(elem, convert_legacy_prontype)
            dmrs.add_node(node)

        elif elem.tag == 'link':
            link = Link.from_xml(elem)
            if link.start == 0:
                top_id = link.end
            else:
                dmrs.add_link(link)
        else:
            raise PydmrsValueError(elem.tag)

    if top_id:
        dmrs.top = dmrs[top_id]
    if index_id:
        dmrs.index = dmrs[index_id]

    return dmrs
Beispiel #21
0
def loads_xml(bytestring, encoding=None, cls=ListDmrs, convert_legacy_prontype=True, **kwargs):
    """
    Currently processes "<dmrs>...</dmrs>"
    To be updated for "<dmrslist>...</dmrslist>"...
    Expects a bytestring; to load from a string instead, specify encoding
    Produces a ListDmrs by default; for a different type, specify cls
    """
    if encoding:
        bytestring = bytestring.encode(encoding)
    xml = ET.XML(bytestring)

    dmrs = cls(**kwargs)

    dmrs.cfrom = int(xml.get('cfrom')) if 'cfrom' in xml.attrib else None
    dmrs.cto = int(xml.get('cto')) if 'cto' in xml.attrib else None
    dmrs.surface = xml.get('surface')
    dmrs.ident = int(xml.get('ident')) if 'ident' in xml.attrib else None
    # top may be set as a graph attribute or as a link (see below)
    top_id = int(xml.get('top')) if 'top' in xml.attrib else None
    index_id = int(xml.get('index')) if 'index' in xml.attrib else None

    for elem in xml:
        if elem.tag == 'node':
            node = Node.from_xml(elem, convert_legacy_prontype)
            dmrs.add_node(node)

        elif elem.tag == 'link':
            link = Link.from_xml(elem)
            if link.start == 0:
                # this would overwrite any graph-level top attribute
                # (see above), but let's assume we won't encounter
                # both in the same graph
                top_id = link.end
            else:
                dmrs.add_link(link)
        else:
            raise PydmrsValueError(elem.tag)

    if top_id:
        dmrs.top = dmrs[top_id]
    if index_id:
        dmrs.index = dmrs[index_id]

    return dmrs
Beispiel #22
0
 def compose(self, other, fusion):
     assert isinstance(other, ComposableDmrs)
     composition = deepcopy(self)
     nodeid_mapping = dict()
     for anchor1, anchor2 in fusion.items():
         node1 = composition.anchors[anchor1]
         node2 = other.anchors[anchor2]
         node1.unify(node2)
         nodeid_mapping[node2.nodeid] = node1.nodeid
     for nodeid2 in other:
         if nodeid2 in nodeid_mapping:
             continue
         node1 = deepcopy(other[nodeid2])
         node1.nodeid = None
         nodeid_mapping[nodeid2] = composition.add_node(node1)
     for link2 in other.iter_links():
         link1 = Link(nodeid_mapping[link2.start],
                      nodeid_mapping[link2.end], link2.rargname, link2.post)
         composition.add_link(link1)
     if composition.index is None and other.index is not None:
         composition.index = composition[nodeid_mapping[other.index.nodeid]]
     if composition.top is None and other.top is not None:
         composition.top = composition[nodeid_mapping[other.top.nodeid]]
     return composition
Beispiel #23
0
def dmrs_mapping(dmrs, search_dmrs, replace_dmrs, equalities=(), hierarchy=None, copy_dmrs=True, iterative=True, all_matches=True, require_connected=True, max_matches=100):
    """
    Performs an exact DMRS (sub)graph matching of a (sub)graph against a containing graph.
    :param dmrs DMRS graph to map.
    :param search_dmrs DMRS subgraph to replace.
    :param replace_dmrs DMRS subgraph to replace with.
    :param equalities
    :param hierarchy An optional predicate hierarchy.
    :param copy_dmrs True if DMRS graph argument should be copied before being mapped.
    :param iterative True if all possible mappings should be performed iteratively to the same DMRS graph, instead of a separate copy per mapping (iterative=False requires copy_dmrs=True).
    :param all_matches True if all possible matches should be returned, instead of only the first (or None).
    :param require_connected True if mappings resulting in a disconnected DMRS graph should be ignored.
    :param max_matches: Maximum number of matches.
    :return Mapped DMRS graph (resp. a list of graphs in case of iterative=False and all_matches=True)
    """
    assert copy_dmrs or iterative, 'Invalid argument combination.'

    # extract anchor node mapping between search_dmrs and replace_dmrs
    sub_mapping = {}
    optional_nodeids = []
    for search_node in search_dmrs.iter_nodes():
        if not isinstance(search_node, AnchorNode):
            continue
        if not search_node.required:
            optional_nodeids.append(search_node.nodeid)
        for replace_node in replace_dmrs.iter_nodes():
            if not isinstance(replace_node, AnchorNode) or all(anchor not in replace_node.anchors for anchor in search_node.anchors):
                continue
            assert search_node.nodeid not in sub_mapping, 'Node matches multiple nodes.' + str(search_node)
            sub_mapping[search_node.nodeid] = replace_node.nodeid
        if search_node.nodeid not in sub_mapping:
            assert not search_node.requires_target, 'Un-matched anchor node.'

    # set up variables according to settings
    if iterative:
        result_dmrs = copy.deepcopy(dmrs) if copy_dmrs else dmrs
        matchings = dmrs_exact_matching(search_dmrs, dmrs, optional_nodeids=optional_nodeids, equalities=equalities, hierarchy=hierarchy, match_top_index=True)
    else:
        matchings = dmrs_exact_matching(search_dmrs, dmrs, optional_nodeids=optional_nodeids, equalities=equalities, hierarchy=hierarchy, match_top_index=True)
    if not iterative and all_matches:
        result = []

    # continue while there is a match for search_dmrs
    count = 0
    for _ in range(max_matches):
        if iterative:
            pass
            # matchings = dmrs_exact_matching(search_dmrs, result_dmrs, optional_nodeids=optional_nodeids, equalities=equalities, hierarchy=hierarchy, match_top_index=True)
        else:
            result_dmrs = copy.deepcopy(dmrs) if copy_dmrs else dmrs

        # return mapping(s) if there are no more matches left
        try:
            search_matching = next(matchings)
            count += 1
        except StopIteration:
            if not all_matches:
                if copy_dmrs:
                    return None
                else:
                    return False
            elif iterative:
                if not require_connected or result_dmrs.is_connected():
                    if copy_dmrs:
                        return result_dmrs
                    else:
                        return count > 0
                else:
                    if copy_dmrs:
                        return None
                    else:
                        return False
            else:
                return result

        # remove nodes in the matched search_dmrs if they are no anchor nodes, otherwise perform mapping()
        # mapping() performs the mapping process (with whatever it involves) specific to this node type (e.g. fill underspecified values)
        for nodeid in search_dmrs:
            search_node = search_dmrs[nodeid]
            if isinstance(search_node, AnchorNode):
                search_node.before_map(result_dmrs, search_matching[nodeid])
        replace_matching = {}
        for nodeid in search_matching:
            if nodeid in sub_mapping:
                replace_dmrs[sub_mapping[nodeid]].map(result_dmrs, search_matching[nodeid], hierarchy=hierarchy)
                replace_dmrs[sub_mapping[nodeid]].after_map(result_dmrs, search_matching[nodeid])
                replace_matching[sub_mapping[nodeid]] = search_matching[nodeid]
            elif search_matching[nodeid] is not None:
                result_dmrs.remove_node(search_matching[nodeid])

        # add copies of the non-anchor nodes for the matched replace_dmrs
        for nodeid in replace_dmrs:
            if nodeid in replace_matching:
                continue
            node = copy.deepcopy(replace_dmrs[nodeid])
            node.nodeid = result_dmrs.free_nodeid()
            result_dmrs.add_node(node)
            replace_matching[nodeid] = node.nodeid

        # set top/index if specified in replace_dmrs
        if replace_dmrs.top is not None:
            result_dmrs.top = result_dmrs[replace_matching[replace_dmrs.top.nodeid]]
        if replace_dmrs.index is not None:
            result_dmrs.index = result_dmrs[replace_matching[replace_dmrs.index.nodeid]]

        # remove all links in the matched search_dmrs
        links = []
        matching_values = set(search_matching.values())
        for link in result_dmrs.iter_links():
            if link.start in matching_values and link.end in matching_values:
                links.append(link)
        result_dmrs.remove_links(links)

        # add all links for the matched replace_dmrs
        for link in replace_dmrs.iter_links():
            link = Link(replace_matching[link.start], replace_matching[link.end], link.rargname, link.post)
            result_dmrs.add_link(link)

        # add/return result
        if not require_connected or result_dmrs.is_connected():
            if all_matches and not iterative:
                result.append(result_dmrs)
            elif not all_matches:
                if copy_dmrs:
                    return result_dmrs
                else:
                    return True

    raise Exception('More than {} matches!'.format(max_matches))
Beispiel #24
0
 def setUp(self):
     self.match = general_matching.Match(
         [(2, 3),
          (4, 2)], [(Link(4, 5, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H'))])
Beispiel #25
0
    def test_Match_add(self):
        self.assertIsNone(self.match.add(general_matching.Match()))
        self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2)])
        self.assertCountEqual(
            self.match.link_pairs,
            [(Link(4, 5, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H'))])

        incompatible_match = general_matching.Match(
            [(1, 2),
             (8, 1)], [(Link(1, 8, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H'))])
        self.match.add(incompatible_match)
        self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2),
                                                        (8, 1)])
        self.assertCountEqual(
            self.match.link_pairs,
            [(Link(4, 5, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H'))])

        compatible_match = general_matching.Match(
            [(1, 5),
             (3, 4)], [(Link(1, 3, 'ARG1', 'NEQ'), Link(1, 5, 'ARG2', 'NEQ'))])
        self.match.add(compatible_match)
        self.assertCountEqual(self.match.nodeid_pairs, [(2, 3), (4, 2), (1, 5),
                                                        (8, 1), (3, 4)])
        self.assertCountEqual(
            self.match.link_pairs,
            [(Link(4, 5, 'RSTR', 'H'), Link(1, 2, 'RSTR', 'H')),
             (Link(1, 3, 'ARG1', 'NEQ'), Link(1, 5, 'ARG2', 'NEQ'))])
Beispiel #26
0
def _parse_link(string, left_nodeid, right_nodeid, queries, equalities):
    assert ' ' not in string, 'Links must not contain spaces.'
    l = 0
    r = len(string) - 1
    if string[l] == '<':  # pointing left
        start = right_nodeid
        end = left_nodeid
        l += 1
    elif string[r] == '>':  # pointing right
        start = left_nodeid
        end = right_nodeid
        r -= 1
    else:  # invalid link
        assert False, 'Link must have a direction.'
    assert string[l] in '-=' and string[
        r] in '-=', 'Link line must consist of either "-" or "=".'
    link_char = string[l]
    while l < len(string) and string[l] == link_char:  # arbitrary left length
        l += 1
    while r >= 0 and string[r] == link_char:  # arbitrary right length
        r -= 1
    if l + 1 < r:  # explicit specification
        r += 1
        if string[l:r] == 'rstr':  # rargname RSTR uniquely determines post H
            rargname = 'rstr'
            post = 'h'
        elif string[l:r] == 'eq':  # post EQ uniquely determines rargname None
            rargname = None
            post = 'eq'
        else:
            m = string.find('/', l, r)
            if m >= 0:
                if l == m and m + 1 == r:
                    rargname = None
                    post = None
                elif l == m:
                    rargname = None
                    post = _parse_value(
                        string[m + 1:r], '?', queries, equalities,
                        (lambda matching, dmrs: ','.join(
                            link.post
                            for link in dmrs.get_out(matching[start], itr=True)
                            if link.end == matching[end])))
                elif m + 1 == r:
                    rargname = _parse_value(
                        string[l:m], '?', queries, equalities,
                        (lambda matching, dmrs: ','.join(
                            link.rargname
                            for link in dmrs.get_out(matching[start], itr=True)
                            if link.end == matching[end])))
                    post = None
                else:
                    # problem: doesn't combine rargname and post
                    rargname = _parse_value(
                        string[l:m], '?', queries, equalities,
                        (lambda matching, dmrs: ','.join(
                            link.rargname
                            for link in dmrs.get_out(matching[start], itr=True)
                            if link.end == matching[end])))
                    post = _parse_value(
                        string[m + 1:r], '?', queries, equalities,
                        (lambda matching, dmrs: ','.join(
                            link.post
                            for link in dmrs.get_out(matching[start], itr=True)
                            if link.end == matching[end])))
            else:
                rargname = _parse_value(
                    string[l:r], '?', queries, equalities,
                    (lambda matching, dmrs: ','.join(
                        link.labelstring
                        for link in dmrs.get_out(matching[start], itr=True)
                        if link.end == matching[end])))
                post = None
        return Link(start, end, rargname, post)
    if l > r:  # no specification symbol
        if link_char == '=':
            rargname = None
            post = 'eq'
        else:
            rargname = 'rstr'
            post = 'h'
    else:
        if string[l] == '?':  # no equal constraint
            rargname = '?'
            post = '?'
            value = _parse_value(
                string[l:r + 1], None, queries, equalities,
                (lambda matching, dmrs: ','.join(
                    link.labelstring
                    for link in dmrs.get_out(matching[start], itr=True)
                    if link.end == matching[end])))
            assert not value
        elif l == r:  # one specification symbol, i.e. variable link
            if link_char == '=':
                post = 'eq'
            else:
                post = 'neq'
        elif l + 1 == r:  # two specification symbol, i.e. handle link
            assert string[
                r] == 'h', 'Second link specification symbol must be "h".'
            if link_char == '=':
                post = 'heq'
            else:
                post = 'h'
        else:
            assert False  # never reached
        if string[l] == 'n':  # ARG/ARGN (underspecified ARG)
            rargname = 'arg'
        elif string[l] in '1234':  # ARG{1,2,3,4}
            rargname = 'arg' + str(string[l])
        elif string[l] in 'lr':  # {L,R}-{INDEX,HNDL}
            if l == r:
                rargname = str(string[l]).upper() + '-index'
            else:
                rargname = str(string[l]).upper() + '-hndl'
        elif string[l] != '?':
            assert False, 'Invalid link specification symbol.'
    return Link(start, end, rargname, post)