Esempio n. 1
0
 def test_GPred_immutable(self):
     """
     GPreds should be immutable
     """
     pron = GPred('pron')
     with self.assertRaises(AttributeError):
         pron.name = 1
Esempio n. 2
0
 def test_GPred_immutable(self):
     """
     GPreds should be immutable
     """
     pron = GPred('pron')
     with self.assertRaises(AttributeError):
         pron.name = 1
Esempio n. 3
0
    def test_Node_init(self):
        node = Node(
            nodeid=13,
            pred='the_q',
            surface='cat',
            base='x',
            cfrom=23,
            cto=27,
            carg='Kim',
        )
        self.assertEqual(node.nodeid, 13)
        self.assertEqual(node.surface, 'cat')
        self.assertEqual(node.base, 'x')

        self.assertEqual(node.cfrom, 23)
        self.assertEqual(node.cto, 27)
        # Incorrect span
        with self.assertRaises(PydmrsValueError):
            Node(cfrom=22, cto=7)

        self.assertEqual(node.carg, 'Kim')
        # Fix carg with  "".
        self.assertEqual(Node(carg='"Kim"').carg, 'Kim')
        # Unaccounted " in carg
        with self.assertRaises(PydmrsValueError):
            Node(carg='"Kim')

        # String pred.
        self.assertEqual(node.pred, GPred('the_q'))
        # Other pred
        self.assertEqual(Node(pred=GPred('the_q')).pred, GPred('the_q'))

        # Allow None for sortinfo.
        self.assertEqual(Node().sortinfo, None)
        # Dict sortinfo
        self.assertEqual(
            Node(sortinfo={
                'cvarsort': 'i',
                'pers': '3'
            }).sortinfo, InstanceSortinfo(pers='3'))
        # Sortinfo sortinfo
        self.assertEqual(
            Node(sortinfo=InstanceSortinfo(pers='3')).sortinfo,
            InstanceSortinfo(pers='3'))
        # List sortinfo
        self.assertEqual(
            Node(sortinfo=[('cvarsort', 'i'), ('pers', '3')]).sortinfo,
            InstanceSortinfo(pers='3'))
        # But nothing else.
        with self.assertRaises(PydmrsTypeError):
            Node(sortinfo="x[pers=3, num=sg, ind=+]")
Esempio n. 4
0
 def test_GPred_eq(self):
     """
     GPreds should be equal if their names are equal.
     GPreds should be hashable.
     """
     pron1 = GPred('pron')
     pron2 = GPred('pron')
     udef = GPred('udef_q')
     # Check equality
     self.assertEqual(pron1, pron2)
     self.assertNotEqual(pron1, udef)
     # Check hashability
     mydict = {pron1: 1}
     self.assertEqual(mydict[pron2], 1)
Esempio n. 5
0
 def test_GPred_cmp(self):
     """
     GPreds should be compared according to their names
     """
     pron = GPred('pron')
     pron1 = GPred('pron')
     the = GPred('the_q')
     self.assertEqual(pron, pron1)
     self.assertLessEqual(pron, pron1)
     self.assertGreaterEqual(pron, pron1)
     self.assertNotEqual(pron, the)
     self.assertLess(pron, the)
     self.assertGreater(the, pron)
     self.assertLessEqual(pron, the)
     self.assertGreaterEqual(the, pron)
Esempio n. 6
0
 def test_GPred_new(self):
     """
     GPreds should require exactly one slot (name).
     The constructor should take either a positional or a keyword argument.
     The slot should be accessible as an attribute.
     """
     # Check one argument
     self.assertEqual(GPred('pron').name, 'pron')
     self.assertEqual(GPred(name='pron').name, 'pron')
     
     # Check wrong numbers of arguments
     with self.assertRaises(TypeError):
         GPred()
     with self.assertRaises(TypeError):
         GPred('udef', 'q')
Esempio n. 7
0
 def test_GPred_repr(self):
     """
     The 'official' string representation of a GPred
     should evaluate to an equivalent GPred
     """
     pron_pred = GPred('pron')
     self.assertEqual(pron_pred, eval(repr(pron_pred)))
Esempio n. 8
0
 def test_Pred_cmp_subclasses(self):
     """
     Any Pred instance should be less than instances of subclasses. 
     """
     p = Pred()
     cat = RealPred('cat', 'n', '1')
     pron = GPred('pron')
     self.assertEqual(p, p)
     self.assertEqual(pron, pron)
     self.assertEqual(cat, cat)
     self.assertNotEqual(p, pron)
     self.assertNotEqual(p, cat)
     self.assertNotEqual(pron, cat)
     self.assertLess(p, pron)
     self.assertLess(p, cat)
     self.assertLess(pron, cat)
     self.assertLessEqual(p, pron)
     self.assertLessEqual(p, cat)
     self.assertLessEqual(pron, cat)
     self.assertGreater(pron, p)
     self.assertGreater(cat, p)
     self.assertGreater(cat, pron)
     self.assertGreaterEqual(pron, p)
     self.assertGreaterEqual(cat, p)
     self.assertGreaterEqual(cat, pron)
Esempio n. 9
0
def the_dog_chases_the_cat_and_the_mouse():
    return DictDmrs(nodes=[
        Node(nodeid=1, pred=RealPred('the', 'q')),
        Node(nodeid=2,
             pred=RealPred('dog', 'n', '1'),
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
        Node(nodeid=3,
             pred=RealPred('chase', 'v', '1'),
             sortinfo=EventSortinfo(sf='prop', tense='pres',
                                    mood='indicative')),
        Node(nodeid=4, pred=RealPred('the', 'q')),
        Node(nodeid=5,
             pred=RealPred('cat', 'n', '1'),
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
        Node(nodeid=6, pred=GPred('udef_q')),
        Node(nodeid=7,
             pred=RealPred('and', 'c'),
             sortinfo=InstanceSortinfo(pers='3', num='pl')),
        Node(nodeid=8, pred=RealPred('the', 'q')),
        Node(nodeid=9,
             pred=RealPred('mouse', 'n', '1'),
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))
    ],
                    links=[
                        Link(start=1, end=2, rargname='RSTR', post='H'),
                        Link(start=3, end=2, rargname='ARG1', post='NEQ'),
                        Link(start=3, end=7, rargname='ARG2', post='NEQ'),
                        Link(start=4, end=5, rargname='RSTR', post='H'),
                        Link(start=6, end=7, rargname='RSTR', post='H'),
                        Link(start=7, end=5, rargname='L-INDEX', post='NEQ'),
                        Link(start=7, end=9, rargname='R-INDEX', post='NEQ'),
                        Link(start=8, end=9, rargname='RSTR', post='H')
                    ],
                    index=3,
                    top=3)
Esempio n. 10
0
    def unify(self, other, hierarchy=None):
        """
        Unify nodes.
        :param other: The node to unify with.
        :param hierarchy: An optional predicate hierarchy.
        """
        hierarchy = hierarchy or dict()
        if (
            type(self.pred) is RealPred and
            type(other.pred) is RealPred and
            (self.pred.lemma == other.pred.lemma or self.pred.lemma == '?' or other.pred.lemma == '?') and
            (self.pred.pos == other.pred.pos or self.pred.pos in ('u', '?') or other.pred.pos in ('u', '?')) and
            (self.pred.sense == other.pred.sense or self.pred.sense in ('unknown', '?') or other.pred.sense in ('unknown', '?'))
        ):
            # RealPred and predicate values are either equal or underspecified
            lemma = other.pred.lemma if self.pred.lemma == '?' else self.pred.lemma
            pos = other.pred.pos if self.pred.pos in ('u', '?') else self.pred.pos
            sense = other.pred.sense if self.pred.sense in ('unknown', '?') else self.pred.sense
            self.pred = RealPred(lemma, pos, sense)
        elif (
            type(self.pred) is GPred and
            type(other.pred) is GPred and
            (self.pred.name == other.pred.name or self.pred.name == '?' or other.pred.name == '?')
        ):
            # GPred and predicate values are either equal or underspecified
            name = other.pred.name if self.pred.name == '?' else self.pred.name
            self.pred = GPred(name)
        elif type(self.pred) is Pred or str(other.pred) in hierarchy.get(str(self.pred), ()):
            # predicate is underspecified, or predicate is more general according to the hierarchy
            self.pred = other.pred
        elif type(other.pred) is Pred or str(self.pred) in hierarchy.get(str(other.pred), ()):
            # other is underspecified, or predicate is more specific according to the hierarchy
            pass
        else:
            raise PydmrsError("Node predicates cannot be unified: {}, {}".format(self.pred, other.pred))

        if type(self.sortinfo) is not Sortinfo and isinstance(other.sortinfo, type(self.sortinfo)) and all((self.sortinfo[key] == other.sortinfo[key]) or (self.sortinfo[key] in ('u', '?')) or (other.sortinfo[key] in ('u', '?')) for key in self.sortinfo.features):
            # same sortinfo type and values are either equal or underspecified
            self.sortinfo = type(self.sortinfo)(*(other.sortinfo[key] if self.sortinfo[key] in ('u', '?') else self.sortinfo[key] for key in self.sortinfo.features))
        elif type(self.sortinfo) is Sortinfo and isinstance(other.sortinfo, Sortinfo):
            # sortinfo is underspecified
            self.sortinfo = other.sortinfo
        elif type(other.sortinfo) is Sortinfo and isinstance(self.sortinfo, Sortinfo):
            # other is underspecified
            pass
        elif self.sortinfo is None and other.sortinfo is None:
            pass
        else:
            raise PydmrsError("Node sortinfos cannot be unified: {}, {}".format(self.sortinfo, other.sortinfo))

        if self.carg == other.carg or other.carg == '?':
            # same carg, or other is underspecified
            pass
        elif self.carg == '?':
            # carg is underspecified
            self.carg = other.carg
        else:
            raise PydmrsError("Node cargs cannot be unified: {}, {}".format(self.carg, other.carg))
Esempio n. 11
0
 def iter_eps(self, icon_mode=False):
     if icon_mode and self.icon_hcons:
         icon_hcons_iter = iter(self.icon_hcons)
         eps = list(self.iter_nodes())
         free_index = self.free_index()
         for ref1, icon, ref2 in self.icons:
             _, label = next(icon_hcons_iter)
             eps.append(ElemPred(label=label, pred=GPred(icon + '_d'), intrinsic=Reference('e', free_index), args=[('arg1', ref1), ('arg2', ref2)]))
             free_index += 1
         return iter(eps)
     else:
         return (ep for eps in self.eps.values() for ep in eps.values())
Esempio n. 12
0
def _parse_pred(string, nodeid, queries, equalities):
    assert string.islower(), 'Predicates must be lower-case.'
    assert ' ' not in string, 'Predicates must not contain spaces.'
    if string[0] == '"' and string[-1] == '"':
        string = string[1:-1]
    assert '"' not in string, 'Predicates must not contain quotes.'
    assert string[
        0] != '\'', 'Predicates with opening single-quote have been deprecated.'
    if (string[:4] == 'pred' and
        (len(string) == 4 or string[4] in special_values)) or (
            string[:8] == 'predsort' and
            (len(string) == 8 or string[8] in special_values)):
        i = 8 if string[:8] == 'predsort' else 4
        value = _parse_value(
            string[i:], None, queries, equalities,
            (lambda matching, dmrs: dmrs[matching[nodeid]].pred))
        assert not value
        return Pred(), string[:i]
    rel_suffix = ''
    if string[-4:] == '_rel':
        string = string[:-4]
        rel_suffix = '_rel'
    if string[0] != '_':
        name = _parse_value(
            string, '?', queries, equalities,
            (lambda matching, dmrs: dmrs[matching[nodeid]].pred.name))
        return GPred(name), name + rel_suffix
    values = string[1:].rsplit('_', 2)
    count = len(values)
    assert count > 0, 'Invalid number of arguments for RealPred.'
    if count == 1:
        values.insert(0, '?')
        values.append('unknown')
    elif count == 2:
        values.append(None)
    lemma = _parse_value(
        values[0], '?', queries, equalities,
        (lambda matching, dmrs: dmrs[matching[nodeid]].pred.lemma))
    pos = _parse_value(
        values[1], 'u', queries, equalities,
        (lambda matching, dmrs: dmrs[matching[nodeid]].pred.pos))  # u ???
    sense = _parse_value(
        values[2], 'unknown', queries, equalities,
        (lambda matching, dmrs: dmrs[matching[nodeid]].pred.sense
         ))  # unknown ???
    if count == 1:
        ref_name = '_{}{}'.format(pos, rel_suffix)
    elif count == 2:
        ref_name = '_{}_{}{}'.format(lemma, pos, rel_suffix)
    else:
        ref_name = '_{}_{}_{}{}'.format(lemma, pos, sense, rel_suffix)
    return RealPred(lemma, pos, sense), ref_name
Esempio n. 13
0
 def test_GPred_copy(self):
     """
     copy.copy should return an equal GPred
     copy.deepcopy should also return an equal GPred
     """
     from copy import copy, deepcopy
     pron = GPred('pron')
     pron_copy = copy(pron)
     pron_deep = deepcopy(pron)
     self.assertEqual(pron, pron_copy)
     self.assertEqual(pron, pron_deep)
     self.assertIsNot(pron, pron_copy)
     self.assertIsNot(pron, pron_deep)
Esempio n. 14
0
 def test_Pred_from_string(self):
     """
     Pred.from_string should normalise the string as necessary
     """
     cat_pred = RealPred.from_normalised_string('_cat_n_1')
     self.assertEqual(Pred.from_string('_cat_n_1_rel'), cat_pred)
     self.assertEqual(Pred.from_string('"_cat_n_1_rel"'), cat_pred)
     self.assertEqual(Pred.from_string('_CAT_N_1_REL'), cat_pred)
     
     the_pred = GPred.from_normalised_string('the')
     self.assertEqual(Pred.from_string('the_rel'), the_pred)
     self.assertEqual(Pred.from_string('"the_rel"'), the_pred)
     self.assertEqual(Pred.from_string('THE_REL'), the_pred)
Esempio n. 15
0
    def test_Pred_from_normalised_string(self):
        """
        Pred.from_normalised_string should instantiate RealPreds or GPreds
        depending on whether there is a leading underscore
        """
        # Check the preds are of the right type
        cat_pred = Pred.from_normalised_string('_cat_n_1')
        the_pred = Pred.from_normalised_string('the_q')
        self.assertIsInstance(cat_pred, RealPred)
        self.assertIsInstance(the_pred, GPred)

        # Check the preds are the equivalent to initialising directly 
        cat_realpred = RealPred.from_normalised_string('_cat_n_1')
        the_gpred = GPred.from_normalised_string('the_q')
        self.assertEqual(cat_pred, cat_realpred)
        self.assertEqual(the_pred, the_gpred)
Esempio n. 16
0
    def test_Pred_from_normalised_string(self):
        """
        Pred.from_normalised_string should instantiate RealPreds or GPreds
        depending on whether there is a leading underscore
        """
        # Check the preds are of the right type
        cat_pred = Pred.from_normalised_string('_cat_n_1')
        the_pred = Pred.from_normalised_string('the_q')
        self.assertIsInstance(cat_pred, RealPred)
        self.assertIsInstance(the_pred, GPred)

        # Check the preds are the equivalent to initialising directly 
        cat_realpred = RealPred.from_normalised_string('_cat_n_1')
        the_gpred = GPred.from_normalised_string('the_q')
        self.assertEqual(cat_pred, cat_realpred)
        self.assertEqual(the_pred, the_gpred)
Esempio n. 17
0
 def test_Pred_from_string(self):
     """
     Pred.from_string should normalise the string as necessary
     """
     cat_pred = RealPred.from_normalised_string('_cat_n_1')
     self.assertEqual(Pred.from_string('_cat_n_1_rel'), cat_pred)
     self.assertEqual(Pred.from_string('"_cat_n_1_rel"'), cat_pred)
     with self.assertRaises(Warning):
         warnings.simplefilter('error')
         self.assertEqual(Pred.from_string('_CAT_N_1_REL'), cat_pred)
     warnings.resetwarnings()
     
     the_pred = GPred.from_normalised_string('the')
     self.assertEqual(Pred.from_string('the_rel'), the_pred)
     self.assertEqual(Pred.from_string('"the_rel"'), the_pred)
     with self.assertRaises(Warning):
         warnings.simplefilter('error')
         self.assertEqual(Pred.from_string('THE_REL'), the_pred)
     warnings.resetwarnings()
Esempio n. 18
0
 def test_GPred_from_string(self):
     """
     GPred.from_string should instantiate GPreds
     It requires a string without a leading underscore
     """
     # No intermediate underscores
     pron_rel = GPred.from_string('pron_rel')
     pron = GPred.from_string('pron')
     self.assertEqual(GPred('pron'), pron_rel)
     self.assertEqual(GPred('pron'), pron)
     self.assertIsInstance(pron_rel, GPred)
     self.assertIsInstance(pron, GPred)
     # Intermediate underscores
     udef_q_rel = GPred.from_string('udef_q_rel')
     udef_q = GPred.from_string('udef_q')
     self.assertEqual(GPred('udef_q'), udef_q_rel)
     self.assertEqual(GPred('udef_q'), udef_q)
     self.assertIsInstance(udef_q_rel, GPred)
     self.assertIsInstance(udef_q, GPred)
     # Leading underscore or not a string
     with self.assertRaises(ValueError):
         GPred.from_string("_the_q_rel")
     with self.assertRaises(TypeError):
         GPred.from_string(1)
Esempio n. 19
0
 def map(self, dmrs, nodeid, hierarchy=None):
     """
     Overrides the values of the target node if they are not underspecified in this anchor node.
     :param dmrs Target DMRS graph.
     :param nodeid Target node id.
     :param hierarchy: An optional predicate hierarchy.
     """
     node = dmrs[nodeid]
     if self == node or self.is_less_specific(node, hierarchy=hierarchy):
         return
     if isinstance(self.pred, RealPred):
         if isinstance(node.pred, RealPred):
             node.pred = RealPred(node.pred.lemma if self.pred.lemma == '?' else self.pred.lemma, node.pred.pos if self.pred.pos in ('u', '?') else self.pred.pos, node.pred.sense if self.pred.sense in ('unknown', '?') else self.pred.sense)
         else:
             node.pred = copy.deepcopy(self.pred)
     elif isinstance(self.pred, GPred):
         if isinstance(node.pred, GPred):
             node.pred = GPred(node.pred.name if self.pred.name == '?' else self.pred.name)
         else:
             node.pred = copy.deepcopy(self.pred)
     elif not isinstance(self.pred, Pred):
         node.pred = None
     if isinstance(self.sortinfo, EventSortinfo):
         if isinstance(node.sortinfo, EventSortinfo):
             node.sortinfo = EventSortinfo(node.sortinfo.sf if self.sortinfo.sf in ('u', '?') else self.sortinfo.sf, node.sortinfo.tense if self.sortinfo.tense in ('u', '?') else self.sortinfo.tense, node.sortinfo.mood if self.sortinfo.mood in ('u', '?') else self.sortinfo.mood, node.sortinfo.perf if self.sortinfo.perf in ('u', '?') else self.sortinfo.perf, node.sortinfo.prog if self.sortinfo.prog in ('u', '?') else self.sortinfo.prog)
         else:
             node.sortinfo = copy.deepcopy(self.sortinfo)
     elif isinstance(self.sortinfo, InstanceSortinfo):
         if isinstance(node.sortinfo, InstanceSortinfo):
             node.sortinfo = InstanceSortinfo(node.sortinfo.pers if self.sortinfo.pers in ('u', '?') else self.sortinfo.pers, node.sortinfo.num if self.sortinfo.num in ('u', '?') else self.sortinfo.num, node.sortinfo.gend if self.sortinfo.gend in ('u', '?') else self.sortinfo.gend, node.sortinfo.ind if self.sortinfo.ind in ('u', '?') else self.sortinfo.ind, node.sortinfo.pt if self.sortinfo.pt in ('u', '?') else self.sortinfo.pt)
         else:
             node.sortinfo = copy.deepcopy(self.sortinfo)
     elif not isinstance(self.sortinfo, Sortinfo):
         node.sortinfo = None
     if self.carg != '?':
         node.carg = self.carg
Esempio n. 20
0
 def test_Node_underspecification(self):
     with self.assertRaises(TypeError):
         Node(pred='_the_q').is_more_specific(4)
     # complete underspecification
     self.assertFalse(Node().is_more_specific(Node()))
     self.assertFalse(Node().is_less_specific(Node()))
     # pred underspecification
     self.assertFalse(Node(pred=Pred()).is_more_specific(Node()))
     self.assertTrue(Node(pred=Pred()).is_less_specific(Node()))
     self.assertTrue(Node().is_more_specific(Node(pred=Pred())))
     self.assertFalse(Node().is_less_specific(Node(pred=Pred())))
     self.assertFalse(Node(pred=Pred()).is_more_specific(Node(pred=Pred())))
     self.assertFalse(Node(pred=Pred()).is_less_specific(Node(pred=Pred())))
     self.assertFalse(
         Node(pred=Pred()).is_more_specific(Node(pred=GPred(name='abc'))))
     self.assertTrue(
         Node(pred=Pred()).is_less_specific(Node(pred=GPred(name='abc'))))
     self.assertTrue(
         Node(pred=GPred(name='abc')).is_more_specific(Node(pred=Pred())))
     self.assertFalse(
         Node(pred=GPred(name='abc')).is_less_specific(Node(pred=Pred())))
     # carg underspecification
     self.assertFalse(Node(carg='?').is_more_specific(Node()))
     self.assertTrue(Node(carg='?').is_less_specific(Node()))
     self.assertTrue(Node().is_more_specific(Node(carg='?')))
     self.assertFalse(Node().is_less_specific(Node(carg='?')))
     self.assertFalse(Node(carg='?').is_more_specific(Node(carg='?')))
     self.assertFalse(Node(carg='?').is_less_specific(Node(carg='?')))
     self.assertFalse(Node(carg='?').is_more_specific(Node(carg='abc')))
     self.assertTrue(Node(carg='?').is_less_specific(Node(carg='abc')))
     self.assertTrue(Node(carg='abc').is_more_specific(Node(carg='?')))
     self.assertFalse(Node(carg='abc').is_less_specific(Node(carg='?')))
     # sortinfo underspecification
     self.assertFalse(Node(sortinfo=Sortinfo()).is_more_specific(Node()))
     self.assertTrue(Node(sortinfo=Sortinfo()).is_less_specific(Node()))
     self.assertTrue(Node().is_more_specific(Node(sortinfo=Sortinfo())))
     self.assertFalse(Node().is_less_specific(Node(sortinfo=Sortinfo())))
     self.assertFalse(
         Node(sortinfo=Sortinfo()).is_more_specific(
             Node(sortinfo=Sortinfo())))
     self.assertFalse(
         Node(sortinfo=Sortinfo()).is_less_specific(
             Node(sortinfo=Sortinfo())))
     self.assertFalse(
         Node(sortinfo=Sortinfo()).is_more_specific(
             Node(sortinfo=EventSortinfo(sf='abc'))))
     self.assertTrue(
         Node(sortinfo=Sortinfo()).is_less_specific(
             Node(sortinfo=EventSortinfo(sf='abc'))))
     self.assertTrue(
         Node(sortinfo=EventSortinfo(sf='abc')).is_more_specific(
             Node(sortinfo=Sortinfo())))
     self.assertFalse(
         Node(sortinfo=EventSortinfo(sf='abc')).is_less_specific(
             Node(sortinfo=Sortinfo())))
     # mixed specification
     self.assertFalse(Node(pred=Pred()).is_more_specific(Node(carg='?')))
     self.assertFalse(Node(pred=Pred()).is_less_specific(Node(carg='?')))
     self.assertFalse(
         Node(pred=Pred()).is_more_specific(Node(sortinfo=Sortinfo())))
     self.assertFalse(
         Node(pred=Pred()).is_less_specific(Node(sortinfo=Sortinfo())))
     self.assertFalse(
         Node(carg='?').is_more_specific(Node(sortinfo=Sortinfo())))
     self.assertFalse(
         Node(carg='?').is_less_specific(Node(sortinfo=Sortinfo())))
Esempio n. 21
0
def loads_xml(bytestring, encoding=None, cls=ListDmrs, **kwargs):
    """
    Currently processes "<dmrs>...</dmrs>"
    To be updated for "<dmrslist>...</dmrslist>"...
    Expects a bytestring; to load from a string instead, specify encoding
    Produces a ListDmrs by default; for a different type, specify cls
    """
    if encoding:
        bytestring = bytestring.encode(encoding)
    xml = ET.XML(bytestring)

    dmrs = cls(**kwargs)

    dmrs.cfrom = int(xml.get('cfrom')) if 'cfrom' in xml.attrib else None
    dmrs.cto = int(xml.get('cto')) if 'cto' in xml.attrib else None
    dmrs.surface = xml.get('surface')
    dmrs.ident = int(xml.get('ident')) if 'ident' in xml.attrib else None
    index_id = int(xml.get('index')) if 'index' in xml.attrib else None
    top_id = None

    for elem in xml:
        if elem.tag == 'node':
            nodeid = int(elem.get('nodeid')) if 'nodeid' in elem.attrib else None
            cfrom = int(elem.get('cfrom')) if 'cfrom' in elem.attrib else None
            cto = int(elem.get('cto')) if 'cto' in elem.attrib else None
            surface = elem.get('surface')
            base = elem.get('base')
            carg = elem.get('carg')

            pred = None
            sortinfo = None
            for sub in elem:
                if sub.tag == 'realpred':
                    try:
                        pred = RealPred(sub.get('lemma'), sub.get('pos'), sub.get('sense'))
                    except PydmrsValueError:
                        # If the whole pred name is under 'lemma', rather than split between 'lemma', 'pos', 'sense'
                        pred = RealPred.from_string(sub.get('lemma'))
                        warn("RealPred given as string rather than lemma, pos, sense", PydmrsWarning)
                elif sub.tag == 'gpred':
                    try:
                        pred = GPred.from_string(sub.text)
                    except PydmrsValueError:
                        # If the string is actually for a RealPred, not a GPred
                        pred = RealPred.from_string(sub.text)
                        warn("RealPred string found in a <gpred> tag", PydmrsWarning)
                elif sub.tag == 'sortinfo':
                    sortinfo = sub.attrib
                else:
                    raise PydmrsValueError(sub.tag)

            dmrs.add_node(cls.Node(nodeid=nodeid, pred=pred, carg=carg, sortinfo=sortinfo, cfrom=cfrom, cto=cto, surface=surface, base=base))

        elif elem.tag == 'link':
            start = int(elem.get('from'))
            end = int(elem.get('to'))

            if start == 0:
                top_id = end
            else:
                rargname = None
                post = None
                for sub in elem:
                    if sub.tag == 'rargname':
                        rargname = sub.text
                    elif sub.tag == 'post':
                        post = sub.text
                    else:
                        raise PydmrsValueError(sub.tag)
                dmrs.add_link(Link(start, end, rargname, post))
        else:
            raise PydmrsValueError(elem.tag)

    if top_id:
        dmrs.top = dmrs[top_id]
    if index_id:
        dmrs.index = dmrs[index_id]

    return dmrs
Esempio n. 22
0
        default=None,
        help=
        'Path to simplifaction configuration file. By default, configuration in __config__/default_simplification.conf is used.'
    )
    parser.add_argument('input_dmrs', help='Specify input DMRS file')
    parser.add_argument('output_dmrs', help='Specify output dmrs file.')
    args = parser.parse_args()
    if args.config is not None:  # Load the given file
        config = load_config(args.config, default=False)
    else:
        config = load_config(DEFAULT_CONFIG_FILE)
else:
    config = load_config(DEFAULT_CONFIG_FILE)

DEFAULT_FILTER = frozenset(
    GPred.from_string(x) for x in get_config_option(
        config, 'General Predicate Filtering', 'filter', opt_type=list))
DEFAULT_ALLOW_DISC = get_config_option(config, 'General Predicate Filtering',
                                       'allow_disconnected_dmrs')


def gpred_filtering(dmrs,
                    gpred_filter=DEFAULT_FILTER,
                    allow_disconnected_dmrs=DEFAULT_ALLOW_DISC):
    """
    Remove general predicate nodes on the filter list from the DMRS.
    :param dmrs_xml: Input DMRS object
    :param gpred_filter: A list of general predicates to filter (as strings)
    :param allow_disconnected_dmrs: Remove gpred nodes even if their removal would result in a disconnected DMRS.
     If DMRS was already disconnected, gpred nodes are removed regardless.
    :return: Output DMRS object
Esempio n. 23
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='DMRS simplification tool')
    parser.add_argument('-c', '--config', default=None,
                        help='Path to simplifaction configuration file. By default, configuration in __config__/default_simplification.conf is used.')
    parser.add_argument('input_dmrs', help='Specify input DMRS file')
    parser.add_argument('output_dmrs', help='Specify output dmrs file.')
    args = parser.parse_args()
    if args.config is not None:  # Load the given file
        config = load_config(args.config, default=False)
    else:
        config = load_config(DEFAULT_CONFIG_FILE)
else:
    config = load_config(DEFAULT_CONFIG_FILE)

DEFAULT_FILTER = frozenset(GPred.from_string(x) for x in get_config_option(config, 'General Predicate Filtering', 'filter', opt_type=list))
DEFAULT_ALLOW_DISC = get_config_option(config, 'General Predicate Filtering', 'allow_disconnected_dmrs') 

def gpred_filtering(dmrs, gpred_filter=DEFAULT_FILTER, allow_disconnected_dmrs=DEFAULT_ALLOW_DISC):
    """
    Remove general predicate nodes on the filter list from the DMRS.
    :param dmrs_xml: Input DMRS object
    :param gpred_filter: A list of general predicates to filter (as strings)
    :param allow_disconnected_dmrs: Remove gpred nodes even if their removal would result in a disconnected DMRS.
     If DMRS was already disconnected, gpred nodes are removed regardless.
    :return: Output DMRS object
    """

    filterable_nodeids = set()

    # Find general predicate nodes to filter
Esempio n. 24
0
from copy import copy

from pydmrs.core import Link, LinkLabel
from pydmrs.components import Pred, RealPred, GPred
from pydmrs.simplification.gpred_filtering import gpred_filtering, DEFAULT_FILTER
#from pydmrs.mapping.mapping import dmrs_mapping
from pydmrs.graphlang.graphlang import parse_graphlang

# Also remove pronouns
extended_filter = DEFAULT_FILTER | {GPred('pron')}

# Replace the first pred with the second:
rename = [(RealPred('forwards', 'p'), RealPred('forward', 'p', 'dir'))]

# Replace a pair of nodes with a single node
# (the first pred linked to the second pred, is replaced by the third pred)
shrink = [('_left_a_1', 'ARG1/EQ', 'place_n', '_left_n_1'),
          ('_right_a_1', 'ARG1/EQ', 'place_n', '_right_n_1'),
          ('loc_nonsp', 'ARG2/NEQ', '_left_n_1', '_left_p_dir'),
          ('loc_nonsp', 'ARG2/NEQ', '_right_n_1', '_right_p_dir'),
          ('_to_p', 'ARG2/NEQ', '_left_n_1', '_left_p_dir'),
          ('_to_p', 'ARG2/NEQ', '_right_n_1', '_right_p_dir')]

shrink = [(Pred.from_string(a), LinkLabel.from_string(b), Pred.from_string(c),
           Pred.from_string(d)) for a, b, c, d in shrink]


def simplify(dmrs):
    """
    Simplify an input DMRS to a form that can be converted to robot commands
    """
Esempio n. 25
0
 def test_GPred_from_string(self):
     """
     GPred.from_string should instantiate GPreds
     It requires a string without a leading underscore
     """
     # No intermediate underscores
     pron_rel = GPred.from_string('pron_rel')
     pron = GPred.from_string('pron')
     self.assertEqual(GPred('pron'), pron_rel)
     self.assertEqual(GPred('pron'), pron)
     self.assertIsInstance(pron_rel, GPred)
     self.assertIsInstance(pron, GPred)
     # Intermediate underscores
     udef_q_rel = GPred.from_string('udef_q_rel')
     udef_q = GPred.from_string('udef_q')
     self.assertEqual(GPred('udef_q'), udef_q_rel)
     self.assertEqual(GPred('udef_q'), udef_q)
     self.assertIsInstance(udef_q_rel, GPred)
     self.assertIsInstance(udef_q, GPred)
     # Leading underscore or not a string
     with self.assertRaises(ValueError):
         GPred.from_string("_the_q_rel")
     with self.assertRaises(TypeError):
         GPred.from_string(1)
Esempio n. 26
0
 def test_GPred_str(self):
     """
     The 'informal' string representation of a GPred
     """
     pronstring = 'pron'
     self.assertEqual(str(GPred.from_string(pronstring)), pronstring)
Esempio n. 27
0
 def test_GPred_str(self):
     """
     The 'informal' string representation of a GPred
     """
     pronstring = 'pron'
     self.assertEqual(str(GPred.from_string(pronstring)), pronstring)