예제 #1
0
 def test_RealPred_repr(self):
     """
     The 'official' string representation of a RealPred
     should evaluate to an equivalent RealPred
     """
     the = RealPred('the','q')
     cat = RealPred('cat','n','1')
     self.assertEqual(the, eval(repr(the)))
     self.assertEqual(cat, eval(repr(cat)))
예제 #2
0
 def test_RealPred_str(self):
     """
     The 'informal' string representation of a RealPred
     should have a leading underscore
     """
     thestring = '_the_q'
     catstring = '_cat_n_1'
     self.assertEqual(str(RealPred.from_string(thestring)), thestring)
     self.assertEqual(str(RealPred.from_string(catstring)), catstring)
예제 #3
0
 def test_RealPred_str(self):
     """
     The 'informal' string representation of a RealPred
     should have a leading underscore
     """
     thestring = '_the_q'
     catstring = '_cat_n_1'
     self.assertEqual(str(RealPred.from_string(thestring)), thestring)
     self.assertEqual(str(RealPred.from_string(catstring)), catstring)
예제 #4
0
 def test_RealPred_immutable(self):
     """
     RealPreds should be immutable
     """
     the = RealPred('the','q')
     cat = RealPred('cat','n','1')
     with self.assertRaises(AttributeError):
         the.lemma = 1
     with self.assertRaises(AttributeError):
         cat.lemma = 1
예제 #5
0
def the_mouse():
    dmrs = DictDmrs(surface='the mouse')
    dmrs.add_node(Node(nodeid=1, pred=RealPred('the', 'q'), cfrom=0, cto=3))
    dmrs.add_node(
        Node(nodeid=2,
             pred=RealPred('mouse', 'n', '1'),
             cfrom=4,
             cto=9,
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')))
    dmrs.add_link(Link(start=1, end=2, rargname='RSTR', post='H'))
    return dmrs
예제 #6
0
def dog_cat():
    dmrs = DictDmrs(surface='dog cat')
    dmrs.add_node(
        Node(pred=RealPred('dog', 'n', '1'),
             cfrom=0,
             cto=3,
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')))
    dmrs.add_node(
        Node(pred=RealPred('cat', 'n', '1'),
             cfrom=4,
             cto=7,
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')))
    return dmrs
예제 #7
0
def the_dog_chases_the_cat_and_the_mouse():
    return DictDmrs(nodes=[
        Node(nodeid=1, pred=RealPred('the', 'q')),
        Node(nodeid=2,
             pred=RealPred('dog', 'n', '1'),
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
        Node(nodeid=3,
             pred=RealPred('chase', 'v', '1'),
             sortinfo=EventSortinfo(sf='prop', tense='pres',
                                    mood='indicative')),
        Node(nodeid=4, pred=RealPred('the', 'q')),
        Node(nodeid=5,
             pred=RealPred('cat', 'n', '1'),
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+')),
        Node(nodeid=6, pred=GPred('udef_q')),
        Node(nodeid=7,
             pred=RealPred('and', 'c'),
             sortinfo=InstanceSortinfo(pers='3', num='pl')),
        Node(nodeid=8, pred=RealPred('the', 'q')),
        Node(nodeid=9,
             pred=RealPred('mouse', 'n', '1'),
             sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))
    ],
                    links=[
                        Link(start=1, end=2, rargname='RSTR', post='H'),
                        Link(start=3, end=2, rargname='ARG1', post='NEQ'),
                        Link(start=3, end=7, rargname='ARG2', post='NEQ'),
                        Link(start=4, end=5, rargname='RSTR', post='H'),
                        Link(start=6, end=7, rargname='RSTR', post='H'),
                        Link(start=7, end=5, rargname='L-INDEX', post='NEQ'),
                        Link(start=7, end=9, rargname='R-INDEX', post='NEQ'),
                        Link(start=8, end=9, rargname='RSTR', post='H')
                    ],
                    index=3,
                    top=3)
예제 #8
0
 def test_get_matched_subgraph(self):
     match = general_matching.find_best_matches(self.cat_dmrs,
                                                self.small_dmrs)[0]
     subgraph = general_matching.get_matched_subgraph(
         self.small_dmrs, match)
     expected = DictDmrs(
         nodes=[
             Node(nodeid=4, pred=RealPred('the', 'q')),
             Node(nodeid=5,
                  pred=RealPred('cat', 'n', '1'),
                  sortinfo=InstanceSortinfo(pers='3', num='sg', ind='+'))
         ],
         links=[Link(start=4, end=5, rargname='RSTR', post='H')])
     self.assertListEqual(subgraph.nodes, expected.nodes)
     self.assertListEqual(subgraph.links, expected.links)
예제 #9
0
 def test_Pred_cmp_subclasses(self):
     """
     Any Pred instance should be less than instances of subclasses. 
     """
     p = Pred()
     cat = RealPred('cat', 'n', '1')
     pron = GPred('pron')
     self.assertEqual(p, p)
     self.assertEqual(pron, pron)
     self.assertEqual(cat, cat)
     self.assertNotEqual(p, pron)
     self.assertNotEqual(p, cat)
     self.assertNotEqual(pron, cat)
     self.assertLess(p, pron)
     self.assertLess(p, cat)
     self.assertLess(pron, cat)
     self.assertLessEqual(p, pron)
     self.assertLessEqual(p, cat)
     self.assertLessEqual(pron, cat)
     self.assertGreater(pron, p)
     self.assertGreater(cat, p)
     self.assertGreater(cat, pron)
     self.assertGreaterEqual(pron, p)
     self.assertGreaterEqual(cat, p)
     self.assertGreaterEqual(cat, pron)
예제 #10
0
파일: mapping.py 프로젝트: goodmami/pydmrs
    def unify(self, other, hierarchy=None):
        """
        Unify nodes.
        :param other: The node to unify with.
        :param hierarchy: An optional predicate hierarchy.
        """
        hierarchy = hierarchy or dict()
        if (
            type(self.pred) is RealPred and
            type(other.pred) is RealPred and
            (self.pred.lemma == other.pred.lemma or self.pred.lemma == '?' or other.pred.lemma == '?') and
            (self.pred.pos == other.pred.pos or self.pred.pos in ('u', '?') or other.pred.pos in ('u', '?')) and
            (self.pred.sense == other.pred.sense or self.pred.sense in ('unknown', '?') or other.pred.sense in ('unknown', '?'))
        ):
            # RealPred and predicate values are either equal or underspecified
            lemma = other.pred.lemma if self.pred.lemma == '?' else self.pred.lemma
            pos = other.pred.pos if self.pred.pos in ('u', '?') else self.pred.pos
            sense = other.pred.sense if self.pred.sense in ('unknown', '?') else self.pred.sense
            self.pred = RealPred(lemma, pos, sense)
        elif (
            type(self.pred) is GPred and
            type(other.pred) is GPred and
            (self.pred.name == other.pred.name or self.pred.name == '?' or other.pred.name == '?')
        ):
            # GPred and predicate values are either equal or underspecified
            name = other.pred.name if self.pred.name == '?' else self.pred.name
            self.pred = GPred(name)
        elif type(self.pred) is Pred or str(other.pred) in hierarchy.get(str(self.pred), ()):
            # predicate is underspecified, or predicate is more general according to the hierarchy
            self.pred = other.pred
        elif type(other.pred) is Pred or str(self.pred) in hierarchy.get(str(other.pred), ()):
            # other is underspecified, or predicate is more specific according to the hierarchy
            pass
        else:
            raise PydmrsError("Node predicates cannot be unified: {}, {}".format(self.pred, other.pred))

        if type(self.sortinfo) is not Sortinfo and isinstance(other.sortinfo, type(self.sortinfo)) and all((self.sortinfo[key] == other.sortinfo[key]) or (self.sortinfo[key] in ('u', '?')) or (other.sortinfo[key] in ('u', '?')) for key in self.sortinfo.features):
            # same sortinfo type and values are either equal or underspecified
            self.sortinfo = type(self.sortinfo)(*(other.sortinfo[key] if self.sortinfo[key] in ('u', '?') else self.sortinfo[key] for key in self.sortinfo.features))
        elif type(self.sortinfo) is Sortinfo and isinstance(other.sortinfo, Sortinfo):
            # sortinfo is underspecified
            self.sortinfo = other.sortinfo
        elif type(other.sortinfo) is Sortinfo and isinstance(self.sortinfo, Sortinfo):
            # other is underspecified
            pass
        elif self.sortinfo is None and other.sortinfo is None:
            pass
        else:
            raise PydmrsError("Node sortinfos cannot be unified: {}, {}".format(self.sortinfo, other.sortinfo))

        if self.carg == other.carg or other.carg == '?':
            # same carg, or other is underspecified
            pass
        elif self.carg == '?':
            # carg is underspecified
            self.carg = other.carg
        else:
            raise PydmrsError("Node cargs cannot be unified: {}, {}".format(self.carg, other.carg))
예제 #11
0
 def test_RealPred_eq(self):
     """
     RealPreds should be equal if lemma, pos, and sense are equal.
     RealPreds should be hashable.
     """
     the1 = RealPred('the','q')
     the2 = RealPred('the','q')
     cat1 = RealPred('cat','n','1')
     cat2 = RealPred('cat','n','1')
     catnone = RealPred('cat','n')
     # Check equality
     self.assertEqual(the1, the2)
     self.assertEqual(cat1, cat2)
     self.assertNotEqual(cat1, the1)
     self.assertNotEqual(cat1, catnone)
     self.assertNotEqual(the1, catnone)
     # Check hashability
     mydict = {the1: 1}
     self.assertEqual(mydict[the2], 1)
예제 #12
0
 def test_RealPred_copy(self):
     """
     copy.copy should return an equal RealPred
     copy.deepcopy should also return an equal RealPred
     """
     from copy import copy, deepcopy
     the = RealPred('the','q')
     cat = RealPred('cat','n','1')
     the_copy = copy(the)
     the_deep = deepcopy(the)
     cat_copy = copy(cat)
     cat_deep = deepcopy(cat)
     self.assertEqual(the, the_copy)
     self.assertEqual(the, the_deep)
     self.assertEqual(cat, cat_copy)
     self.assertEqual(cat, cat_deep)
     self.assertIsNot(the, the_copy)
     self.assertIsNot(the, the_deep)
     self.assertIsNot(cat, cat_copy)
     self.assertIsNot(cat, cat_deep)
예제 #13
0
def the_cat_chases_the_dog():
    return DictDmrs(surface='the cat chases the dog',
                    nodes=[
                        Node(nodeid=1,
                             pred=RealPred('the', 'q'),
                             cfrom=0,
                             cto=3),
                        Node(nodeid=2,
                             pred=RealPred('cat', 'n', '1'),
                             cfrom=4,
                             cto=7,
                             sortinfo=InstanceSortinfo(pers='3',
                                                       num='sg',
                                                       ind='+')),
                        Node(nodeid=3,
                             pred=RealPred('chase', 'v', '1'),
                             cfrom=8,
                             cto=14,
                             sortinfo=EventSortinfo(sf='prop',
                                                    tense='pres',
                                                    mood='indicative')),
                        Node(nodeid=4,
                             pred=RealPred('the', 'q'),
                             cfrom=15,
                             cto=18),
                        Node(nodeid=5,
                             pred=RealPred('dog', 'n', '1'),
                             cfrom=19,
                             cto=22,
                             sortinfo=InstanceSortinfo(pers='3',
                                                       num='sg',
                                                       ind='+'))
                    ],
                    links=[
                        Link(start=1, end=2, rargname='RSTR', post='H'),
                        Link(start=3, end=2, rargname='ARG1', post='NEQ'),
                        Link(start=3, end=5, rargname='ARG2', post='NEQ'),
                        Link(start=4, end=5, rargname='RSTR', post='H')
                    ],
                    index=3,
                    top=3)
예제 #14
0
 def test_RealPred_cmp(self):
     """
     RealPreds should be compared according to their names
     """
     take = RealPred('take','v','1')
     take1 = RealPred('take','v','1')
     jump = RealPred('jump','v','1')
     take_n = RealPred('take','n','1')
     take_off = RealPred('take','v','off')
     take_none = RealPred('take','v')
     self.assertEqual(take, take1)
     self.assertLessEqual(take, take1)
     self.assertGreaterEqual(take, take1)
     self.assertNotEqual(take, jump)
     self.assertNotEqual(take, take_n)
     self.assertNotEqual(take, take_off)
     self.assertNotEqual(take, take_none)
     self.assertLess(jump, take)
     self.assertGreater(take, jump)
     self.assertLessEqual(jump, take)
     self.assertGreaterEqual(take, jump)
     self.assertLess(take_n, take)
     self.assertGreater(take, take_n)
     self.assertLessEqual(take_n, take)
     self.assertGreaterEqual(take, take_n)
     self.assertLess(take, take_off)
     self.assertGreater(take_off, take)
     self.assertLessEqual(take, take_off)
     self.assertGreaterEqual(take_off, take)
     self.assertLess(take_none, take)
     self.assertGreater(take, take_none)
     self.assertLessEqual(take_none, take)
     self.assertGreaterEqual(take, take_none)
예제 #15
0
def _parse_pred(string, nodeid, queries, equalities):
    assert string.islower(), 'Predicates must be lower-case.'
    assert ' ' not in string, 'Predicates must not contain spaces.'
    if string[0] == '"' and string[-1] == '"':
        string = string[1:-1]
    assert '"' not in string, 'Predicates must not contain quotes.'
    assert string[
        0] != '\'', 'Predicates with opening single-quote have been deprecated.'
    if (string[:4] == 'pred' and
        (len(string) == 4 or string[4] in special_values)) or (
            string[:8] == 'predsort' and
            (len(string) == 8 or string[8] in special_values)):
        i = 8 if string[:8] == 'predsort' else 4
        value = _parse_value(
            string[i:], None, queries, equalities,
            (lambda matching, dmrs: dmrs[matching[nodeid]].pred))
        assert not value
        return Pred(), string[:i]
    rel_suffix = ''
    if string[-4:] == '_rel':
        string = string[:-4]
        rel_suffix = '_rel'
    if string[0] != '_':
        name = _parse_value(
            string, '?', queries, equalities,
            (lambda matching, dmrs: dmrs[matching[nodeid]].pred.name))
        return GPred(name), name + rel_suffix
    values = string[1:].rsplit('_', 2)
    count = len(values)
    assert count > 0, 'Invalid number of arguments for RealPred.'
    if count == 1:
        values.insert(0, '?')
        values.append('unknown')
    elif count == 2:
        values.append(None)
    lemma = _parse_value(
        values[0], '?', queries, equalities,
        (lambda matching, dmrs: dmrs[matching[nodeid]].pred.lemma))
    pos = _parse_value(
        values[1], 'u', queries, equalities,
        (lambda matching, dmrs: dmrs[matching[nodeid]].pred.pos))  # u ???
    sense = _parse_value(
        values[2], 'unknown', queries, equalities,
        (lambda matching, dmrs: dmrs[matching[nodeid]].pred.sense
         ))  # unknown ???
    if count == 1:
        ref_name = '_{}{}'.format(pos, rel_suffix)
    elif count == 2:
        ref_name = '_{}_{}{}'.format(lemma, pos, rel_suffix)
    else:
        ref_name = '_{}_{}_{}{}'.format(lemma, pos, sense, rel_suffix)
    return RealPred(lemma, pos, sense), ref_name
예제 #16
0
 def test_Pred_from_string(self):
     """
     Pred.from_string should normalise the string as necessary
     """
     cat_pred = RealPred.from_normalised_string('_cat_n_1')
     self.assertEqual(Pred.from_string('_cat_n_1_rel'), cat_pred)
     self.assertEqual(Pred.from_string('"_cat_n_1_rel"'), cat_pred)
     self.assertEqual(Pred.from_string('_CAT_N_1_REL'), cat_pred)
     
     the_pred = GPred.from_normalised_string('the')
     self.assertEqual(Pred.from_string('the_rel'), the_pred)
     self.assertEqual(Pred.from_string('"the_rel"'), the_pred)
     self.assertEqual(Pred.from_string('THE_REL'), the_pred)
예제 #17
0
 def test_RealPred_immutable(self):
     """
     RealPreds should be immutable
     """
     the = RealPred('the','q')
     cat = RealPred('cat','n','1')
     with self.assertRaises(AttributeError):
         the.lemma = 1
     with self.assertRaises(AttributeError):
         cat.lemma = 1
예제 #18
0
    def test_Pred_from_normalised_string(self):
        """
        Pred.from_normalised_string should instantiate RealPreds or GPreds
        depending on whether there is a leading underscore
        """
        # Check the preds are of the right type
        cat_pred = Pred.from_normalised_string('_cat_n_1')
        the_pred = Pred.from_normalised_string('the_q')
        self.assertIsInstance(cat_pred, RealPred)
        self.assertIsInstance(the_pred, GPred)

        # Check the preds are the equivalent to initialising directly 
        cat_realpred = RealPred.from_normalised_string('_cat_n_1')
        the_gpred = GPred.from_normalised_string('the_q')
        self.assertEqual(cat_pred, cat_realpred)
        self.assertEqual(the_pred, the_gpred)
예제 #19
0
    def test_Pred_from_normalised_string(self):
        """
        Pred.from_normalised_string should instantiate RealPreds or GPreds
        depending on whether there is a leading underscore
        """
        # Check the preds are of the right type
        cat_pred = Pred.from_normalised_string('_cat_n_1')
        the_pred = Pred.from_normalised_string('the_q')
        self.assertIsInstance(cat_pred, RealPred)
        self.assertIsInstance(the_pred, GPred)

        # Check the preds are the equivalent to initialising directly 
        cat_realpred = RealPred.from_normalised_string('_cat_n_1')
        the_gpred = GPred.from_normalised_string('the_q')
        self.assertEqual(cat_pred, cat_realpred)
        self.assertEqual(the_pred, the_gpred)
예제 #20
0
 def test_Pred_from_string(self):
     """
     Pred.from_string should normalise the string as necessary
     """
     cat_pred = RealPred.from_normalised_string('_cat_n_1')
     self.assertEqual(Pred.from_string('_cat_n_1_rel'), cat_pred)
     self.assertEqual(Pred.from_string('"_cat_n_1_rel"'), cat_pred)
     with self.assertRaises(Warning):
         warnings.simplefilter('error')
         self.assertEqual(Pred.from_string('_CAT_N_1_REL'), cat_pred)
     warnings.resetwarnings()
     
     the_pred = GPred.from_normalised_string('the')
     self.assertEqual(Pred.from_string('the_rel'), the_pred)
     self.assertEqual(Pred.from_string('"the_rel"'), the_pred)
     with self.assertRaises(Warning):
         warnings.simplefilter('error')
         self.assertEqual(Pred.from_string('THE_REL'), the_pred)
     warnings.resetwarnings()
예제 #21
0
파일: mapping.py 프로젝트: goodmami/pydmrs
 def map(self, dmrs, nodeid, hierarchy=None):
     """
     Overrides the values of the target node if they are not underspecified in this anchor node.
     :param dmrs Target DMRS graph.
     :param nodeid Target node id.
     :param hierarchy: An optional predicate hierarchy.
     """
     node = dmrs[nodeid]
     if self == node or self.is_less_specific(node, hierarchy=hierarchy):
         return
     if isinstance(self.pred, RealPred):
         if isinstance(node.pred, RealPred):
             node.pred = RealPred(node.pred.lemma if self.pred.lemma == '?' else self.pred.lemma, node.pred.pos if self.pred.pos in ('u', '?') else self.pred.pos, node.pred.sense if self.pred.sense in ('unknown', '?') else self.pred.sense)
         else:
             node.pred = copy.deepcopy(self.pred)
     elif isinstance(self.pred, GPred):
         if isinstance(node.pred, GPred):
             node.pred = GPred(node.pred.name if self.pred.name == '?' else self.pred.name)
         else:
             node.pred = copy.deepcopy(self.pred)
     elif not isinstance(self.pred, Pred):
         node.pred = None
     if isinstance(self.sortinfo, EventSortinfo):
         if isinstance(node.sortinfo, EventSortinfo):
             node.sortinfo = EventSortinfo(node.sortinfo.sf if self.sortinfo.sf in ('u', '?') else self.sortinfo.sf, node.sortinfo.tense if self.sortinfo.tense in ('u', '?') else self.sortinfo.tense, node.sortinfo.mood if self.sortinfo.mood in ('u', '?') else self.sortinfo.mood, node.sortinfo.perf if self.sortinfo.perf in ('u', '?') else self.sortinfo.perf, node.sortinfo.prog if self.sortinfo.prog in ('u', '?') else self.sortinfo.prog)
         else:
             node.sortinfo = copy.deepcopy(self.sortinfo)
     elif isinstance(self.sortinfo, InstanceSortinfo):
         if isinstance(node.sortinfo, InstanceSortinfo):
             node.sortinfo = InstanceSortinfo(node.sortinfo.pers if self.sortinfo.pers in ('u', '?') else self.sortinfo.pers, node.sortinfo.num if self.sortinfo.num in ('u', '?') else self.sortinfo.num, node.sortinfo.gend if self.sortinfo.gend in ('u', '?') else self.sortinfo.gend, node.sortinfo.ind if self.sortinfo.ind in ('u', '?') else self.sortinfo.ind, node.sortinfo.pt if self.sortinfo.pt in ('u', '?') else self.sortinfo.pt)
         else:
             node.sortinfo = copy.deepcopy(self.sortinfo)
     elif not isinstance(self.sortinfo, Sortinfo):
         node.sortinfo = None
     if self.carg != '?':
         node.carg = self.carg
예제 #22
0
    def test_RealPred_new(self):
        """
        RealPreds should be able to have exactly two slots (lemma and pos)
        or exactly three slots (lemma, pos, and sense).
        The constructor should take either positional or keyword arguments.
        The slots should be accessible by attribute names.
        """
        # Check two arguments
        self.assert_the(RealPred('the', 'q'))
        self.assert_the(RealPred(lemma='the', pos='q'))

        # Check three arguments
        self.assert_cat(RealPred('cat', 'n', '1'))
        self.assert_cat(RealPred(lemma='cat', pos='n', sense='1'))

        # Check wrong numbers of arguments
        with self.assertRaises(TypeError):
            RealPred('cat')
        with self.assertRaises(TypeError):
            RealPred('cat', 'n', '1', '2')
예제 #23
0
def lemmatize_pred(pred, pos):
    old = pred.lemma.rsplit('/', 1)[0]
    new = lemmatizer.lemmatize(old, pos)
    return RealPred(new, pos, 'unknown')
예제 #24
0
from scipy.special import expit
from numpy import outer, zeros_like, zeros, array
from math import log

from pydmrs.components import RealPred
from utils import make_shared, is_verb

D = 800
C = 40

half = int(D/2)

with open('/anfs/bigdisc/gete2/wikiwoods/core-5-vocab.pkl', 'rb') as f:
    preds = pickle.load(f)
ind = {p:i for i,p in enumerate(preds)}
pred_index = {RealPred.from_string(p):i for p,i in ind.items()}

pred_wei = make_shared(zeros((len(preds), D)))
for filename, offset in [('/anfs/bigdisc/gete2/wikiwoods/word2vec/matrix_nouns400', 0),
                         ('/anfs/bigdisc/gete2/wikiwoods/word2vec/matrix_verbs400', half)]:
    with open(filename, 'r') as f:
        for line in f:
            pred, vecstr = line.strip().split(maxsplit=1)
            vec = array(vecstr.split())
            pred_wei[ind[pred], offset:offset+half] = vec
# Make vectors longer (av. sum 1.138 over av. 44.9 nonzero entries)
# An average entry is then 0.2, so a predicate is expit(0.2*30 - 3) = 0.95 true
pred_wei *= 8

DATA = '/anfs/bigdisc/gete2/wikiwoods/core-5'
예제 #25
0
파일: serial.py 프로젝트: delph-in/pydmrs
def loads_xml(bytestring, encoding=None, cls=ListDmrs, **kwargs):
    """
    Currently processes "<dmrs>...</dmrs>"
    To be updated for "<dmrslist>...</dmrslist>"...
    Expects a bytestring; to load from a string instead, specify encoding
    Produces a ListDmrs by default; for a different type, specify cls
    """
    if encoding:
        bytestring = bytestring.encode(encoding)
    xml = ET.XML(bytestring)

    dmrs = cls(**kwargs)

    dmrs.cfrom = int(xml.get('cfrom')) if 'cfrom' in xml.attrib else None
    dmrs.cto = int(xml.get('cto')) if 'cto' in xml.attrib else None
    dmrs.surface = xml.get('surface')
    dmrs.ident = int(xml.get('ident')) if 'ident' in xml.attrib else None
    index_id = int(xml.get('index')) if 'index' in xml.attrib else None
    top_id = None

    for elem in xml:
        if elem.tag == 'node':
            nodeid = int(elem.get('nodeid')) if 'nodeid' in elem.attrib else None
            cfrom = int(elem.get('cfrom')) if 'cfrom' in elem.attrib else None
            cto = int(elem.get('cto')) if 'cto' in elem.attrib else None
            surface = elem.get('surface')
            base = elem.get('base')
            carg = elem.get('carg')

            pred = None
            sortinfo = None
            for sub in elem:
                if sub.tag == 'realpred':
                    try:
                        pred = RealPred(sub.get('lemma'), sub.get('pos'), sub.get('sense'))
                    except PydmrsValueError:
                        # If the whole pred name is under 'lemma', rather than split between 'lemma', 'pos', 'sense'
                        pred = RealPred.from_string(sub.get('lemma'))
                        warn("RealPred given as string rather than lemma, pos, sense", PydmrsWarning)
                elif sub.tag == 'gpred':
                    try:
                        pred = GPred.from_string(sub.text)
                    except PydmrsValueError:
                        # If the string is actually for a RealPred, not a GPred
                        pred = RealPred.from_string(sub.text)
                        warn("RealPred string found in a <gpred> tag", PydmrsWarning)
                elif sub.tag == 'sortinfo':
                    sortinfo = sub.attrib
                else:
                    raise PydmrsValueError(sub.tag)

            dmrs.add_node(cls.Node(nodeid=nodeid, pred=pred, carg=carg, sortinfo=sortinfo, cfrom=cfrom, cto=cto, surface=surface, base=base))

        elif elem.tag == 'link':
            start = int(elem.get('from'))
            end = int(elem.get('to'))

            if start == 0:
                top_id = end
            else:
                rargname = None
                post = None
                for sub in elem:
                    if sub.tag == 'rargname':
                        rargname = sub.text
                    elif sub.tag == 'post':
                        post = sub.text
                    else:
                        raise PydmrsValueError(sub.tag)
                dmrs.add_link(Link(start, end, rargname, post))
        else:
            raise PydmrsValueError(elem.tag)

    if top_id:
        dmrs.top = dmrs[top_id]
    if index_id:
        dmrs.index = dmrs[index_id]

    return dmrs
예제 #26
0
from copy import copy

from pydmrs.core import Link, LinkLabel
from pydmrs.components import Pred, RealPred, GPred
from pydmrs.simplification.gpred_filtering import gpred_filtering, DEFAULT_FILTER
#from pydmrs.mapping.mapping import dmrs_mapping
from pydmrs.graphlang.graphlang import parse_graphlang

# Also remove pronouns
extended_filter = DEFAULT_FILTER | {GPred('pron')}

# Replace the first pred with the second:
rename = [(RealPred('forwards', 'p'), RealPred('forward', 'p', 'dir'))]

# Replace a pair of nodes with a single node
# (the first pred linked to the second pred, is replaced by the third pred)
shrink = [('_left_a_1', 'ARG1/EQ', 'place_n', '_left_n_1'),
          ('_right_a_1', 'ARG1/EQ', 'place_n', '_right_n_1'),
          ('loc_nonsp', 'ARG2/NEQ', '_left_n_1', '_left_p_dir'),
          ('loc_nonsp', 'ARG2/NEQ', '_right_n_1', '_right_p_dir'),
          ('_to_p', 'ARG2/NEQ', '_left_n_1', '_left_p_dir'),
          ('_to_p', 'ARG2/NEQ', '_right_n_1', '_right_p_dir')]

shrink = [(Pred.from_string(a), LinkLabel.from_string(b), Pred.from_string(c),
           Pred.from_string(d)) for a, b, c, d in shrink]


def simplify(dmrs):
    """
    Simplify an input DMRS to a form that can be converted to robot commands
    """
예제 #27
0
def noun():
    dmrs = DictDmrs()
    dmrs.add_node(
        Node(pred=RealPred('?', 'n', 'unknown'),
             sortinfo=Sortinfo()))  # underspecified noun and sortinfo
    return dmrs
예제 #28
0
def the():
    dmrs = DictDmrs()
    dmrs.add_node(Node(pred=RealPred('the', 'q')))  # node id set automatically
    return dmrs
예제 #29
0
 def test_RealPred_from_string(self):
     """
     RealPred.from_string should instantiate RealPreds
     """
     # Two slots
     the_rel = RealPred.from_string('_the_q_rel')
     the = RealPred.from_string('_the_q')
     self.assertEqual(RealPred('the','q'), the_rel)
     self.assertEqual(RealPred('the','q'), the)
     self.assertIsInstance(the_rel, RealPred)
     self.assertIsInstance(the, RealPred)
     # Three slots
     cat_rel = RealPred.from_string('_cat_n_1_rel')
     cat = RealPred.from_string('_cat_n_1')
     self.assertEqual(RealPred('cat','n','1'), cat_rel)
     self.assertEqual(RealPred('cat','n','1'), cat)
     self.assertIsInstance(cat_rel, RealPred)
     self.assertIsInstance(cat, RealPred)
     # Intermediate underscores in lemma
     nowhere_near_rel = RealPred.from_string('_nowhere_near_x_deg_rel')
     nowhere_near = RealPred.from_string('_nowhere_near_x_deg')
     self.assertEqual(RealPred('nowhere_near','x','deg'), nowhere_near_rel)
     self.assertEqual(RealPred('nowhere_near','x','deg'), nowhere_near)
     self.assertIsInstance(nowhere_near_rel, RealPred)
     self.assertIsInstance(nowhere_near, RealPred)
     # Too few slots, no leading underscore, or not a string
     with self.assertRaises(ValueError):
         RealPred.from_string("_the_rel")
     with self.assertRaises(ValueError):
         RealPred.from_string("_the")
     with self.assertRaises(ValueError):
         RealPred.from_string("udef_q_rel")
     with self.assertRaises(TypeError):
         RealPred.from_string(1)
예제 #30
0
 def test_RealPred_from_string(self):
     """
     RealPred.from_string should instantiate RealPreds
     """
     # Two slots
     the_rel = RealPred.from_string('_the_q_rel')
     the = RealPred.from_string('_the_q')
     self.assertEqual(RealPred('the','q'), the_rel)
     self.assertEqual(RealPred('the','q'), the)
     self.assertIsInstance(the_rel, RealPred)
     self.assertIsInstance(the, RealPred)
     # Three slots
     cat_rel = RealPred.from_string('_cat_n_1_rel')
     cat = RealPred.from_string('_cat_n_1')
     self.assertEqual(RealPred('cat','n','1'), cat_rel)
     self.assertEqual(RealPred('cat','n','1'), cat)
     self.assertIsInstance(cat_rel, RealPred)
     self.assertIsInstance(cat, RealPred)
     # Intermediate underscores in lemma
     nowhere_near_rel = RealPred.from_string('_nowhere_near_x_deg_rel')
     nowhere_near = RealPred.from_string('_nowhere_near_x_deg')
     self.assertEqual(RealPred('nowhere_near','x','deg'), nowhere_near_rel)
     self.assertEqual(RealPred('nowhere_near','x','deg'), nowhere_near)
     self.assertIsInstance(nowhere_near_rel, RealPred)
     self.assertIsInstance(nowhere_near, RealPred)
     # Too few slots, no leading underscore, or not a string
     with self.assertRaises(ValueError):
         RealPred.from_string("_the_rel")
     with self.assertRaises(ValueError):
         RealPred.from_string("_the")
     with self.assertRaises(ValueError):
         RealPred.from_string("udef_q_rel")
     with self.assertRaises(TypeError):
         RealPred.from_string(1)