Exemple #1
0
def _read_lnk(tokens):
    """Read and return a tuple of the pred's lnk type and lnk value,
       if a pred lnk is specified."""
    # < FROM : TO > or < FROM # TO > or < TOK... > or < @ EDGE >
    lnk = None
    if tokens[0] == '<':
        tokens.popleft()  # we just checked this is a left angle
        if tokens[0] == '>':
            pass  # empty <> brackets the same as no lnk specified
        # edge lnk: ['@', EDGE, ...]
        elif tokens[0] == '@':
            tokens.popleft()  # remove the @
            lnk = Lnk.edge(tokens.popleft())  # edge lnks only have one number
        # character span lnk: [FROM, ':', TO, ...]
        elif tokens[1] == ':':
            lnk = Lnk.charspan(tokens.popleft(), tokens[1])
            tokens.popleft()  # this should be the colon
            tokens.popleft()  # and this is the cto
        # chart vertex range lnk: [FROM, '#', TO, ...]
        elif tokens[1] == '#':
            lnk = Lnk.chartspan(tokens.popleft(), tokens[1])
            tokens.popleft()  # this should be the hash
            tokens.popleft()  # and this is the to vertex
        # tokens lnk: [(TOK,)+ ...]
        else:
            lnkdata = []
            while tokens[0] != '>':
                lnkdata.append(int(tokens.popleft()))
            lnk = Lnk.tokens(lnkdata)
        _read_literals(tokens, '>')
    return lnk
Exemple #2
0
def read_lnk(tokens):
    """Read and return a tuple of the pred's lnk type and lnk value,
       if a pred lnk is specified."""
    # < FROM : TO > or < FROM # TO > or < TOK... > or < @ EDGE >
    lnk = None
    if tokens[0] == _left_angle:
        tokens.popleft()  # we just checked this is a left angle
        if tokens[0] == _right_angle:
            pass  # empty <> brackets the same as no lnk specified
        # edge lnk: ['@', EDGE, ...]
        elif tokens[0] == _at:
            tokens.popleft()  # remove the @
            lnk = Lnk.edge(tokens.popleft())  # edge lnks only have one number
        # character span lnk: [FROM, ':', TO, ...]
        elif tokens[1] == _colon:
            lnk = Lnk.charspan(tokens.popleft(), tokens[1])
            tokens.popleft()  # this should be the colon
            tokens.popleft()  # and this is the cto
        # chart vertex range lnk: [FROM, '#', TO, ...]
        elif tokens[1] == _hash:
            lnk = Lnk.chartspan(tokens.popleft(), tokens[1])
            tokens.popleft()  # this should be the hash
            tokens.popleft()  # and this is the to vertex
        # tokens lnk: [(TOK,)+ ...]
        else:
            lnkdata = []
            while tokens[0] != _right_angle:
                lnkdata.append(int(tokens.popleft()))
            lnk = Lnk.tokens(lnkdata)
        validate_token(tokens.popleft(), _right_angle)
    return lnk
Exemple #3
0
    def test_from_list(self):
        tl = YY.from_list(
            [{'id':1, 'start': 0, 'end': 1, 'form': "dog"}]
        )
        assert tl.tokens == [YyToken(1, 0, 1, form="dog")]

        tl = YY.from_list(
            [
                {'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 4,
                 'paths': [1], 'form': "dogs", 'surface': "Dogs",
                 #'ipos': 0, 'lrules': ["null"],
                 'tags': ["NN"], 'probabilities': [1.0]
                },
                {'id': 1, 'start': 0, 'end': 1, 'from': 5, 'to': 9,
                 'paths': [1], 'form': "bark",
                 #'ipos': 0, 'lrules': ["null"],
                 'tags': ["VBZ"], 'probabilities': [1.0]
                }
            ]
        )
        assert tl.tokens == [
            YyToken(1, 0, 1, Lnk.charspan(0,4), [1], "dogs", "Dogs",
                    ipos=0, lrules=["null"], pos=[("NN", 1.0)]),
            YyToken(1, 0, 1, Lnk.charspan(5,9), [1], "bark",
                    ipos=0, lrules=["null"], pos=[("VBZ", 1.0)])
        ]
 def test_lnk(self):
     n = Node(10000, spred('_dog_n_rel'))
     assert n.lnk == None
     assert n.cfrom == -1
     assert n.cto == -1
     n = Node(10000, spred('_dog_n_rel'), lnk=Lnk.charspan(0, 1))
     assert n.lnk == Lnk.charspan(0, 1)
     assert n.cfrom == 0
     assert n.cto == 1
 def test_lnk(self):
     n = Node(10000, spred('_dog_n_rel'))
     assert n.lnk == None
     assert n.cfrom == -1
     assert n.cto == -1
     n = Node(10000, spred('_dog_n_rel'),
              lnk=Lnk.charspan(0,1))
     assert n.lnk == Lnk.charspan(0,1)
     assert n.cfrom == 0
     assert n.cto == 1
Exemple #6
0
 def testChartSpanLnk(self):
     lnk = Lnk.chartspan(0, 1)
     assert lnk.type == Lnk.CHARTSPAN
     assert lnk.data == (0, 1)
     assert str(lnk) == '<0#1>'
     repr(lnk)  # no error
     lnk = Lnk.chartspan('0', '1')
     assert lnk.data == (0, 1)
     with pytest.raises(TypeError): Lnk.chartspan(1)
     with pytest.raises(TypeError): Lnk.chartspan([1, 2])
     with pytest.raises(TypeError): Lnk.chartspan(1, 2, 3)
     with pytest.raises(ValueError): Lnk.chartspan('a', 'b')
Exemple #7
0
    def test_from_list(self):
        tl = YY.from_list([{'id': 1, 'start': 0, 'end': 1, 'form': "dog"}])
        assert tl.tokens == [YyToken(1, 0, 1, form="dog")]

        tl = YY.from_list([
            {
                'id': 1,
                'start': 0,
                'end': 1,
                'from': 0,
                'to': 4,
                'paths': [1],
                'form': "dogs",
                'surface': "Dogs",
                #'ipos': 0, 'lrules': ["null"],
                'tags': ["NN"],
                'probabilities': [1.0]
            },
            {
                'id': 1,
                'start': 0,
                'end': 1,
                'from': 5,
                'to': 9,
                'paths': [1],
                'form': "bark",
                #'ipos': 0, 'lrules': ["null"],
                'tags': ["VBZ"],
                'probabilities': [1.0]
            }
        ])
        assert tl.tokens == [
            YyToken(1,
                    0,
                    1,
                    Lnk.charspan(0, 4), [1],
                    "dogs",
                    "Dogs",
                    ipos=0,
                    lrules=["null"],
                    pos=[("NN", 1.0)]),
            YyToken(1,
                    0,
                    1,
                    Lnk.charspan(5, 9), [1],
                    "bark",
                    ipos=0,
                    lrules=["null"],
                    pos=[("VBZ", 1.0)])
        ]
Exemple #8
0
def decode_lnk(cfrom, cto):
    if cfrom is cto is None:
        return None
    elif None in (cfrom, cto):
        raise ValueError('Both cfrom and cto, or neither, must be specified.')
    else:
        return Lnk.charspan(cfrom, cto)
Exemple #9
0
 def from_string(cls, s):
     """
     Decode from the YY token lattice format.
     """
     def _qstrip(s):
         return s[1:-1]  # remove assumed quote characters
     tokens = []
     for match in _yy_re.finditer(s):
         d = match.groupdict()
         lnk, pos = None, []
         if d['lnkfrom'] is not None:
             lnk = Lnk.charspan(d['lnkfrom'], d['lnkto'])
         if d['pos'] is not None:
             ps = d['pos'].strip().split()
             pos = list(zip(map(_qstrip, ps[::2]), map(float, ps[1::2])))
         tokens.append(
             YyToken(
                 int(d['id']),
                 int(d['start']),
                 int(d['end']),
                 lnk,
                 list(map(int, d['paths'].strip().split())),
                 _qstrip(d['form']),
                 None if d['surface'] is None else _qstrip(d['surface']),
                 int(d['ipos']),
                 list(map(_qstrip, d['lrules'].strip().split())),
                 pos
             )
         )
     return cls(tokens)
Exemple #10
0
    def from_string(cls, s):
        """
        Decode from the YY token lattice format.
        """
        def _qstrip(s):
            return s[1:-1]  # remove assumed quote characters

        tokens = []
        for match in _yy_re.finditer(s):
            d = match.groupdict()
            lnk, pos = None, []
            if d['lnkfrom'] is not None:
                lnk = Lnk.charspan(d['lnkfrom'], d['lnkto'])
            if d['pos'] is not None:
                ps = d['pos'].strip().split()
                pos = list(zip(map(_qstrip, ps[::2]), map(float, ps[1::2])))
            tokens.append(
                YyToken(
                    int(d['id']), int(d['start']), int(d['end']), lnk,
                    list(map(int, d['paths'].strip().split())),
                    _qstrip(d['form']),
                    None if d['surface'] is None else _qstrip(d['surface']),
                    int(d['ipos']),
                    list(map(_qstrip, d['lrules'].strip().split())), pos))
        return cls(tokens)
Exemple #11
0
 def test_to_dict(self):
     t = YyToken(1, 0, 1, form="dog")
     assert t.to_dict() == {'id': 1, 'start': 0, 'end': 1, 'form': "dog"}
     t = YyToken(1,
                 0,
                 1,
                 Lnk.charspan(0, 1), [1],
                 "dog",
                 "Dog",
                 ipos=0,
                 lrules=["null"],
                 pos=[("NN", 1.0)])
     assert t.to_dict() == {
         'id': 1,
         'start': 0,
         'end': 1,
         'from': 0,
         'to': 1,
         #'paths': [1],
         'form': "dog",
         'surface': "Dog",
         #'ipos': 0, 'lrules': ["null"],
         'tags': ["NN"],
         'probabilities': [1.0]
     }
Exemple #12
0
 def from_triples(cls, triples):
     lnk, surface, identifier = None, None, None
     nids, nd, edges = [], {}, []
     for src, rel, tgt in triples:
         if src not in nd:
             nids.append(src)
             nd[src] = {'pred': None, 'lnk': None, 'carg': None, 'si': []}
         if rel == 'predicate':
             nd[src]['pred'] = Pred.string_or_grammar_pred(tgt)
         elif rel == 'lnk':
             cfrom, cto = tgt.strip('"<>').split(':')
             nd[src]['lnk'] = Lnk.charspan(int(cfrom), int(cto))
         elif rel == 'carg':
             if (tgt[0], tgt[-1]) == ('"', '"'):
                 tgt = tgt[1:-1]
             nd[src]['carg'] = tgt
         elif rel == 'type':
             nd[src]['si'].append((CVARSORT, tgt))
         elif rel.islower():
             nd[src]['si'].append((rel, tgt))
         else:
             edges.append((src, rel, tgt))
     nodes = [
         Node(nodeid=nid,
              pred=nd[nid]['pred'],
              sortinfo=nd[nid]['si'],
              lnk=nd[nid]['lnk'],
              carg=nd[nid]['carg']) for nid in nids
     ]
     top = nids[0] if nids else None
     return cls(top=top, nodes=nodes, edges=edges)
Exemple #13
0
 def testEdgeLnk(self):
     lnk = Lnk.edge(1)
     assert lnk.type == Lnk.EDGE
     assert lnk.data == 1
     assert str(lnk) == '<@1>'
     repr(lnk)  # no error
     lnk = Lnk.edge('1')
     assert lnk.data == 1
     with pytest.raises(TypeError): Lnk.edge(None)
     with pytest.raises(TypeError): Lnk.edge((1,))
     with pytest.raises(ValueError): Lnk.edge('a')
Exemple #14
0
 def tokenize(self, s, pattern=r'[ \t]+', active=None):
     res = self.apply(s, active=active)
     tokens = [
         YyToken(id=i,
                 start=i,
                 end=i + 1,
                 lnk=Lnk.charspan(tok[0], tok[1]),
                 form=tok[2])
         for i, tok in enumerate(_tokenize(res, pattern))
     ]
     return YyTokenLattice(tokens)
Exemple #15
0
 def test_fromstring(self):
     assert len(YY.from_string(token_v1_basic).tokens) == 1
     t = YY.from_string(token_v1_basic).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [])
     t = YY.from_string(token_v1_surface).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", "Dog", 0, ["null"], [])
     t = YY.from_string(token_v1_pos).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"],
                 [("NN", 0.8), ("VV", 0.2)])
     t = YY.from_string(token_v1_surface_pos).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", "Dog", 0, ["null"],
                 [("NN", 1.0)])
     t = YY.from_string(token_v1_lrules).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0,
                 ["lrule1", "lrule2"], [])
     t = YY.from_string(token_v2).tokens[0]
     check_token(t, 1, 0, 1, Lnk.charspan(1, 3), [1], "dog", "Dog", 0,
                 ["null"], [("NN", 1.0)])
     tl = YY.from_string(tokenstring)
     assert len(tl.tokens) == 9
     check_token(tl.tokens[0], 42, 0, 1, Lnk.charspan(0, 12), [1],
                 "Tokenization", None, 0, ["null"], [("NNP", 0.7677),
                                                     ("NN", 0.2323)])
     check_token(tl.tokens[1], 43, 1, 2, Lnk.charspan(12, 13), [1], ",",
                 None, 0, ["null"], [(",", 1.0000)])
     check_token(tl.tokens[2], 44, 2, 3, Lnk.charspan(14, 15), [1], "a",
                 None, 0, ["null"], [("DT", 1.0000)])
     check_token(tl.tokens[3], 45, 3, 4, Lnk.charspan(16, 27), [1],
                 "non-trivial", None, 0, ["null"], [("JJ", 1.0000)])
     check_token(tl.tokens[4], 46, 4, 5, Lnk.charspan(28, 36), [1],
                 "exercise", None, 0, ["null"], [("NN", 0.9887),
                                                 ("VB", 0.0113)])
     check_token(tl.tokens[5], 47, 5, 6, Lnk.charspan(36, 37), [1], ",",
                 None, 0, ["null"], [(",", 1.0000)])
     check_token(tl.tokens[6], 48, 6, 7, Lnk.charspan(38, 44), [1], "bazed",
                 None, 0, ["null"], [("VBD", 0.5975), ("VBN", 0.4025)])
     check_token(tl.tokens[7], 49, 7, 8, Lnk.charspan(45, 58), [1],
                 "*****@*****.**", None, 0, ["null"], [("NN", 0.7342),
                                                      ("JJ", 0.2096)])
     check_token(tl.tokens[8], 50, 8, 9, Lnk.charspan(58, 59), [1], ".",
                 None, 0, ["null"], [(".", 1.0000)])
Exemple #16
0
 def test_to_dict(self):
     t = YyToken(1, 0, 1, form="dog")
     assert t.to_dict() == {'id':1, 'start': 0, 'end': 1, 'form': "dog"}
     t = YyToken(1, 0, 1, Lnk.charspan(0,1), [1], "dog", "Dog",
                 ipos=0, lrules=["null"], pos=[("NN", 1.0)])
     assert t.to_dict() == {
         'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 1,
         #'paths': [1],
         'form': "dog", 'surface': "Dog",
         #'ipos': 0, 'lrules': ["null"],
         'tags': ["NN"], 'probabilities': [1.0]
     }
Exemple #17
0
 def test_from_dict(self):
     t = YyToken.from_dict({'id':1, 'start': 0, 'end': 1, 'form': "dog"})
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [])
     t = YyToken.from_dict({
         'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 1,
         #'paths': [1],
         'form': "dog", 'surface': "Dog",
         #'ipos': 0, 'lrules': ["null"],
         'tags': ["NN"], 'probabilities': [1.0]
     })
     check_token(t, 1, 0, 1, Lnk.charspan(0,1), [1], "dog", "Dog",
                 0, ["null"], [("NN", 1.0)])
Exemple #18
0
 def testTokensLnk(self):
     lnk = Lnk.tokens([1, 2, 3])
     assert lnk.type == Lnk.TOKENS
     assert lnk.data == (1, 2, 3)
     assert str(lnk) == '<1 2 3>'
     repr(lnk)  # no error
     lnk = Lnk.tokens(['1'])
     assert lnk.data == (1,)
     # empty tokens list might be invalid, but accept for now
     lnk = Lnk.tokens([])
     assert lnk.data == tuple()
     with pytest.raises(TypeError): Lnk.tokens(1)
     with pytest.raises(ValueError): Lnk.tokens(['a','b'])
Exemple #19
0
def eds_it_rains():
    return eds.Eds(
        top='e2',
        nodes=[
            Node(
                'e2',
                Pred.surface('"_rain_v_1_rel"'),
                sortinfo={
                    'SF': 'prop', 'TENSE': 'pres', 'MOOD': 'indicative',
                    'PROG': '-', 'PERF': '-', CVARSORT: 'e'},
                lnk=Lnk.charspan(3, 9)
            )
        ],
        edges=[]
    )
Exemple #20
0
 def from_dict(cls, d):
     """
     Decode from a dictionary as from YyToken.to_dict().
     """
     return cls(
         d['id'],
         d['start'],
         d['end'],
         Lnk.charspan(d['from'], d['to']) if 'from' in d else None,
         # d.get('paths', [1]),
         form=d['form'],
         surface=d.get('surface'),
         # ipos=
         # lrules=
         pos=zip(d.get('tags', []), d.get('probabilities', [])))
Exemple #21
0
def eds_it_rains():
    return eds.Eds(
        top='e2',
        nodes=[
            Node(
                'e2',
                Pred.stringpred('"_rain_v_1_rel"'),
                sortinfo={
                    'SF': 'prop', 'TENSE': 'pres', 'MOOD': 'indicative',
                    'PROG': '-', 'PERF': '-', CVARSORT: 'e'},
                lnk=Lnk.charspan(3, 9)
            )
        ],
        edges=[]
    )
Exemple #22
0
 def from_dict(cls, d):
     """
     Decode from a dictionary as from YyToken.to_dict().
     """
     return cls(
         d['id'],
         d['start'],
         d['end'],
         Lnk.charspan(d['from'], d['to']) if 'from' in d else None,
         # d.get('paths', [1]),
         form=d['form'],
         surface=d.get('surface'),
         # ipos=
         # lrules=
         pos=zip(d.get('tags', []), d.get('probabilities', []))
     )
Exemple #23
0
 def test_from_dict(self):
     t = YyToken.from_dict({'id': 1, 'start': 0, 'end': 1, 'form': "dog"})
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [])
     t = YyToken.from_dict({
         'id': 1,
         'start': 0,
         'end': 1,
         'from': 0,
         'to': 1,
         #'paths': [1],
         'form': "dog",
         'surface': "Dog",
         #'ipos': 0, 'lrules': ["null"],
         'tags': ["NN"],
         'probabilities': [1.0]
     })
     check_token(t, 1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog", 0,
                 ["null"], [("NN", 1.0)])
Exemple #24
0
 def test_init(self):
     with pytest.raises(TypeError):
         YyToken()
         YyToken(1)
         YyToken(1, 0)
         YyToken(1, 0, 1)
         YyToken(1, 0, 1, Lnk.charspan(0,1))
         YyToken(1, 0, 1, Lnk.charspan(0,1), [1])
         YyToken(1, 0, 1, Lnk.charspan(0,1), [1], surface=".")
         YyToken(1, 0, 1, Lnk.charspan(0,1), [1], surface=".", ipos=0)
         YyToken(1, 0, 1, Lnk.charspan(0,1), [1], surface=".",
                 ipos=0, lrules=["null"])
         YyToken(1, 0, 1, Lnk.charspan(0,1), [1], surface=".",
                 ipos=0, lrules=["null"], pos=[(".", 1.0)])
     t = YyToken(1, 0, 1, form="dog")
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [])
     t = YyToken(1, 0, 1, Lnk.charspan(0,1), [1], "dog", "Dog",
                 ipos=0, lrules=["null"], pos=[("NN", 1.0)])
     check_token(t, 1, 0, 1, Lnk.charspan(0,1), [1], "dog", "Dog",
                 0, ["null"], [("NN", 1.0)])
Exemple #25
0
 def test_init(self):
     with pytest.raises(TypeError):
         YyToken()
         YyToken(1)
         YyToken(1, 0)
         YyToken(1, 0, 1)
         YyToken(1, 0, 1, Lnk.charspan(0, 1))
         YyToken(1, 0, 1, Lnk.charspan(0, 1), [1])
         YyToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".")
         YyToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".", ipos=0)
         YyToken(1,
                 0,
                 1,
                 Lnk.charspan(0, 1), [1],
                 surface=".",
                 ipos=0,
                 lrules=["null"])
         YyToken(1,
                 0,
                 1,
                 Lnk.charspan(0, 1), [1],
                 surface=".",
                 ipos=0,
                 lrules=["null"],
                 pos=[(".", 1.0)])
     t = YyToken(1, 0, 1, form="dog")
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [])
     t = YyToken(1,
                 0,
                 1,
                 Lnk.charspan(0, 1), [1],
                 "dog",
                 "Dog",
                 ipos=0,
                 lrules=["null"],
                 pos=[("NN", 1.0)])
     check_token(t, 1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog", 0,
                 ["null"], [("NN", 1.0)])
Exemple #26
0
    edges = [(nid, rarg, tgt) for rarg, tgt in d[5]]
    return (node, edges)

_COLON   = regex(r'\s*:\s*', value=Ignore)
_COMMA   = regex(r',\s*')
_SPACES  = regex(r'\s+', value=Ignore)
_SYMBOL  = regex(r'[-+\w]+')
_PRED    = regex(r'((?!<-?\d|\("|\{|\[)\w)+',
                 value=Pred.string_or_grammar_pred)
_EDS     = nt('EDS', value=_make_eds)
_TOP     = opt(nt('TOP'), default=None)
_TOPID   = opt(_SYMBOL, default=None)
_FLAG    = opt(regex(r'\s*\(fragmented\)', value=Ignore))
_NODE    = nt('NODE', value=_make_nodedata)
_DSCN    = opt(lit('|', value=Ignore))
_LNK     = opt(nt('LNK', value=lambda d: Lnk.charspan(*d)), default=None)
_CARG    = opt(nt('CARG'), default=None)
_PROPS   = opt(nt('PROPS', value=lambda d: d[0] + d[1]), default=None)
_EDGES   = nt('EDGES')
_TYPE    = opt(_SYMBOL, value=lambda i: [(CVARSORT, i)], default=[])
_AVLIST  = nt('AVLIST')
_ATTRVAL = nt('ATTRVAL')

_eds_parser = Peg(
    grammar=dict(
        start=delimited(_EDS, Spacing),
        EDS=bounded(regex(r'\{\s*'), seq(_TOP, nt('NODES')), regex(r'\s*\}')),
        TOP=seq(_TOPID, _COLON, _FLAG, Spacing, value=lambda d: d[0]),
        NODES=delimited(_NODE, Spacing),
        NODE=seq(_DSCN, _SYMBOL, _COLON, _PRED, _LNK, _CARG, _PROPS, _EDGES),
        LNK=bounded(lit('<'), seq(Integer, _COLON, Integer), lit('>')),
Exemple #27
0
 def test_raw_init(self):
     # don't allow just any Lnk type
     with pytest.raises(XmrsError): Lnk('lnktype', (0, 1))
Exemple #28
0
def test_REPP():
    r = repp.REPP

    # single-module REPP

    x = r().apply('abc')
    assert x.string == 'abc'
    assert x.startmap.tolist() == [1,0,0,0,0]
    assert x.endmap.tolist() == [0,0,0,0,-1]

    x = r.from_string('').apply('abc')
    assert x.string == 'abc'
    assert x.startmap.tolist() == [1,0,0,0,0]
    assert x.endmap.tolist() == [0,0,0,0,-1]

    x = r.from_string(r'!a	b').apply('ccc')  # no match
    assert x.string == 'ccc'
    assert x.startmap.tolist() == [1, 0, 0, 0, 0]
    assert x.endmap.tolist() == [0, 0, 0, 0, -1]

    x = r.from_string(r'!a	b').apply('baba')
    assert x.string == 'bbbb'
    assert x.startmap.tolist() == [1,0,0,0,0,0]
    assert x.endmap.tolist() == [0,0,0,0,0,-1]

    x = r.from_string(r'!a	aa').apply('baba')
    assert x.string == 'baabaa'
    assert x.startmap.tolist() == [1,0,0,-1,-1,-1,-2,-2]
    assert x.endmap.tolist()  == [0,0,0,-1,-1,-1,-2,-3]

    x = r.from_string(r'!(\w+)	[\1]').apply('abc def')
    assert x.string == '[abc] [def]'
    assert x.startmap.tolist() == [1, 0,-1,-1,-1,-1,-2,-2,-3,-3,-3,-3,-4]
    assert x.endmap.tolist()   == [0,-1,-1,-1,-1,-2,-2,-3,-3,-3,-3,-4,-5]

    x = r.from_string(r"!wo(n't)	will \1").apply("I won't go")
    assert x.string == "I will n't go"
    assert x.startmap.tolist() == [1,0,0,0,-1,-2,-3,-4,-3,-3,-3,-3,-3,-3,-3]
    assert x.endmap.tolist()   == [0,0,0,1, 0,-1,-2,-3,-3,-3,-3,-3,-3,-3,-4]

    x = r.from_string(r"!wo(n't)	will \1").tokenize("I won't go")
    assert len(x.tokens) == 4
    assert x.tokens[0].form == 'I'
    assert x.tokens[0].lnk == Lnk.charspan(0,1)
    assert x.tokens[1].form == 'will'
    assert x.tokens[1].lnk == Lnk.charspan(2,4)
    assert x.tokens[2].form == "n't"
    assert x.tokens[2].lnk == Lnk.charspan(4,7)
    assert x.tokens[3].form == 'go'
    assert x.tokens[3].lnk == Lnk.charspan(8,10)

    # additional modules/groups

    x = r.from_string('>a', modules={'a': r.from_string(r'!a	b')}, active=['a']).apply('baba')
    assert x.string == 'bbbb'
    assert x.startmap.tolist() == [1,0,0,0,0,0]
    assert x.endmap.tolist() == [0,0,0,0,0,-1]

    x = r.from_string('>a', modules={'a': r.from_string(r'!a	b')}).apply('baba', active=['a'])
    assert x.string == 'bbbb'
    assert x.startmap.tolist() == [1,0,0,0,0,0]
    assert x.endmap.tolist() == [0,0,0,0,0,-1]

    x = r.from_string('>a', modules={'a': r.from_string(r'!a	b')}).apply('baba')
    assert x.string == 'baba'
    assert x.startmap.tolist() == [1,0,0,0,0,0]
    assert x.endmap.tolist() == [0,0,0,0,0,-1]

    x = r.from_string('>a\n>b\n>c', modules={'a': r.from_string('!a	b'), 'b': r.from_string('!b	c'), 'c': r.from_string('!c	d')}, active=['a','b','c']).apply('baba')
    assert x.string == 'dddd'
    assert x.startmap.tolist() == [1,0,0,0,0,0]
    assert x.endmap.tolist() == [0,0,0,0,0,-1]

    x = r.from_string('>a\n>b\n>c', modules={'a': r.from_string('!a	b'), 'b': r.from_string('!b	c'), 'c': r.from_string('!c	d')}, active=['a']).apply('baba')
    assert x.string == 'bbbb'
    assert x.startmap.tolist() == [1,0,0,0,0,0]
    assert x.endmap.tolist() == [0,0,0,0,0,-1]

    # iterative groups

    x = r.from_string(
        r'!(^| )([()%,])([^ ])	\1\2 \3' '\n'
        r'!([^ ])([()%,])( |$)	\1 \2\3'
	).apply('(42%),')
    assert x.string == '( 42%) ,'
    assert x.startmap.tolist() == [1,0, 0,-1,-1,-1,-1,-1,-2,-2]
    assert x.endmap.tolist()   == [0,0,-1,-1,-1,-1,-1,-2,-2,-3]

    x = r.from_string(
        '#1\n'
        r'!(^| )([()%,])([^ ])	\1\2 \3' '\n'
        r'!([^ ])([()%,])( |$)	\1 \2\3' '\n'
        '#\n'
        '>1'
    ).apply('(42%),')
    assert x.string == '( 42 % ) ,'
    assert x.startmap.tolist() == [1,0, 0,-1,-1,-1,-2,-2,-3,-3,-4,-4]
    assert x.endmap.tolist()   == [0,0,-1,-1,-1,-2,-2,-3,-3,-4,-4,-5]

    # tokenization

    x = r.from_string(
        '#1\n'
        r'!(^| )([()%,])([^ ])	\1\2 \3' '\n'
        r'!([^ ])([()%,])( |$)	\1 \2\3' '\n'
        '#\n'
        '>1'
    ).tokenize('(42%),')
    assert len(x.tokens) == 5
    assert x.tokens[0].form == '('
    assert x.tokens[0].lnk == Lnk.charspan(0,1)
    assert x.tokens[1].form == '42'
    assert x.tokens[1].lnk == Lnk.charspan(1,3)
    assert x.tokens[2].form == '%'
    assert x.tokens[2].lnk == Lnk.charspan(3,4)
    assert x.tokens[3].form == ')'
    assert x.tokens[3].lnk == Lnk.charspan(4,5)
    assert x.tokens[4].form == ','
    assert x.tokens[4].lnk == Lnk.charspan(5,6)
Exemple #29
0
    edges = [(nid, rarg, tgt) for rarg, tgt in d[5]]
    return (node, edges)


_COLON = regex(r'\s*:\s*', value=Ignore)
_COMMA = regex(r',\s*')
_SPACES = regex(r'\s+', value=Ignore)
_SYMBOL = regex(r'[-+\w]+')
_PRED = regex(r'((?!<-?\d|\("|\{|\[)\w)+', value=Pred.string_or_grammar_pred)
_EDS = nt('EDS', value=_make_eds)
_TOP = opt(nt('TOP'), default=None)
_TOPID = opt(_SYMBOL, default=None)
_FLAG = opt(regex(r'\s*\(fragmented\)', value=Ignore))
_NODE = nt('NODE', value=_make_nodedata)
_DSCN = opt(lit('|', value=Ignore))
_LNK = opt(nt('LNK', value=lambda d: Lnk.charspan(*d)), default=None)
_CARG = opt(nt('CARG'), default=None)
_PROPS = opt(nt('PROPS', value=lambda d: d[0] + d[1]), default=None)
_EDGES = nt('EDGES')
_TYPE = opt(_SYMBOL, value=lambda i: [(CVARSORT, i)], default=[])
_AVLIST = nt('AVLIST')
_ATTRVAL = nt('ATTRVAL')

_eds_parser = Peg(grammar=dict(
    start=delimited(_EDS, Spacing),
    EDS=bounded(regex(r'\{\s*'), seq(_TOP, nt('NODES')), regex(r'\s*\}')),
    TOP=seq(_TOPID, _COLON, _FLAG, Spacing, value=lambda d: d[0]),
    NODES=delimited(_NODE, Spacing),
    NODE=seq(_DSCN, _SYMBOL, _COLON, _PRED, _LNK, _CARG, _PROPS, _EDGES),
    LNK=bounded(lit('<'), seq(Integer, _COLON, Integer), lit('>')),
    CARG=bounded(lit('('), DQString, lit(')')),
Exemple #30
0
 def test_fromstring(self):
     assert len(YY.from_string(token_v1_basic).tokens) == 1
     t = YY.from_string(token_v1_basic).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [])
     t = YY.from_string(token_v1_surface).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", "Dog", 0, ["null"], [])
     t = YY.from_string(token_v1_pos).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"],
                 [("NN", 0.8), ("VV", 0.2)])
     t = YY.from_string(token_v1_surface_pos).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", "Dog", 0, ["null"],
                 [("NN", 1.0)])
     t = YY.from_string(token_v1_lrules).tokens[0]
     check_token(t, 1, 0, 1, None, [1], "dog", None, 0,
                 ["lrule1", "lrule2"], [])
     t = YY.from_string(token_v2).tokens[0]
     check_token(t, 1, 0, 1, Lnk.charspan(1,3), [1], "dog", "Dog",
                 0, ["null"], [("NN", 1.0)])
     tl = YY.from_string(tokenstring)
     assert len(tl.tokens) == 9
     check_token(
         tl.tokens[0],
         42, 0, 1, Lnk.charspan(0,12), [1], "Tokenization", None,
         0, ["null"], [("NNP", 0.7677), ("NN", 0.2323)]
     )
     check_token(
         tl.tokens[1],
         43, 1, 2, Lnk.charspan(12,13), [1], ",", None,
         0, ["null"], [(",", 1.0000)]
     )
     check_token(
         tl.tokens[2],
         44, 2, 3, Lnk.charspan(14,15), [1], "a", None,
         0, ["null"], [("DT", 1.0000)]
     )
     check_token(
         tl.tokens[3],
         45, 3, 4, Lnk.charspan(16,27), [1], "non-trivial", None,
         0, ["null"], [("JJ", 1.0000)]
     )
     check_token(
         tl.tokens[4],
         46, 4, 5, Lnk.charspan(28,36), [1], "exercise", None,
         0, ["null"], [("NN", 0.9887), ("VB", 0.0113)]
     )
     check_token(
         tl.tokens[5],
         47, 5, 6, Lnk.charspan(36,37), [1], ",", None,
         0, ["null"], [(",", 1.0000)]
     )
     check_token(
         tl.tokens[6],
         48, 6, 7, Lnk.charspan(38,44), [1], "bazed", None,
         0, ["null"], [("VBD", 0.5975), ("VBN", 0.4025)]
     )
     check_token(
         tl.tokens[7],
         49, 7, 8, Lnk.charspan(45,58), [1], "*****@*****.**", None,
         0, ["null"], [("NN", 0.7342), ("JJ", 0.2096)]
     )
     check_token(
         tl.tokens[8],
         50, 8, 9, Lnk.charspan(58,59), [1], ".", None,
         0, ["null"], [(".", 1.0000)]
     )
Exemple #31
0
 def __init__(self):
     self.lnk = Lnk.charspan(0,1)