def test_from_list(self): tl = YY.from_list( [{'id':1, 'start': 0, 'end': 1, 'form': "dog"}] ) assert tl.tokens == [YyToken(1, 0, 1, form="dog")] tl = YY.from_list( [ {'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 4, 'paths': [1], 'form': "dogs", 'surface': "Dogs", #'ipos': 0, 'lrules': ["null"], 'tags': ["NN"], 'probabilities': [1.0] }, {'id': 1, 'start': 0, 'end': 1, 'from': 5, 'to': 9, 'paths': [1], 'form': "bark", #'ipos': 0, 'lrules': ["null"], 'tags': ["VBZ"], 'probabilities': [1.0] } ] ) assert tl.tokens == [ YyToken(1, 0, 1, Lnk.charspan(0,4), [1], "dogs", "Dogs", ipos=0, lrules=["null"], pos=[("NN", 1.0)]), YyToken(1, 0, 1, Lnk.charspan(5,9), [1], "bark", ipos=0, lrules=["null"], pos=[("VBZ", 1.0)]) ]
def test_lnk(self): n = Node(10000, spred('_dog_n_rel')) assert n.lnk == None assert n.cfrom == -1 assert n.cto == -1 n = Node(10000, spred('_dog_n_rel'), lnk=Lnk.charspan(0, 1)) assert n.lnk == Lnk.charspan(0, 1) assert n.cfrom == 0 assert n.cto == 1
def test_lnk(self): n = Node(10000, spred('_dog_n_rel')) assert n.lnk == None assert n.cfrom == -1 assert n.cto == -1 n = Node(10000, spred('_dog_n_rel'), lnk=Lnk.charspan(0,1)) assert n.lnk == Lnk.charspan(0,1) assert n.cfrom == 0 assert n.cto == 1
def testCharSpanLnk(self): lnk = Lnk.charspan(0, 1) assert lnk.type == Lnk.CHARSPAN assert lnk.data == (0, 1) assert str(lnk) == '<0:1>' repr(lnk) # no error lnk = Lnk.charspan('0', '1') assert lnk.data == (0, 1) with pytest.raises(TypeError): Lnk.charspan(1) with pytest.raises(TypeError): Lnk.charspan([1, 2]) with pytest.raises(TypeError): Lnk.charspan(1, 2, 3) with pytest.raises(ValueError): Lnk.charspan('a', 'b')
def test_from_list(self): tl = YY.from_list([{'id': 1, 'start': 0, 'end': 1, 'form': "dog"}]) assert tl.tokens == [YyToken(1, 0, 1, form="dog")] tl = YY.from_list([ { 'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 4, 'paths': [1], 'form': "dogs", 'surface': "Dogs", #'ipos': 0, 'lrules': ["null"], 'tags': ["NN"], 'probabilities': [1.0] }, { 'id': 1, 'start': 0, 'end': 1, 'from': 5, 'to': 9, 'paths': [1], 'form': "bark", #'ipos': 0, 'lrules': ["null"], 'tags': ["VBZ"], 'probabilities': [1.0] } ]) assert tl.tokens == [ YyToken(1, 0, 1, Lnk.charspan(0, 4), [1], "dogs", "Dogs", ipos=0, lrules=["null"], pos=[("NN", 1.0)]), YyToken(1, 0, 1, Lnk.charspan(5, 9), [1], "bark", ipos=0, lrules=["null"], pos=[("VBZ", 1.0)]) ]
def decode_lnk(cfrom, cto): if cfrom is cto is None: return None elif None in (cfrom, cto): raise ValueError('Both cfrom and cto, or neither, must be specified.') else: return Lnk.charspan(cfrom, cto)
def _read_lnk(tokens): """Read and return a tuple of the pred's lnk type and lnk value, if a pred lnk is specified.""" # < FROM : TO > or < FROM # TO > or < TOK... > or < @ EDGE > lnk = None if tokens[0] == '<': tokens.popleft() # we just checked this is a left angle if tokens[0] == '>': pass # empty <> brackets the same as no lnk specified # edge lnk: ['@', EDGE, ...] elif tokens[0] == '@': tokens.popleft() # remove the @ lnk = Lnk.edge(tokens.popleft()) # edge lnks only have one number # character span lnk: [FROM, ':', TO, ...] elif tokens[1] == ':': lnk = Lnk.charspan(tokens.popleft(), tokens[1]) tokens.popleft() # this should be the colon tokens.popleft() # and this is the cto # chart vertex range lnk: [FROM, '#', TO, ...] elif tokens[1] == '#': lnk = Lnk.chartspan(tokens.popleft(), tokens[1]) tokens.popleft() # this should be the hash tokens.popleft() # and this is the to vertex # tokens lnk: [(TOK,)+ ...] else: lnkdata = [] while tokens[0] != '>': lnkdata.append(int(tokens.popleft())) lnk = Lnk.tokens(lnkdata) _read_literals(tokens, '>') return lnk
def test_to_dict(self): t = YyToken(1, 0, 1, form="dog") assert t.to_dict() == {'id': 1, 'start': 0, 'end': 1, 'form': "dog"} t = YyToken(1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog", ipos=0, lrules=["null"], pos=[("NN", 1.0)]) assert t.to_dict() == { 'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 1, #'paths': [1], 'form': "dog", 'surface': "Dog", #'ipos': 0, 'lrules': ["null"], 'tags': ["NN"], 'probabilities': [1.0] }
def from_string(cls, s): """ Decode from the YY token lattice format. """ def _qstrip(s): return s[1:-1] # remove assumed quote characters tokens = [] for match in _yy_re.finditer(s): d = match.groupdict() lnk, pos = None, [] if d['lnkfrom'] is not None: lnk = Lnk.charspan(d['lnkfrom'], d['lnkto']) if d['pos'] is not None: ps = d['pos'].strip().split() pos = list(zip(map(_qstrip, ps[::2]), map(float, ps[1::2]))) tokens.append( YyToken( int(d['id']), int(d['start']), int(d['end']), lnk, list(map(int, d['paths'].strip().split())), _qstrip(d['form']), None if d['surface'] is None else _qstrip(d['surface']), int(d['ipos']), list(map(_qstrip, d['lrules'].strip().split())), pos)) return cls(tokens)
def read_lnk(tokens): """Read and return a tuple of the pred's lnk type and lnk value, if a pred lnk is specified.""" # < FROM : TO > or < FROM # TO > or < TOK... > or < @ EDGE > lnk = None if tokens[0] == _left_angle: tokens.popleft() # we just checked this is a left angle if tokens[0] == _right_angle: pass # empty <> brackets the same as no lnk specified # edge lnk: ['@', EDGE, ...] elif tokens[0] == _at: tokens.popleft() # remove the @ lnk = Lnk.edge(tokens.popleft()) # edge lnks only have one number # character span lnk: [FROM, ':', TO, ...] elif tokens[1] == _colon: lnk = Lnk.charspan(tokens.popleft(), tokens[1]) tokens.popleft() # this should be the colon tokens.popleft() # and this is the cto # chart vertex range lnk: [FROM, '#', TO, ...] elif tokens[1] == _hash: lnk = Lnk.chartspan(tokens.popleft(), tokens[1]) tokens.popleft() # this should be the hash tokens.popleft() # and this is the to vertex # tokens lnk: [(TOK,)+ ...] else: lnkdata = [] while tokens[0] != _right_angle: lnkdata.append(int(tokens.popleft())) lnk = Lnk.tokens(lnkdata) validate_token(tokens.popleft(), _right_angle) return lnk
def from_triples(cls, triples): lnk, surface, identifier = None, None, None nids, nd, edges = [], {}, [] for src, rel, tgt in triples: if src not in nd: nids.append(src) nd[src] = {'pred': None, 'lnk': None, 'carg': None, 'si': []} if rel == 'predicate': nd[src]['pred'] = Pred.string_or_grammar_pred(tgt) elif rel == 'lnk': cfrom, cto = tgt.strip('"<>').split(':') nd[src]['lnk'] = Lnk.charspan(int(cfrom), int(cto)) elif rel == 'carg': if (tgt[0], tgt[-1]) == ('"', '"'): tgt = tgt[1:-1] nd[src]['carg'] = tgt elif rel == 'type': nd[src]['si'].append((CVARSORT, tgt)) elif rel.islower(): nd[src]['si'].append((rel, tgt)) else: edges.append((src, rel, tgt)) nodes = [ Node(nodeid=nid, pred=nd[nid]['pred'], sortinfo=nd[nid]['si'], lnk=nd[nid]['lnk'], carg=nd[nid]['carg']) for nid in nids ] top = nids[0] if nids else None return cls(top=top, nodes=nodes, edges=edges)
def from_string(cls, s): """ Decode from the YY token lattice format. """ def _qstrip(s): return s[1:-1] # remove assumed quote characters tokens = [] for match in _yy_re.finditer(s): d = match.groupdict() lnk, pos = None, [] if d['lnkfrom'] is not None: lnk = Lnk.charspan(d['lnkfrom'], d['lnkto']) if d['pos'] is not None: ps = d['pos'].strip().split() pos = list(zip(map(_qstrip, ps[::2]), map(float, ps[1::2]))) tokens.append( YyToken( int(d['id']), int(d['start']), int(d['end']), lnk, list(map(int, d['paths'].strip().split())), _qstrip(d['form']), None if d['surface'] is None else _qstrip(d['surface']), int(d['ipos']), list(map(_qstrip, d['lrules'].strip().split())), pos ) ) return cls(tokens)
def tokenize(self, s, pattern=r'[ \t]+', active=None): res = self.apply(s, active=active) tokens = [ YyToken(id=i, start=i, end=i + 1, lnk=Lnk.charspan(tok[0], tok[1]), form=tok[2]) for i, tok in enumerate(_tokenize(res, pattern)) ] return YyTokenLattice(tokens)
def test_fromstring(self): assert len(YY.from_string(token_v1_basic).tokens) == 1 t = YY.from_string(token_v1_basic).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], []) t = YY.from_string(token_v1_surface).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", "Dog", 0, ["null"], []) t = YY.from_string(token_v1_pos).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [("NN", 0.8), ("VV", 0.2)]) t = YY.from_string(token_v1_surface_pos).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", "Dog", 0, ["null"], [("NN", 1.0)]) t = YY.from_string(token_v1_lrules).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["lrule1", "lrule2"], []) t = YY.from_string(token_v2).tokens[0] check_token(t, 1, 0, 1, Lnk.charspan(1, 3), [1], "dog", "Dog", 0, ["null"], [("NN", 1.0)]) tl = YY.from_string(tokenstring) assert len(tl.tokens) == 9 check_token(tl.tokens[0], 42, 0, 1, Lnk.charspan(0, 12), [1], "Tokenization", None, 0, ["null"], [("NNP", 0.7677), ("NN", 0.2323)]) check_token(tl.tokens[1], 43, 1, 2, Lnk.charspan(12, 13), [1], ",", None, 0, ["null"], [(",", 1.0000)]) check_token(tl.tokens[2], 44, 2, 3, Lnk.charspan(14, 15), [1], "a", None, 0, ["null"], [("DT", 1.0000)]) check_token(tl.tokens[3], 45, 3, 4, Lnk.charspan(16, 27), [1], "non-trivial", None, 0, ["null"], [("JJ", 1.0000)]) check_token(tl.tokens[4], 46, 4, 5, Lnk.charspan(28, 36), [1], "exercise", None, 0, ["null"], [("NN", 0.9887), ("VB", 0.0113)]) check_token(tl.tokens[5], 47, 5, 6, Lnk.charspan(36, 37), [1], ",", None, 0, ["null"], [(",", 1.0000)]) check_token(tl.tokens[6], 48, 6, 7, Lnk.charspan(38, 44), [1], "bazed", None, 0, ["null"], [("VBD", 0.5975), ("VBN", 0.4025)]) check_token(tl.tokens[7], 49, 7, 8, Lnk.charspan(45, 58), [1], "*****@*****.**", None, 0, ["null"], [("NN", 0.7342), ("JJ", 0.2096)]) check_token(tl.tokens[8], 50, 8, 9, Lnk.charspan(58, 59), [1], ".", None, 0, ["null"], [(".", 1.0000)])
def test_to_dict(self): t = YyToken(1, 0, 1, form="dog") assert t.to_dict() == {'id':1, 'start': 0, 'end': 1, 'form': "dog"} t = YyToken(1, 0, 1, Lnk.charspan(0,1), [1], "dog", "Dog", ipos=0, lrules=["null"], pos=[("NN", 1.0)]) assert t.to_dict() == { 'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 1, #'paths': [1], 'form': "dog", 'surface': "Dog", #'ipos': 0, 'lrules': ["null"], 'tags': ["NN"], 'probabilities': [1.0] }
def test_from_dict(self): t = YyToken.from_dict({'id':1, 'start': 0, 'end': 1, 'form': "dog"}) check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], []) t = YyToken.from_dict({ 'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 1, #'paths': [1], 'form': "dog", 'surface': "Dog", #'ipos': 0, 'lrules': ["null"], 'tags': ["NN"], 'probabilities': [1.0] }) check_token(t, 1, 0, 1, Lnk.charspan(0,1), [1], "dog", "Dog", 0, ["null"], [("NN", 1.0)])
def eds_it_rains(): return eds.Eds( top='e2', nodes=[ Node( 'e2', Pred.surface('"_rain_v_1_rel"'), sortinfo={ 'SF': 'prop', 'TENSE': 'pres', 'MOOD': 'indicative', 'PROG': '-', 'PERF': '-', CVARSORT: 'e'}, lnk=Lnk.charspan(3, 9) ) ], edges=[] )
def eds_it_rains(): return eds.Eds( top='e2', nodes=[ Node( 'e2', Pred.stringpred('"_rain_v_1_rel"'), sortinfo={ 'SF': 'prop', 'TENSE': 'pres', 'MOOD': 'indicative', 'PROG': '-', 'PERF': '-', CVARSORT: 'e'}, lnk=Lnk.charspan(3, 9) ) ], edges=[] )
def from_dict(cls, d): """ Decode from a dictionary as from YyToken.to_dict(). """ return cls( d['id'], d['start'], d['end'], Lnk.charspan(d['from'], d['to']) if 'from' in d else None, # d.get('paths', [1]), form=d['form'], surface=d.get('surface'), # ipos= # lrules= pos=zip(d.get('tags', []), d.get('probabilities', [])))
def from_dict(cls, d): """ Decode from a dictionary as from YyToken.to_dict(). """ return cls( d['id'], d['start'], d['end'], Lnk.charspan(d['from'], d['to']) if 'from' in d else None, # d.get('paths', [1]), form=d['form'], surface=d.get('surface'), # ipos= # lrules= pos=zip(d.get('tags', []), d.get('probabilities', [])) )
def test_from_dict(self): t = YyToken.from_dict({'id': 1, 'start': 0, 'end': 1, 'form': "dog"}) check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], []) t = YyToken.from_dict({ 'id': 1, 'start': 0, 'end': 1, 'from': 0, 'to': 1, #'paths': [1], 'form': "dog", 'surface': "Dog", #'ipos': 0, 'lrules': ["null"], 'tags': ["NN"], 'probabilities': [1.0] }) check_token(t, 1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog", 0, ["null"], [("NN", 1.0)])
def test_init(self): with pytest.raises(TypeError): YyToken() YyToken(1) YyToken(1, 0) YyToken(1, 0, 1) YyToken(1, 0, 1, Lnk.charspan(0,1)) YyToken(1, 0, 1, Lnk.charspan(0,1), [1]) YyToken(1, 0, 1, Lnk.charspan(0,1), [1], surface=".") YyToken(1, 0, 1, Lnk.charspan(0,1), [1], surface=".", ipos=0) YyToken(1, 0, 1, Lnk.charspan(0,1), [1], surface=".", ipos=0, lrules=["null"]) YyToken(1, 0, 1, Lnk.charspan(0,1), [1], surface=".", ipos=0, lrules=["null"], pos=[(".", 1.0)]) t = YyToken(1, 0, 1, form="dog") check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], []) t = YyToken(1, 0, 1, Lnk.charspan(0,1), [1], "dog", "Dog", ipos=0, lrules=["null"], pos=[("NN", 1.0)]) check_token(t, 1, 0, 1, Lnk.charspan(0,1), [1], "dog", "Dog", 0, ["null"], [("NN", 1.0)])
def test_init(self): with pytest.raises(TypeError): YyToken() YyToken(1) YyToken(1, 0) YyToken(1, 0, 1) YyToken(1, 0, 1, Lnk.charspan(0, 1)) YyToken(1, 0, 1, Lnk.charspan(0, 1), [1]) YyToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".") YyToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".", ipos=0) YyToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".", ipos=0, lrules=["null"]) YyToken(1, 0, 1, Lnk.charspan(0, 1), [1], surface=".", ipos=0, lrules=["null"], pos=[(".", 1.0)]) t = YyToken(1, 0, 1, form="dog") check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], []) t = YyToken(1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog", ipos=0, lrules=["null"], pos=[("NN", 1.0)]) check_token(t, 1, 0, 1, Lnk.charspan(0, 1), [1], "dog", "Dog", 0, ["null"], [("NN", 1.0)])
def test_fromstring(self): assert len(YY.from_string(token_v1_basic).tokens) == 1 t = YY.from_string(token_v1_basic).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], []) t = YY.from_string(token_v1_surface).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", "Dog", 0, ["null"], []) t = YY.from_string(token_v1_pos).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["null"], [("NN", 0.8), ("VV", 0.2)]) t = YY.from_string(token_v1_surface_pos).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", "Dog", 0, ["null"], [("NN", 1.0)]) t = YY.from_string(token_v1_lrules).tokens[0] check_token(t, 1, 0, 1, None, [1], "dog", None, 0, ["lrule1", "lrule2"], []) t = YY.from_string(token_v2).tokens[0] check_token(t, 1, 0, 1, Lnk.charspan(1,3), [1], "dog", "Dog", 0, ["null"], [("NN", 1.0)]) tl = YY.from_string(tokenstring) assert len(tl.tokens) == 9 check_token( tl.tokens[0], 42, 0, 1, Lnk.charspan(0,12), [1], "Tokenization", None, 0, ["null"], [("NNP", 0.7677), ("NN", 0.2323)] ) check_token( tl.tokens[1], 43, 1, 2, Lnk.charspan(12,13), [1], ",", None, 0, ["null"], [(",", 1.0000)] ) check_token( tl.tokens[2], 44, 2, 3, Lnk.charspan(14,15), [1], "a", None, 0, ["null"], [("DT", 1.0000)] ) check_token( tl.tokens[3], 45, 3, 4, Lnk.charspan(16,27), [1], "non-trivial", None, 0, ["null"], [("JJ", 1.0000)] ) check_token( tl.tokens[4], 46, 4, 5, Lnk.charspan(28,36), [1], "exercise", None, 0, ["null"], [("NN", 0.9887), ("VB", 0.0113)] ) check_token( tl.tokens[5], 47, 5, 6, Lnk.charspan(36,37), [1], ",", None, 0, ["null"], [(",", 1.0000)] ) check_token( tl.tokens[6], 48, 6, 7, Lnk.charspan(38,44), [1], "bazed", None, 0, ["null"], [("VBD", 0.5975), ("VBN", 0.4025)] ) check_token( tl.tokens[7], 49, 7, 8, Lnk.charspan(45,58), [1], "*****@*****.**", None, 0, ["null"], [("NN", 0.7342), ("JJ", 0.2096)] ) check_token( tl.tokens[8], 50, 8, 9, Lnk.charspan(58,59), [1], ".", None, 0, ["null"], [(".", 1.0000)] )
edges = [(nid, rarg, tgt) for rarg, tgt in d[5]] return (node, edges) _COLON = regex(r'\s*:\s*', value=Ignore) _COMMA = regex(r',\s*') _SPACES = regex(r'\s+', value=Ignore) _SYMBOL = regex(r'[-+\w]+') _PRED = regex(r'((?!<-?\d|\("|\{|\[)\w)+', value=Pred.string_or_grammar_pred) _EDS = nt('EDS', value=_make_eds) _TOP = opt(nt('TOP'), default=None) _TOPID = opt(_SYMBOL, default=None) _FLAG = opt(regex(r'\s*\(fragmented\)', value=Ignore)) _NODE = nt('NODE', value=_make_nodedata) _DSCN = opt(lit('|', value=Ignore)) _LNK = opt(nt('LNK', value=lambda d: Lnk.charspan(*d)), default=None) _CARG = opt(nt('CARG'), default=None) _PROPS = opt(nt('PROPS', value=lambda d: d[0] + d[1]), default=None) _EDGES = nt('EDGES') _TYPE = opt(_SYMBOL, value=lambda i: [(CVARSORT, i)], default=[]) _AVLIST = nt('AVLIST') _ATTRVAL = nt('ATTRVAL') _eds_parser = Peg( grammar=dict( start=delimited(_EDS, Spacing), EDS=bounded(regex(r'\{\s*'), seq(_TOP, nt('NODES')), regex(r'\s*\}')), TOP=seq(_TOPID, _COLON, _FLAG, Spacing, value=lambda d: d[0]), NODES=delimited(_NODE, Spacing), NODE=seq(_DSCN, _SYMBOL, _COLON, _PRED, _LNK, _CARG, _PROPS, _EDGES), LNK=bounded(lit('<'), seq(Integer, _COLON, Integer), lit('>')),
def test_REPP(): r = repp.REPP # single-module REPP x = r().apply('abc') assert x.string == 'abc' assert x.startmap.tolist() == [1,0,0,0,0] assert x.endmap.tolist() == [0,0,0,0,-1] x = r.from_string('').apply('abc') assert x.string == 'abc' assert x.startmap.tolist() == [1,0,0,0,0] assert x.endmap.tolist() == [0,0,0,0,-1] x = r.from_string(r'!a b').apply('ccc') # no match assert x.string == 'ccc' assert x.startmap.tolist() == [1, 0, 0, 0, 0] assert x.endmap.tolist() == [0, 0, 0, 0, -1] x = r.from_string(r'!a b').apply('baba') assert x.string == 'bbbb' assert x.startmap.tolist() == [1,0,0,0,0,0] assert x.endmap.tolist() == [0,0,0,0,0,-1] x = r.from_string(r'!a aa').apply('baba') assert x.string == 'baabaa' assert x.startmap.tolist() == [1,0,0,-1,-1,-1,-2,-2] assert x.endmap.tolist() == [0,0,0,-1,-1,-1,-2,-3] x = r.from_string(r'!(\w+) [\1]').apply('abc def') assert x.string == '[abc] [def]' assert x.startmap.tolist() == [1, 0,-1,-1,-1,-1,-2,-2,-3,-3,-3,-3,-4] assert x.endmap.tolist() == [0,-1,-1,-1,-1,-2,-2,-3,-3,-3,-3,-4,-5] x = r.from_string(r"!wo(n't) will \1").apply("I won't go") assert x.string == "I will n't go" assert x.startmap.tolist() == [1,0,0,0,-1,-2,-3,-4,-3,-3,-3,-3,-3,-3,-3] assert x.endmap.tolist() == [0,0,0,1, 0,-1,-2,-3,-3,-3,-3,-3,-3,-3,-4] x = r.from_string(r"!wo(n't) will \1").tokenize("I won't go") assert len(x.tokens) == 4 assert x.tokens[0].form == 'I' assert x.tokens[0].lnk == Lnk.charspan(0,1) assert x.tokens[1].form == 'will' assert x.tokens[1].lnk == Lnk.charspan(2,4) assert x.tokens[2].form == "n't" assert x.tokens[2].lnk == Lnk.charspan(4,7) assert x.tokens[3].form == 'go' assert x.tokens[3].lnk == Lnk.charspan(8,10) # additional modules/groups x = r.from_string('>a', modules={'a': r.from_string(r'!a b')}, active=['a']).apply('baba') assert x.string == 'bbbb' assert x.startmap.tolist() == [1,0,0,0,0,0] assert x.endmap.tolist() == [0,0,0,0,0,-1] x = r.from_string('>a', modules={'a': r.from_string(r'!a b')}).apply('baba', active=['a']) assert x.string == 'bbbb' assert x.startmap.tolist() == [1,0,0,0,0,0] assert x.endmap.tolist() == [0,0,0,0,0,-1] x = r.from_string('>a', modules={'a': r.from_string(r'!a b')}).apply('baba') assert x.string == 'baba' assert x.startmap.tolist() == [1,0,0,0,0,0] assert x.endmap.tolist() == [0,0,0,0,0,-1] x = r.from_string('>a\n>b\n>c', modules={'a': r.from_string('!a b'), 'b': r.from_string('!b c'), 'c': r.from_string('!c d')}, active=['a','b','c']).apply('baba') assert x.string == 'dddd' assert x.startmap.tolist() == [1,0,0,0,0,0] assert x.endmap.tolist() == [0,0,0,0,0,-1] x = r.from_string('>a\n>b\n>c', modules={'a': r.from_string('!a b'), 'b': r.from_string('!b c'), 'c': r.from_string('!c d')}, active=['a']).apply('baba') assert x.string == 'bbbb' assert x.startmap.tolist() == [1,0,0,0,0,0] assert x.endmap.tolist() == [0,0,0,0,0,-1] # iterative groups x = r.from_string( r'!(^| )([()%,])([^ ]) \1\2 \3' '\n' r'!([^ ])([()%,])( |$) \1 \2\3' ).apply('(42%),') assert x.string == '( 42%) ,' assert x.startmap.tolist() == [1,0, 0,-1,-1,-1,-1,-1,-2,-2] assert x.endmap.tolist() == [0,0,-1,-1,-1,-1,-1,-2,-2,-3] x = r.from_string( '#1\n' r'!(^| )([()%,])([^ ]) \1\2 \3' '\n' r'!([^ ])([()%,])( |$) \1 \2\3' '\n' '#\n' '>1' ).apply('(42%),') assert x.string == '( 42 % ) ,' assert x.startmap.tolist() == [1,0, 0,-1,-1,-1,-2,-2,-3,-3,-4,-4] assert x.endmap.tolist() == [0,0,-1,-1,-1,-2,-2,-3,-3,-4,-4,-5] # tokenization x = r.from_string( '#1\n' r'!(^| )([()%,])([^ ]) \1\2 \3' '\n' r'!([^ ])([()%,])( |$) \1 \2\3' '\n' '#\n' '>1' ).tokenize('(42%),') assert len(x.tokens) == 5 assert x.tokens[0].form == '(' assert x.tokens[0].lnk == Lnk.charspan(0,1) assert x.tokens[1].form == '42' assert x.tokens[1].lnk == Lnk.charspan(1,3) assert x.tokens[2].form == '%' assert x.tokens[2].lnk == Lnk.charspan(3,4) assert x.tokens[3].form == ')' assert x.tokens[3].lnk == Lnk.charspan(4,5) assert x.tokens[4].form == ',' assert x.tokens[4].lnk == Lnk.charspan(5,6)
def __init__(self): self.lnk = Lnk.charspan(0,1)
edges = [(nid, rarg, tgt) for rarg, tgt in d[5]] return (node, edges) _COLON = regex(r'\s*:\s*', value=Ignore) _COMMA = regex(r',\s*') _SPACES = regex(r'\s+', value=Ignore) _SYMBOL = regex(r'[-+\w]+') _PRED = regex(r'((?!<-?\d|\("|\{|\[)\w)+', value=Pred.string_or_grammar_pred) _EDS = nt('EDS', value=_make_eds) _TOP = opt(nt('TOP'), default=None) _TOPID = opt(_SYMBOL, default=None) _FLAG = opt(regex(r'\s*\(fragmented\)', value=Ignore)) _NODE = nt('NODE', value=_make_nodedata) _DSCN = opt(lit('|', value=Ignore)) _LNK = opt(nt('LNK', value=lambda d: Lnk.charspan(*d)), default=None) _CARG = opt(nt('CARG'), default=None) _PROPS = opt(nt('PROPS', value=lambda d: d[0] + d[1]), default=None) _EDGES = nt('EDGES') _TYPE = opt(_SYMBOL, value=lambda i: [(CVARSORT, i)], default=[]) _AVLIST = nt('AVLIST') _ATTRVAL = nt('ATTRVAL') _eds_parser = Peg(grammar=dict( start=delimited(_EDS, Spacing), EDS=bounded(regex(r'\{\s*'), seq(_TOP, nt('NODES')), regex(r'\s*\}')), TOP=seq(_TOPID, _COLON, _FLAG, Spacing, value=lambda d: d[0]), NODES=delimited(_NODE, Spacing), NODE=seq(_DSCN, _SYMBOL, _COLON, _PRED, _LNK, _CARG, _PROPS, _EDGES), LNK=bounded(lit('<'), seq(Integer, _COLON, Integer), lit('>')), CARG=bounded(lit('('), DQString, lit(')')),