def test_single(self): #basicConfig(level=DEBUG) expr = Literal('*')[:,...][3] expr.config.clear().low_memory(5) match = expr.get_match_string()('*' * 4) list(match)
def assert_range(self, n_match, direcn, results, multiplier): for index in range(len(results)): queue_len = index * multiplier expr = Literal('*')[::direcn,...][n_match] & Eos() expr.config.clear().low_memory(queue_len) matcher = expr.get_match_string() self.assert_count(matcher, queue_len, index, results[index])
def test_workaround(self): c = Delayed() a = Literal("a") + c b = Literal("b") c += (a | b) >= list self.assert_literal('ab', c)
def test_as_given(self): c = Delayed() a = Literal("a") + c b = Literal("b") c += a | b self.assert_literal('ab', c)
def test_offset(self): #basicConfig(level=DEBUG) text = Token('[^\n\r]+') line = BLine(text(~Literal('aa') & Regexp('.*'))) line.config.default_line_aware(block_start=0) parser = line.get_parse_string() assert parser('aabc') == ['bc'] # what happens with an empty match? check = ~Literal('aa') & Regexp('.*') check.config.no_full_first_match() assert check.parse('aa') == [''] assert parser('aa') == ['']
def build(self): ''' Construct the parser. ''' # Avoid dependency loops from lepl.matchers.derived import Drop, Eos, AnyBut, Upper from lepl.matchers.core import Any, Lookahead, Literal, Delayed from lepl.matchers.error import make_error from lepl.matchers.variables import TraceVariables from lepl.support.node import node_throw with TraceVariables(False): # these two definitions enforce the conditions above, providing only # special characters appear as literals in the grammar escaped = Drop(self.alphabet.escape) & self.alphabet.escaped raw = ~Lookahead(self.alphabet.escape) & \ AnyBut(self.alphabet.illegal) close = Drop(')') extend = (Drop('(*') & Upper()[1:, ...] & close) >> self.extend single = escaped | raw | extend any_ = Literal('.') >> self.dot letter = single >> self.dup pair = single & Drop('-') & single > self.tup interval = pair | letter brackets = Drop('[') & interval[1:] & Drop(']') inverted = Drop('[^') & interval[1:] & Drop(']') >= self.invert char = inverted | brackets | letter | any_ | extend > self.char item = Delayed() open = Drop('(?:') range = Drop(self.alphabet.escape) & self.alphabet.range seq = (char | item | range)[0:] > self.sequence group = open & seq & close alts = open & seq[2:, Drop('|')] & close > self.choice star = (alts | group | char) & Drop('*') > self.star plus = (alts | group | char) & Drop('+') > self.plus opt = (alts | group | char) & Drop('?') > self.option bad_grp = (Drop('(') & ~Lookahead('?:') & seq & close) \ ** make_error( "Lepl's own regular expressions do not currently " "support matched groups.\n" "Use '(?:...)' to group expressions without " "matching.") item += alts | group | star | plus | opt | bad_grp expr = ((char | item)[:] & Drop(Eos())) >> node_throw # Empty config here avoids loops if the default config includes # references to alphabets expr.config.clear() return expr.parse_string
def SingleLineString(quote='"', escape='\\', exclude='\n'): ''' Like `String`, but will not match across multiple lines. ''' q = Literal(quote) content = AnyBut(Or(q, Any(exclude))) if escape: content = Or(content, And(Drop(escape), q)) content = Repeat(content, add_=True) return And(Drop(q), content, Drop(q))
def String(quote='"', escape='\\'): ''' Match a string with quotes that can be escaped. This will match across newlines (see `SingleLineString` for an alternative). ''' q = Literal(quote) content = AnyBut(q) if escape: content = Or(And(Drop(escape), q), content) content = Repeat(content, add_=True) return And(Drop(q), content, Drop(q))
def mkchr(char, range, invert=False): from lepl.matchers.core import Literal from lepl.matchers.derived import Map from lepl.regexp.core import Character intervals = lmap(lambda x: (x, x), range) if invert: # this delays call to invert until after creation of self func = lambda _: Character(self.invert(intervals), self) else: func = lambda _: Character(intervals, self) return Map(Literal(char), func)
def SkipString(quote='"', escape='\\', ignore='\n', empty='', join=__add__): ''' Like `String`, matching across multiple lines, but will silently drop newlines. ''' q = Literal(quote) content = AnyBut(Or(q, Any(ignore))) if escape: content = Or(content, And(Drop(escape), q)) content = Or(content, Drop(Any(ignore))) content = Repeat(content, reduce=(empty, join)) return And(Drop(q), content, Drop(q))
def String(quote='"', escape='\\', empty='', join=__add__): ''' Match a string with quotes that can be escaped. This will match across newlines (see `SingleLineString` for an alternative). More generally, a string is a grouping of results. Setting `empty` and `join` correctly will allow this matcher to work with a variety of types. ''' q = Literal(quote) content = AnyBut(q) if escape: content = Or(And(Drop(escape), q), content) content = Repeat(content, reduce=(empty, join)) return And(Drop(q), content, Drop(q))
def SingleLineString(quote='"', escape='\\', exclude='\n', empty='', join=__add__): ''' Like `String`, but will not match across multiple lines. ''' q = Literal(quote) content = AnyBut(Or(q, Any(exclude))) if escape: content = Or(content, And(Drop(escape), q)) content = Repeat(content, reduce=(empty, join)) return And(Drop(q), content, Drop(q))
def build(self): ''' Construct the parser. ''' # Avoid dependency loops from lepl.matchers.derived import Drop, Eos, AnyBut, Upper from lepl.matchers.core import Any, Lookahead, Literal, Delayed # these two definitions enforce the conditions above, providing only # special characters appear as literals in the grammar escaped = Drop(self.alphabet.escape) + Any(self.alphabet.escaped) raw = ~Lookahead(self.alphabet.escape) + \ AnyBut(self.alphabet.escaped) single = escaped | raw any_ = Literal('.') >> self.dot letter = single >> self.dup pair = single & Drop('-') & single > self.tup extend = (Drop('(*') & Upper()[1:, ...] & Drop(')')) >> self.extend interval = pair | letter | extend brackets = Drop('[') & interval[1:] & Drop(']') inverted = Drop('[^') & interval[1:] & Drop(']') >= self.invert char = inverted | brackets | letter | any_ | extend > self.char item = Delayed() seq = (char | item)[0:] > self.sequence group = Drop('(') & seq & Drop(')') alts = Drop('(') & seq[2:, Drop('|')] & Drop(')') > self.choice star = (alts | group | char) & Drop('*') > self.star plus = (alts | group | char) & Drop('+') > self.plus opt = (alts | group | char) & Drop('?') > self.option item += alts | group | star | plus | opt expr = (char | item)[:] & Drop(Eos()) # Empty config here avoids loops if the default config includes # references to alphabets expr.config.clear() return expr.parse_string
def test_nested(self): # note extra list self.assert_direct('foo ', (Literal('foo') >> self.mkappend('b')) > self.mkappend(['c']), [[['foob', 'c']]])
def test_apply(self): # note extra list self.assert_direct('foo ', Literal('foo') > self.mkappend(['b']), [[['foo', 'b']]])
def test_kapply(self): # note extra list self.assert_direct('foo ', Literal('foo') >> self.mkappend('b'), [['foob']])
def test_literal(self): self.assert_direct('foo ', Literal('foo'), [['foo']])
def test_commit(self): self.assert_direct('abcd', (Any()[0::'b'] + (Literal('d') | Literal('cd') + Commit() | Literal('bcd')) + Eof()), [['abcd'], ['abcd']])
def test_example(self): factory = Exclude(lambda x: x == 'a') matcher = factory(Literal('b')[:, ...]) + Literal('c')[:, ...] result = matcher.parse_string('abababccc') assert result == ['bbbccc'], result
def test_flatten(self): matcher = Literal('a') & Literal('b') & Literal('c') assert str(matcher) == "And(And, Literal)", str(matcher) matcher.config.clear().flatten() parser = matcher.get_parse_string() assert str(parser.matcher) == "And(Literal, Literal, Literal)", str(parser.matcher)
def Newline(): '''Match newline (Unix) or carriage return newline (Windows)''' return Or(Literal('\n'), Literal('\r\n'))