def build(self): ''' Construct the parser. ''' # Avoid dependency loops from lepl.matchers.derived import Drop, Eos, AnyBut, Upper from lepl.matchers.core import Any, Lookahead, Literal, Delayed from lepl.matchers.error import make_error from lepl.matchers.variables import TraceVariables from lepl.support.node import node_throw with TraceVariables(False): # these two definitions enforce the conditions above, providing only # special characters appear as literals in the grammar escaped = Drop(self.alphabet.escape) & self.alphabet.escaped raw = ~Lookahead(self.alphabet.escape) & \ AnyBut(self.alphabet.illegal) close = Drop(')') extend = (Drop('(*') & Upper()[1:, ...] & close) >> self.extend single = escaped | raw | extend any_ = Literal('.') >> self.dot letter = single >> self.dup pair = single & Drop('-') & single > self.tup interval = pair | letter brackets = Drop('[') & interval[1:] & Drop(']') inverted = Drop('[^') & interval[1:] & Drop(']') >= self.invert char = inverted | brackets | letter | any_ | extend > self.char item = Delayed() open = Drop('(?:') range = Drop(self.alphabet.escape) & self.alphabet.range seq = (char | item | range)[0:] > self.sequence group = open & seq & close alts = open & seq[2:, Drop('|')] & close > self.choice star = (alts | group | char) & Drop('*') > self.star plus = (alts | group | char) & Drop('+') > self.plus opt = (alts | group | char) & Drop('?') > self.option bad_grp = (Drop('(') & ~Lookahead('?:') & seq & close) \ ** make_error( "Lepl's own regular expressions do not currently " "support matched groups.\n" "Use '(?:...)' to group expressions without " "matching.") item += alts | group | star | plus | opt | bad_grp expr = ((char | item)[:] & Drop(Eos())) >> node_throw # Empty config here avoids loops if the default config includes # references to alphabets expr.config.clear() return expr.parse_string
def test_trace(self): buffer = StringIO() with TraceVariables(out=buffer): word = ~Lookahead('OR') & Word() phrase = String() with DroppedSpace(): text = (phrase | word)[1:] > list query = text[:, Drop('OR')] expected = ''' phrase failed stream = 'spicy meatballs OR... word = ['spicy'] stream = ' meatballs OR "el ... phrase failed stream = 'meatballs OR "el b... word = ['meatballs'] stream = ' OR "el bulli rest... phrase failed stream = 'OR "el bulli resta... word failed stream = 'OR "el bulli resta... phrase failed stream = ' OR "el bulli rest... word failed stream = ' OR "el bulli rest... text = [['spicy', 'meatballs']] stream = ' OR "el bulli rest... phrase = ['el bulli restaurant'] stream = '' phrase failed stream = '' word failed stream = '' text = [['el bulli restaurant']] stream = '' query = [['spicy', 'meatballs'], ['el... stream = '' ''' query.config.auto_memoize(full=True) query.parse('spicy meatballs OR "el bulli restaurant"') trace = buffer.getvalue() assert trace == expected, '"""' + trace + '"""' # check caching works query.parse('spicy meatballs OR "el bulli restaurant"') trace = buffer.getvalue() assert trace == expected, '"""' + trace + '"""'
def SkipTo(matcher, include=True): ''' Consume everything up to (and including, if include is True, as it is by default) the matcher. Returns all the skipped data, joined. ''' if include: return Add(And(Star(AnyBut(matcher)), matcher)) else: return And(Add(Star(AnyBut(matcher))), Lookahead(matcher))
def AnyBut(exclude=None): ''' Match any character except those specified (or, if a matcher is used as the exclude, if the matcher fails). The argument should be a list of tokens (or a string of suitable characters) to exclude, or a matcher. If omitted all tokens are accepted. ''' return And(~Lookahead(coerce_(exclude, Any)), Any())
def build(self): ''' Construct the parser. ''' # Avoid dependency loops from lepl.matchers.derived import Drop, Eos, AnyBut, Upper from lepl.matchers.core import Any, Lookahead, Literal, Delayed # these two definitions enforce the conditions above, providing only # special characters appear as literals in the grammar escaped = Drop(self.alphabet.escape) + Any(self.alphabet.escaped) raw = ~Lookahead(self.alphabet.escape) + \ AnyBut(self.alphabet.escaped) single = escaped | raw any_ = Literal('.') >> self.dot letter = single >> self.dup pair = single & Drop('-') & single > self.tup extend = (Drop('(*') & Upper()[1:, ...] & Drop(')')) >> self.extend interval = pair | letter | extend brackets = Drop('[') & interval[1:] & Drop(']') inverted = Drop('[^') & interval[1:] & Drop(']') >= self.invert char = inverted | brackets | letter | any_ | extend > self.char item = Delayed() seq = (char | item)[0:] > self.sequence group = Drop('(') & seq & Drop(')') alts = Drop('(') & seq[2:, Drop('|')] & Drop(')') > self.choice star = (alts | group | char) & Drop('*') > self.star plus = (alts | group | char) & Drop('+') > self.plus opt = (alts | group | char) & Drop('?') > self.option item += alts | group | star | plus | opt expr = (char | item)[:] & Drop(Eos()) # Empty config here avoids loops if the default config includes # references to alphabets expr.config.clear() return expr.parse_string
def test_bang(self): self.assert_direct('ab', Any() + ~Lookahead('c') + Any(), [['ab']]) self.assert_direct('ab', Any() + ~Lookahead('b') + Any(), [])
def test_simple(self): self.assert_direct('ab', Any() + Lookahead('c') + Any(), []) self.assert_direct('ab', Any() + Lookahead('b') + Any(), [['ab']])