def test_complete(self): ''' The complete flag indicates whether the entire token must be consumed. ''' #basicConfig(level=DEBUG) abc = Token('abc') incomplete = abc(Literal('ab')) incomplete.config.no_full_first_match() self.examples([(lambda: incomplete.parse('abc'), "None")]) abc = Token('abc') incomplete = abc(Literal('ab'), complete=False) incomplete.config.no_full_first_match() self.examples([(lambda: incomplete.parse('abc'), "['ab']")])
def test_complex(self): #basicConfig(level=DEBUG) rx = Literal('foo') | (Literal('ba') + Any('a')[1:,...]) rx.config.compile_to_nfa().no_full_first_match() matcher = rx.get_match_null() results = list(matcher('foo')) assert results == [(['foo'], '')], results results = list(matcher('baaaaax')) assert results == [(['baaaaa'], 'x'), (['baaaa'], 'ax'), (['baaa'], 'aax'), (['baa'], 'aaax')], results results = list(matcher('ba')) assert results == [], results assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher
class ExtensionParser(object): """ A class that parses extensions. """ class ExtensionCall(Node): """ An extension call. """ _name = None _args = None _kwargs = None @property def name(self): return self._name[0] if self._name else None @property def args(self): return tuple(self._args) if self._args else tuple() @property def kwargs(self): return dict(self._kwargs) if self._kwargs else {} COMMA = Drop(',') NONE = Literal('None') >> (lambda x: None) BOOL = (Literal('True') | Literal('False')) >> (lambda x: x == 'True') IDENTIFIER = Word(Letter() | '_', Letter() | '_' | Digit()) FLOAT = Real() >> float INTEGER = Integer() >> int STRING = String() | String("'") ITEM = STRING | INTEGER | FLOAT | NONE | BOOL | IDENTIFIER with Separator(~Regexp(r'\s*')): VALUE = Delayed() LIST = Drop('[') & VALUE[:, COMMA] & Drop(']') > list TUPLE = Drop('(') & VALUE[:, COMMA] & Drop(')') > tuple VALUE += LIST | TUPLE | ITEM ARGUMENT = VALUE >> '_args' KWARGUMENT = (IDENTIFIER & Drop('=') & VALUE > tuple) >> '_kwargs' ARGUMENTS = (KWARGUMENT | ARGUMENT)[:, COMMA] NAME = IDENTIFIER > '_name' EXTENSION = ((NAME & Drop('(') & ARGUMENTS & Drop(')')) | NAME) & Eos() > ExtensionCall @property def parser(self): return self.EXTENSION.get_parse_string()
def make_polygon_grammar(): sep = ~(Space() | Literal(','))[:] with Separator(sep): num = Real() >> float grammar = sep & num[2][:] & sep grammar.config.no_compile_to_regexp() return grammar
def test_literal(self): ''' Simple literal should compile directly. ''' token = Token(Literal('abc')) token.compile() assert token.regexp == 'abc', repr(token.regexp)
def test_ambiguity(self): ''' A (signed) integer will consume a - sign. ''' tokens = (Token(Integer()) | Token(r'\-'))[:] & Eos() self.examples([(lambda: list(tokens.parse_all('1-2')), "[['1', '-2']]")]) matchers = (Integer() | Literal('-'))[:] & Eos() self.examples([(lambda: list(matchers.parse_all('1-2')), "[['1', '-2'], ['1', '-', '2']]")])
def create_parser(delimiter): space = Space() comma = Drop(',') | Drop(',') + space if delimiter == ',': # by comma seperator = Separator(~Regexp(r'\s*')) delimiter = comma else: assert delimiter == ' ', 'delimiter "%s" not supported' % delimiter seperator = DroppedSpace() delimiter = space none = Literal('None') >> (lambda x: None) bool = (Literal('True') | Literal('False')) >> (lambda x: x == 'True') ident = Word(Letter() | '_', Letter() | '_' | Digit()) float_ = Float() >> float int_ = Integer() >> int str_ = String() | String("'") dict_key = str_ | int_ | float_ | Word() dict_spaces = ~Whitespace()[:] dict_value = dict_key item = str_ | int_ | float_ | none | bool | ident | Word() with seperator: value = Delayed() list_ = Drop('[') & value[:, comma] & Drop(']') > list tuple_ = Drop('(') & value[:, comma] & Drop(')') > tuple dict_el = dict_key & Drop(':') & value > tuple dict_ = Drop('{') & dict_el[1:, Drop(',')] & Drop('}') > dict value += list_ | tuple_ | dict_ | item | space arg = value >> 'arg' karg = (ident & Drop('=') & value > tuple) >> 'karg' expr = (karg | arg)[:, delimiter] & Drop(Eos()) > Node return expr.get_parse()
def test_fmtted(self): ''' Test a message with fmtting. ''' parser = (Literal('abc') > 'name')**make_error('msg {in_rest}') parser.config.no_full_first_match() node = parser.parse('abc')[0] assert isinstance(node, Error) assert node[0] == "msg 'abc'", node[0] assert str(node).startswith("msg 'abc' ("), str(node) assert isinstance(node, Exception), type(node)
def test_formatted(self): ''' Test a message with formatting. ''' parser = (Literal('abc') > 'name')**make_error('msg {stream_in}') parser.config.no_full_first_match() node = parser.parse('abc')[0] assert isinstance(node, Error) assert node[0] == 'msg abc', node[0] assert str(node).startswith('msg abc ('), str(node) assert isinstance(node, Exception), type(node)
def test_bad_format(self): ''' Test a message with bad formatting. ''' try: parser = (Literal('abc') > 'name')**make_error('msg {0}') parser.config.no_full_first_match() list(parser.match('abc')) assert False, 'expected error' except IndexError: pass
def test_list(self): ''' Code has an exception for handling lists. ''' #basicConfig(level=DEBUG) with TraceVariables(): parser = (Literal([1, 2, 3]) > 'name')**make_error('msg {in_str}') parser.config.no_full_first_match() node = parser.parse([1, 2, 3])[0] assert isinstance(node, Error) assert node[0] == 'msg 1', node[0] assert str(node).startswith('msg 1 ('), str(node) assert isinstance(node, Exception), type(node)
def test_literal(self): rx = Literal('abc') rx.config.clear().compile_to_nfa(force=True) matcher = rx.get_match_null() assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher results = list(matcher('abcd')) assert results == [(['abc'], 'd')], results rx = Literal('abc') >> (lambda x: x+'e') rx.config.clear().compose_transforms().compile_to_nfa(force=True) matcher = rx.get_match_null() results = list(matcher('abcd')) assert results == [(['abce'], 'd')], results #print(repr(matcher.matcher)) assert isinstance(matcher.matcher, NfaRegexp), matcher.matcher
def word(self): return Literal("a") & Literal("bc")[1:, ...]
def test_literal(self): self.assert_regexp(Literal('foo'), 'foo')
def test_repeat(self): self.assert_regexp(Any()[1:, ...], '.(.)*') # ugly, but correct self.assert_regexp(Any()[:, ...], '(.(.)*|)') self.assert_regexp(Literal('foo')[:, ...], '(foo(foo)*|)')
def test_complex(self): self.assert_regexp((Any('ab') + Literal('q')) | Literal('z'), '([a-b]q|z)') self.assert_regexp((Any('ab') + 'q') | 'z', '([a-b]q|z)')
def make_path_grammar(): sep = ~(Space() | Literal(','))[:] with Separator(sep): num = Real() >> float # Moveto M = ((Literal('M') | Literal('m')) & num[2][:]) # Horizontal straight lines H = (Literal('H') | Literal('h')) & num[:] # Vertical straight lines V = (Literal('V') | Literal('v')) & num[:] # General straight lines L = (Literal('L') | Literal('l')) & num[2][:] # Cubic bezier curves (curveto) C = (Literal('C') | Literal('c')) & num[6][:] # Cubic bezier curves (smooth curveto) S = (Literal('S') | Literal('s')) & num[4][:] # Close the path z = Literal('z') | Literal('Z') grammar = sep & ((M|H|V|L|C|S|z) > List)[:] & sep grammar.config.no_compile_to_regexp() return grammar
def test_flatten(self): matcher = Literal('a') & Literal('b') & Literal('c') assert str(matcher) == "And(And, Literal)", str(matcher) matcher.config.clear().flatten() parser = matcher.get_parse_string() assert str(parser.matcher) == "And(Literal, Literal, Literal)", str(parser.matcher)