def compile(source: Union[str, Grammar], actions: Dict[str, Callable] = None, parser: str = 'packrat', flags: Flag = Flag.OPTIMIZE) -> Parser: """Compile the parsing expression or grammar in *source*.""" parsername = parser.lower() if parsername == 'packrat': from pe.packrat import PackratParser as parser_class elif parsername == 'machine': from pe.machine import MachineParser as parser_class # type: ignore elif parsername == 'machine-python': from pe._py_machine import MachineParser as parser_class # type: ignore else: raise Error(f'unsupported parser: {parser}') if isinstance(source, Grammar): g = source if actions: raise Error('cannot assign actions to prepared grammar') else: assert isinstance(source, str) start, defmap = loads(source) g = Grammar(defmap, actions=actions, start=start) if flags & Flag.DEBUG: print('## Grammar ##') print(g) p = parser_class(g, flags=flags) if (flags & Flag.DEBUG) and (flags & Flag.OPTIMIZE): print('## Modified Grammar ##') print(p.modified_grammar) return p
def __init__(self, grammar: Union[Grammar, Definition], flags: Flag = Flag.NONE): if isinstance(grammar, Definition): grammar = Grammar({'Start': grammar}) self.grammar = grammar self.modified_grammar = grammar # may be reassigned later self.flags = flags
def test_exprs(parser, dfn, input, pos, end, match): if parser is None: pytest.skip('extension module is not available') g = Grammar({'Start': dfn, 'abc': abc, 'abcs': Str(abc)}) p = parser(g) m = p.match(input, pos=pos, flags=pe.NONE) if match is None: assert m is None else: groups, groupdict, value = match assert m.end() == end assert m.groups() == groups assert m.groupdict() == groupdict assert m.value() == value
def optimize(g: Grammar, inline=True, common=True, regex=True): """Combine adjacent terms into a single regular expression.""" defs = g.definitions if inline: new = {} for name, defn in defs.items(): new[name] = _inline(defs, defn, {name}) defs = new if common: new = {} for name, defn in defs.items(): new[name] = _common(defn) defs = new if regex: new = {} grpid = count(start=1) for name, defn in defs.items(): new[name] = _regex(defn, defs, grpid) defs = new return Grammar(definitions=defs, actions=g.actions, start=g.start)
def grm(d): return Grammar(definitions=d, start=next(iter(d)))
def gload(s, inline=False, common=False, regex=False): start, defmap = loads(s) return optimize(Grammar(defmap, start=start), inline=inline, common=common, regex=regex)
def debug(g: Grammar): """Modify the grammar to report debugging information while parsing.""" defs = g.definitions new = {name: _debug(defn, defs) for name, defn in defs.items()} return Grammar(definitions=new, start=g.start)
PEG = Grammar( definitions=V, actions={ 'Grammar': Pack(tuple), 'Definition': Pack(tuple), 'Expression': Pack(_make_prioritized), 'Sequence': Pack(_make_sequential), 'Valued': _make_valued, 'AND': Constant(And), 'NOT': Constant(Not), 'TILDE': Constant(Capture), 'Binding': _make_binder, 'Quantified': _make_quantified, 'QUESTION': Constant(Optional), 'STAR': Constant(Star), 'PLUS': Constant(Plus), 'Name': Nonterminal, 'Literal': _make_literal, 'Class': _make_class, 'DOT': Constant(Dot()), 'RangeEndWarn': Warn('The second character in a range may be an unescaped "]", ' 'but this is often a mistake. Silence this warning by ' 'escaping the hyphen (\\-) or the right bracket (\\]), ' 'depending on what was intended.') })