def lex(text: t.Union[str, bytes]): if isinstance(text, str): text = text.encode() stream = io.BytesIO(text) return map(to_rbnf_token, filter(not_to_ignore, tokenize.tokenize(stream.__next__))) python = Language('python') python.namespace.update({ **extended_ast.__dict__, **helper.__dict__, **ast.__dict__ }) build_language(RBNF, python, '<grammar>') python_parser = python.named_parsers['file_input'] def _find_error(source_code, tokens, state): def _find_nth(string: str, element, nth: int = 0): _pos: int = string.find(element) if _pos is -1: return 0 while nth: _pos = string.index(element, _pos) + 1 nth -= 1 return _pos if not tokens:
engine ::= 'engine' '{' kvs=(kv (',' kv)*) '}' -> Engine(dict(kvs)) id ::= name=Name -> name.value python ::= mark='python' codes=(_{is_indented})+ -> Python(recover_codes([mark, *codes])[len('python'):]) field ::= id=id ':' type=expr ops=option* ['=' value=expr] -> Field(id, type, ops, value) fields ::= fields<<field (',' fields<<field)* [','] -> fields repr ::= '{' names=(id (',' id)*) '}' -> names[::2] table ::= id=id '{' [fields=fields] ['repr' repr=repr] '}' -> Table(id, fields or [], repr) option ::= ch=('~' | '!' | '?') -> ch.value relation ::= left=id w1=['^'] 'with' w2=['^'] right=id '{' field_lst=(field (',' field)*) '}' -> Relation(left, right, (bool(w1), bool(w2)), field_lst[::2]) expr ::= | [is_enum='enum'] tks=(~('=' | '{' | '}' | '!' | '?' | '~' | ',' | 'repr' | 'with'))+ -> (EnumValue if is_enum else Value)(recover_codes(tks)) lexer_helper := R'.' stmts ::= stmts=(engine | relation | table | python)+ -> list(stmts) """ dbg = Language('dbg') dbg.namespace.update(dbg_ast.__dict__) build_language(source_code, dbg, 'dbg-lang.rbnf') parse = build_parser(dbg)
keyword cast := 'commit' 'Author:' sig := R'[0-9a-f]+' space := R'\n|\s+' head ::= 'commit' as mark space sig as sig (~'\n')* '\n' (~'Author:')* 'Author:' (~'<')+ as author '<' (~'>')+ as email '>' with mark.colno <= 1 rewrite sig.value, ''.join(e.value for e in author).strip(), ''.join(e.value for e in email).strip() section ::= head as sig (~head)+ rewrite sig lexer_helper := R'.' partial_text ::= (~section)* section as it rewrite it text ::= (section to [it] | ~section)+ rewrite it """ lang = Language("git-log") build_language(grammar, lang, '<python-internal>') parse = build_parser(lang, use_parser='text') partial_parse = build_parser(lang, use_parser='partial_text')
import re from rbnf.easy import Language, build_language, build_parser from ast import fix_missing_locations from astpretty import pprint from rbnfrbnf import constructs with open('rbnf-bootstrap.rbnf') as f: code = f.read() rbnf2 = Language('rbnf') rbnf2.namespace.update(constructs.__dict__) build_language(code, rbnf2, "rbnf-bootstrap.rbnf") test_line_start = re.compile('\S') parse = build_parser(rbnf2) def add_semi_comma(text: str): def _add_semi_comma(text_formal: str): for each in text_formal.split('\n'): if test_line_start.match(each): yield ';' yield each return '\n'.join(_add_semi_comma(text)) result = parse( add_semi_comma(""" X := 'a' A ::= ('b' | 'c' ('c' | 'a')) Z ::= 'b' recur F ::=
from rbnfrbnf.core import constructs import re import os cfg = Language('cfg-rbnf') test_line_start = re.compile('\S') def add_semi_comma(text: str): def _add_semi_comma(text_formal: str): for each in text_formal.split('\n'): if test_line_start.match(each): yield ';' yield each yield ';' return '\n'.join(_add_semi_comma(text)) directory = os.path.split(__file__)[0] with open(f'{directory}/context_free.rbnf') as f: source = f.read() cfg.namespace = {**constructs.__dict__} build_language(source, cfg, 'context_free.rbnf') _parse = build_parser(cfg) def parse(grammar): return _parse(add_semi_comma(grammar))