g = Grammar(r""" grammar = _ defn*. defn : var '='_ exp ';'_ :hug. exp : 'Choice' args :mk_choice | 'Fixed' args :mk_fixed | 'Sequence' args :mk_sequence | 'Shuffle' args :mk_shuffle | 'Weighted' args :mk_weighted | /Period\b/_ :'.' | /Comma\b/_ :',' | /Semicolon\b/_ :';' | /Dash\b/_ :'--' | /AAn\b/_ :'-a-an-' | /Concat\b/_ :'-adjoining-' | /null\b/_ :'()' | var | string | int. args : '('_ exps? ')'_. exps : exp (','_ exps)*. var : /([A-Za-z_]\w*)/_ :mk_var. int : /(\d+)/ :int. string : '"' qchar* '"'_ :join. qchar : !/["\\]/ /(.)/. _ : (space | comment)*. space : /\s+/. comment : '/*' (!'*/' anyone)* '*/'. anyone : /./ | /\n/. # Ugh. """)( mk_var=lambda s: '-' + '-'.join(parse_camel(s)) + '-', mk_choice=lambda *xs: ' / '.join(xs), mk_fixed=lambda tag, choice: '%s{ %s }' % (tag, choice), mk_sequence=lambda *xs: ' '.join(map(wrap, xs)), mk_shuffle=lambda *xs: '{ %s }' % (' / '.join(xs)), mk_weighted=lambda *spairs: ' / '.join( '[%s] %s' % (w, wrap(x)) for w, x in zip(spairs[0::2], spairs[1::2])), )
STRING ~: /"([^"]*)"/ FNORD # XXX | /'([^']*)'/ FNORD. IDENT ~: !RESERVED_WORD {IdentifierName} FNORD. # XXX incomplete IdentifierName ~= IdentifierStart IdentifierPart*. IdentifierStart ~= UnicodeLetter | '$' | '_'. IdentifierPart ~= IdentifierStart. UnicodeLetter ~= /[A-Za-z]/. """ #import sys; sys.setrecursionlimit(5000) gr = Grammar(g) import microses grr = gr.bind(microses).expecting_one_result() def test(filename): from pprint import pprint print 'testing', filename with open(filename) as f: text = f.read() result = grr(text) for form in result: pprint(form.as_sexpr()) ## grr('a.b =c;') #. (Assign(Get(Variable('a'), 'b'), '=', Variable('c')),)
# Following http://www.json.org/ json_parse = Grammar(r""" start : _ value. object : '{'_ pairs? '}'_ :mk_object. pairs : pair (','_ pair)*. pair : string ':'_ value :hug. array : '['_ elements? ']'_ :hug. elements : value (','_ value)*. value : string | number | object | array | /(true|false|null)\b/_ :mk_literal. string : '"' char* '"'_ :join. char : /([^\x00-\x1f"\\])/ | /\\(["\/\\])/ | /(\\[bfnrt])/ :escape | /(\\u)/ xd xd xd xd :join :escape. xd : /([0-9a-fA-F])/. number : int (frac exp? | exp)? _ :join :mk_number. int : /(-?0)/ !/\d/ | /(-?[1-9]\d*)/. frac : /([.]\d+)/. exp : /([eE][+-]?\d+)/. _ : /\s*/. """)(**globals()).start
toy_grammar = Grammar(r""" main : _ E !/./. E : Fp '`'_ V '`'_ E :fold_infix_app | Fp :fold_apps | '&'_ Vp '=>'_ E :fold_lam | /let\b/_ Decls E :make_let | /case\b/_ E Cases :make_case. Cases : Case+ :hug. Case : '|'_ Param '=>'_ E :hug. Param : Const | V | '('_ Param ')'_ | '['_ ParamList ']'_. ParamList : Param ','_ Param :make_list_pattern. Decls : Decl+ :hug. Decl : /defer\b/_ V ';'_ :make_defer | /bind\b/_ V '='_ E ';'_ :make_bind | Vp '='_ E ';'_ :make_eqn. Fp : F+ :hug. F : Const :make_const | V :make_var | '('_ E ')'_ | '{'_ F Fp '}'_ :fold_send | '['_ EList ']'_ :hug :make_list_expr. EList : (E (','_ EList)?)?. Vp : V+ :hug. V : Identifier | Operator. Identifier : /(?!let\b|case\b|defer\b|bind\b)([A-Za-z_]\w*)\b\s*/. Operator : /(<=|:=|[!+-.])\s*/. Const : '.'_ V :make_lit_sym | /"([^"]*)"/_ :repr | /(-?\d+)/_ | '('_ ')'_ :'()' | '['_ ']'_ :'[]'. _ : /\s*/. """)(**globals())
""" The customary calculator example. """ import operator from parson import Grammar g = Grammar(r""" top : _ exp0 !/./. exp0 : exp1 ( '+'_ exp1 :add | '-'_ exp1 :sub)*. exp1 : exp2 ( '*'_ exp2 :mul | '//'_ exp2 :div | '/'_ exp2 :truediv | '%'_ exp2 :mod)*. exp2 : exp3 ( '^'_ exp2 :pow)?. exp3 : '('_ exp0 ')'_ | '-'_ exp1 :neg | /(\d+)/_ :int. _ = /\s*/. """)(**operator.__dict__) ## g.top('42 * (5-3) + -2^2') #. (80,) ## g.top('2^3^2') #. (512,) ## g.top('5-3-1') #. (1,)
| 'b' A | . A : 'a' S | 'b' A A. B : 'b' S | 'a' B B. """)() ## as_and_bs.allS("abaabbbbaa") #. () nums = Grammar(r""" allnums : nums? !/./. nums : num (',' num)*. num : /(\d+)/ :int. """)() sum_nums = lambda s: sum(nums.allnums(s)) ## sum_nums('10,30,43') #. 83 one_word = Grammar(r"word : /\w+/ :position.")() ## one_word.word('hello') #. (5,) ## one_word.word('hello there') #. (5,) ## one_word.word.attempt(' ')
grammar = Grammar(r""" box* :end. box: name '{' [stmt* :hug] '}' :Box. stmt: "var" name ++ ',' ';' :hug :Decl | "put" [name ':' | :None] box ';'? :Put | "conn" expr ("to" expr)+ :hug ("using" expr box '<' expr ',' expr '>' ';' :Pen | ';' :Conn) | "spline" expr ("to" expr)+ ';' :hug :Spline | "compass" expr "to" expr ';' :Compass | justify string "at" expr ';' :Text | expr ('=' expr)+ ';' :hug :Equate | expr ('~' expr)+ ';' :hug :Default. justify ~: {"left"|"right"|"center"} FNORD | :'center'. expr: term ( '+' term :Add | '-' term :Sub )*. term: factor ( '*' factor :Mul | '/' factor :Div )*. factor: atom ('[' expr ',' expr ']' :Relatively)*. atom: '(' number ',' number ')' :complex :Literal | number :complex :Literal | '-' atom :Negate | '(' expr ')' | unaryfn '(' expr ')' :CallPrim | name :Ref ('.' name :Of)*. unaryfn: "abs" :Abs | "cis" :Cis | "unit" :Unit. name: /([A-Za-z_][A-Za-z_0-9]*)/. number ~: { '-'? (/\d*/ '.' /\d+/ | /\d+/) } FNORD :float. string ~: '"' {/[^\\"]*/} '"' FNORD. FNORD ~: (/\s+/ | comment)*. comment ~: '/*' (!'*/' :anyone)* '*/'. """)
grammar = Grammar(r""" command : /(\d+)/_ :int /(.*)/ /$/ :set_line | 'run' /\b/_ /$/ :run | 'new' /\b/_ /$/ :new | 'load' /\b/_ /(\S+)/_ /$/ :load | 'save' /\b/_ /(\S+)/_ /$/ :save | stmt | /$/. stmt : 'print' /\b/_ printing /$/ :next | '?' _ printing /$/ :next | 'input' /\b/_ id /$/ :input :next | 'goto' /\b/_ exp0 /$/ :goto | 'if' /\b/_ relexp 'then' /\b/_ exp0 /$/ :if_goto | 'gosub' /\b/_ exp0 /$/ :gosub | 'return' /\b/_ /$/ :return_ | 'end' /\b/_ /$/ :end | 'list' /\b/_ /$/ :list :next | 'rem' /\b/ /.*/ /$/ :next | ('let' /\b/_)? id '='_ exp0 /$/ :store :next. printing : (display writes)?. writes : ';'_ printing | ','_ :space printing | :newline. display : exp0 :write | '"' [qchar :write]* '"'_. qchar : /"(")/ | /([^"])/. relexp : exp0 ( '<>'_ exp0 :ne | '<='_ exp0 :le | '<' _ exp0 :lt | '=' _ exp0 :eq | '>='_ exp0 :ge | '>' _ exp0 :gt )?. exp0 : exp1 ( '+'_ exp1 :add | '-'_ exp1 :sub )*. exp1 : exp2 ( '*'_ exp2 :mul | '/'_ exp2 :idiv )*. exp2 : primary ('^'_ exp2 :pow)?. primary : '-'_ exp1 :neg | /(\d+)/_ :int | id :fetch | '('_ exp0 ')'_. id : /([a-z])/_. # TODO: longer names, screening out reserved words _ : /\s*/. """)
exp3 = delay(lambda: g.exp3) exp1 = PrececedenceParser(exp3, [ LeftAssoc(('*'+_, mul), ('//'+_, div), ('/'+_, truediv), ('%'+_, mod)), RightAssoc(('^'+_, pow)), ]) exps = PrececedenceParser(exp1, [ LeftAssoc(('+'+_, add), ('-'+_, sub)), ]) g = Grammar(r""" top = _ :exps !/./. exp3 : '('_ :exps ')'_ | '-'_ :exp1 :neg | /(\d+)/_ :int. _ = /\s*/. """)(**globals()) ## g.top('42 *(5-3) + -2^2') #. (80,) ## g.top('2^3^2') #. (512,) ## g.top('5-3-1') #. (1,) ## g.top('3//2') #. (1,) ## g.top('3/2') #. (1.5,)
def p(grammar, rule, text): parse = getattr(Grammar(grammar)(**globals()), rule) try: return parse(text) except Unparsable, e: return e
Ns_x = set(len(m) for m in xmatches) Ns_y = set(n - m for n in Ns for m in Ns_x if n - m >= 0) ymatches = y(Ns_y) return set(m1 + m2 for m1 in xmatches for m2 in ymatches if len(m1 + m2) in Ns) grammar = Grammar(r""" regex : exp !/./. exp : term ('|' exp :either)* | :empty. term : factor (term :chain)*. factor : primary ( '*' :star | '+' :plus | '?' :optional )?. primary : '(' exp ')' | '[' char* ']' :join :oneof | '.' :dot | /\\(.)/ :literal | /([^.()*+?|[\]])/ :literal. char : /\\(.)/ | /([^\]])/. """) parser = grammar(**globals()) ## generate('.+', range(5)) #. ['?', '??', '???', '????'] ## generate('a[xy]+z()*|c.hi', range(5)) #. ['axz', 'ayz', 'axxz', 'axyz', 'ayxz', 'ayyz', 'c?hi'] ## generate('(Chloe|Yvette), a( precocious)? (toddler|writer)', range(28))
""" A bunch of small examples, some of them from the LPEG documentation. Crudely converted from Peglet. TODO: make them nicer. """ from parson import Grammar, Unparsable, exceptionally parse_words = Grammar(r'words : /\W*(\w+)/ words | .')() # The equivalent re.split() would return extra '' results first and last: ## parse_words.words('"Hi, there", he said.') #. ('Hi', 'there', 'he', 'said') def Tag(label): return lambda *parts: (label, ) + parts name = Grammar(r""" name : title first middle last. title : (/(Dr|Mr|Ms|Mrs|St)[.]?/ | /(Pope(?:ss)?)/) _ :Title |. first : /([A-Za-z]+)/ _ :First. middle : (/([A-Z])[.]/ | /([A-Za-z]+)/) _ :Middle |. last : /([A-Za-z]+)/ :Last. _ : /\s+/. """)(Title=Tag('title'), First=Tag('first'), Middle=Tag('middle'), Last=Tag('last')) ## name.name('Popess Darius Q. Bacon') #. (('title', 'Popess'), ('first', 'Darius'), ('middle', 'Q'), ('last', 'Bacon'))
""" Convert a Peglet grammar to a Parson one. """ import re from parson import Grammar, alter name = r'[A-Za-z_]\w*' grammar = Grammar(r""" grammar : _? rule* !/./. rule : name _ '= ' :equ token* :'.' _?. token : '|' :'|' | /(\/\w*\/\s)/ | name !(_ '= ') | '!' :'!' | _ !(name _ '= ' | !/./) | !('= '|name) /(\S+)/ :mk_regex. name : /(""" + name + """)/ !!(/\s/ | !/./). _ : /(\s+)/. """) def mk_regex(s): return '/' + s.replace('/', '\\/') + '/' def peglet_to_parson(text): nonterminals = set() def equ(name, space):
ProcedureDeclaration: ProcedureHeading ';'_ ProcedureBody ident. declarations: ('CONST'_ (ident '='_ expression ';'_)*)? ('TYPE'_ (ident '='_ type ';'_)*)? ('VAR'_ (IdentList ':'_ type ';'_)*)? (ProcedureDeclaration ';'_)*. module: 'MODULE'_ ident ';'_ declarations ('BEGIN'_ StatementSequence)? 'END'_ ident '.'_. _: whitespace*. whitespace: /\s+/ | comment. comment: '(*' commentchunk* '*)'. commentchunk: comment | !'*)' /.|\n/. # XXX are comments nested in Oberon-0? keyword: /BEGIN|END|MODULE|VAR|TYPE|CONST|PROCEDURE|RECORD|ARRAY|OF|WHILE|DO|IF|ELSIF|THEN|ELSE|OR|DIV|MOD/ /\b/. top: _ module !/./. """ grammar = Grammar(grammar_source)() # TODO test for expected parse failures ## from parson import exceptionally ## import glob ## for filename in sorted(glob.glob('ob-bad/*.ob')): print exceptionally(lambda: test(filename)) #. testing ob-bad/badassign.ob #. (top, 'MODULE badassign;\n\nBEGIN\n ', '1 := 2\nEND badassign.\n') #. testing ob-bad/commentnoend.ob #. (top, "MODULE commentnoend;\n (* started off well,\n but didn't finish\nEND commentnoend.\n", '') #. testing ob-bad/keywordasname.ob #. (top, 'MODULE ', 'END;\nEND END.\n') #. testing ob-bad/repeatsection.ob #. (top, 'MODULE repeatsection;\n\nCONST\n aconst = 10;\n\n', 'CONST\n aconst = 20;\n\nEND repeatsection.\n')
qstring: /'([^']*)'/_. tokenid: /([A-Z_]+)/_. id: /([a-z_]+)/_. _ = /\s*/. """ def foldr1(f, xs): return xs[0] if len(xs) == 1 else f(xs[0], foldr1(f, xs[1:])) def Chain(*pes): return empty if not pes else foldr1(chain, pes) def Optional(pe): return pe.maybe() def Literal(s): return label(one_that(lambda t: t[1] == s), repr(s)) # XXX def Token(name): return label(one_that(lambda t: t[1] == name), name) # XXX def RuleRef(name): return delay((lambda: rules[name]), name) grammar = Grammar(g)(**globals()) subset = open('subset').read() metagrammar = grammar.grammar(subset) rules = dict(metagrammar) pygrammar = rules['file_input'] ## pygrammar([('', 'ENDMARKER',)]) #. () ## grammar.grammar("dotted_name: NAME ('.' NAME)*") #. (('dotted_name', (NAME (('.' NAME))*)),) ## for pair in grammar.grammar('yo: hey boo: yah'): print pair #. ('yo', hey) #. ('boo', yah)
grammar = Grammar(r""" program: _ (definition | declaration)* ('__END__' | :end). definition: defheader '{'_ [declaration* :hug] '}'_ :Definition. defheader: 'define'__ ID ['extends'__ ID | :None]. declaration: ID declarators ';'_ :VarDecl | constraint_section | draw_section. declarators: declarator (','_ declarator)* :hug. declarator: ID [('('_ params ')'_)? :hug] :Declarator. params = param_spec (','_ param_spec)*. param_spec: ID '='_ expression :hug. constraint_section: 'constraints'__ '{'_ constraint* '}'_ :hug :Constraints. constraint: expression '='_ expression ';'_ :hug. draw_section: 'draw'__ '{'_ drawable* '}'_ :hug :Draw. drawable: name ';'_ :DrawName | '&'_ ID ';'_ :DrawFunction. expression: term ('+'_ term :Add |'-'_ term :Sub)*. term: atom ('*'_ atom :Mul |'/'_ atom :Div)*. atom: name | tuple | NUMBER :Number | '-'_ expression :Negate | '('_ expression ')'_. name: ID :Name ('.'_ ID :Dot)*. tuple: '('_ expression (','_ expression)+ ')'_ :hug :Tuple. # Lexical grammar NUMBER: { mantissa /[eE]\d+/? } _ :float. mantissa = /\d+/ ('.' /\d*/)? | '.' /\d+/. ID = /([a-zA-Z_]\w*)/ _. # XXX need to rule out keywords? __ = /\b/_. # (i.e. a keyword must match up to a word boundary) _ = /\s*/. """)