def test_inner_rule_succeeding(self): """Make sure ``parse()`` fails and blames the rightward-progressing-most named Expression when an Expression isn't satisfied. Make sure ParseErrors have nice Unicode representations. """ grammar = Grammar(""" bold_text = open_parens text close_parens open_parens = "((" text = ~"[a-zA-Z]+" close_parens = "))" """) text = '((fred!!' try: grammar.parse(text) except ParseError as error: eq_(error.pos, 6) eq_(error.expr, grammar['close_parens']) eq_(error.text, text) eq_( unicode(error), u"Rule 'close_parens' didn't match at '!!' (line 1, column 7)." )
def test_rewinding(self): """Make sure rewinding the stack and trying an alternative (which progresses farther) from a higher-level rule can blame an expression within the alternative on failure. There's no particular reason I suspect this wouldn't work, but it's a more real-world example than the no-alternative cases already tested. """ grammar = Grammar(""" formatted_text = bold_text / weird_text bold_text = open_parens text close_parens weird_text = open_parens text "!!" bork bork = "bork" open_parens = "((" text = ~"[a-zA-Z]+" close_parens = "))" """) text = '((fred!!' try: grammar.parse(text) except ParseError as error: eq_(error.pos, 8) eq_(error.expr, grammar['bork']) eq_(error.text, text)
def test_lookahead(self): grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') eq_(grammar.parse('burp'), None) s = 'arp' eq_(grammar.parse('arp'), Node('starts_with_a', s, 0, 3, children=[ Node('', s, 0, 0), Node('', s, 0, 3)]))
def test_parse_with_leftovers(self): """Make sure ``parse()`` reports where we started failing to match, even if a partial match was successful.""" grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''') try: grammar.parse('chitty bangbang') except IncompleteParseError as error: eq_(str(error), "Rule 'sequence' matched in its entirety, but it didn't consume all the text. The non-matching portion of the text begins with 'bang' (line 1, column 12).")
def test_favoring_named_rules(self): """Named rules should be used in error messages in favor of anonymous ones, even if those are rightward-progressing-more, and even if the failure starts at position 0.""" grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') try: grammar.parse('burp') except ParseError as error: eq_(str(error), "Rule 'starts_with_a' trying to match (&(\"a\") ~\"[a-z]+\"u) didn't match at 'burp' (line 1, column 1).")
def test_resolve_refs_order(self): """Smoke-test a circumstance where lazy references don't get resolved.""" grammar = Grammar(""" expression = "(" terms ")" terms = term+ term = number number = ~r"[0-9]+" """) grammar.parse('(34)')
def test_favoring_named_rules(self): """Named rules should be used in error messages in favor of anonymous ones, even if those are rightward-progressing-more, and even if the failure starts at position 0.""" grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') try: grammar.parse('burp') except ParseError as error: self.assertEqual(str(error), u"Rule 'starts_with_a' didn't match at 'burp' (line 1, column 1).")
def test_no_named_rule_succeeding(self): """Make sure ParseErrors have sane printable representations even if we never succeeded in matching any named expressions.""" grammar = Grammar('''bork = "bork"''') try: grammar.parse('snork') except ParseError as error: eq_(error.pos, 0) eq_(error.expr, grammar['bork']) eq_(error.text, 'snork')
def test_line_and_column(self): """Make sure we got the line and column computation right.""" grammar = Grammar(r""" whee_lah = whee "\n" lah "\n" whee = "whee" lah = "lah" """) try: grammar.parse('whee\nlahGOO') except ParseError as error: # TODO: Right now, this says "Rule <Literal "\n" at 0x4368250432> # didn't match". That's not the greatest. Fix that, then fix this. self.assertTrue(str(error).endswith(r"""didn't match at 'GOO' (line 2, column 4)."""))
def test_line_and_column(self): """Make sure we got the line and column computation right.""" grammar = Grammar(r""" whee_lah = whee "\n" lah "\n" whee = "whee" lah = "lah" """) try: grammar.parse('whee\nlahGOO') except ParseError as error: # TODO: Right now, this says "Rule <Literal "\n" at 0x4368250432> # didn't match". That's not the greatest. Fix that, then fix this. ok_(str(error).endswith(r"""didn't match at 'GOO' (line 2, column 4)."""))
def test_lazy_custom_rules(self): """Make sure LazyReferences manually shoved into custom rules are resolved. Incidentally test passing full-on Expressions as custom rules and having a custom rule as the default one. """ grammar = Grammar(""" four = '4' five = '5'""", forty_five=Sequence( LazyReference('four'), LazyReference('five'), name='forty_five')).default('forty_five') s = '45' eq_( grammar.parse(s), Node(grammar['forty_five'], s, 0, 2, children=[ Node(grammar['four'], s, 0, 1), Node(grammar['five'], s, 1, 2) ]))
def convert(wikitext): # read the grammer PEG file f = open("wiki1.peg") peg = f.read() f.close() start = time.clock() grammar = Grammar(peg) elapsed1 = time.clock() - start print("+++++++++grammar+++++++++") print(peg) print("+++++++++input+++++++++") print(atext.decode('utf8')) print('*********************') start = time.clock() parsed = grammar.parse(atext.decode('utf8')) elapsed2 = time.clock() - start print(dir(parsed)) print(parsed) start = time.clock() result = HtmlFormatter().visit(parsed) elapsed3 = time.clock() - start print("timeto create grammar:", elapsed1) print("timeto parse text:", elapsed2) print("timeto convert to xml:", elapsed3) return result.encode('utf8')
def test_parens(self): grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''') # Make sure it's not as if the parens aren't there: eq_(grammar.parse('chitty bangbang'), None) s = 'chitty bang bang' eq_(str(grammar.parse(s)), """<Node called "sequence" matching "chitty bang bang"> <Node matching "chitty"> <Node matching " bang bang"> <Node matching " bang"> <Node matching " "> <Node matching "bang"> <Node matching " bang"> <Node matching " "> <Node matching "bang">""")
def lex(text): grammar = Grammar("""\ entry = _ (statement _)* _ statement = multiline / single multiline = atom _ args _ ":" _ "_INDENT_" _ (statement _)+ "_DEDENT_" single = atom _ args atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'") _ = ~"\s*" (~"#[^\\r\\n]*\s*")* args = ( _ map ) / ( _ "(" _ term (_ "," _ term)* _ ")" ) / (_ "(" _ ")") map = "(" _ kv (_ "," _ kv)* _ ")" list = ( _ "[" _ term (_ "," _ term)* _ "]" ) / ( _ "[" _ "]") kv = term _ "=" _ term _ term = unumber / logic_exp / single / list / string / atom / number logic_exp = logic_priority / logic_unary / logic_plain logic_priority = "(" _ logic_exp _ ")" _ (logic_binary _ logic_exp _)* logic_unary = "not" _ logic_exp _ logic_binary = "and" / "or" logic_plain = logic_op _ (logic_binary _ logic_exp _)* logic_op = (string / number) _ ("<=" / ">=" / "<" / ">" / "==" / "!=" / "<>" / "/=") _ (string / number) string = '"' ~r'(\\\\.|[^\\\\"])*' '"' number = ~"[0-9]+(\.[0-9]+)?(e\-?[0-9]+)?[GKM]?" unumber = (number / single) _ atom """) try: return grammar.parse(add_indents(text)) except parsimonious.exceptions.ParseError as e: raise ParseError(e)
def test_right_recursive(self): """Right-recursive refs should resolve.""" grammar = Grammar(""" digits = digit digits? digit = ~r"[0-9]" """) ok_(grammar.parse('12') is not None)
def load_props(file_name): g = Grammar(grammar) retd = {} cbs = {} g = g.parse(open(file_name).read()) iv = Visitor() l = iv.visit(g) for e in l: o = toDict(e, 1) if o['name'] == "cdfData": o = toDict(o['value'], 0)['parameters'] for p in o: d = toDict(p, 0) v = d['defValue'] if v.startswith("iPar"): v = "1" #TODO: hack if d['type'] == "boolean": v = True if v == 't' else None retd[d['name']] = {'value': v} if 'callback' in d: cbs[d['name']] = d['callback'] continue t = "" if "valueType" in o: t = o["valueType"] if t == "float": o['value'] = float(o['value']) retd[o['name']] = o['value'] if o['value'] == "valueType": print o print e return {'props': retd, 'cbs': cbs}
def parse_config_lines(lines: List[str], name: Optional[str] = None) -> SimpleNamespace: grammar = Grammar(r""" section = opt_header? (module / definition / comment / endl)* opt_header = '#SETTINGS_FILE ' filename filename = ~r"[^\n]+" module = '% ' module_name module_name = ~r"[a-zA-Z_]+" definition = sp? name sp? '=' sp? value? comment? endl? comment = sp? '#' vartype? anychar* vartype = '(' ('bool' / 'string' / 'double' / 'int') ')' name = ~r"[a-zA-Z_]+" value = ~r"[^#\n]+" sp = ' '* endl = sp? '\n' anychar = ~r"[^\n]" """) output = grammar.parse('\n'.join(lines)) filename, params = tree_to_filename_and_dict(output) # remove temporary state entries if '_current_module' in params: del params['_current_module'] if '_filename' in params: del params['_filename'] return filename, params
def test_complex_custom_rules(self): """Run 5-arg custom rules through their paces. Incidentally tests returning an actual Node from the custom rule. """ grammar = Grammar( """ bracketed_digit = start digit end start = '[' end = ']' real_digit = '6'""", # In this particular implementation of the digit rule, no node is # generated for `digit`; it falls right through to `real_digit`. # I'm not sure if this could lead to problems; I can't think of # any, but it's probably not a great idea. digit=lambda text, pos, cache, error, grammar: grammar[ 'real_digit'].match_core(text, pos, cache, error)) s = '[6]' eq_( grammar.parse(s), Node(grammar['bracketed_digit'], s, 0, 3, children=[ Node(grammar['start'], s, 0, 1), Node(grammar['real_digit'], s, 1, 2), Node(grammar['end'], s, 2, 3) ]))
def parse_jil(input_file): """Parse Jil file and return a python dictionary of the parsed data""" grammar = Grammar(r""" expr = (entry / emptyline)* entry = job pair* job = jobstart colon jobname ws pair = key colon value ws? key = !jobstart word+ value = (word / quoted)+ word = ~r"[- ,\w\(\)\@\.\/\$\*\'\&\<\>]+" wordwild = ~r"(.*)" quoted = ~'"+[^\"]+"+' colon = ws? ":" ws? jobname = ~r"[\w]+" jobstart = "insert_job" ws = ~"\s*" emptyline = ws+ """) with open(input_file, 'r') as rfh: jil_data = rfh.read() tree = grammar.parse(jil_data) jil_vis = JilVisitor() output = jil_vis.visit(tree) return output
def test_lookahead(self): grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') assert_raises(ParseError, grammar.parse, 'burp') s = 'arp' eq_(grammar.parse('arp'), Node('starts_with_a', s, 0, 3, children=[ Node('', s, 0, 0), Node('', s, 0, 3)]))
def test_lookahead(self): grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') assert_raises(ParseError, grammar.parse, 'burp') s = 'arp' eq_(grammar.parse('arp'), Node(grammar['starts_with_a'], s, 0, 3, children=[ Node(Lookahead(Literal('a')), s, 0, 0), Node(Regex(r'[a-z]+'), s, 0, 3)]))
def __init__(self, code): self.object_query = {} self.steps = [] # parsing: grammar = Grammar(QUERY_PEG) self.__nodes = grammar.parse(code) self._translate()
def test_lookahead(self): grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') self.assertRaises(ParseError, grammar.parse, 'burp') s = 'arp' self.assertEqual(grammar.parse('arp'), Node(grammar['starts_with_a'], s, 0, 3, children=[ Node(Lookahead(Literal('a')), s, 0, 0), Node(Regex(r'[a-z]+'), s, 0, 3)]))
def result(self): """ The 'result' property """ g = Grammar(""" condition = always / never / comparison ws = ~"\s*" never = ~"never"i always = ~"always"i value = numeric / varname numeric = ~"[+-]?\d+(\.\d+)?" varname = ~"[a-z_][a-z0-9_]*"i range = percentage / numeric percentage = numeric percent_sign percent_sign = "%" comparison = range_eq_comparison / range_leftrocket_comparison / range_rightrocket_comparison / range_muchlessthan_comparison / range_muchgreaterthan_comparison / simple_comparison simple_comparison = value ws simple_comparator ws value simple_comparator = cmp_eq / cmp_neq / cmp_gte / cmp_gt / cmp_lte / cmp_lt cmp_eq = "==" cmp_neq = "!=" cmp_gte = ">=" cmp_gt = ">" cmp_lte = "<=" cmp_lt = "<" range_muchlessthan_comparison = value ws range_lt_prev range range_lt_post ws value range_lt_prev = "<" range_lt_post = "<" range_leftrocket_comparison = value ws range_lr_prev range range_lr_post ws value range_lr_prev = "<" range_lr_post = "=" range_eq_comparison = value ws range_eq_prev range range_eq_post ws value range_eq_prev = "=" range_eq_post = "=" range_rightrocket_comparison = value ws range_rr_prev range range_rr_post ws value range_rr_prev = "=" range_rr_post = ">" range_muchgreaterthan_comparison = value ws range_gt_prev range range_gt_post ws value range_gt_prev = ">" range_gt_post = ">" """) tree = g.parse(self._condition) v = ConditionVisitor(self.data) return v.visit(tree)[0]
def parse(source): """Parse tbon Source""" grammar = Grammar(""" score = wsc* music* music = (partswitch* bar+)+ wsc* partswitch = "P=" partnum wsc = comment / ws+ comment = ws* ~r"/\*.*?\*/"s ws* bar = (wsc* (meta / beat) wsc+)+ barline meta = beatspec / key / tempo / relativetempo / velocity / de_emphasis / channel / instrument beatspec = "B=" ("2." / "2" / "4." / "4" / "8." / "8") key = "K=" keyname keyname = ~r"[a-gA-G](@|#)?" tempo = "T=" floatnum relativetempo = "t=" floatnum velocity = "V=" floatnum de_emphasis = "D=" floatnum channel = "C=" chnum partnum = ~r"[1-9][0-9]*"i instrument = "I=" inum inum = ~r"[1-9][0-9]*"i floatnum = ~r"\d*\.?\d+"i chnum = ~r"\d*\.?\d+"i beat = subbeat+ barline = "|" / ":" extendable = chord / roll / ornament / pitch / rest pitch = octave* alteration? pitchname chord = chordstart chorditem chorditem* rparen chordstart = "(" chorditem = chordpitch / chordhold / chordrest chordpitch = octave* alteration? pitchname chordhold = '-' chordrest = "_" / "z" rparen = ")" roll = rollstart pitch pitch+ rparen rollstart = "(:" ornament = ornamentstart pitch pitch+ rparen ornamentstart = "(~" subbeat = extendable / hold rest = "_" / "z" hold = "-" octave = octave_up / octave_down alteration = doublesharp / sharp / doubleflat / flat / natural doublesharp = "𝄪" / "##" sharp = "♯" / "#" doubleflat = "𝄫" / "@@" flat = "♭" / "@" natural = "♮" / "%" octave_up = "^" octave_down = "/" pitchname = ~"[a-g1-7]"i ws = ~r"\s*"i """) return grammar.parse(source)
def test_lazy_default_rule(self): """Make sure we get an actual rule set as our default rule, even when the first rule has forward references and is thus a LazyReference at some point during grammar compilation. """ grammar = Grammar(r""" styled_text = text text = "hi" """) eq_(grammar.parse('hi'), Node('text', 'hi', 0, 2))
def test_multi_line(self): """Make sure we tolerate all sorts of crazy line breaks and comments in the middle of rules.""" grammar = Grammar(""" bold_text = bold_open # commenty comment text # more comment bold_close text = ~"[A-Z 0-9]*"i bold_open = "((" bold_close = "))" """) ok_(grammar.parse('((booyah))') is not None)
def test_lazy_default_rule(self): """Make sure we get an actual rule set as our default rule, even when the first rule has forward references and is thus a LazyReference at some point during grammar compilation. """ grammar = Grammar(r""" styled_text = text text = "hi" """) self.assertEqual(grammar.parse('hi'), Node(grammar['text'], 'hi', 0, 2))
def test_unconnected_custom_rules(self): """Make sure custom rules that aren't hooked to any other rules still get included in the grammar and that lone ones get set as the default. Incidentally test Grammar's `rules` default arg. """ grammar = Grammar(one_char=lambda text, pos: pos + 1).default('one_char') s = '4' eq_(grammar.parse(s), Node('one_char', s, 0, 1))
class TexnoMagicLanguage: def __init__(self): self.grammar = Grammar(TEXNO_MAGIC_GRAMMAR) def parse(self, text): """ use this to parse TexnoMagic spell into its data representation """ tree = self.grammar.parse(text) v = TexnoVisitor() output = v.visit(tree) return output
def Assume(*args): grammar = Grammar(r""" expr = expr1 / expr2 / expr3 /expr4 /expr5 / expr6 /expr7 expr1 = expr_dist1 logic_op num_log expr2 = expr_dist2 logic_op num_log expr3 = classVar ws logic_op ws value expr4 = classVarArr ws logic_op ws value expr5 = classVar ws logic_op ws classVar expr6 = classVarArr ws logic_op ws classVarArr expr7 = "True" expr_dist1 = op_beg?abs?para_open classVar ws arith_op ws classVar para_close op_end? expr_dist2 = op_beg?abs?para_open classVarArr ws arith_op ws classVarArr para_close op_end? classVar = variable brack_open number brack_close classVarArr = variable brack_open variable brack_close para_open = "(" para_close = ")" brack_open = "[" brack_close = "]" variable = ~"([a-zA-Z_][a-zA-Z0-9_]*)" logic_op = ws (geq / leq / eq / neq / and / lt / gt) ws op_beg = number arith_op op_end = arith_op number arith_op = (add/sub/div/mul) abs = "abs" add = "+" sub = "-" div = "/" mul = "*" lt = "<" gt = ">" geq = ">=" leq = "<=" eq = "=" neq = "!=" and = "&" ws = ~"\s*" value = ~"\d+" num_log = ~"[+-]?([0-9]*[.])?[0-9]+" number = ~"[+-]?([0-9]*[.])?[0-9]+" """) tree = grammar.parse(args[0]) assumeVisitObj = assume2logic.AssumptionVisitor() if len(args) == 3: assumeVisitObj.storeInd(args[1]) assumeVisitObj.storeArr(args[2]) assumeVisitObj.visit(tree) elif len(args) == 2: assumeVisitObj.storeInd(args[1]) assumeVisitObj.visit(tree) elif len(args) == 1: assumeVisitObj.visit(tree)
def test_unconnected_custom_rules(self): """Make sure custom rules that aren't hooked to any other rules still get included in the grammar and that lone ones get set as the default. Incidentally test Grammar's `rules` default arg. """ grammar = Grammar(one_char=lambda text, pos: pos + 1).default('one_char') s = '4' self.assertEqual(grammar.parse(s), Node(grammar['one_char'], s, 0, 1))
def test_expressions_from_rules(self): """Test the ``Grammar`` base class's ability to compile an expression tree from rules. That the correct ``Expression`` tree is built is already tested in ``RuleGrammarTests``. This tests only that the ``Grammar`` base class's ``_expressions_from_rules`` works. """ greeting_grammar = Grammar('greeting = "hi" / "howdy"') tree = greeting_grammar.parse("hi") eq_(tree, Node("greeting", "hi", 0, 2, children=[Node("", "hi", 0, 2)]))
def funcReadXml(self): grammar = Grammar(r""" expr = name / type / minimum / maximum / xmlStartDoc / xmlStartInps / xmlEndInps / xmlStartInp / xmlEndInp / xmlStartValTag /xmlEndValTag name = xmlStartNameTag feName xmlEndNameTag type = xmlStartTypeTag feType xmlEndTypeTag minimum = xmlStartMinTag number xmlEndMinTag maximum = xmlStartMaxTag number xmlEndMaxTag xmlStartDoc = '<?xml version="1.0" encoding="UTF-8"?>' xmlStartInps = "<Inputs>" xmlEndInps = "<\Inputs>" xmlStartInp = "<Input>" xmlEndInp = "<\Input>" xmlStartNameTag = "<Feature-name>" xmlEndNameTag = "<\Feature-name>" xmlStartTypeTag = "<Feature-type>" xmlEndTypeTag = "<\Feature-type>" xmlStartValTag = "<Value>" xmlEndValTag = "<\Value>" xmlStartMinTag = "<minVal>" xmlEndMinTag = "<\minVal>" xmlStartMaxTag = "<maxVal>" xmlEndMaxTag = "<\maxVal>" feName = ~"([a-zA-Z_][a-zA-Z0-9_]*)" feType = ~"[A-Z 0-9]*"i number = ~"[+-]?([0-9]*[.])?[0-9]+" """) with open(self.fileName) as f1: file_content = f1.readlines() file_content = [x.strip() for x in file_content] feNameArr = [] feTypeArr = [] minValArr = [] maxValArr = [] for lines in file_content: tree = grammar.parse(lines) dfObj = dataFrameCreate() dfObj.visit(tree) if dfObj.feName is not None: feNameArr.append(dfObj.feName) if dfObj.feType is not None: feTypeArr.append(dfObj.feType) if dfObj.feMinVal != -99999: minValArr.append(dfObj.feMinVal) if dfObj.feMaxVal != 0: maxValArr.append(dfObj.feMaxVal) genDataObj = generateData(feNameArr, feTypeArr, minValArr, maxValArr) genDataObj.funcGenerateTestData()
def test_inner_rule_succeeding(self): """Make sure ``parse()`` fails and blames the rightward-progressing-most named Expression when an Expression isn't satisfied. Make sure ParseErrors have nice Unicode representations. """ grammar = Grammar(""" bold_text = open_parens text close_parens open_parens = "((" text = ~"[a-zA-Z]+" close_parens = "))" """) text = '((fred!!' try: grammar.parse(text) except ParseError as error: eq_(error.pos, 6) eq_(error.expr, grammar['close_parens']) eq_(error.text, text) eq_(str(error), "Rule 'close_parens' trying to match \"))\" didn't match at '!!' (line 1, column 7).")
def test_expressions_from_rules(self): """Test the ``Grammar`` base class's ability to compile an expression tree from rules. That the correct ``Expression`` tree is built is already tested in ``RuleGrammarTests``. This tests only that the ``Grammar`` base class's ``_expressions_from_rules`` works. """ greeting_grammar = Grammar('greeting = "hi" / "howdy"') tree = greeting_grammar.parse('hi') self.assertEqual(tree, Node(greeting_grammar['greeting'], 'hi', 0, 2, children=[ Node(Literal('hi'), 'hi', 0, 2)]))
def result(self): """ The 'result' property """ g = Grammar(""" replacement = ws replacevalue transformationlist ws replacevalue = expression / varname / literal transformationlist = transformation* transformation = ws comma ws transname transarglist transarglist = transarg* transarg = singlequotedstr / doublequotedstr / unquotedarg expression = term rws operator rws term term = numberliteral / varname varname = ~"[a-z_][a-z0-9_]*"i transname = ~"[a-z_][a-z0-9_]*"i literal = numberliteral / stringliteral numberliteral = ~"(\+|-)?\d+([.]\d+)?" stringliteral = singlequotedstr / doublequotedstr doublequotedstr = ws dblq notdblq dblq singlequotedstr = ws sngq notsngq sngq unquotedarg = ws notwsorcomma operator = plus / minus / times / divide plus = "+" minus = "-" times = "*" divide = "/" rws = ~"\s+" ws = ~"\s*" comma = "," notwsorcomma = ~"[^\s,]+" dblq = "\\"" notdblq = ~"[^\\"]*" sngq = "'" notsngq = ~"[^']*" """) tree = g.parse(self._replacement) return ReplacementVisitor(self._data).visit(tree)
def test_simple_custom_rules(self): """Run 2-arg custom-coded rules through their paces.""" grammar = Grammar(""" bracketed_digit = start digit end start = '[' end = ']'""", digit=lambda text, pos: (pos + 1) if text[pos].isdigit() else None) s = '[6]' self.assertEqual(grammar.parse(s), Node(grammar['bracketed_digit'], s, 0, 3, children=[ Node(grammar['start'], s, 0, 1), Node(grammar['digit'], s, 1, 2), Node(grammar['end'], s, 2, 3)]))
def test_simple_custom_rules(self): """Run 2-arg custom-coded rules through their paces.""" grammar = Grammar(""" bracketed_digit = start digit end start = '[' end = ']'""", digit = lambda text, pos: (pos + 1) if text[pos].isdigit() else None) s = '[6]' eq_(grammar.parse(s), Node('bracketed_digit', s, 0, 3, children=[ Node('start', s, 0, 1), Node('digit', s, 1, 2), Node('end', s, 2, 3)]))
def test_callability_custom_rules(self): """Confirms that functions, methods and method descriptors can all be used to supply custom grammar rules. """ grammar = Grammar(""" default = function method descriptor """, function=function_rule, method=self.method_rule, descriptor=self.rules['descriptor_rule'], ) result = grammar.parse('functionmethoddescriptor') rule_names = [node.expr.name for node in result.children] self.assertEqual(rule_names, ['function', 'method', 'descriptor'])
class PEGSyntaxRule(BaseSyntaxRule): def __init__(self, params: dict): BaseSyntaxRule.__init__(self, params) try: self._grammar = Grammar(params['peg']) except Exception: raise InvalidSchemaError( f'Failed to parse PEG grammar for {params["name"]}') def parse(self, value): try: return self._grammar.parse(value) except ParseError: raise SyntaxParseError()
def test_parens(self): grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''') # Make sure it's not as if the parens aren't there: assert_raises(ParseError, grammar.parse, 'chitty bangbang') s = 'chitty bang bang' eq_(str(grammar.parse(s)), """<Node called "sequence" matching "chitty bang bang"> <Node called "__Literal__" matching "chitty"> <Node called "__OneOrMore__" matching " bang bang"> <Node called "__Sequence__" matching " bang"> <Node called "__Literal__" matching " "> <Node called "__Literal__" matching "bang"> <Node called "__Sequence__" matching " bang"> <Node called "__Literal__" matching " "> <Node called "__Literal__" matching "bang">""")
def test(inFP): with open(inFP) as inF: grammar = Grammar(clean(inF.read())) good_inputs = ['{the quick brown} > fox > jumps < over < ({the lazy} > dog)', 'They > conspired < to < defenestrate < themselves\n(conspired* to defenestrate on < Tuesday)', 'a (** b c) d**', 'a (** b c**)', '::~1 :-)~1 ~(-: (0_0) ~(0_0)~2 *_*~3 )~1 ~( <*_*>', ''' Found** < (the scarriest mystery door*) Found < in < (my > school) I’M** < (SO > CURIOUS) D:** my = I’M''', ''' thers** < still thers < ((1 1/2) > hours < till < (Biebs > bday)) (thers like 1 1/2 hours) thers < here (:P)**''', ''' If < (it~1 > 's < restin') I > 'll < [wake up] < it~2 If > 'll** it~1 = it~2''', ''' {Our three} > weapons > are < $a $a :: {fear surprise efficiency} :: {and~1 and~2} ruthless > efficiency''', ''' We > are < knights < the knights < (who > say < Ni) who = knights'''] bad_inputs = ['{the quick brown} > fox > jumps < over < {the lazy} > dog', 'the > {lazy dog}', 'the < lazy > dog', 'They > conspired* < to < defenestrate < themselves\n(conspired* to defenestrate on < Tuesday)', 'big > **', '{** happy} > days', '(my big** fat Greek wedding*)', 'big** > day', 'hi :: there', ':-)', '(-:', '(0_0)~1', '*_*', ') ('] for x in bad_inputs: try: parse(x, grammar) assert False except GFLError as ex: print(ex) for x in good_inputs: p = grammar.parse(x) assert p is not None print(x) pprint(analyze(walk(p)))
def test_lazy_custom_rules(self): """Make sure LazyReferences manually shoved into custom rules are resolved. Incidentally test passing full-on Expressions as custom rules and having a custom rule as the default one. """ grammar = Grammar(""" four = '4' five = '5'""", forty_five=Sequence(LazyReference('four'), LazyReference('five'), name='forty_five')).default('forty_five') s = '45' eq_(grammar.parse(s), Node('forty_five', s, 0, 2, children=[ Node('four', s, 0, 1), Node('five', s, 1, 2)]))
def lex(text): grammar = Grammar("""\ entry = (term _ "." _)* _ term = boolean / atom / list / tuple / map / string / binary / number atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'") _ = ~"\s*" list = ( _ "[" _ term (_ "," _ term)* _ "]" ) / ( _ "[" _ "]") tuple = ( _ "{" _ term (_ "," _ term)* _ "}" ) / ( _ "{" _ "}") map = ( _ "#{" _ keyvalue (_ "," _ keyvalue)* _ "}" ) / ( _ "#{" _ "}") keyvalue = term _ "=>" _ term _ string = '"' ~r'(\\\\"|[^"])*' '"' binary = "<<" string ">>" boolean = "true" / "false" number = ~"[0-9]+\#[0-9a-zA-Z]+" / ~"[0-9]+(\.[0-9]+)?(e\-?[0-9]+)?" """) nocomments = re.sub("(?m)%.*?$", "", text) try: return grammar.parse(nocomments) except parsimonious.exceptions.ParseError as e: raise ParseError(e)
def lex(text): grammar = Grammar("""\ entry = (term _ "." _)* _ term = boolean / atom / list / tuple / map / string / binary / number atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'") _ = ~"\s*" (~"%[^\\r\\n]*\s*")* list = ( _ "[" _ term (_ "," _ term)* _ "]" ) / ( _ "[" _ "]") tuple = ( _ "{" _ term (_ "," _ term)* _ "}" ) / ( _ "{" _ "}") map = ( _ "#{" _ keyvalue (_ "," _ keyvalue)* _ "}" ) / ( _ "#{" _ "}") keyvalue = term _ "=>" _ term _ string = '"' ~r'(\\\\.|[^"])*' '"' binary = ( "<<" _ binary_part ( _ "," _ binary_part)* _ ">>") / ("<<" _ ">>") binary_part = string / char_number char_number = ~"[0-9]+" boolean = "true" / "false" number = ~"\-?[0-9]+\#[0-9a-zA-Z]+" / ~"\-?[0-9]+(\.[0-9]+)?((e|E)(\-|\+)?[0-9]+)?" """) try: return grammar.parse(text) except parsimonious.exceptions.ParseError as e: raise ParseError(e)
def lex(text): grammar = Grammar("""\ entry = _ (statement _)* _ statement = multiline / single multiline = atom _ args _ ":" _ "_INDENT_" _ (statement _)+ "_DEDENT_" single = atom _ args atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'") _ = ~"\s*" (~"#[^\\r\\n]*\s*")* args = ( _ map ) / ( _ "(" _ term (_ "," _ term)* _ ")" ) / (_ "(" _ ")") map = "(" _ kv (_ "," _ kv)* _ ")" list = ( _ "[" _ term (_ "," _ term)* _ "]" ) / ( _ "[" _ "]") kv = term _ "=" _ term _ term = unumber / logic_op / single / list / string / atom / number logic_op = (string / number) _ ("<=" / ">=" / "<" / ">" / "==") _ (string / number) string = '"' ~'[^"]*' '"' number = ~"[0-9]+(\.[0-9]+)?(e\-?[0-9]+)?[GKM]?" unumber = (number / single) _ atom """) try: return grammar.parse(add_indents(text)) except parsimonious.exceptions.ParseError as e: raise ParseError(e)
def test_complex_custom_rules(self): """Run 5-arg custom rules through their paces. Incidentally tests returning an actual Node from the custom rule. """ grammar = Grammar(""" bracketed_digit = start digit end start = '[' end = ']' real_digit = '6'""", # In this particular implementation of the digit rule, no node is # generated for `digit`; it falls right through to `real_digit`. # I'm not sure if this could lead to problems; I can't think of # any, but it's probably not a great idea. digit = lambda text, pos, cache, error, grammar: grammar['real_digit']._match(text, pos, cache, error)) s = '[6]' eq_(grammar.parse(s), Node('bracketed_digit', s, 0, 3, children=[ Node('start', s, 0, 1), Node('real_digit', s, 1, 2), Node('end', s, 2, 3)]))
out('}}\n\n') except GenerationImpossible: print get_go_type(type) result = result[:old_len] return ''.join(result) input_file = sys.argv[1] package_name = sys.argv[2] output_file = sys.argv[3] text = open(input_file).read().decode('utf-8') text = re.sub(r'//.*($|[\r\n])', '\\1', text) nodes = grammar.parse(text + '\n') if nodes is not None: types = Compiler().visit(nodes) main_type, types = resolve_types(types) type_names = {} for name, type in types.iteritems(): resolve_attributes(type) type_names[type] = name out = convert_to_go(types, package_name) with open(output_file, 'w') as of: of.write(out) # template = open('test.template.cpp').read() # template = template.replace('{STRUCTS}', structs)
def test_not_really_json_parsing(): """As a baseline for speed, parse some JSON. I have no reason to believe that JSON is a particularly representative or revealing grammar to test with. Also, this is a naive, unoptimized, incorrect grammar, so don't use it as a basis for comparison with other parsers. It's just meant to compare across versions of Parsimonious. """ father = """{ "id" : 1, "married" : true, "name" : "Larry Lopez", "sons" : null, "daughters" : [ { "age" : 26, "name" : "Sandra" }, { "age" : 25, "name" : "Margaret" }, { "age" : 6, "name" : "Mary" } ] }""" more_fathers = ','.join([father] * 60) json = '{"fathers" : [' + more_fathers + ']}' grammar = Grammar(r""" value = space (string / number / object / array / true_false_null) space object = "{" members "}" members = (pair ("," pair)*)? pair = string ":" value array = "[" elements "]" elements = (value ("," value)*)? true_false_null = "true" / "false" / "null" string = space "\"" chars "\"" space chars = ~"[^\"]*" # TODO implement the real thing number = (int frac exp) / (int exp) / (int frac) / int int = "-"? ((digit1to9 digits) / digit) frac = "." digits exp = e digits digits = digit+ e = "e+" / "e-" / "e" / "E+" / "E-" / "E" digit1to9 = ~"[1-9]" digit = ~"[0-9]" space = ~"\s*" """) # These number and repetition values seem to keep results within 5% of the # difference between min and max. We get more consistent results running a # bunch of single-parse tests and taking the min rather than upping the # NUMBER and trying to stomp out the outliers with averaging. NUMBER = 1 REPEAT = 5 total_seconds = min(repeat(lambda: grammar.parse(json), lambda: gc.enable(), # so we take into account how we treat the GC repeat=REPEAT, number=NUMBER)) seconds_each = total_seconds / NUMBER kb = len(json) / 1024.0 print('Took %.3fs to parse %.1fKB: %.0fKB/s.' % (seconds_each, kb, kb / seconds_each))
def test_not(self): """Make sure "not" predicates get parsed and work properly.""" grammar = Grammar(r'''not_arp = !"arp" ~"[a-z]+"''') assert_raises(ParseError, grammar.parse, 'arp') ok_(grammar.parse('argle') is not None)