def test_rewinding(self): """Make sure rewinding the stack and trying an alternative (which progresses farther) from a higher-level rule can blame an expression within the alternative on failure. There's no particular reason I suspect this wouldn't work, but it's a more real-world example than the no-alternative cases already tested. """ grammar = Grammar(""" formatted_text = bold_text / weird_text bold_text = open_parens text close_parens weird_text = open_parens text "!!" bork bork = "bork" open_parens = "((" text = ~"[a-zA-Z]+" close_parens = "))" """) text = '((fred!!' try: grammar.parse(text) except ParseError as error: eq_(error.pos, 8) eq_(error.expr, grammar['bork']) eq_(error.text, text)
def test_right_recursive(self): """Right-recursive refs should resolve.""" grammar = Grammar(""" digits = digit digits? digit = ~r"[0-9]" """) ok_(grammar.parse('12') is not None)
def test_lookahead(self): grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') assert_raises(ParseError, grammar.parse, 'burp') s = 'arp' eq_(grammar.parse('arp'), Node('starts_with_a', s, 0, 3, children=[ Node('', s, 0, 0), Node('', s, 0, 3)]))
def __init__(self, code): self.object_query = {} self.steps = [] # parsing: grammar = Grammar(QUERY_PEG) self.__nodes = grammar.parse(code) self._translate()
def test_parse_with_leftovers(self): """Make sure ``parse()`` reports where we started failing to match, even if a partial match was successful.""" grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''') try: grammar.parse('chitty bangbang') except IncompleteParseError as error: eq_(str(error), "Rule 'sequence' matched in its entirety, but it didn't consume all the text. The non-matching portion of the text begins with 'bang' (line 1, column 12).")
def test_lookahead(self): grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') eq_(grammar.parse('burp'), None) s = 'arp' eq_(grammar.parse('arp'), Node('starts_with_a', s, 0, 3, children=[ Node('', s, 0, 0), Node('', s, 0, 3)]))
def test_lookahead(self): grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') assert_raises(ParseError, grammar.parse, 'burp') s = 'arp' eq_(grammar.parse('arp'), Node(grammar['starts_with_a'], s, 0, 3, children=[ Node(Lookahead(Literal('a')), s, 0, 0), Node(Regex(r'[a-z]+'), s, 0, 3)]))
def result(self): """ The 'result' property """ g = Grammar(""" condition = always / never / comparison ws = ~"\s*" never = ~"never"i always = ~"always"i value = numeric / varname numeric = ~"[+-]?\d+(\.\d+)?" varname = ~"[a-z_][a-z0-9_]*"i range = percentage / numeric percentage = numeric percent_sign percent_sign = "%" comparison = range_eq_comparison / range_leftrocket_comparison / range_rightrocket_comparison / range_muchlessthan_comparison / range_muchgreaterthan_comparison / simple_comparison simple_comparison = value ws simple_comparator ws value simple_comparator = cmp_eq / cmp_neq / cmp_gte / cmp_gt / cmp_lte / cmp_lt cmp_eq = "==" cmp_neq = "!=" cmp_gte = ">=" cmp_gt = ">" cmp_lte = "<=" cmp_lt = "<" range_muchlessthan_comparison = value ws range_lt_prev range range_lt_post ws value range_lt_prev = "<" range_lt_post = "<" range_leftrocket_comparison = value ws range_lr_prev range range_lr_post ws value range_lr_prev = "<" range_lr_post = "=" range_eq_comparison = value ws range_eq_prev range range_eq_post ws value range_eq_prev = "=" range_eq_post = "=" range_rightrocket_comparison = value ws range_rr_prev range range_rr_post ws value range_rr_prev = "=" range_rr_post = ">" range_muchgreaterthan_comparison = value ws range_gt_prev range range_gt_post ws value range_gt_prev = ">" range_gt_post = ">" """) tree = g.parse(self._condition) v = ConditionVisitor(self.data) return v.visit(tree)[0]
def test_favoring_named_rules(self): """Named rules should be used in error messages in favor of anonymous ones, even if those are rightward-progressing-more, and even if the failure starts at position 0.""" grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') try: grammar.parse('burp') except ParseError as error: eq_(str(error), "Rule 'starts_with_a' trying to match (&(\"a\") ~\"[a-z]+\"u) didn't match at 'burp' (line 1, column 1).")
def test_resolve_refs_order(self): """Smoke-test a circumstance where lazy references don't get resolved.""" grammar = Grammar(""" expression = "(" terms ")" terms = term+ term = number number = ~r"[0-9]+" """) grammar.parse('(34)')
def test_no_named_rule_succeeding(self): """Make sure ParseErrors have sane printable representations even if we never succeeded in matching any named expressions.""" grammar = Grammar('''bork = "bork"''') try: grammar.parse('snork') except ParseError as error: eq_(error.pos, 0) eq_(error.expr, grammar['bork']) eq_(error.text, 'snork')
def test_lazy_default_rule(self): """Make sure we get an actual rule set as our default rule, even when the first rule has forward references and is thus a LazyReference at some point during grammar compilation. """ grammar = Grammar(r""" styled_text = text text = "hi" """) eq_(grammar.parse('hi'), Node('text', 'hi', 0, 2))
def test_multi_line(self): """Make sure we tolerate all sorts of crazy line breaks and comments in the middle of rules.""" grammar = Grammar(""" bold_text = bold_open # commenty comment text # more comment bold_close text = ~"[A-Z 0-9]*"i bold_open = "((" bold_close = "))" """) ok_(grammar.parse('((booyah))') is not None)
def test_expressions_from_rules(self): """Test the ``Grammar`` base class's ability to compile an expression tree from rules. That the correct ``Expression`` tree is built is already tested in ``RuleGrammarTests``. This tests only that the ``Grammar`` base class's ``_expressions_from_rules`` works. """ greeting_grammar = Grammar('greeting = "hi" / "howdy"') tree = greeting_grammar.parse("hi") eq_(tree, Node("greeting", "hi", 0, 2, children=[Node("", "hi", 0, 2)]))
def test_unconnected_custom_rules(self): """Make sure custom rules that aren't hooked to any other rules still get included in the grammar and that lone ones get set as the default. Incidentally test Grammar's `rules` default arg. """ grammar = Grammar(one_char=lambda text, pos: pos + 1).default('one_char') s = '4' eq_(grammar.parse(s), Node('one_char', s, 0, 1))
def test_match(self): """Make sure partial-matching (with pos) works.""" grammar = Grammar(r""" bold_text = bold_open text bold_close text = ~"[A-Z 0-9]*"i bold_open = "((" bold_close = "))" """) s = ' ((boo))yah' eq_(grammar.match(s, pos=1), Node('bold_text', s, 1, 8, children=[ Node('bold_open', s, 1, 3), Node('text', s, 3, 6), Node('bold_close', s, 6, 8)]))
def test_line_and_column(self): """Make sure we got the line and column computation right.""" grammar = Grammar(r""" whee_lah = whee "\n" lah "\n" whee = "whee" lah = "lah" """) try: grammar.parse('whee\nlahGOO') except ParseError as error: # TODO: Right now, this says "Rule <Literal "\n" at 0x4368250432> # didn't match". That's not the greatest. Fix that, then fix this. ok_(str(error).endswith(r"""didn't match at 'GOO' (line 2, column 4)."""))
def result(self): """ The 'result' property """ g = Grammar(""" replacement = ws replacevalue transformationlist ws replacevalue = expression / varname / literal transformationlist = transformation* transformation = ws comma ws transname transarglist transarglist = transarg* transarg = singlequotedstr / doublequotedstr / unquotedarg expression = term rws operator rws term term = numberliteral / varname varname = ~"[a-z_][a-z0-9_]*"i transname = ~"[a-z_][a-z0-9_]*"i literal = numberliteral / stringliteral numberliteral = ~"(\+|-)?\d+([.]\d+)?" stringliteral = singlequotedstr / doublequotedstr doublequotedstr = ws dblq notdblq dblq singlequotedstr = ws sngq notsngq sngq unquotedarg = ws notwsorcomma operator = plus / minus / times / divide plus = "+" minus = "-" times = "*" divide = "/" rws = ~"\s+" ws = ~"\s*" comma = "," notwsorcomma = ~"[^\s,]+" dblq = "\\"" notdblq = ~"[^\\"]*" sngq = "'" notsngq = ~"[^']*" """) tree = g.parse(self._replacement) return ReplacementVisitor(self._data).visit(tree)
def test_simple_custom_rules(self): """Run 2-arg custom-coded rules through their paces.""" grammar = Grammar(""" bracketed_digit = start digit end start = '[' end = ']'""", digit = lambda text, pos: (pos + 1) if text[pos].isdigit() else None) s = '[6]' eq_(grammar.parse(s), Node('bracketed_digit', s, 0, 3, children=[ Node('start', s, 0, 1), Node('digit', s, 1, 2), Node('end', s, 2, 3)]))
def test_parens(self): grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''') # Make sure it's not as if the parens aren't there: assert_raises(ParseError, grammar.parse, 'chitty bangbang') s = 'chitty bang bang' eq_(str(grammar.parse(s)), """<Node called "sequence" matching "chitty bang bang"> <Node matching "chitty"> <Node matching " bang bang"> <Node matching " bang"> <Node matching " "> <Node matching "bang"> <Node matching " bang"> <Node matching " "> <Node matching "bang">""")
def test(inFP): with open(inFP) as inF: grammar = Grammar(clean(inF.read())) good_inputs = ['{the quick brown} > fox > jumps < over < ({the lazy} > dog)', 'They > conspired < to < defenestrate < themselves\n(conspired* to defenestrate on < Tuesday)', 'a (** b c) d**', 'a (** b c**)', '::~1 :-)~1 ~(-: (0_0) ~(0_0)~2 *_*~3 )~1 ~( <*_*>', ''' Found** < (the scarriest mystery door*) Found < in < (my > school) I’M** < (SO > CURIOUS) D:** my = I’M''', ''' thers** < still thers < ((1 1/2) > hours < till < (Biebs > bday)) (thers like 1 1/2 hours) thers < here (:P)**''', ''' If < (it~1 > 's < restin') I > 'll < [wake up] < it~2 If > 'll** it~1 = it~2''', ''' {Our three} > weapons > are < $a $a :: {fear surprise efficiency} :: {and~1 and~2} ruthless > efficiency''', ''' We > are < knights < the knights < (who > say < Ni) who = knights'''] bad_inputs = ['{the quick brown} > fox > jumps < over < {the lazy} > dog', 'the > {lazy dog}', 'the < lazy > dog', 'They > conspired* < to < defenestrate < themselves\n(conspired* to defenestrate on < Tuesday)', 'big > **', '{** happy} > days', '(my big** fat Greek wedding*)', 'big** > day', 'hi :: there', ':-)', '(-:', '(0_0)~1', '*_*', ') ('] for x in bad_inputs: try: parse(x, grammar) assert False except GFLError as ex: print(ex) for x in good_inputs: p = grammar.parse(x) assert p is not None print(x) pprint(analyze(walk(p)))
def test_parens(self): grammar = Grammar(r"""sequence = "chitty" (" " "bang")+""") # Make sure it's not as if the parens aren't there: eq_(grammar.parse("chitty bangbang"), None) s = "chitty bang bang" eq_( str(grammar.parse(s)), """<Node called "sequence" matching "chitty bang bang"> <Node matching "chitty"> <Node matching " bang bang"> <Node matching " bang"> <Node matching " "> <Node matching "bang"> <Node matching " bang"> <Node matching " "> <Node matching "bang">""", )
def test_lazy_custom_rules(self): """Make sure LazyReferences manually shoved into custom rules are resolved. Incidentally test passing full-on Expressions as custom rules and having a custom rule as the default one. """ grammar = Grammar(""" four = '4' five = '5'""", forty_five=Sequence(LazyReference('four'), LazyReference('five'), name='forty_five')).default('forty_five') s = '45' eq_(grammar.parse(s), Node('forty_five', s, 0, 2, children=[ Node('four', s, 0, 1), Node('five', s, 1, 2)]))
def lex(text): grammar = Grammar("""\ entry = (term _ "." _)* _ term = boolean / atom / list / tuple / map / string / binary / number atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'") _ = ~"\s*" list = ( _ "[" _ term (_ "," _ term)* _ "]" ) / ( _ "[" _ "]") tuple = ( _ "{" _ term (_ "," _ term)* _ "}" ) / ( _ "{" _ "}") map = ( _ "#{" _ keyvalue (_ "," _ keyvalue)* _ "}" ) / ( _ "#{" _ "}") keyvalue = term _ "=>" _ term _ string = '"' ~r'(\\\\"|[^"])*' '"' binary = "<<" string ">>" boolean = "true" / "false" number = ~"[0-9]+\#[0-9a-zA-Z]+" / ~"[0-9]+(\.[0-9]+)?(e\-?[0-9]+)?" """) nocomments = re.sub("(?m)%.*?$", "", text) try: return grammar.parse(nocomments) except parsimonious.exceptions.ParseError as e: raise ParseError(e)
def lex(text): grammar = Grammar("""\ entry = (term _ "." _)* _ term = boolean / atom / list / tuple / map / string / binary / number atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'") _ = ~"\s*" (~"%[^\\r\\n]*\s*")* list = ( _ "[" _ term (_ "," _ term)* _ "]" ) / ( _ "[" _ "]") tuple = ( _ "{" _ term (_ "," _ term)* _ "}" ) / ( _ "{" _ "}") map = ( _ "#{" _ keyvalue (_ "," _ keyvalue)* _ "}" ) / ( _ "#{" _ "}") keyvalue = term _ "=>" _ term _ string = '"' ~r'(\\\\.|[^"])*' '"' binary = ( "<<" _ binary_part ( _ "," _ binary_part)* _ ">>") / ("<<" _ ">>") binary_part = string / char_number char_number = ~"[0-9]+" boolean = "true" / "false" number = ~"\-?[0-9]+\#[0-9a-zA-Z]+" / ~"\-?[0-9]+(\.[0-9]+)?((e|E)(\-|\+)?[0-9]+)?" """) try: return grammar.parse(text) except parsimonious.exceptions.ParseError as e: raise ParseError(e)
def test_inner_rule_succeeding(self): """Make sure ``parse()`` fails and blames the rightward-progressing-most named Expression when an Expression isn't satisfied. Make sure ParseErrors have nice Unicode representations. """ grammar = Grammar(""" bold_text = open_parens text close_parens open_parens = "((" text = ~"[a-zA-Z]+" close_parens = "))" """) text = '((fred!!' try: grammar.parse(text) except ParseError as error: eq_(error.pos, 6) eq_(error.expr, grammar['close_parens']) eq_(error.text, text) eq_(str(error), "Rule 'close_parens' trying to match \"))\" didn't match at '!!' (line 1, column 7).")
def lex(text): grammar = Grammar("""\ entry = _ (statement _)* _ statement = multiline / single multiline = atom _ args _ ":" _ "_INDENT_" _ (statement _)+ "_DEDENT_" single = atom _ args atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'") _ = ~"\s*" (~"#[^\\r\\n]*\s*")* args = ( _ map ) / ( _ "(" _ term (_ "," _ term)* _ ")" ) / (_ "(" _ ")") map = "(" _ kv (_ "," _ kv)* _ ")" list = ( _ "[" _ term (_ "," _ term)* _ "]" ) / ( _ "[" _ "]") kv = term _ "=" _ term _ term = unumber / logic_op / single / list / string / atom / number logic_op = (string / number) _ ("<=" / ">=" / "<" / ">" / "==") _ (string / number) string = '"' ~'[^"]*' '"' number = ~"[0-9]+(\.[0-9]+)?(e\-?[0-9]+)?[GKM]?" unumber = (number / single) _ atom """) try: return grammar.parse(add_indents(text)) except parsimonious.exceptions.ParseError as e: raise ParseError(e)
def test_complex_custom_rules(self): """Run 5-arg custom rules through their paces. Incidentally tests returning an actual Node from the custom rule. """ grammar = Grammar(""" bracketed_digit = start digit end start = '[' end = ']' real_digit = '6'""", # In this particular implementation of the digit rule, no node is # generated for `digit`; it falls right through to `real_digit`. # I'm not sure if this could lead to problems; I can't think of # any, but it's probably not a great idea. digit = lambda text, pos, cache, error, grammar: grammar['real_digit']._match(text, pos, cache, error)) s = '[6]' eq_(grammar.parse(s), Node('bracketed_digit', s, 0, 3, children=[ Node('start', s, 0, 1), Node('real_digit', s, 1, 2), Node('end', s, 2, 3)]))
from parsimonious.grammar import Grammar from parsimonious.nodes import NodeVisitor from parsimonious.exceptions import VisitationError from .encode import * grammar = Grammar(r""" start = line* line = ws? (definition / assignment / board) ws? id = ~"[a-z_][a-z0-9_]*"i number = ~"-?[0-9]+" hex = ~"0x[0-9a-f]+"i ws = ~"\s*" lpar = "(" rpar = ")" equal = ws? "=" ws? str = ~'"[^\"]+"' arg = ws? ( definition / id / str / hex/ number) ws? args = (arg "," args) / arg definition = id "(" args? ")" board = "%board:" ws? id assignment = id equal definition """) class Assignment: def __init__(self, source, pos, id, call): self.source = source self.pos = pos self.id = id
grammar = Grammar(r""" exprstmt = ws expr ws expr = biexpr / unexpr / value biexpr = value ws binaryop ws expr unexpr = unaryop expr value = parenval / number / boolean / function / col_ref / string / attr parenval = "(" ws expr ws ")" function = fname "(" ws arg_list? ws ")" arg_list = expr (ws "," ws expr)* number = ~"\d*\.?\d+"i string = ~"\'\w*\'"i col_ref = (name ".")? name attr = ~"\w[\w\d]*"i name = ~"[a-zA-Z]\w*"i fname = ~"\w[\w\d]*"i boolean = "true" / "false" compound_op = "UNION" / "union" binaryop = "+" / "-" / "*" / "/" / "=" / "<>" / "<=" / ">" / "<" / ">" / "and" / "or" unaryop = "+" / "-" / "not" ws = ~"\s*"i wsp = ~"\s+"i """)
compat_support_grammar_source = kumascript_grammar_source + ( r""" # # Add compat support strings to text_token # text_token = kumascript / cell_version / footnote_id / bracket_text / cell_removed / cell_noprefix / cell_partial / text_item cell_version = _ ~r"(?P<version>\d+(\.\d+)*)""" r"""(\s+\((?P<eng_version>\d+(\.\d+)*)\))?\s*"s _ cell_removed = _ ~r"[Rr]emoved\s+[Ii]n\s*"s _ cell_noprefix = _ ("(unprefixed)" / "(no prefix)" / "without prefix" / "(without prefix)") _ cell_partial = _ (", partial" / "(partial)") _ """) + compat_shared_grammar_source compat_feature_grammar = Grammar(compat_feature_grammar_source) compat_support_grammar = Grammar(compat_support_grammar_source) compat_footnote_grammar = compat_feature_grammar class CompatSectionExtractor(Extractor): """Extracts data from elements parsed from a Browser Compatibility section. A Browser Compatibility section looks like this: <h2 id="Browser_compatibility">Browser compatibility</h2> <div>{{CompatibilityTable}}</div> <div id="compat-desktop"> <table class="compat-table"> <tbody> <tr><th>Feature</th><th>Chrome</th></tr>
from parsimonious.grammar import Grammar WooGrammar = Grammar(''' a = (expr nl?)* expr = fun_call / int fun_call = fun_label ws? (literal ws?)* (kwarg ws?)* literal = int / bool / path / string int = ~"[0-9]+"i bool = 'True' / 'fromalse' path = ~"[\/\.\~\-a-zA-Z0-9]+"i string = "\\"" t "\\"" t = ~"[^\\"]*"i kwarg = kwarg_colon / tf kwarg_colon = tf '=' literal tf = '@' ~"[a-z]" ~"[a-zA-Z0-9\_\-]*" fun_label = ~"[a-z]" ~"[a-zA-Z0-9\_\-\~]*" ws = ~"[ \\t]+"i nl = ~"\\n+"i ''')
def test_not(self): """Make sure "not" predicates get parsed and work properly.""" grammar = Grammar(r'''not_arp = !"arp" ~"[a-z]+"''') assert_raises(ParseError, grammar.parse, 'arp') ok_(grammar.parse('argle') is not None)
def test_not(self): """Make sure "not" predicates get parsed and work properly.""" grammar = Grammar(r'''not_arp = !"arp" ~"[a-z]+"''') self.assertRaises(ParseError, grammar.parse, 'arp') self.assertTrue(grammar.parse('argle') is not None)
snql_grammar = Grammar(r""" query_exp = match_clause where_clause? collect_clause? group_by_clause? having_clause? order_by_clause? match_clause = space* "MATCH" space* open_paren clause close_paren space* where_clause = space* "WHERE" or_expression space* collect_clause = space* "COLLECT" collect_list space* group_by_clause = space* "BY" group_list space* having_clause = space* "HAVING" or_expression space* order_by_clause = space* "ORDER BY" order_list space* main_condition = low_pri_arithmetic condition_op (function_call / column_name / quoted_literal / numeric_literal) space* condition = main_condition / parenthesized_cdn condition_op = "=" / "!=" / ">" / ">=" / "<" / "<=" parenthesized_cdn = space* open_paren or_expression close_paren space* and_expression = space* condition space* (and_tuple)* or_expression = space* and_expression space* (or_tuple)* and_tuple = "AND" condition or_tuple = "OR" and_expression collect_list = collect_columns* (selected_expression) collect_columns = selected_expression space* comma space* selected_expression = low_pri_arithmetic space* group_list = group_columns* (low_pri_arithmetic) group_columns = low_pri_arithmetic space* comma space* order_list = order_columns* low_pri_arithmetic ("ASC"/"DESC") order_columns = low_pri_arithmetic ("ASC"/"DESC") space* comma space* clause = space* ~r"[-=><\w]+" space* low_pri_arithmetic = space* high_pri_arithmetic space* (low_pri_tuple)* high_pri_arithmetic = space* arithmetic_term space* (high_pri_tuple)* low_pri_tuple = low_pri_op high_pri_arithmetic high_pri_tuple = high_pri_op arithmetic_term arithmetic_term = space* (function_call / numeric_literal / column_name / parenthesized_arithm) space* parenthesized_arithm = open_paren low_pri_arithmetic close_paren low_pri_op = "+" / "-" high_pri_op = "/" / "*" param_expression = low_pri_arithmetic / quoted_literal parameters_list = parameter* (param_expression) parameter = param_expression space* comma space* function_call = function_name open_paren parameters_list? close_paren (open_paren parameters_list? close_paren)? simple_term = quoted_literal / numeric_literal / column_name literal = ~r"[a-zA-Z0-9_\.:-]+" quoted_literal = "'" string_literal "'" string_literal = ~r"[a-zA-Z0-9_\.\+\*\/:-]*" numeric_literal = ~r"-?[0-9]+(\.[0-9]+)?(e[\+\-][0-9]+)?" column_name = ~r"[a-zA-Z_][a-zA-Z0-9_\.]*" function_name = ~r"[a-zA-Z_][a-zA-Z0-9_]*" open_paren = "(" close_paren = ")" space = " " comma = "," """)
# have to avoid using backslashes to escape chars here. grammar += r""" filepath = quoted_filepath / unquoted_filepath quoted_filepath = ('"' dquoted_filepath_char+ '"') / ("'" squoted_filepath_char+ "'") dquoted_filepath_char = ~r'[^\r\n"]' squoted_filepath_char = ~r"[^\r\n']" unquoted_filepath = unquoted_filepath_char+ unquoted_filepath_char = ~r"[^\s\"]" """ else: grammar += r""" filepath = string """ grammar = Grammar(grammar) def urljoin2(base, path, **kwargs): if not base.endswith('/'): base += '/' url = urljoin(base, path, **kwargs) if url.endswith('/') and not path.endswith('/'): url = url[:-1] return url def generate_help_text(): """Return a formatted string listing commands, HTTPie options, and HTTP actions. """
modelica_parser = Grammar(r""" #=============================================================== # STORED DEFINITION #=============================================================== stored_definition = _ (within name? semicolon)? (_ final? class_definition semicolon) #=============================================================== # CLASS DEFINITION #=============================================================== class_definition = encapsulated? class_prefixes class_specifier class_prefixes = partial? (class/model/(operator? record)/block/ (expandable? connector)/type/package/ ((pure/impure)? operator? function)/ operator) class_specifier = long_class_specifier/ short_class_specifier/ der_class_specifier/ extends_class_specifier long_class_specifier = ident string_comment composition end ident short_class_specifier = (ident equals base_prefix name array_subscripts? class_modification? comment) / (ident equals enumeration lparen (enum_list/colon) rparen comment) der_class_specifier = ident equals der lparen name (comma ident)+ rparen comment extends_class_specifier = extends ident class_modification? string_comment composition end ident base_prefix = type_prefix enum_list = enumeration_literal (comma enumeration_literal)* enumeration_literal = ident comment composition = element_list (((public/protected) element_list)/ equation_section/ algorithm_section)* (external language_specification? external_function_call? annotation? semicolon)? (annotation semicolon)? language_specification = string external_function_call = (component_reference equals)? ident lparen expression_list? rparen # notice we do a lookahead assertion here that is PEG but not EBNF # to ensure that end is not consumed as an ident element_list = (!(end/equation/algorithm) ((element semicolon)/(annotation semicolon)))* element = import_clause / extends_clause / (redeclare? final? inner? outer? ( (class_definition / component_clause) /(replaceable (class_definition / component_clause) constraining_clause comment))) import_clause = import ( (ident equals name) / (name (period (times / (lbrace import_list rbrace)) )?)) comment import_list = ident (comma import_list)? #=============================================================== # EXTENDS #=============================================================== extends_clause = extends name class_modification? annotation? constraining_clause = constrainedby name class_modification? #=============================================================== # COMPONENT CLAUSE #=============================================================== component_clause = type_prefix type_specifier array_subscripts? component_list type_prefix = (flow/ stream)? (discrete/parameter/constant)? (input/output)? type_specifier = name component_list = component_declaration (comma component_declaration)* component_declaration = declaration condition_attribute? comment? condition_attribute = if expression declaration = ident array_subscripts? modification? #=============================================================== # MODIFICATION #=============================================================== modification = (class modification ( equals expression)?) / ( equals expression) / (assign expression) class_modification = lparen argument_list? rparen argument_list = argument (comma argument)* argument = element_modification_or_replaceable / element_redeclaration element_modification_or_replaceable = each? final? (element_modification / element_replaceable) element_modification = name modification? string_comment element_redeclaration = redeclare each? final? element_replaceable = replaceable (short_class_definition / component_clause1) constraining_clause? component_clause1 = type_prefix type_specifier component_declaration1 component_declaration1 = declaration comment short_class_definition = class_prefixes ident equals (( base_prefix name array_subscripts? class_modification? comment) / (enumeration lparen ( enum_list? / colon ) rparen comment )) #=============================================================== # EQUATION #=============================================================== equation_section = initial? equation (equation_expr semicolon)* algorithm_section = initial? algorithm (statement semicolon)* # note there is also an equation keywords so we call the # expression equation_expr equation_expr = ((simple_expression equals expression) / if_equation / for_equation / connect_clause / when_equation / (name function_call_args)) comment statement = ((component_reference ( (assign expression) / function_call_args )) / ( lparen output_expression_list rparen assign component_reference function_call_args) / break / return / if_statement / for_statement / while_statement / when_statement ) if_equation = if expression then (equation_expr semicolon)* (elseif expression then (equation_expr semicolon)* )* (else (equation_expr semicolon)* )? end if if_statement = if expression then (statement semicolon)* (elseif expression then (statement semicolon)* )* (else (statement semicolon)* )? end if for_equation = for for_indices loop (equation_expr semicolon)* end for for_statement = for for_indices loop (statement semicolon)* end for for_indices = for_index (comma for_index)* for_index = ident (in expression)? while_statement = while expression loop (statement semicolon)* end while when_equation = when expression then (equation semicolon)* (elsewhen expression then (equation semicolon)* )* end when when_statement = when expression then (statement semicolon)* (elsewhen expression then (statement semicolon)* )* end when connect_clause = connect lparen component_reference comma component_reference rparen #=============================================================== # EXPRESSION #=============================================================== expression = simple_expression / (if expression then expression (elseif expression then expression)* else expression) simple_expression = logical_expression (semicolon logical_expression (semicolon logical_expression)?)? logical_expression = logical_term (or logical_term)* logical_term = logical_factor (and logical_factor)* logical_factor = not? relation relation = arithmetic_expression (rel_op arithmetic_expression)? rel_op = less_than / less_than_or_equal / greater_than / greater_than_or_equal / equality / inequality arithmetic_expression = add_op? term (add_op term)* add_op = plus / minus / dot_plus / dot_minus term = factor (mul_op factor)* mul_op = times / divide / dot_times / dot_divide factor = primary ( (exp / dot_exp) primary)? primary = unsigned_number / string / false / true / ((name / der / initial) function_call_args) / component_reference / (lparen output_expression_list rparen) / (lbracket expression_list ( semicolon expression_list )* rbracket) / (lbrace function_arguments rbrace) / end name = period ? ident (period ident)* component_reference = (period ident array_subscripts?)+ function_call_args = lparen function_arguments? rparen function_arguments = function argument ((comma function_arguments) / (for for_indices) / named_arguments) named_arguments = named_argument (comma named_arguments)? named_argument = ident equals function_argument function_argument = function name ((lparen named_arguments? rparen) / expression) output_expression_list = expression? (comma expression?)* expression_list = expression (comma expression)* array_subscripts = lbracket subscript (comma subscript)* rbracket subscript = colon / expression comment = string_comment annotation? string_comment = string ( plus string)* annotation = annotation class_modification #=============================================================== # BASIC #=============================================================== _ = ~'\s*' equals = '='_ assign = ':='_ semicolon = ';'_ lparen = '('_ rparen = ')'_ lbracket = '{'_ rbracket = '}'_ colon = ':'_ comma = ','_ double_quote = '"'_ single_quote = "'"_ lbrace = '{'_ rbrace = '}'_ period = '.'_ plus = '+'_ dot_plus = '.+'_ minus = '-'_ dot_minus = '.-'_ times = '*'_ dot_times = '.*'_ divide = '/'_ dot_divide = './'_ exp = '^'_ dot_exp = '.^'_ less_than = '<'_ less_than_or_equal = '<='_ greater_than = '>'_ greater_than_or_equal = '>='_ equality = '=='_ inequality = '<>'_ ident = (nondigit ( digit / nondigit )*_) / q_ident q_ident = single_quote (q_char / s_escape)+ single_quote string = double_quote (s_char/s_escape)* double_quote nondigit = ~'[_a-zA-Z]' s_char = ~r'[^"\\]*'u q_char = (nondigit/digit/~r'[#$%&()*+,-./:;<>=?@[]^\{}|~ ') s_escape = ~r'[\'"\?\\\a\b\f\n\r\t\v]' digit = ~'[0-9]' unsigned_integer = digit+ unsigned_number = unsigned_integer ( '.' unsigned_integer?)? (('e'/'E') ('+'/'-')? unsigned_integer)? #=============================================================== # KEYWORDS #=============================================================== algorithm = 'algorithm'_ and = 'and'_ annotation = 'annotation'_ assert = 'assert'_ block = 'block'_ break = 'break'_ class = 'class'_ connect = 'connect'_ connector = 'connector'_ constant = 'constant'_ constrainedby = 'constrainedby'_ der = 'der'_ discrete = 'discrete'_ each = 'each'_ else = 'else'_ elseif = 'elseif'_ elsewhen = 'elsewhen'_ encapsulated = 'encapsulated'_ end = 'end'_ enumeration = 'enumeration'_ equation = 'equation'_ expandable = 'expandable'_ extends = 'extends'_ external = 'external'_ false = 'false'_ final = 'final'_ flow = 'flow'_ for= 'for'_ function = 'function'_ if = 'if'_ import = 'import'_ impure = 'impure'_ in = 'in'_ initial = 'initial'_ inner = 'inner'_ input = 'input'_ initial = 'initial'_ loop = 'loop'_ model = 'model'_ not = 'not'_ operator = 'operator'_ or = 'or'_ outer = 'outer'_ output = 'output'_ package = 'package'_ parameter = 'parameter'_ partial = 'partial'_ protected = 'protected'_ public = 'public'_ pure = 'pure'_ record = 'record'_ redeclare = 'redeclare'_ replaceable = 'replaceable'_ return = 'return'_ stream = 'stream'_ then = 'then'_ true = 'true'_ type = 'type'_ when = 'when'_ while = 'while'_ within = 'within'_ """)
def test_parens_with_leading_whitespace(self): """Make sure a parenthesized expression is allowed to have leading whitespace when nested directly inside another.""" Grammar("""foo = ( ("c") )""").parse('c')
def test_rule_ordering_is_preserved_on_shallow_copies(self): grammar = Grammar('\n'.join('r%s = "something"' % i for i in range(100)))._copy() self.assertEqual( list(grammar.keys()), ['r%s' % i for i in range(100)])
grammar = Grammar( r""" query = select_cores orderby? limit? select_cores = select_core (compound_op select_core)* select_core = SELECT wsp select_results from_clause? where_clause? gb_clause? select_results = select_result (ws "," ws select_result)* select_result = sel_res_all_star / sel_res_tab_star / sel_res_val / sel_res_col sel_res_tab_star = name ".*" sel_res_all_star = "*" sel_res_val = expr (AS wsp name)? sel_res_col = col_ref (AS wsp name) from_clause = FROM join_source join_source = ws single_source (ws "," ws single_source)* single_source = source_table / source_subq source_table = table_name (AS wsp name)? source_subq = "(" ws query ws ")" (AS ws name)? where_clause = WHERE wsp expr (AND expr)* gb_clause = GROUP BY group_clause having_clause? group_clause = grouping_term (ws "," grouping_term)* grouping_term = ws expr having_clause = HAVING expr orderby = ORDER BY ordering_term (ws "," ordering_term)* ordering_term = ws expr (ASC/DESC)? limit = LIMIT expr (OFFSET expr)? col_ref = (table_name ".")? column_name expr = biexpr / unexpr / value biexpr = value ws binaryop ws expr unexpr = unaryop expr value = parenval / number / boolean / col_ref / function / string / attr parenval = "(" ws expr ws ")" function = fname "(" ws arg_list? ws ")" arg_list = expr (ws "," ws expr)* number = ~"\d*\.?\d+"i string = ~"\'\w*\'"i attr = ~"\w[\w\d]*"i fname = ~"\w[\w\d]*"i boolean = "true" / "false" compound_op = "UNION" / "union" binaryop = "+" / "-" / "*" / "/" / "=" / "<>" / "<=" / ">" / "<" / ">" / "and" / "or" unaryop = "+" / "-" / "not" ws = ~"\s*"i wsp = ~"\s+"i name = ~"[a-zA-Z]\w*"i table_name = name column_name = name ADD = wsp "ADD" ALL = wsp "ALL" ALTER = wsp "ALTER" AND = wsp "AND" AS = wsp "AS" ASC = wsp "ASC" BETWEEN = wsp "BETWEEN" BY = wsp "BY" CAST = wsp "CAST" COLUMN = wsp "COLUMN" DESC = wsp "DESC" DISTINCT = wsp "DISTINCT" E = "E" ESCAPE = wsp "ESCAPE" EXCEPT = wsp "EXCEPT" EXISTS = wsp "EXISTS" EXPLAIN = ws "EXPLAIN" EVENT = ws "EVENT" FORALL = wsp "FORALL" FROM = wsp "FROM" GLOB = wsp "GLOB" GROUP = wsp "GROUP" HAVING = wsp "HAVING" IN = wsp "IN" INNER = wsp "INNER" INSERT = ws "INSERT" INTERSECT = wsp "INTERSECT" INTO = wsp "INTO" IS = wsp "IS" ISNULL = wsp "ISNULL" JOIN = wsp "JOIN" KEY = wsp "KEY" LEFT = wsp "LEFT" LIKE = wsp "LIKE" LIMIT = wsp "LIMIT" MATCH = wsp "MATCH" NO = wsp "NO" NOT = wsp "NOT" NOTNULL = wsp "NOTNULL" NULL = wsp "NULL" OF = wsp "OF" OFFSET = wsp "OFFSET" ON = wsp "ON" OR = wsp "OR" ORDER = wsp "ORDER" OUTER = wsp "OUTER" PRIMARY = wsp "PRIMARY" QUERY = wsp "QUERY" RAISE = wsp "RAISE" REFERENCES = wsp "REFERENCES" REGEXP = wsp "REGEXP" RENAME = wsp "RENAME" REPLACE = ws "REPLACE" RETURN = wsp "RETURN" ROW = wsp "ROW" SAVEPOINT = wsp "SAVEPOINT" SELECT = ws "SELECT" SET = wsp "SET" TABLE = wsp "TABLE" TEMP = wsp "TEMP" TEMPORARY = wsp "TEMPORARY" THEN = wsp "THEN" TO = wsp "TO" UNION = wsp "UNION" USING = wsp "USING" VALUES = wsp "VALUES" VIRTUAL = wsp "VIRTUAL" WITH = wsp "WITH" WHERE = wsp "WHERE" """ )
def mod_grammar(grammar): new_grammar = Grammar(r""" baz = 'biff' """) grammar.update(new_grammar)
def test_single_quoted_literals(self): Grammar("""foo = 'a' '"'""").parse('a"')
grammar = Grammar(r""" command = mutation / immutation mutation = concat_mut+ / nonconcat_mut immutation = preview / action concat_mut = option_mut / full_quoted_mut / value_quoted_mut / unquoted_mut nonconcat_mut = cd / rm preview = _ tool _ (method _)? (urlpath _)? concat_mut* action = _ method _ (urlpath _)? concat_mut* urlpath = (~r"https?://" unquoted_string) / (!concat_mut string) unquoted_mut = _ unquoted_mutkey mutop unquoted_mutval _ full_quoted_mut = full_squoted_mut / full_dquoted_mut value_quoted_mut = value_squoted_mut / value_dquoted_mut full_squoted_mut = _ "'" squoted_mutkey mutop squoted_mutval "'" _ full_dquoted_mut = _ '"' dquoted_mutkey mutop dquoted_mutval '"' _ value_squoted_mut = _ unquoted_mutkey mutop "'" squoted_mutval "'" _ value_dquoted_mut = _ unquoted_mutkey mutop '"' dquoted_mutval '"' _ mutop = ":" / "==" / "=" unquoted_mutkey = unquoted_mutkey_item+ unquoted_mutval = unquoted_stringitem* unquoted_mutkey_item = unquoted_mutkey_char / escapeseq unquoted_mutkey_char = ~r"[^\s'\"\\=:]" squoted_mutkey = squoted_mutkey_item+ squoted_mutval = squoted_stringitem* squoted_mutkey_item = squoted_mutkey_char / escapeseq squoted_mutkey_char = ~r"[^\r\n'\\=:]" dquoted_mutkey = dquoted_mutkey_item+ dquoted_mutval = dquoted_stringitem* dquoted_mutkey_item = dquoted_mutkey_char / escapeseq dquoted_mutkey_char = ~r'[^\r\n"\\=:]' option_mut = flag_option_mut / value_option_mut flag_option_mut = _ flag_optname _ flag_optname = "--json" / "-j" / "--form" / "-f" / "--verbose" / "-v" / "--headers" / "-h" / "--body" / "-b" / "--stream" / "-S" / "--download" / "-d" / "--continue" / "-c" / "--follow" / "--check-status" / "--ignore-stdin" / "--help" / "--version" / "--traceback" / "--debug" value_option_mut = _ value_optname ~r"(\s+|=)" string _ value_optname = "--pretty" / "--style" / "-s" / "--print" / "-p" / "--output" / "-o" / "--session" / "--session-read-only" / "--auth" / "-a" / "--auth-type" / "--proxy" / "--verify" / "--cert" / "--cert-key" / "--timeout" cd = _ "cd" _ string _ rm = _ "rm" _ ~r"\-(h|q|b|o)" _ mutkey _ tool = "httpie" / "curl" method = ~r"get"i / ~r"head"i / ~r"post"i / ~r"put"i / ~r"delete"i / ~r"patch"i mutkey = unquoted_mutkey / ("'" squoted_mutkey "'") / ('"' dquoted_mutkey '"') / flag_optname / value_optname string = quoted_string / unquoted_string quoted_string = ('"' dquoted_stringitem* '"') / ("'" squoted_stringitem* "'") unquoted_string = unquoted_stringitem+ dquoted_stringitem = dquoted_stringchar / escapeseq squoted_stringitem = squoted_stringchar / escapeseq unquoted_stringitem = unquoted_stringchar / escapeseq dquoted_stringchar = ~r'[^\r\n"\\]' squoted_stringchar = ~r"[^\r\n'\\]" unquoted_stringchar = ~r"[^\s'\"\\]" escapeseq = ~r"\\." _ = ~r"\s*" """)
grammar = Grammar( r""" type = primitive / lowcardinality / agg / nullable / array primitive = basic_type / uint / float / fixedstring / enum # DateTime must come before Date basic_type = "DateTime" / "Date" / "IPv4" / "IPv6" / "String" / "UUID" uint = "UInt" uint_size uint_size = "8" / "16" / "32" / "64" float = "Float" float_size float_size = "32" / "64" fixedstring = "FixedString" open_paren space* fixedstring_size space* close_paren fixedstring_size = ~r"\d+" enum = "Enum" enum_size open_paren space* enum_pairs space* close_paren enum_size = "8" / "16" enum_pairs = (enum_pair (space* comma space*)?)* enum_pair = quote enum_str quote space* equal space* enum_val enum_str = ~r"([a-zA-Z0-9\-]+)" enum_val = ~r"\d+" agg = "AggregateFunction" open_paren space* agg_func space* comma space* agg_types space* close_paren agg_func = ~r"[a-zA-Z]+\([a-zA-Z0-9\,\.\s]+\)|[a-zA-Z]+" agg_types = (primitive (space* comma space*)?)* array = "Array" open_paren space* (array / primitive / lowcardinality / nullable) space* close_paren lowcardinality = "LowCardinality" open_paren space* (primitive / nullable) space* close_paren nullable = "Nullable" open_paren space* (primitive / basic_type) space* close_paren open_paren = "(" close_paren = ")" equal = "=" comma = "," space = " " quote = "'" """ )
def compile(): g = Grammar(r''' Start = ~r"\s*" Value ~r"\s*" Object = ~r"{\s*" Members? ~r"\s*}" Members = Mapping (~r"\s*,\s*" Mapping)* Mapping = String ~r"\s*:\s*" Value Array = ~r"\[\s*" Items? ~r"\s*\]" Items = Value (~r"\s*,\s*" Value)* Value = Object / Array / String / TrueVal / FalseVal / NullVal / Number TrueVal = "true" FalseVal = "false" NullVal = "null" String = ~r"\"[ !#-\[\]-\U0010ffff]*(?:\\(?:[\"\\/bfnrt]|u[0-9A-Fa-f]{4})[ !#-\[\]-\U0010ffff]*)*\"" Number = ~r"-?(0|[1-9][0-9]*)(\.\d*)?([eE][-+]?\d+)?" ''') class JsonVisitor(NodeVisitor): def generic_visit(self, node, children): return children or node.text # helper functions for generic patterns def delimited(self, node, children): items = [children[0]] items.extend(item for _, item in children[1]) return items def atomic(self, node, children): return children[0] # visitors visit_Value = atomic visit_Members = visit_Items = delimited def visit_Start(self, node, children): return children[1] def visit_Object(self, node, children): _, members, _ = children if isinstance(members, list): members = members[0] else: members = [] return dict(members) def visit_Array(self, node, children): _, values, _ = children if isinstance(values, list): values = values[0] else: values = [] return values def visit_Mapping(self, node, children): key, _, value = children return key, value def visit_String(self, node, children): return json_unescape(node.text) def visit_Number(self, node, children): return float(node.text) def visit_TrueVal(self, node, children): return True def visit_FalseVal(self, node, children): return False def visit_NullVal(self, node, children): return None v = JsonVisitor() return lambda s: v.visit(g.parse(s))
def __init__(self, _grammar, _text): ast = Grammar(_grammar).parse(_text) # print(ast) self.top_group_node = self.visit(ast)
let a = 5 let b = 9 let c = 8 let d = "a string" """ # works so far! grammar = Grammar( r""" expr = (entry / emptyline)* entry = start definition* start = ws "#start#" ws definition = "let" ws key assign value ws? key = word+ value = (number / word / quoted)+ number = ~"[0-9]"+ word = ~r"[-\w]+" quoted = ~'"[^\"]+"' assign = ws? "=" ws? lpar = "[" rpar = "]" ws = ~"\s*" emptyline = ws+ """) # create the abstract syntax tree. tree = grammar.parse(data) va = VeraVisitor() out = va.visit(tree) print(out)
def build_grammar(grammar): grammar = grammar + BaseGrammar return Grammar(grammar)
grammar = Grammar( r''' body = ANY_WS? (type_decl NL ANY_WS?)+ ANY_WS? type_decl = "type" WS IDENT WS type_expr type_expr = array_expr / struct_expr / union_type / enum_type / type_ref / filtered_type array_expr = "array" WS? "[" WS? expr WS? "]" WS "of" WS type_expr type_ref = IDENT (WS? "(" WS? attribute_list? WS? ")")? attribute_list = attribute (WS? "," WS? attribute)* attribute = IDENT WS? "=" WS? INT filtered_type = "$" IDENT WS? "(" WS? type_expr WS? ")" union_type = "switch" WS expr WS? "{" (WS_WITH_NL union_item)+ WS_WITH_NL "}" union_item = union_case / union_default union_case = "case" WS simple_const WS? ":" WS? type_ref union_default = "default" WS? ":" WS? type_ref enum_type = ("enum" / "set") WS? "(" WS? type_ref WS? ")" WS? "{" (WS_WITH_NL enum_item)+ WS_WITH_NL "}" enum_item = IDENT WS? "=" WS? expr simple_const = enum_value / INT expr = bin_op / atom bin_op = atom WS? ("==" / ">" / "<<" / "-" / "+" / "*") WS? expr atom = field_ref / enum_value / INT field_ref = ("@" / "^"+) IDENT enum_value = IDENT "." IDENT struct_expr = "struct" WS "{" (((WS_WITH_NL struct_item)+ WS_WITH_NL) / ANY_WS?) "}" struct_item = field / if_stmt field = (IDENT / "_") WS type_expr if_stmt = "if" WS expr WS? "{" (WS_WITH_NL struct_item)+ WS_WITH_NL "}" IDENT = ~"[a-z][a-z0-9_]*"i NL = ~"[\r?\n]+" WS = ~"[\t ]+" INT = ~"(0[xX][0-9a-fA-F]+|0|[1-9]\d*)" ANY_WS = ~"[\r\n\t ]+" WS_WITH_NL = ~"[\r\t ]*\n[\r\n\t ]*" ''')
# data = """ # A, art. ഒരു # Aback, ad. പുറകൊട്ട, പിന്നൊക്കം # Abaft, ad. പിമ്പുറത്തെക്ക, കപ്പലിൻറ അമരത്തെക്ക # Abandon, v. a. വിട്ടൊഴിയുന്നു, ത്യജിക്കുന്നു, പരിത്യാഗം ചെയ്യുന്നു; ഉപെക്ഷിക്കുന്നു, കൈവിടുന്നു # Abandoned, a. വിട്ടൊഴിയപ്പെട്ട,ത്യജിക്കപ്പെട്ട; ഉപെക്ഷിക്കപ്പെട്ട, കൈവിടപ്പെട്ട; മഹാ കെട്ട, ദുഷ്ടതയുള്ള, വഷളായുള്ള, മഹാ ചീത്ത # """ grammar = Grammar(r""" expr = (entry / emptyline )* entry = headword comma pos ws senses subentry emptyline headword = ~"[A-Z 0-9]*"i pos = (ws ~"[a-z]+\.")+ subentry = (semicolon ws senses)* senses = (sense comma)* sense sense = (ml ws ml)* ml ml = ~"[\u0d00-\u0d7f]*" semicolon = ~";" comma = ~"," ws = ~"\s*" emptyline = ws+ """) class DictVisitor(NodeVisitor): def visit_expr(self, node, visited_children): """ Returns the overall output. """ output = [] for child in visited_children: if type(child[0]) == dict: output.append(child[0])
GRAMMAR = Grammar(""" espec = data_raw / data_zipped / data_encrypted / data_block data_raw = flag_raw data_zipped = flag_zip (COLON zip_args)? data_encrypted = flag_encrypted COLON encryption_args data_block = flag_block COLON block_args flag_raw = "n" flag_zip = "z" flag_encrypted = "e" flag_block = "b" mpq = "mpq" zip_level = NUMBER zip_bits = NUMBER / mpq zip_level_and_bits = BEGIN zip_level COMMA zip_bits END zip_args = zip_level / zip_level_and_bits encryption_key = HEX_NUMBER encryption_nonce = HEX_NUMBER encryption_args = BEGIN encryption_key COMMA encryption_nonce COMMA espec END unit_kilobyte = "K" unit_megabyte = "M" block_unit = unit_kilobyte / unit_megabyte block_count = NUMBER block_size = NUMBER (block_unit)? block_size_args = STAR (block_count)? block_size_spec = (block_size block_size_args?) / STAR block_subchunk_short = block_size_spec EQUALS espec block_subchunk_long = (BEGIN block_subchunk_short (COMMA block_subchunk_short)* END) block_args = block_subchunk_short / block_subchunk_long NUMBER = ~"[0-9]+" HEX_NUMBER = ~"[0-9A-F]+" COLON = ":" COMMA = "," EQUALS = "=" STAR = "*" BEGIN = "{" END = "}" """)
def test_unicode_crash(self): """Make sure matched unicode strings don't crash ``__str__``.""" grammar = Grammar(r'string = ~r"\S+"u') str(grammar.parse('中文'))
enhancements_grammar = Grammar(r""" enhancements = line+ line = _ (comment / rule / empty) newline? rule = _ matchers actions matchers = matcher+ matcher = _ negation? matcher_type sep argument matcher_type = key / quoted_key key = ~r"[a-zA-Z0-9_\.-]+" quoted_key = ~r"\"([a-zA-Z0-9_\.:-]+)\"" actions = action+ action = flag_action / var_action var_action = _ var_name _ "=" _ expr var_name = "max-frames" / "min-frames" flag_action = _ range? flag flag_action_name flag_action_name = "group" / "app" flag = "+" / "-" range = "^" / "v" expr = int int = ~r"[0-9]+" comment = ~r"#[^\r\n]*" argument = quoted / unquoted quoted = ~r'"([^"\\]*(?:\\.[^"\\]*)*)"' unquoted = ~r"\S+" sep = ":" space = " " empty = "" negation = "!" newline = ~r"[\r\n]" _ = space* """)
event_search_grammar = Grammar(r""" # raw_search must come at the end, otherwise other # search_terms will be treated as a raw query search = search_term* raw_search? search_term = space? (time_filter / has_filter / basic_filter) space? raw_search = ~r".+$" # standard key:val filter basic_filter = negation? search_key sep search_value # filter specifically for the timestamp time_filter = "timestamp" operator date_format # has filter for not null type checks has_filter = negation? "has" sep (search_key / search_value) search_key = key / quoted_key search_value = quoted_value / value value = ~r"\S*" quoted_value = ~r"\"(.*)\""s key = ~r"[a-zA-Z0-9_\.-]+" # only allow colons in quoted keys quoted_key = ~r"\"([a-zA-Z0-9_\.:-]+)\"" date_format = ~r"\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{1,6})?)?" # NOTE: the order in which these operators are listed matters # because for example, if < comes before <= it will match that # even if the operator is <= operator = ">=" / "<=" / ">" / "<" / "=" / "!=" sep = ":" space = " " negation = "!" """)
from parsimonious.grammar import Grammar from parsimonious.nodes import NodeVisitor from collections import namedtuple grammar = Grammar(r''' regex = ( outer_literal / braces )+ braces = '[' whitespace? ( ops_matches / either / matches )? whitespace? ']' ops_matches = op ( whitespace op )* ( whitespace matches )? op = token either = matches ( whitespace? '|' whitespace? matches )+ matches = match ( whitespace match )* match = inner_literal / def / macro / braces macro = '#' ( range_macro / token ) range_macro = range_endpoint '..' range_endpoint def = macro '=' braces outer_literal = ~r'[^\[\]]+' inner_literal = ( '\'' until_quote '\'' ) / ( '"' until_doublequote '"' ) until_quote = ~r"[^']*" until_doublequote = ~r'[^"]*' whitespace = ~r'[ \t\r\n]+' token = ~r'[A-Za-z0-9!$-&(-/:-<>-@\\^-`{}~]+' range_endpoint = ~r'[A-Za-z0-9]' ''') Concat = namedtuple('Concat', ['items']) Either = namedtuple('Either', ['items']) Def = namedtuple('Def', ['name', 'subregex']) Operator = namedtuple('Operator', ['name', 'subregex']) Macro = namedtuple('Macro', ['name'])
help = "help" / "h" / "?" exit = "exit" / "quit" / "q" ls = ("ls" / "ll") _ (grep)? cd = _ "cd" _ string _ grep = pipe _ "grep" _ ex_string pipe = "|" ex_string = string / "*" / "-" / "_" / "." string = char+ char = ~r"[^\s'\\]" _ = ~r"\s*" """ grammar = Grammar(RULES) class PrettyFile(object): def __init__(self, efile): """ :type efile: easywebdav.client.File """ self._file = efile self._name = unquote(path.basename(efile.name)).decode('utf-8') self.is_dir = efile.contenttype == 'httpd/unix-directory' self.name = self._name self.size = humanbytes(int(efile.size)) self.modify_time = dt_parse(efile.mtime).astimezone(tz.tzlocal()).strftime('%Y-%m-%d %H:%M:%S') if self.is_dir:
def __init__(self, grammar, text): self.op = [] ast = Grammar(grammar).parse(text) self.visit(ast)
event_search_grammar = Grammar( r""" search = (boolean_term / paren_term / search_term)* boolean_term = (paren_term / search_term) space? (boolean_operator space? (paren_term / search_term) space?)+ paren_term = space? open_paren space? (paren_term / boolean_term)+ space? closed_paren space? search_term = key_val_term / quoted_raw_search / raw_search key_val_term = space? (tag_filter / time_filter / rel_time_filter / specific_time_filter / numeric_filter / aggregate_filter / aggregate_date_filter / has_filter / is_filter / quoted_basic_filter / basic_filter) space? raw_search = (!key_val_term ~r"\ *([^\ ^\n ()]+)\ *" )* quoted_raw_search = spaces quoted_value spaces # standard key:val filter basic_filter = negation? search_key sep search_value quoted_basic_filter = negation? search_key sep quoted_value # filter for dates time_filter = search_key sep? operator date_format # filter for relative dates rel_time_filter = search_key sep rel_date_format # exact time filter for dates specific_time_filter = search_key sep date_format # Numeric comparison filter numeric_filter = (function_key / search_key) sep operator? numeric_value # Aggregate numeric filter aggregate_filter = aggregate_key sep operator? numeric_value aggregate_date_filter = aggregate_key sep operator? (date_format / rel_date_format) # has filter for not null type checks has_filter = negation? "has" sep (search_key / search_value) is_filter = negation? "is" sep search_value tag_filter = negation? "tags[" search_key "]" sep search_value aggregate_key = key space? open_paren space? key space? closed_paren function_key = key space? open_paren space? closed_paren search_key = key / quoted_key search_value = quoted_value / value value = ~r"[^()\s]*" numeric_value = ~r"[0-9]+(?=\s|$)" quoted_value = ~r"\"((?:[^\"]|(?<=\\)[\"])*)?\""s key = ~r"[a-zA-Z0-9_\.-]+" # only allow colons in quoted keys quoted_key = ~r"\"([a-zA-Z0-9_\.:-]+)\"" date_format = ~r"\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{1,6})?)?Z?(?=\s|$)" rel_date_format = ~r"[\+\-][0-9]+[wdhm](?=\s|$)" # NOTE: the order in which these operators are listed matters # because for example, if < comes before <= it will match that # even if the operator is <= boolean_operator = "OR" / "AND" operator = ">=" / "<=" / ">" / "<" / "=" / "!=" open_paren = "(" closed_paren = ")" sep = ":" space = " " negation = "!" spaces = ~r"\ *" """ )
def test_repr(self): self.assertTrue(repr(Grammar(r'foo = "a"')))
entity_grammar = Grammar(r""" #DOCUMENT = VERSION_LINES? ENTITY* #VERSION_LINES = "Version" SPACE INTEGER SPACE "HierarchyVersion" SPACE INTEGER SPACE ENTITY = "entity" LBRACE ENTITY_PROPS* RBRACE ENTITY_PROPS = (ENTITYDEF_BLOCK / LAYERS_BLOCK / ASSIGNMENT) ENTITYDEF_BLOCK = "entityDef" SPACE VARNAME LBRACE ASSIGNMENT* RBRACE LAYERS_BLOCK = "layers" LBRACE STRING RBRACE ASSIGNMENT = VARIABLE EQUALS (OBJECT / LITERAL) OBJECT = LBRACE ASSIGNMENT+ RBRACE LITERAL = (NUMBER / STRING / NULL / BOOL) SEMICOLON VARIABLE = (INDEXED / VARNAME) INDEXED = VARNAME "[" INTEGER "]" LBRACE = SPACE? "{" SPACE? RBRACE = SPACE? "}" SPACE? EQUALS = SPACE? "=" SPACE? SEMICOLON = SPACE? ";" SPACE? VARNAME = ~r"\w+" STRING = '"' ~r"[^\"]*" '"' NUMBER = ~r"[+\-]?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?" INTEGER = ~r"[-]?\d+" BOOL = "true" / "false" NULL = "NULL" SPACE = ~r"\s+" """)