Ejemplo n.º 1
0
    def test_rewinding(self):
        """Make sure rewinding the stack and trying an alternative (which
        progresses farther) from a higher-level rule can blame an expression
        within the alternative on failure.

        There's no particular reason I suspect this wouldn't work, but it's a
        more real-world example than the no-alternative cases already tested.

        """
        grammar = Grammar("""
            formatted_text = bold_text / weird_text
            bold_text = open_parens text close_parens
            weird_text = open_parens text "!!" bork
            bork = "bork"
            open_parens = "(("
            text = ~"[a-zA-Z]+"
            close_parens = "))"
            """)
        text = '((fred!!'
        try:
            grammar.parse(text)
        except ParseError as error:
            eq_(error.pos, 8)
            eq_(error.expr, grammar['bork'])
            eq_(error.text, text)
Ejemplo n.º 2
0
 def test_right_recursive(self):
     """Right-recursive refs should resolve."""
     grammar = Grammar("""
         digits = digit digits?
         digit = ~r"[0-9]"
         """)
     ok_(grammar.parse('12') is not None)
Ejemplo n.º 3
0
    def test_lookahead(self):
        grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
        assert_raises(ParseError, grammar.parse, 'burp')

        s = 'arp'
        eq_(grammar.parse('arp'), Node('starts_with_a', s, 0, 3, children=[
                                      Node('', s, 0, 0),
                                      Node('', s, 0, 3)]))
Ejemplo n.º 4
0
  def __init__(self, code):
    self.object_query = {}
    self.steps = []

    # parsing:
    grammar = Grammar(QUERY_PEG)
    self.__nodes = grammar.parse(code)
    self._translate()
Ejemplo n.º 5
0
 def test_parse_with_leftovers(self):
     """Make sure ``parse()`` reports where we started failing to match,
     even if a partial match was successful."""
     grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''')
     try:
         grammar.parse('chitty bangbang')
     except IncompleteParseError as error:
         eq_(str(error), "Rule 'sequence' matched in its entirety, but it didn't consume all the text. The non-matching portion of the text begins with 'bang' (line 1, column 12).")
Ejemplo n.º 6
0
    def test_lookahead(self):
        grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
        eq_(grammar.parse('burp'), None)

        s = 'arp'
        eq_(grammar.parse('arp'), Node('starts_with_a', s, 0, 3, children=[
                                      Node('', s, 0, 0),
                                      Node('', s, 0, 3)]))
Ejemplo n.º 7
0
    def test_lookahead(self):
        grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
        assert_raises(ParseError, grammar.parse, 'burp')

        s = 'arp'
        eq_(grammar.parse('arp'), Node(grammar['starts_with_a'], s, 0, 3, children=[
                                      Node(Lookahead(Literal('a')), s, 0, 0),
                                      Node(Regex(r'[a-z]+'), s, 0, 3)]))
    def result(self):
        """ The 'result' property """

        g = Grammar("""
            condition = always / never / comparison

            ws = ~"\s*"

            never = ~"never"i
            always = ~"always"i

            value = numeric / varname

            numeric = ~"[+-]?\d+(\.\d+)?"
            varname = ~"[a-z_][a-z0-9_]*"i

            range = percentage / numeric

            percentage = numeric percent_sign
            percent_sign = "%"

            comparison = range_eq_comparison / range_leftrocket_comparison / range_rightrocket_comparison / range_muchlessthan_comparison / range_muchgreaterthan_comparison / simple_comparison

            simple_comparison = value ws simple_comparator ws value

            simple_comparator = cmp_eq / cmp_neq / cmp_gte / cmp_gt / cmp_lte / cmp_lt
            cmp_eq = "=="
            cmp_neq = "!="
            cmp_gte = ">="
            cmp_gt = ">"
            cmp_lte = "<="
            cmp_lt = "<"

            range_muchlessthan_comparison = value ws range_lt_prev range range_lt_post ws value
            range_lt_prev = "<"
            range_lt_post = "<"

            range_leftrocket_comparison = value ws range_lr_prev range range_lr_post ws value
            range_lr_prev = "<"
            range_lr_post = "="

            range_eq_comparison = value ws range_eq_prev range range_eq_post ws value
            range_eq_prev = "="
            range_eq_post = "="

            range_rightrocket_comparison = value ws range_rr_prev range range_rr_post ws value
            range_rr_prev = "="
            range_rr_post = ">"

            range_muchgreaterthan_comparison = value ws range_gt_prev range range_gt_post ws value
            range_gt_prev = ">"
            range_gt_post = ">"
        """)

        tree = g.parse(self._condition)
        v = ConditionVisitor(self.data)

        return v.visit(tree)[0]
Ejemplo n.º 9
0
 def test_favoring_named_rules(self):
     """Named rules should be used in error messages in favor of anonymous
     ones, even if those are rightward-progressing-more, and even if the
     failure starts at position 0."""
     grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
     try:
         grammar.parse('burp')
     except ParseError as error:
         eq_(str(error), "Rule 'starts_with_a' trying to match (&(\"a\") ~\"[a-z]+\"u) didn't match at 'burp' (line 1, column 1).")
Ejemplo n.º 10
0
 def test_resolve_refs_order(self):
     """Smoke-test a circumstance where lazy references don't get resolved."""
     grammar = Grammar("""
         expression = "(" terms ")"
         terms = term+
         term = number
         number = ~r"[0-9]+"
         """)
     grammar.parse('(34)')
Ejemplo n.º 11
0
 def test_no_named_rule_succeeding(self):
     """Make sure ParseErrors have sane printable representations even if we
     never succeeded in matching any named expressions."""
     grammar = Grammar('''bork = "bork"''')
     try:
         grammar.parse('snork')
     except ParseError as error:
         eq_(error.pos, 0)
         eq_(error.expr, grammar['bork'])
         eq_(error.text, 'snork')
Ejemplo n.º 12
0
    def test_lazy_default_rule(self):
        """Make sure we get an actual rule set as our default rule, even when
        the first rule has forward references and is thus a LazyReference at
        some point during grammar compilation.

        """
        grammar = Grammar(r"""
            styled_text = text
            text        = "hi"
            """)
        eq_(grammar.parse('hi'), Node('text', 'hi', 0, 2))
Ejemplo n.º 13
0
 def test_multi_line(self):
     """Make sure we tolerate all sorts of crazy line breaks and comments in
     the middle of rules."""
     grammar = Grammar("""
         bold_text  = bold_open  # commenty comment
                      text  # more comment
                      bold_close
         text       = ~"[A-Z 0-9]*"i
         bold_open  = "((" bold_close =  "))"
         """)
     ok_(grammar.parse('((booyah))') is not None)
Ejemplo n.º 14
0
    def test_expressions_from_rules(self):
        """Test the ``Grammar`` base class's ability to compile an expression
        tree from rules.

        That the correct ``Expression`` tree is built is already tested in
        ``RuleGrammarTests``. This tests only that the ``Grammar`` base class's
        ``_expressions_from_rules`` works.

        """
        greeting_grammar = Grammar('greeting = "hi" / "howdy"')
        tree = greeting_grammar.parse("hi")
        eq_(tree, Node("greeting", "hi", 0, 2, children=[Node("", "hi", 0, 2)]))
Ejemplo n.º 15
0
    def test_unconnected_custom_rules(self):
        """Make sure custom rules that aren't hooked to any other rules still
        get included in the grammar and that lone ones get set as the
        default.

        Incidentally test Grammar's `rules` default arg.

        """
        grammar = Grammar(one_char=lambda text, pos: pos + 1).default('one_char')
        s = '4'
        eq_(grammar.parse(s),
            Node('one_char', s, 0, 1))
Ejemplo n.º 16
0
 def test_match(self):
     """Make sure partial-matching (with pos) works."""
     grammar = Grammar(r"""
                       bold_text  = bold_open text bold_close
                       text       = ~"[A-Z 0-9]*"i
                       bold_open  = "(("
                       bold_close = "))"
                       """)
     s = ' ((boo))yah'
     eq_(grammar.match(s, pos=1), Node('bold_text', s, 1, 8, children=[
                                      Node('bold_open', s, 1, 3),
                                      Node('text', s, 3, 6),
                                      Node('bold_close', s, 6, 8)]))
Ejemplo n.º 17
0
 def test_line_and_column(self):
     """Make sure we got the line and column computation right."""
     grammar = Grammar(r"""
         whee_lah = whee "\n" lah "\n"
         whee = "whee"
         lah = "lah"
         """)
     try:
         grammar.parse('whee\nlahGOO')
     except ParseError as error:
         # TODO: Right now, this says "Rule <Literal "\n" at 0x4368250432>
         # didn't match". That's not the greatest. Fix that, then fix this.
         ok_(str(error).endswith(r"""didn't match at 'GOO' (line 2, column 4)."""))
Ejemplo n.º 18
0
    def result(self):
        """ The 'result' property """

        g = Grammar("""
            replacement = ws replacevalue transformationlist ws

            replacevalue = expression / varname / literal

            transformationlist = transformation*
            transformation = ws comma ws transname transarglist

            transarglist = transarg*
            transarg = singlequotedstr / doublequotedstr / unquotedarg

            expression = term rws operator rws term

            term = numberliteral / varname

            varname = ~"[a-z_][a-z0-9_]*"i
            transname = ~"[a-z_][a-z0-9_]*"i

            literal = numberliteral / stringliteral
            numberliteral = ~"(\+|-)?\d+([.]\d+)?"
            stringliteral = singlequotedstr / doublequotedstr

            doublequotedstr = ws dblq notdblq dblq
            singlequotedstr = ws sngq notsngq sngq
            unquotedarg = ws notwsorcomma

            operator = plus / minus / times / divide

            plus = "+"
            minus = "-"
            times = "*"
            divide = "/"

            rws = ~"\s+"
            ws = ~"\s*"
            comma = ","
            notwsorcomma = ~"[^\s,]+"

            dblq = "\\""
            notdblq = ~"[^\\"]*"

            sngq = "'"
            notsngq = ~"[^']*"
        """)

        tree = g.parse(self._replacement)

        return ReplacementVisitor(self._data).visit(tree)
Ejemplo n.º 19
0
 def test_simple_custom_rules(self):
     """Run 2-arg custom-coded rules through their paces."""
     grammar = Grammar("""
         bracketed_digit = start digit end
         start = '['
         end = ']'""",
         digit = lambda text, pos:
                     (pos + 1) if text[pos].isdigit() else None)
     s = '[6]'
     eq_(grammar.parse(s),
         Node('bracketed_digit', s, 0, 3, children=[
             Node('start', s, 0, 1),
             Node('digit', s, 1, 2),
             Node('end', s, 2, 3)]))
Ejemplo n.º 20
0
    def test_parens(self):
        grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''')
        # Make sure it's not as if the parens aren't there:
        assert_raises(ParseError, grammar.parse, 'chitty bangbang')

        s = 'chitty bang bang'
        eq_(str(grammar.parse(s)),
            """<Node called "sequence" matching "chitty bang bang">
    <Node matching "chitty">
    <Node matching " bang bang">
        <Node matching " bang">
            <Node matching " ">
            <Node matching "bang">
        <Node matching " bang">
            <Node matching " ">
            <Node matching "bang">""")
Ejemplo n.º 21
0
def test(inFP):
    with open(inFP) as inF:
        grammar = Grammar(clean(inF.read()))

    good_inputs = ['{the quick brown} > fox > jumps < over < ({the lazy} > dog)', 
                   'They > conspired < to < defenestrate < themselves\n(conspired* to defenestrate on < Tuesday)',
                   'a (** b c) d**', 'a (** b c**)', '::~1 :-)~1 ~(-: (0_0) ~(0_0)~2 *_*~3 )~1 ~( <*_*>',
                   '''
                      Found** < (the scarriest mystery door*)
                      Found < in < (my > school)
                      I’M** < (SO > CURIOUS)
                      D:**
                      my = I’M''',
                   '''
                      thers** < still
                      thers < ((1 1/2) > hours < till < (Biebs > bday))
                      (thers like 1 1/2 hours)
                      thers < here
                      (:P)**''',
                   '''
                      If < (it~1 > 's < restin')
                      I > 'll < [wake up] < it~2
                      If > 'll**
                      it~1 = it~2''',
                   '''
                      {Our three} > weapons > are < $a
                      $a :: {fear surprise efficiency} :: {and~1 and~2}
                      ruthless > efficiency''',
                   '''
                      We > are < knights < the
                      knights < (who > say < Ni)
                      who = knights''']
    bad_inputs = ['{the quick brown} > fox > jumps < over < {the lazy} > dog', 'the > {lazy dog}', 'the < lazy > dog', 
                  'They > conspired* < to < defenestrate < themselves\n(conspired* to defenestrate on < Tuesday)',
                  'big > **', '{** happy} > days', '(my big** fat Greek wedding*)', 'big** > day', 
                  'hi :: there', ':-)', '(-:', '(0_0)~1', '*_*', ') (']
    for x in bad_inputs:
        try:
            parse(x, grammar)
            assert False
        except GFLError as ex:
            print(ex)
    for x in good_inputs:
        p = grammar.parse(x)
        assert p is not None
        print(x)
        pprint(analyze(walk(p)))
Ejemplo n.º 22
0
    def test_parens(self):
        grammar = Grammar(r"""sequence = "chitty" (" " "bang")+""")
        # Make sure it's not as if the parens aren't there:
        eq_(grammar.parse("chitty bangbang"), None)

        s = "chitty bang bang"
        eq_(
            str(grammar.parse(s)),
            """<Node called "sequence" matching "chitty bang bang">
    <Node matching "chitty">
    <Node matching " bang bang">
        <Node matching " bang">
            <Node matching " ">
            <Node matching "bang">
        <Node matching " bang">
            <Node matching " ">
            <Node matching "bang">""",
        )
Ejemplo n.º 23
0
    def test_lazy_custom_rules(self):
        """Make sure LazyReferences manually shoved into custom rules are
        resolved.

        Incidentally test passing full-on Expressions as custom rules and
        having a custom rule as the default one.

        """
        grammar = Grammar("""
            four = '4'
            five = '5'""",
            forty_five=Sequence(LazyReference('four'),
                                LazyReference('five'),
                                name='forty_five')).default('forty_five')
        s = '45'
        eq_(grammar.parse(s),
            Node('forty_five', s, 0, 2, children=[
                Node('four', s, 0, 1),
                Node('five', s, 1, 2)]))
Ejemplo n.º 24
0
def lex(text):
    grammar = Grammar("""\
    entry = (term _ "." _)* _
    term = boolean / atom / list / tuple / map / string / binary / number
    atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'")
    _ = ~"\s*"
    list = ( _ "[" _ term (_ "," _ term)* _ "]" ) / ( _ "[" _ "]")
    tuple = ( _ "{" _ term (_ "," _ term)* _ "}" ) / ( _ "{" _ "}")
    map   = ( _ "#{" _ keyvalue (_ "," _ keyvalue)* _ "}" ) / ( _ "#{" _ "}")
    keyvalue = term _ "=>" _ term _
    string = '"' ~r'(\\\\"|[^"])*' '"'
    binary = "<<" string ">>"
    boolean = "true" / "false"
    number = ~"[0-9]+\#[0-9a-zA-Z]+" / ~"[0-9]+(\.[0-9]+)?(e\-?[0-9]+)?"
    """)
    nocomments = re.sub("(?m)%.*?$", "", text)
    try:
        return grammar.parse(nocomments)
    except parsimonious.exceptions.ParseError as e:
        raise ParseError(e)
Ejemplo n.º 25
0
def lex(text):
    grammar = Grammar("""\
    entry = (term _ "." _)* _
    term = boolean / atom / list / tuple / map / string / binary / number
    atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'")
    _ = ~"\s*" (~"%[^\\r\\n]*\s*")*
    list = ( _ "[" _ term (_ "," _ term)* _ "]" ) / ( _ "[" _ "]")
    tuple = ( _ "{" _ term (_ "," _ term)* _ "}" ) / ( _ "{" _ "}")
    map   = ( _ "#{" _ keyvalue (_ "," _ keyvalue)* _ "}" ) / ( _ "#{" _ "}")
    keyvalue = term _ "=>" _ term _
    string = '"' ~r'(\\\\.|[^"])*' '"'
    binary = ( "<<" _ binary_part ( _ "," _ binary_part)* _ ">>") / ("<<" _ ">>")
    binary_part = string / char_number
    char_number = ~"[0-9]+"
    boolean = "true" / "false"
    number = ~"\-?[0-9]+\#[0-9a-zA-Z]+" / ~"\-?[0-9]+(\.[0-9]+)?((e|E)(\-|\+)?[0-9]+)?"
    """)
    try:
        return grammar.parse(text)
    except parsimonious.exceptions.ParseError as e:
        raise ParseError(e)
Ejemplo n.º 26
0
    def test_inner_rule_succeeding(self):
        """Make sure ``parse()`` fails and blames the
        rightward-progressing-most named Expression when an Expression isn't
        satisfied.

        Make sure ParseErrors have nice Unicode representations.

        """
        grammar = Grammar("""
            bold_text = open_parens text close_parens
            open_parens = "(("
            text = ~"[a-zA-Z]+"
            close_parens = "))"
            """)
        text = '((fred!!'
        try:
            grammar.parse(text)
        except ParseError as error:
            eq_(error.pos, 6)
            eq_(error.expr, grammar['close_parens'])
            eq_(error.text, text)
            eq_(str(error), "Rule 'close_parens' trying to match \"))\" didn't match at '!!' (line 1, column 7).")
Ejemplo n.º 27
0
def lex(text):
    grammar = Grammar("""\
    entry = _ (statement _)* _
    statement = multiline / single
    multiline = atom _ args _ ":" _ "_INDENT_" _ (statement _)+ "_DEDENT_"
    single = atom _ args
    atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'")
    _ = ~"\s*" (~"#[^\\r\\n]*\s*")*
    args = ( _ map ) / ( _ "(" _ term (_ "," _ term)* _ ")" ) / (_ "(" _ ")")
    map = "(" _ kv (_ "," _ kv)* _ ")"
    list = ( _ "[" _ term (_ "," _ term)* _ "]" ) / ( _ "[" _ "]")
    kv = term _ "=" _ term _
    term = unumber / logic_op / single / list / string / atom / number
    logic_op = (string / number) _ ("<=" / ">=" / "<" / ">" / "==") _ (string / number)
    string = '"' ~'[^"]*' '"'
    number = ~"[0-9]+(\.[0-9]+)?(e\-?[0-9]+)?[GKM]?"
    unumber = (number / single) _ atom
    """)
    try:
        return grammar.parse(add_indents(text))
    except parsimonious.exceptions.ParseError as e:
        raise ParseError(e)
Ejemplo n.º 28
0
    def test_complex_custom_rules(self):
        """Run 5-arg custom rules through their paces.

        Incidentally tests returning an actual Node from the custom rule.

        """
        grammar = Grammar("""
            bracketed_digit = start digit end
            start = '['
            end = ']'
            real_digit = '6'""",
            # In this particular implementation of the digit rule, no node is
            # generated for `digit`; it falls right through to `real_digit`.
            # I'm not sure if this could lead to problems; I can't think of
            # any, but it's probably not a great idea.
            digit = lambda text, pos, cache, error, grammar:
                        grammar['real_digit']._match(text, pos, cache, error))
        s = '[6]'
        eq_(grammar.parse(s),
            Node('bracketed_digit', s, 0, 3, children=[
                Node('start', s, 0, 1),
                Node('real_digit', s, 1, 2),
                Node('end', s, 2, 3)]))
Ejemplo n.º 29
0
from parsimonious.grammar import Grammar
from parsimonious.nodes import NodeVisitor
from parsimonious.exceptions import VisitationError

from .encode import *

grammar = Grammar(r"""
start      = line* 
line       = ws? (definition / assignment / board) ws?
id         = ~"[a-z_][a-z0-9_]*"i
number     = ~"-?[0-9]+"
hex        = ~"0x[0-9a-f]+"i
ws         = ~"\s*"
lpar       = "("
rpar       = ")"
equal      =  ws? "=" ws?
str        = ~'"[^\"]+"'
arg        = ws? ( definition / id / str / hex/ number) ws?
args       =  (arg "," args) / arg 
definition = id "(" args? ")"
board      = "%board:" ws? id
assignment = id equal definition 
""")


class Assignment:
    def __init__(self, source, pos, id, call):
        self.source = source
        self.pos = pos
        self.id = id
Ejemplo n.º 30
0
grammar = Grammar(r"""
  exprstmt = ws expr ws
  expr     = biexpr / unexpr / value
  biexpr   = value ws binaryop ws expr
  unexpr   = unaryop expr
  value    = parenval / 
             number /
             boolean /
             function /
             col_ref /
             string /
             attr
  parenval = "(" ws expr ws ")"
  function = fname "(" ws arg_list? ws ")"
  arg_list = expr (ws "," ws expr)*
  number   = ~"\d*\.?\d+"i
  string   = ~"\'\w*\'"i
  col_ref  = (name ".")? name
  attr     = ~"\w[\w\d]*"i
  name     = ~"[a-zA-Z]\w*"i
  fname    = ~"\w[\w\d]*"i
  boolean  = "true" / "false"
  compound_op = "UNION" / "union"
  binaryop = "+" / "-" / "*" / "/" / "=" / "<>" /
             "<=" / ">" / "<" / ">" / "and" / "or"
  unaryop  = "+" / "-" / "not"
  ws       = ~"\s*"i
  wsp      = ~"\s+"i
  """)
Ejemplo n.º 31
0
compat_support_grammar_source = kumascript_grammar_source + (
    r"""
#
# Add compat support strings to text_token
#
text_token = kumascript / cell_version / footnote_id / bracket_text /
    cell_removed / cell_noprefix / cell_partial / text_item
cell_version = _ ~r"(?P<version>\d+(\.\d+)*)"""
    r"""(\s+\((?P<eng_version>\d+(\.\d+)*)\))?\s*"s _
cell_removed = _ ~r"[Rr]emoved\s+[Ii]n\s*"s _
cell_noprefix = _ ("(unprefixed)" / "(no prefix)" / "without prefix" /
    "(without prefix)") _
cell_partial =  _ (", partial" / "(partial)") _
""") + compat_shared_grammar_source

compat_feature_grammar = Grammar(compat_feature_grammar_source)
compat_support_grammar = Grammar(compat_support_grammar_source)
compat_footnote_grammar = compat_feature_grammar


class CompatSectionExtractor(Extractor):
    """Extracts data from elements parsed from a Browser Compatibility section.

    A Browser Compatibility section looks like this:

    <h2 id="Browser_compatibility">Browser compatibility</h2>
    <div>{{CompatibilityTable}}</div>
    <div id="compat-desktop">
      <table class="compat-table">
        <tbody>
          <tr><th>Feature</th><th>Chrome</th></tr>
Ejemplo n.º 32
0
from parsimonious.grammar import Grammar

WooGrammar = Grammar('''
a = (expr nl?)*
expr = fun_call / int 
fun_call = fun_label ws? (literal ws?)* (kwarg ws?)*
literal = int / bool / path / string
int = ~"[0-9]+"i
bool = 'True' / 'fromalse'
path = ~"[\/\.\~\-a-zA-Z0-9]+"i
string = "\\"" t "\\""
t = ~"[^\\"]*"i
kwarg = kwarg_colon / tf
kwarg_colon = tf '=' literal
tf = '@' ~"[a-z]" ~"[a-zA-Z0-9\_\-]*"
fun_label = ~"[a-z]" ~"[a-zA-Z0-9\_\-\~]*"
ws = ~"[ \\t]+"i
nl = ~"\\n+"i
''')
Ejemplo n.º 33
0
 def test_not(self):
     """Make sure "not" predicates get parsed and work properly."""
     grammar = Grammar(r'''not_arp = !"arp" ~"[a-z]+"''')
     assert_raises(ParseError, grammar.parse, 'arp')
     ok_(grammar.parse('argle') is not None)
Ejemplo n.º 34
0
 def test_not(self):
     """Make sure "not" predicates get parsed and work properly."""
     grammar = Grammar(r'''not_arp = !"arp" ~"[a-z]+"''')
     self.assertRaises(ParseError, grammar.parse, 'arp')
     self.assertTrue(grammar.parse('argle') is not None)
Ejemplo n.º 35
0
snql_grammar = Grammar(r"""
    query_exp             = match_clause where_clause? collect_clause? group_by_clause? having_clause? order_by_clause?

    match_clause          = space* "MATCH" space* open_paren clause close_paren space*
    where_clause          = space* "WHERE" or_expression space*
    collect_clause        = space* "COLLECT" collect_list space*
    group_by_clause       = space* "BY" group_list space*
    having_clause         = space* "HAVING" or_expression space*
    order_by_clause       = space* "ORDER BY" order_list space*

    main_condition        = low_pri_arithmetic condition_op (function_call / column_name / quoted_literal / numeric_literal) space*
    condition             = main_condition / parenthesized_cdn
    condition_op          = "=" / "!=" / ">" / ">=" / "<" / "<="
    parenthesized_cdn     = space* open_paren or_expression close_paren space*

    and_expression        = space* condition space* (and_tuple)*
    or_expression         = space* and_expression space* (or_tuple)*
    and_tuple             = "AND" condition
    or_tuple              = "OR" and_expression

    collect_list          = collect_columns* (selected_expression)
    collect_columns       = selected_expression space* comma space*
    selected_expression   = low_pri_arithmetic space*

    group_list            = group_columns* (low_pri_arithmetic)
    group_columns         = low_pri_arithmetic space* comma space*
    order_list            = order_columns* low_pri_arithmetic ("ASC"/"DESC")
    order_columns         = low_pri_arithmetic ("ASC"/"DESC") space* comma space*

    clause                = space* ~r"[-=><\w]+" space*

    low_pri_arithmetic    = space* high_pri_arithmetic space* (low_pri_tuple)*
    high_pri_arithmetic   = space* arithmetic_term space* (high_pri_tuple)*
    low_pri_tuple         = low_pri_op high_pri_arithmetic
    high_pri_tuple        = high_pri_op arithmetic_term

    arithmetic_term       = space* (function_call / numeric_literal / column_name / parenthesized_arithm) space*
    parenthesized_arithm  = open_paren low_pri_arithmetic close_paren

    low_pri_op            = "+" / "-"
    high_pri_op           = "/" / "*"
    param_expression      = low_pri_arithmetic / quoted_literal
    parameters_list       = parameter* (param_expression)
    parameter             = param_expression space* comma space*
    function_call         = function_name open_paren parameters_list? close_paren (open_paren parameters_list? close_paren)?
    simple_term           = quoted_literal / numeric_literal / column_name
    literal               = ~r"[a-zA-Z0-9_\.:-]+"
    quoted_literal        = "'" string_literal "'"
    string_literal        = ~r"[a-zA-Z0-9_\.\+\*\/:-]*"
    numeric_literal       = ~r"-?[0-9]+(\.[0-9]+)?(e[\+\-][0-9]+)?"
    column_name           = ~r"[a-zA-Z_][a-zA-Z0-9_\.]*"
    function_name         = ~r"[a-zA-Z_][a-zA-Z0-9_]*"
    open_paren            = "("
    close_paren           = ")"
    space                 = " "
    comma                 = ","

""")
Ejemplo n.º 36
0
    # have to avoid using backslashes to escape chars here.
    grammar += r"""
        filepath = quoted_filepath / unquoted_filepath
        quoted_filepath = ('"' dquoted_filepath_char+ '"') /
                          ("'" squoted_filepath_char+ "'")
        dquoted_filepath_char = ~r'[^\r\n"]'
        squoted_filepath_char = ~r"[^\r\n']"
        unquoted_filepath = unquoted_filepath_char+
        unquoted_filepath_char = ~r"[^\s\"]"
    """
else:
    grammar += r"""
        filepath = string
    """

grammar = Grammar(grammar)


def urljoin2(base, path, **kwargs):
    if not base.endswith('/'):
        base += '/'
    url = urljoin(base, path, **kwargs)
    if url.endswith('/') and not path.endswith('/'):
        url = url[:-1]
    return url


def generate_help_text():
    """Return a formatted string listing commands, HTTPie options, and HTTP
    actions.
    """
Ejemplo n.º 37
0
modelica_parser = Grammar(r"""
    #===============================================================
    # STORED DEFINITION
    #===============================================================
    stored_definition = _ (within name? semicolon)?
        (_ final? class_definition semicolon)

    #===============================================================
    # CLASS DEFINITION
    #===============================================================
    class_definition = encapsulated? class_prefixes class_specifier

    class_prefixes = partial?
        (class/model/(operator? record)/block/
        (expandable? connector)/type/package/
        ((pure/impure)? operator? function)/
        operator)

    class_specifier = long_class_specifier/
        short_class_specifier/
        der_class_specifier/
        extends_class_specifier

    long_class_specifier = ident string_comment
        composition end ident

    short_class_specifier = (ident equals base_prefix name
        array_subscripts? class_modification? comment) /
        (ident  equals enumeration lparen (enum_list/colon) rparen
        comment)

    der_class_specifier = ident equals der lparen name
        (comma ident)+ rparen comment

    extends_class_specifier = extends ident class_modification?
        string_comment composition end ident

    base_prefix = type_prefix

    enum_list = enumeration_literal (comma enumeration_literal)*

    enumeration_literal = ident comment

    composition = element_list (((public/protected)
       element_list)/ equation_section/ algorithm_section)*
       (external language_specification?
       external_function_call? annotation? semicolon)?
       (annotation semicolon)?

    language_specification = string

    external_function_call = (component_reference equals)? ident
        lparen expression_list? rparen

    # notice we do a lookahead assertion here that is PEG but not EBNF
    # to ensure that end is not consumed as an ident
    element_list = (!(end/equation/algorithm)
        ((element semicolon)/(annotation semicolon)))*

    element = import_clause /  extends_clause
         / (redeclare? final? inner? outer? (
         (class_definition / component_clause)
         /(replaceable (class_definition / component_clause)
         constraining_clause comment)))

    import_clause = import ( (ident equals name) /
        (name (period (times / (lbrace import_list rbrace)) )?)) comment

    import_list = ident (comma import_list)?

    #===============================================================
    # EXTENDS
    #===============================================================
    extends_clause = extends name class_modification? annotation?

    constraining_clause = constrainedby name class_modification?

    #===============================================================
    # COMPONENT CLAUSE
    #===============================================================
    component_clause = type_prefix type_specifier array_subscripts?
       component_list

    type_prefix = (flow/ stream)? (discrete/parameter/constant)?
    (input/output)?

    type_specifier = name

    component_list = component_declaration (comma
      component_declaration)*

    component_declaration = declaration condition_attribute? comment?

    condition_attribute = if expression

    declaration = ident array_subscripts? modification?

    #===============================================================
    # MODIFICATION
    #===============================================================
    modification = (class modification ( equals expression)?) /
        ( equals expression) / (assign expression)

    class_modification = lparen argument_list? rparen

    argument_list = argument (comma argument)*

    argument = element_modification_or_replaceable /
        element_redeclaration

    element_modification_or_replaceable =
        each? final? (element_modification / element_replaceable)

    element_modification = name modification? string_comment

    element_redeclaration = redeclare each? final?

    element_replaceable = replaceable (short_class_definition /
        component_clause1) constraining_clause?

    component_clause1 = type_prefix type_specifier
        component_declaration1

    component_declaration1 = declaration comment

    short_class_definition = class_prefixes ident equals ((
        base_prefix name array_subscripts? class_modification?
        comment) / (enumeration lparen ( enum_list? / colon )
        rparen comment ))


    #===============================================================
    # EQUATION
    #===============================================================
    equation_section = initial? equation (equation_expr semicolon)*

    algorithm_section = initial? algorithm (statement semicolon)*

    # note there is also an equation keywords so we call the
    # expression equation_expr
    equation_expr = ((simple_expression equals expression)
        / if_equation / for_equation
        / connect_clause / when_equation
        / (name function_call_args)) comment

    statement = ((component_reference ( (assign expression)
        / function_call_args )) / ( lparen output_expression_list
        rparen assign component_reference function_call_args)
        / break / return / if_statement
        / for_statement / while_statement / when_statement )

    if_equation = if expression then
            (equation_expr semicolon)*
        (elseif expression then
            (equation_expr semicolon)* )*
        (else
            (equation_expr semicolon)* )?
        end if

    if_statement = if expression then
            (statement semicolon)*
        (elseif expression then
            (statement semicolon)*
        )*
        (else
            (statement semicolon)*
        )?
        end if

    for_equation = for for_indices loop
        (equation_expr semicolon)*
        end for

    for_statement = for for_indices loop
        (statement semicolon)*
        end for

    for_indices = for_index (comma for_index)*

    for_index = ident (in expression)?

    while_statement = while expression loop
        (statement semicolon)*
        end while

    when_equation = when expression then
            (equation semicolon)*
        (elsewhen expression then
            (equation semicolon)*
        )*
        end when

    when_statement = when expression then
            (statement semicolon)*
        (elsewhen expression then
            (statement semicolon)*
        )*
        end when

    connect_clause = connect lparen component_reference comma
        component_reference rparen

    #===============================================================
    # EXPRESSION
    #===============================================================
    expression = simple_expression /
        (if expression then expression
        (elseif expression then expression)* else expression)

    simple_expression = logical_expression
        (semicolon logical_expression
        (semicolon logical_expression)?)?

    logical_expression = logical_term (or logical_term)*

    logical_term = logical_factor (and logical_factor)*

    logical_factor = not? relation

    relation = arithmetic_expression (rel_op arithmetic_expression)?

    rel_op = less_than / less_than_or_equal / greater_than
        / greater_than_or_equal / equality / inequality

    arithmetic_expression = add_op? term (add_op term)*

    add_op = plus / minus / dot_plus / dot_minus

    term = factor (mul_op factor)*

    mul_op = times / divide / dot_times / dot_divide

    factor = primary ( (exp / dot_exp) primary)?

    primary = unsigned_number / string / false / true
        / ((name / der / initial) function_call_args)
        / component_reference
        / (lparen output_expression_list rparen)
        / (lbracket expression_list
            ( semicolon expression_list )* rbracket)
        / (lbrace function_arguments rbrace)
        / end

    name = period ? ident (period ident)*

    component_reference = (period ident array_subscripts?)+

    function_call_args = lparen function_arguments? rparen

    function_arguments = function argument
        ((comma function_arguments) / (for for_indices) /
        named_arguments)

    named_arguments = named_argument (comma named_arguments)?

    named_argument = ident equals function_argument

    function_argument = function name
        ((lparen named_arguments? rparen) / expression)

    output_expression_list = expression? (comma expression?)*

    expression_list = expression (comma expression)*

    array_subscripts = lbracket subscript (comma subscript)* rbracket

    subscript = colon / expression

    comment = string_comment annotation?

    string_comment = string ( plus string)*

    annotation = annotation class_modification

    #===============================================================
    # BASIC
    #===============================================================
    _ = ~'\s*'
    equals = '='_
    assign = ':='_
    semicolon = ';'_
    lparen = '('_
    rparen = ')'_
    lbracket = '{'_
    rbracket = '}'_
    colon = ':'_
    comma = ','_
    double_quote = '"'_
    single_quote = "'"_
    lbrace = '{'_
    rbrace = '}'_
    period = '.'_
    plus = '+'_
    dot_plus = '.+'_
    minus = '-'_
    dot_minus = '.-'_
    times = '*'_
    dot_times = '.*'_
    divide = '/'_
    dot_divide = './'_
    exp = '^'_
    dot_exp = '.^'_
    less_than = '<'_
    less_than_or_equal = '<='_
    greater_than = '>'_
    greater_than_or_equal = '>='_
    equality = '=='_
    inequality = '<>'_
    ident = (nondigit ( digit / nondigit )*_) / q_ident
    q_ident = single_quote (q_char / s_escape)+ single_quote
    string = double_quote (s_char/s_escape)* double_quote
    nondigit = ~'[_a-zA-Z]'
    s_char = ~r'[^"\\]*'u
    q_char = (nondigit/digit/~r'[#$%&()*+,-./:;<>=?@[]^\{}|~ ')
    s_escape = ~r'[\'"\?\\\a\b\f\n\r\t\v]'
    digit = ~'[0-9]'
    unsigned_integer = digit+
    unsigned_number = unsigned_integer ( '.' unsigned_integer?)?
        (('e'/'E') ('+'/'-')? unsigned_integer)?

    #===============================================================
    # KEYWORDS
    #===============================================================
    algorithm = 'algorithm'_
    and = 'and'_
    annotation = 'annotation'_
    assert = 'assert'_
    block = 'block'_
    break = 'break'_
    class = 'class'_
    connect = 'connect'_
    connector = 'connector'_
    constant = 'constant'_
    constrainedby = 'constrainedby'_
    der = 'der'_
    discrete = 'discrete'_
    each = 'each'_
    else = 'else'_
    elseif = 'elseif'_
    elsewhen = 'elsewhen'_
    encapsulated = 'encapsulated'_
    end = 'end'_
    enumeration = 'enumeration'_
    equation = 'equation'_
    expandable = 'expandable'_
    extends = 'extends'_
    external = 'external'_
    false = 'false'_
    final = 'final'_
    flow = 'flow'_
    for= 'for'_
    function = 'function'_
    if = 'if'_
    import = 'import'_
    impure = 'impure'_
    in = 'in'_
    initial = 'initial'_
    inner = 'inner'_
    input = 'input'_
    initial = 'initial'_
    loop = 'loop'_
    model = 'model'_
    not = 'not'_
    operator = 'operator'_
    or = 'or'_
    outer = 'outer'_
    output = 'output'_
    package = 'package'_
    parameter = 'parameter'_
    partial = 'partial'_
    protected = 'protected'_
    public = 'public'_
    pure = 'pure'_
    record = 'record'_
    redeclare = 'redeclare'_
    replaceable = 'replaceable'_
    return = 'return'_
    stream = 'stream'_
    then = 'then'_
    true = 'true'_
    type = 'type'_
    when = 'when'_
    while = 'while'_
    within = 'within'_
    """)
Ejemplo n.º 38
0
 def test_parens_with_leading_whitespace(self):
     """Make sure a parenthesized expression is allowed to have leading
     whitespace when nested directly inside another."""
     Grammar("""foo = ( ("c") )""").parse('c')
Ejemplo n.º 39
0
 def test_rule_ordering_is_preserved_on_shallow_copies(self):
     grammar = Grammar('\n'.join('r%s = "something"' % i for i in range(100)))._copy()
     self.assertEqual(
         list(grammar.keys()),
         ['r%s' % i for i in range(100)])
Ejemplo n.º 40
0
grammar = Grammar(
    r"""
    query    = select_cores orderby? limit?
    select_cores   = select_core (compound_op select_core)*
    select_core    = SELECT wsp select_results from_clause? where_clause? gb_clause?
    select_results = select_result (ws "," ws select_result)*
    select_result  = sel_res_all_star / sel_res_tab_star / sel_res_val / sel_res_col 
    sel_res_tab_star = name ".*"
    sel_res_all_star = "*"
    sel_res_val    = expr (AS wsp name)?
    sel_res_col    = col_ref (AS wsp name)

    from_clause    = FROM join_source
    join_source    = ws single_source (ws "," ws single_source)*
    single_source  = source_table / source_subq
    source_table   = table_name (AS wsp name)?
    source_subq    = "(" ws query ws ")" (AS ws name)?

    where_clause   = WHERE wsp expr (AND expr)*

    gb_clause      = GROUP BY group_clause having_clause?
    group_clause   = grouping_term (ws "," grouping_term)*
    grouping_term  = ws expr
    having_clause  = HAVING expr

    orderby        = ORDER BY ordering_term (ws "," ordering_term)*
    ordering_term  = ws expr (ASC/DESC)?

    limit          = LIMIT expr (OFFSET expr)?

    col_ref        = (table_name ".")? column_name



    expr     = biexpr / unexpr / value
    biexpr   = value ws binaryop ws expr
    unexpr   = unaryop expr
    value    = parenval / 
               number /
               boolean /
               col_ref /
               function /
               string /
               attr
    parenval = "(" ws expr ws ")"
    function = fname "(" ws arg_list? ws ")"
    arg_list = expr (ws "," ws expr)*
    number   = ~"\d*\.?\d+"i
    string   = ~"\'\w*\'"i
    attr     = ~"\w[\w\d]*"i
    fname    = ~"\w[\w\d]*"i
    boolean  = "true" / "false"
    compound_op = "UNION" / "union"
    binaryop = "+" / "-" / "*" / "/" / "=" / "<>" /
               "<=" / ">" / "<" / ">" / "and" / "or"
    unaryop  = "+" / "-" / "not"
    ws       = ~"\s*"i
    wsp      = ~"\s+"i

    name       = ~"[a-zA-Z]\w*"i
    table_name = name
    column_name = name

    ADD = wsp "ADD"
    ALL = wsp "ALL"
    ALTER = wsp "ALTER"
    AND = wsp "AND"
    AS = wsp "AS"
    ASC = wsp "ASC"
    BETWEEN = wsp "BETWEEN"
    BY = wsp "BY"
    CAST = wsp "CAST"
    COLUMN = wsp "COLUMN"
    DESC = wsp "DESC"
    DISTINCT = wsp "DISTINCT"
    E = "E"
    ESCAPE = wsp "ESCAPE"
    EXCEPT = wsp "EXCEPT"
    EXISTS = wsp "EXISTS"
    EXPLAIN = ws "EXPLAIN"
    EVENT = ws "EVENT"
    FORALL = wsp "FORALL"
    FROM = wsp "FROM"
    GLOB = wsp "GLOB"
    GROUP = wsp "GROUP"
    HAVING = wsp "HAVING"
    IN = wsp "IN"
    INNER = wsp "INNER"
    INSERT = ws "INSERT"
    INTERSECT = wsp "INTERSECT"
    INTO = wsp "INTO"
    IS = wsp "IS"
    ISNULL = wsp "ISNULL"
    JOIN = wsp "JOIN"
    KEY = wsp "KEY"
    LEFT = wsp "LEFT"
    LIKE = wsp "LIKE"
    LIMIT = wsp "LIMIT"
    MATCH = wsp "MATCH"
    NO = wsp "NO"
    NOT = wsp "NOT"
    NOTNULL = wsp "NOTNULL"
    NULL = wsp "NULL"
    OF = wsp "OF"
    OFFSET = wsp "OFFSET"
    ON = wsp "ON"
    OR = wsp "OR"
    ORDER = wsp "ORDER"
    OUTER = wsp "OUTER"
    PRIMARY = wsp "PRIMARY"
    QUERY = wsp "QUERY"
    RAISE = wsp "RAISE"
    REFERENCES = wsp "REFERENCES"
    REGEXP = wsp "REGEXP"
    RENAME = wsp "RENAME"
    REPLACE = ws "REPLACE"
    RETURN = wsp "RETURN"
    ROW = wsp "ROW"
    SAVEPOINT = wsp "SAVEPOINT"
    SELECT = ws "SELECT"
    SET = wsp "SET"
    TABLE = wsp "TABLE"
    TEMP = wsp "TEMP"
    TEMPORARY = wsp "TEMPORARY"
    THEN = wsp "THEN"
    TO = wsp "TO"
    UNION = wsp "UNION"
    USING = wsp "USING"
    VALUES = wsp "VALUES"
    VIRTUAL = wsp "VIRTUAL"
    WITH = wsp "WITH"
    WHERE = wsp "WHERE"
    """
)
Ejemplo n.º 41
0
 def mod_grammar(grammar):
     new_grammar = Grammar(r"""
         baz = 'biff'
     """)
     grammar.update(new_grammar)
Ejemplo n.º 42
0
 def test_single_quoted_literals(self):
     Grammar("""foo = 'a' '"'""").parse('a"')
Ejemplo n.º 43
0
grammar = Grammar(r"""
    command = mutation / immutation

    mutation = concat_mut+ / nonconcat_mut
    immutation = preview / action

    concat_mut = option_mut / full_quoted_mut / value_quoted_mut / unquoted_mut
    nonconcat_mut = cd / rm
    preview = _ tool _ (method _)? (urlpath _)? concat_mut*
    action = _ method _ (urlpath _)? concat_mut*
    urlpath = (~r"https?://" unquoted_string) / (!concat_mut string)

    unquoted_mut = _ unquoted_mutkey mutop unquoted_mutval _
    full_quoted_mut = full_squoted_mut / full_dquoted_mut
    value_quoted_mut = value_squoted_mut / value_dquoted_mut
    full_squoted_mut = _ "'" squoted_mutkey mutop squoted_mutval "'" _
    full_dquoted_mut = _ '"' dquoted_mutkey mutop dquoted_mutval '"' _
    value_squoted_mut = _ unquoted_mutkey mutop "'" squoted_mutval "'" _
    value_dquoted_mut = _ unquoted_mutkey mutop '"' dquoted_mutval '"' _
    mutop = ":" / "==" / "="
    unquoted_mutkey = unquoted_mutkey_item+
    unquoted_mutval = unquoted_stringitem*
    unquoted_mutkey_item = unquoted_mutkey_char / escapeseq
    unquoted_mutkey_char = ~r"[^\s'\"\\=:]"
    squoted_mutkey = squoted_mutkey_item+
    squoted_mutval = squoted_stringitem*
    squoted_mutkey_item = squoted_mutkey_char / escapeseq
    squoted_mutkey_char = ~r"[^\r\n'\\=:]"
    dquoted_mutkey = dquoted_mutkey_item+
    dquoted_mutval = dquoted_stringitem*
    dquoted_mutkey_item = dquoted_mutkey_char / escapeseq
    dquoted_mutkey_char = ~r'[^\r\n"\\=:]'

    option_mut = flag_option_mut / value_option_mut
    flag_option_mut = _ flag_optname _
    flag_optname = "--json" / "-j" / "--form" / "-f" / "--verbose" / "-v" /
                   "--headers" / "-h" / "--body" / "-b" / "--stream" / "-S" /
                   "--download" / "-d" / "--continue" / "-c" / "--follow" /
                   "--check-status" / "--ignore-stdin" / "--help" /
                   "--version" / "--traceback" / "--debug"
    value_option_mut = _ value_optname ~r"(\s+|=)" string _
    value_optname = "--pretty" / "--style" / "-s" / "--print" / "-p" /
                    "--output" / "-o" / "--session" / "--session-read-only" /
                    "--auth" / "-a" / "--auth-type" / "--proxy" / "--verify" /
                    "--cert" / "--cert-key" / "--timeout"

    cd = _ "cd" _ string _
    rm = _ "rm" _ ~r"\-(h|q|b|o)" _ mutkey _
    tool = "httpie" / "curl"
    method = ~r"get"i / ~r"head"i / ~r"post"i / ~r"put"i / ~r"delete"i /
             ~r"patch"i
    mutkey = unquoted_mutkey / ("'" squoted_mutkey "'") /
             ('"' dquoted_mutkey '"') / flag_optname / value_optname

    string = quoted_string / unquoted_string
    quoted_string = ('"' dquoted_stringitem* '"') /
                    ("'" squoted_stringitem* "'")
    unquoted_string = unquoted_stringitem+
    dquoted_stringitem = dquoted_stringchar / escapeseq
    squoted_stringitem = squoted_stringchar / escapeseq
    unquoted_stringitem = unquoted_stringchar / escapeseq
    dquoted_stringchar = ~r'[^\r\n"\\]'
    squoted_stringchar = ~r"[^\r\n'\\]"
    unquoted_stringchar = ~r"[^\s'\"\\]"
    escapeseq = ~r"\\."
    _ = ~r"\s*"
""")
Ejemplo n.º 44
0
grammar = Grammar(
    r"""
    type             = primitive / lowcardinality / agg / nullable / array
    primitive        = basic_type / uint / float / fixedstring / enum
    # DateTime must come before Date
    basic_type       = "DateTime" / "Date" / "IPv4" / "IPv6" / "String" / "UUID"
    uint             = "UInt" uint_size
    uint_size        = "8" / "16" / "32" / "64"
    float            = "Float" float_size
    float_size       = "32" / "64"
    fixedstring      = "FixedString" open_paren space* fixedstring_size space* close_paren
    fixedstring_size = ~r"\d+"
    enum             = "Enum" enum_size open_paren space* enum_pairs space* close_paren
    enum_size        = "8" / "16"
    enum_pairs       = (enum_pair (space* comma space*)?)*
    enum_pair        = quote enum_str quote space* equal space* enum_val
    enum_str         = ~r"([a-zA-Z0-9\-]+)"
    enum_val         = ~r"\d+"
    agg              = "AggregateFunction" open_paren space* agg_func space* comma space* agg_types space* close_paren
    agg_func         = ~r"[a-zA-Z]+\([a-zA-Z0-9\,\.\s]+\)|[a-zA-Z]+"
    agg_types        = (primitive (space* comma space*)?)*
    array            = "Array" open_paren space* (array / primitive / lowcardinality / nullable) space* close_paren
    lowcardinality   = "LowCardinality" open_paren space* (primitive / nullable) space* close_paren
    nullable         = "Nullable" open_paren space* (primitive / basic_type) space* close_paren
    open_paren       = "("
    close_paren      = ")"
    equal            = "="
    comma            = ","
    space            = " "
    quote            = "'"
    """
)
Ejemplo n.º 45
0
def compile():
    g = Grammar(r'''
        Start    = ~r"\s*" Value ~r"\s*"
        Object   = ~r"{\s*" Members? ~r"\s*}"
        Members  = Mapping (~r"\s*,\s*" Mapping)*
        Mapping  = String ~r"\s*:\s*" Value
        Array    = ~r"\[\s*" Items? ~r"\s*\]"
        Items    = Value (~r"\s*,\s*" Value)*
        Value    = Object / Array / String
                 / TrueVal / FalseVal / NullVal / Number
        TrueVal  = "true"
        FalseVal = "false"
        NullVal  = "null"
        String   = ~r"\"[ !#-\[\]-\U0010ffff]*(?:\\(?:[\"\\/bfnrt]|u[0-9A-Fa-f]{4})[ !#-\[\]-\U0010ffff]*)*\""
        Number   = ~r"-?(0|[1-9][0-9]*)(\.\d*)?([eE][-+]?\d+)?"
    ''')

    class JsonVisitor(NodeVisitor):
        def generic_visit(self, node, children):
            return children or node.text

        # helper functions for generic patterns
        def delimited(self, node, children):
            items = [children[0]]
            items.extend(item for _, item in children[1])
            return items

        def atomic(self, node, children):
            return children[0]

        # visitors
        visit_Value = atomic
        visit_Members = visit_Items = delimited

        def visit_Start(self, node, children):
            return children[1]

        def visit_Object(self, node, children):
            _, members, _ = children
            if isinstance(members, list):
                members = members[0]
            else:
                members = []
            return dict(members)

        def visit_Array(self, node, children):
            _, values, _ = children
            if isinstance(values, list):
                values = values[0]
            else:
                values = []
            return values

        def visit_Mapping(self, node, children):
            key, _, value = children
            return key, value

        def visit_String(self, node, children):
            return json_unescape(node.text)

        def visit_Number(self, node, children):
            return float(node.text)

        def visit_TrueVal(self, node, children):
            return True

        def visit_FalseVal(self, node, children):
            return False

        def visit_NullVal(self, node, children):
            return None

    v = JsonVisitor()
    return lambda s: v.visit(g.parse(s))
Ejemplo n.º 46
0
 def __init__(self, _grammar, _text):
     ast = Grammar(_grammar).parse(_text)
     # print(ast)
     self.top_group_node = self.visit(ast)
Ejemplo n.º 47
0
let a = 5
let b = 9
let c = 8
let d = "a string"
"""

# works so far!
grammar = Grammar( r"""
    expr        = (entry / emptyline)*
    entry       = start definition*

    start       = ws "#start#" ws
    definition  = "let" ws key assign value ws?
    key         = word+
    value       = (number / word / quoted)+ 
    number      = ~"[0-9]"+
    word        = ~r"[-\w]+"
    quoted      = ~'"[^\"]+"'
    assign       = ws? "=" ws?
    lpar        = "["
    rpar        = "]"
    ws          = ~"\s*"
    emptyline   = ws+
    """)

# create the abstract syntax tree.
tree = grammar.parse(data)

va = VeraVisitor()
out = va.visit(tree)
print(out)
Ejemplo n.º 48
0
def build_grammar(grammar):
    grammar = grammar + BaseGrammar
    return Grammar(grammar)
Ejemplo n.º 49
0
grammar = Grammar(
r'''
body = ANY_WS? (type_decl NL ANY_WS?)+ ANY_WS?
type_decl = "type" WS IDENT WS type_expr

type_expr = array_expr / struct_expr / union_type / enum_type / type_ref / filtered_type
array_expr = "array" WS? "[" WS? expr WS? "]" WS "of" WS type_expr
type_ref = IDENT (WS? "(" WS? attribute_list? WS? ")")?
attribute_list = attribute (WS? "," WS? attribute)*
attribute = IDENT WS? "=" WS? INT

filtered_type = "$" IDENT WS? "(" WS? type_expr WS? ")"

union_type = "switch" WS expr WS? "{" (WS_WITH_NL union_item)+ WS_WITH_NL "}"
union_item = union_case / union_default
union_case = "case" WS simple_const WS? ":" WS? type_ref
union_default = "default" WS? ":" WS? type_ref

enum_type = ("enum" / "set") WS? "(" WS? type_ref WS? ")" WS? "{" (WS_WITH_NL enum_item)+ WS_WITH_NL "}"
enum_item = IDENT WS? "=" WS? expr

simple_const = enum_value / INT

expr = bin_op / atom
bin_op = atom WS? ("==" / ">" / "<<" / "-" / "+" / "*") WS? expr
atom = field_ref / enum_value / INT
field_ref = ("@" / "^"+) IDENT
enum_value = IDENT "." IDENT

struct_expr = "struct" WS "{" (((WS_WITH_NL struct_item)+ WS_WITH_NL) / ANY_WS?) "}"
struct_item = field / if_stmt
field = (IDENT / "_") WS type_expr
if_stmt = "if" WS expr WS? "{" (WS_WITH_NL struct_item)+ WS_WITH_NL "}"

IDENT = ~"[a-z][a-z0-9_]*"i
NL = ~"[\r?\n]+"
WS = ~"[\t ]+"
INT = ~"(0[xX][0-9a-fA-F]+|0|[1-9]\d*)"
ANY_WS = ~"[\r\n\t ]+"
WS_WITH_NL = ~"[\r\t ]*\n[\r\n\t ]*"
''')
Ejemplo n.º 50
0
# data = """
# A, art. ഒരു
# Aback, ad. പുറകൊട്ട, പിന്നൊക്കം
# Abaft, ad. പിമ്പുറത്തെക്ക, കപ്പലിൻറ അമരത്തെക്ക
# Abandon, v. a. വിട്ടൊഴിയുന്നു, ത്യജിക്കുന്നു, പരിത്യാഗം ചെയ്യുന്നു; ഉപെക്ഷിക്കുന്നു, കൈവിടുന്നു
# Abandoned, a. വിട്ടൊഴിയപ്പെട്ട,ത്യജിക്കപ്പെട്ട; ഉപെക്ഷിക്കപ്പെട്ട, കൈവിടപ്പെട്ട; മഹാ കെട്ട, ദുഷ്ടതയുള്ള, വഷളായുള്ള, മഹാ ചീത്ത
# """

grammar = Grammar(r"""
    expr       = (entry / emptyline )*
    entry      = headword comma pos ws senses subentry emptyline
    headword   = ~"[A-Z 0-9]*"i
    pos        = (ws ~"[a-z]+\.")+
    subentry   = (semicolon ws senses)*
    senses     = (sense comma)* sense
    sense      = (ml ws ml)* ml
    ml         = ~"[\u0d00-\u0d7f]*"
    semicolon  = ~";"
    comma      = ~","
    ws         = ~"\s*"
    emptyline  = ws+
    """)


class DictVisitor(NodeVisitor):
    def visit_expr(self, node, visited_children):
        """ Returns the overall output. """
        output = []
        for child in visited_children:
            if type(child[0]) == dict:
                output.append(child[0])
Ejemplo n.º 51
0
Archivo: espec.py Proyecto: 0xf4b1/keg
GRAMMAR = Grammar("""
espec = data_raw / data_zipped / data_encrypted / data_block

data_raw = flag_raw
data_zipped = flag_zip (COLON zip_args)?
data_encrypted = flag_encrypted COLON encryption_args
data_block = flag_block COLON block_args

flag_raw = "n"
flag_zip = "z"
flag_encrypted = "e"
flag_block = "b"

mpq = "mpq"
zip_level = NUMBER
zip_bits = NUMBER / mpq
zip_level_and_bits = BEGIN zip_level COMMA zip_bits END
zip_args = zip_level / zip_level_and_bits

encryption_key = HEX_NUMBER
encryption_nonce = HEX_NUMBER
encryption_args = BEGIN encryption_key COMMA encryption_nonce COMMA espec END

unit_kilobyte = "K"
unit_megabyte = "M"
block_unit = unit_kilobyte / unit_megabyte
block_count = NUMBER
block_size = NUMBER (block_unit)?
block_size_args = STAR (block_count)?
block_size_spec = (block_size block_size_args?) / STAR
block_subchunk_short = block_size_spec EQUALS espec
block_subchunk_long = (BEGIN block_subchunk_short (COMMA block_subchunk_short)* END)
block_args = block_subchunk_short / block_subchunk_long

NUMBER = ~"[0-9]+"
HEX_NUMBER = ~"[0-9A-F]+"
COLON = ":"
COMMA = ","
EQUALS = "="
STAR = "*"
BEGIN = "{"
END = "}"
""")
Ejemplo n.º 52
0
 def test_unicode_crash(self):
     """Make sure matched unicode strings don't crash ``__str__``."""
     grammar = Grammar(r'string = ~r"\S+"u')
     str(grammar.parse('中文'))
Ejemplo n.º 53
0
enhancements_grammar = Grammar(r"""

enhancements = line+

line = _ (comment / rule / empty) newline?

rule = _ matchers actions

matchers         = matcher+
matcher          = _ negation? matcher_type sep argument
matcher_type     = key / quoted_key

key              = ~r"[a-zA-Z0-9_\.-]+"
quoted_key       = ~r"\"([a-zA-Z0-9_\.:-]+)\""

actions          = action+
action           = flag_action / var_action
var_action       = _ var_name _ "=" _ expr
var_name         = "max-frames" / "min-frames"
flag_action      = _ range? flag flag_action_name
flag_action_name = "group" / "app"
flag             = "+" / "-"
range            = "^" / "v"
expr             = int
int              = ~r"[0-9]+"

comment          = ~r"#[^\r\n]*"

argument         = quoted / unquoted
quoted           = ~r'"([^"\\]*(?:\\.[^"\\]*)*)"'
unquoted         = ~r"\S+"

sep      = ":"
space    = " "
empty    = ""
negation = "!"
newline  = ~r"[\r\n]"
_        = space*

""")
Ejemplo n.º 54
0
event_search_grammar = Grammar(r"""
# raw_search must come at the end, otherwise other
# search_terms will be treated as a raw query
search          = search_term* raw_search?
search_term     = space? (time_filter / has_filter / basic_filter) space?
raw_search      = ~r".+$"

# standard key:val filter
basic_filter    = negation? search_key sep search_value
# filter specifically for the timestamp
time_filter     = "timestamp" operator date_format
# has filter for not null type checks
has_filter      = negation? "has" sep (search_key / search_value)

search_key      = key / quoted_key
search_value    = quoted_value / value
value           = ~r"\S*"
quoted_value    = ~r"\"(.*)\""s
key             = ~r"[a-zA-Z0-9_\.-]+"
# only allow colons in quoted keys
quoted_key      = ~r"\"([a-zA-Z0-9_\.:-]+)\""

date_format    = ~r"\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{1,6})?)?"

# NOTE: the order in which these operators are listed matters
# because for example, if < comes before <= it will match that
# even if the operator is <=
operator        = ">=" / "<=" / ">" / "<" / "=" / "!="
sep             = ":"
space           = " "
negation        = "!"
""")
Ejemplo n.º 55
0
from parsimonious.grammar import Grammar
from parsimonious.nodes import NodeVisitor
from collections import namedtuple

grammar = Grammar(r'''
regex           = ( outer_literal / braces )+
braces          = '[' whitespace? ( ops_matches / either / matches )? whitespace? ']'
ops_matches     = op ( whitespace op )* ( whitespace matches )?
op              = token
either          = matches ( whitespace? '|' whitespace? matches )+
matches         = match ( whitespace match )*
match           = inner_literal / def / macro / braces
macro           = '#' ( range_macro / token )
range_macro     = range_endpoint '..' range_endpoint
def             = macro '=' braces

outer_literal   = ~r'[^\[\]]+'
inner_literal   = ( '\'' until_quote '\'' ) / ( '"' until_doublequote '"' )
until_quote     = ~r"[^']*"
until_doublequote = ~r'[^"]*'

whitespace      = ~r'[ \t\r\n]+'
token           = ~r'[A-Za-z0-9!$-&(-/:-<>-@\\^-`{}~]+'
range_endpoint  = ~r'[A-Za-z0-9]'
''')

Concat = namedtuple('Concat', ['items'])
Either = namedtuple('Either', ['items'])
Def = namedtuple('Def', ['name', 'subregex'])
Operator = namedtuple('Operator', ['name', 'subregex'])
Macro = namedtuple('Macro', ['name'])
Ejemplo n.º 56
0
    help        = "help" / "h" / "?"
    exit        = "exit" / "quit" / "q"
    ls          = ("ls" / "ll") _ (grep)?
    cd          = _ "cd" _ string _
    
    grep        = pipe _ "grep" _ ex_string
    
    pipe        = "|"

    ex_string   = string / "*" / "-" / "_" / "."
    string      = char+
    char        = ~r"[^\s'\\]"
    _           = ~r"\s*"
"""

grammar = Grammar(RULES)


class PrettyFile(object):
    def __init__(self, efile):
        """
        :type efile: easywebdav.client.File
        """
        self._file = efile
        self._name = unquote(path.basename(efile.name)).decode('utf-8')

        self.is_dir = efile.contenttype == 'httpd/unix-directory'
        self.name = self._name
        self.size = humanbytes(int(efile.size))
        self.modify_time = dt_parse(efile.mtime).astimezone(tz.tzlocal()).strftime('%Y-%m-%d %H:%M:%S')
        if self.is_dir:
Ejemplo n.º 57
0
 def __init__(self, grammar, text):
     self.op = []
     ast = Grammar(grammar).parse(text)
     self.visit(ast)
Ejemplo n.º 58
0
event_search_grammar = Grammar(
    r"""
search               = (boolean_term / paren_term / search_term)*
boolean_term         = (paren_term / search_term) space? (boolean_operator space? (paren_term / search_term) space?)+
paren_term           = space? open_paren space? (paren_term / boolean_term)+ space? closed_paren space?
search_term          = key_val_term / quoted_raw_search / raw_search
key_val_term         = space? (tag_filter / time_filter / rel_time_filter / specific_time_filter
                       / numeric_filter / aggregate_filter / aggregate_date_filter / has_filter
                       / is_filter / quoted_basic_filter / basic_filter)
                       space?
raw_search           = (!key_val_term ~r"\ *([^\ ^\n ()]+)\ *" )*
quoted_raw_search    = spaces quoted_value spaces

# standard key:val filter
basic_filter         = negation? search_key sep search_value
quoted_basic_filter  = negation? search_key sep quoted_value
# filter for dates
time_filter          = search_key sep? operator date_format
# filter for relative dates
rel_time_filter      = search_key sep rel_date_format
# exact time filter for dates
specific_time_filter = search_key sep date_format
# Numeric comparison filter
numeric_filter       = (function_key / search_key) sep operator? numeric_value
# Aggregate numeric filter
aggregate_filter        = aggregate_key sep operator? numeric_value
aggregate_date_filter   = aggregate_key sep operator? (date_format / rel_date_format)

# has filter for not null type checks
has_filter           = negation? "has" sep (search_key / search_value)
is_filter            = negation? "is" sep search_value
tag_filter           = negation? "tags[" search_key "]" sep search_value

aggregate_key        = key space? open_paren space? key space? closed_paren
function_key         = key space? open_paren space? closed_paren
search_key           = key / quoted_key
search_value         = quoted_value / value
value                = ~r"[^()\s]*"
numeric_value        = ~r"[0-9]+(?=\s|$)"
quoted_value         = ~r"\"((?:[^\"]|(?<=\\)[\"])*)?\""s
key                  = ~r"[a-zA-Z0-9_\.-]+"
# only allow colons in quoted keys
quoted_key           = ~r"\"([a-zA-Z0-9_\.:-]+)\""

date_format          = ~r"\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(\.\d{1,6})?)?Z?(?=\s|$)"
rel_date_format      = ~r"[\+\-][0-9]+[wdhm](?=\s|$)"

# NOTE: the order in which these operators are listed matters
# because for example, if < comes before <= it will match that
# even if the operator is <=
boolean_operator     = "OR" / "AND"
operator             = ">=" / "<=" / ">" / "<" / "=" / "!="
open_paren           = "("
closed_paren         = ")"
sep                  = ":"
space                = " "
negation             = "!"
spaces               = ~r"\ *"
"""
)
Ejemplo n.º 59
0
 def test_repr(self):
     self.assertTrue(repr(Grammar(r'foo = "a"')))
entity_grammar = Grammar(r"""
    #DOCUMENT = VERSION_LINES? ENTITY*

    #VERSION_LINES = "Version" SPACE INTEGER SPACE "HierarchyVersion" SPACE INTEGER SPACE
    ENTITY        = "entity" LBRACE ENTITY_PROPS* RBRACE
    ENTITY_PROPS  = (ENTITYDEF_BLOCK / LAYERS_BLOCK / ASSIGNMENT)

    ENTITYDEF_BLOCK = "entityDef" SPACE VARNAME LBRACE ASSIGNMENT* RBRACE
    LAYERS_BLOCK    = "layers" LBRACE STRING RBRACE
    ASSIGNMENT      = VARIABLE EQUALS (OBJECT / LITERAL)

    OBJECT     = LBRACE ASSIGNMENT+ RBRACE
    LITERAL    = (NUMBER / STRING / NULL / BOOL) SEMICOLON

    VARIABLE = (INDEXED / VARNAME)
    INDEXED    = VARNAME "[" INTEGER "]"

    LBRACE    = SPACE? "{" SPACE?
    RBRACE    = SPACE? "}" SPACE?
    EQUALS    = SPACE? "=" SPACE?
    SEMICOLON = SPACE? ";" SPACE?

    VARNAME = ~r"\w+"
    STRING  = '"' ~r"[^\"]*" '"'
    NUMBER  = ~r"[+\-]?(?:0|[1-9]\d*)(?:\.\d*)?(?:[eE][+\-]?\d+)?"
    INTEGER = ~r"[-]?\d+"
    BOOL    = "true" / "false"
    NULL    = "NULL"
    SPACE   = ~r"\s+"
""")