Example #1
0
    def test_inner_rule_succeeding(self):
        """Make sure ``parse()`` fails and blames the
        rightward-progressing-most named Expression when an Expression isn't
        satisfied.

        Make sure ParseErrors have nice Unicode representations.

        """
        grammar = Grammar("""
            bold_text = open_parens text close_parens
            open_parens = "(("
            text = ~"[a-zA-Z]+"
            close_parens = "))"
            """)
        text = '((fred!!'
        try:
            grammar.parse(text)
        except ParseError as error:
            eq_(error.pos, 6)
            eq_(error.expr, grammar['close_parens'])
            eq_(error.text, text)
            eq_(
                unicode(error),
                u"Rule 'close_parens' didn't match at '!!' (line 1, column 7)."
            )
Example #2
0
    def test_rewinding(self):
        """Make sure rewinding the stack and trying an alternative (which
        progresses farther) from a higher-level rule can blame an expression
        within the alternative on failure.

        There's no particular reason I suspect this wouldn't work, but it's a
        more real-world example than the no-alternative cases already tested.

        """
        grammar = Grammar("""
            formatted_text = bold_text / weird_text
            bold_text = open_parens text close_parens
            weird_text = open_parens text "!!" bork
            bork = "bork"
            open_parens = "(("
            text = ~"[a-zA-Z]+"
            close_parens = "))"
            """)
        text = '((fred!!'
        try:
            grammar.parse(text)
        except ParseError as error:
            eq_(error.pos, 8)
            eq_(error.expr, grammar['bork'])
            eq_(error.text, text)
    def test_rewinding(self):
        """Make sure rewinding the stack and trying an alternative (which
        progresses farther) from a higher-level rule can blame an expression
        within the alternative on failure.

        There's no particular reason I suspect this wouldn't work, but it's a
        more real-world example than the no-alternative cases already tested.

        """
        grammar = Grammar("""
            formatted_text = bold_text / weird_text
            bold_text = open_parens text close_parens
            weird_text = open_parens text "!!" bork
            bork = "bork"
            open_parens = "(("
            text = ~"[a-zA-Z]+"
            close_parens = "))"
            """)
        text = '((fred!!'
        try:
            grammar.parse(text)
        except ParseError as error:
            eq_(error.pos, 8)
            eq_(error.expr, grammar['bork'])
            eq_(error.text, text)
    def test_lookahead(self):
        grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
        eq_(grammar.parse('burp'), None)

        s = 'arp'
        eq_(grammar.parse('arp'), Node('starts_with_a', s, 0, 3, children=[
                                      Node('', s, 0, 0),
                                      Node('', s, 0, 3)]))
Example #5
0
 def test_parse_with_leftovers(self):
     """Make sure ``parse()`` reports where we started failing to match,
     even if a partial match was successful."""
     grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''')
     try:
         grammar.parse('chitty bangbang')
     except IncompleteParseError as error:
         eq_(str(error), "Rule 'sequence' matched in its entirety, but it didn't consume all the text. The non-matching portion of the text begins with 'bang' (line 1, column 12).")
 def test_favoring_named_rules(self):
     """Named rules should be used in error messages in favor of anonymous
     ones, even if those are rightward-progressing-more, and even if the
     failure starts at position 0."""
     grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
     try:
         grammar.parse('burp')
     except ParseError as error:
         eq_(str(error), "Rule 'starts_with_a' trying to match (&(\"a\") ~\"[a-z]+\"u) didn't match at 'burp' (line 1, column 1).")
Example #7
0
 def test_resolve_refs_order(self):
     """Smoke-test a circumstance where lazy references don't get resolved."""
     grammar = Grammar("""
         expression = "(" terms ")"
         terms = term+
         term = number
         number = ~r"[0-9]+"
         """)
     grammar.parse('(34)')
Example #8
0
 def test_resolve_refs_order(self):
     """Smoke-test a circumstance where lazy references don't get resolved."""
     grammar = Grammar("""
         expression = "(" terms ")"
         terms = term+
         term = number
         number = ~r"[0-9]+"
         """)
     grammar.parse('(34)')
Example #9
0
 def test_favoring_named_rules(self):
     """Named rules should be used in error messages in favor of anonymous
     ones, even if those are rightward-progressing-more, and even if the
     failure starts at position 0."""
     grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
     try:
         grammar.parse('burp')
     except ParseError as error:
         self.assertEqual(str(error), u"Rule 'starts_with_a' didn't match at 'burp' (line 1, column 1).")
Example #10
0
 def test_no_named_rule_succeeding(self):
     """Make sure ParseErrors have sane printable representations even if we
     never succeeded in matching any named expressions."""
     grammar = Grammar('''bork = "bork"''')
     try:
         grammar.parse('snork')
     except ParseError as error:
         eq_(error.pos, 0)
         eq_(error.expr, grammar['bork'])
         eq_(error.text, 'snork')
 def test_no_named_rule_succeeding(self):
     """Make sure ParseErrors have sane printable representations even if we
     never succeeded in matching any named expressions."""
     grammar = Grammar('''bork = "bork"''')
     try:
         grammar.parse('snork')
     except ParseError as error:
         eq_(error.pos, 0)
         eq_(error.expr, grammar['bork'])
         eq_(error.text, 'snork')
Example #12
0
 def test_line_and_column(self):
     """Make sure we got the line and column computation right."""
     grammar = Grammar(r"""
         whee_lah = whee "\n" lah "\n"
         whee = "whee"
         lah = "lah"
         """)
     try:
         grammar.parse('whee\nlahGOO')
     except ParseError as error:
         # TODO: Right now, this says "Rule <Literal "\n" at 0x4368250432>
         # didn't match". That's not the greatest. Fix that, then fix this.
         self.assertTrue(str(error).endswith(r"""didn't match at 'GOO' (line 2, column 4)."""))
 def test_line_and_column(self):
     """Make sure we got the line and column computation right."""
     grammar = Grammar(r"""
         whee_lah = whee "\n" lah "\n"
         whee = "whee"
         lah = "lah"
         """)
     try:
         grammar.parse('whee\nlahGOO')
     except ParseError as error:
         # TODO: Right now, this says "Rule <Literal "\n" at 0x4368250432>
         # didn't match". That's not the greatest. Fix that, then fix this.
         ok_(str(error).endswith(r"""didn't match at 'GOO' (line 2, column 4)."""))
Example #14
0
    def test_lazy_custom_rules(self):
        """Make sure LazyReferences manually shoved into custom rules are
        resolved.

        Incidentally test passing full-on Expressions as custom rules and
        having a custom rule as the default one.

        """
        grammar = Grammar("""
            four = '4'
            five = '5'""",
                          forty_five=Sequence(
                              LazyReference('four'),
                              LazyReference('five'),
                              name='forty_five')).default('forty_five')
        s = '45'
        eq_(
            grammar.parse(s),
            Node(grammar['forty_five'],
                 s,
                 0,
                 2,
                 children=[
                     Node(grammar['four'], s, 0, 1),
                     Node(grammar['five'], s, 1, 2)
                 ]))
Example #15
0
def convert(wikitext):

    # read the grammer PEG file
    f = open("wiki1.peg")
    peg = f.read()
    f.close()

    start = time.clock()
    grammar = Grammar(peg)
    elapsed1 = time.clock() - start

    print("+++++++++grammar+++++++++")
    print(peg)
    print("+++++++++input+++++++++")
    print(atext.decode('utf8'))
    print('*********************')
    start = time.clock()
    parsed = grammar.parse(atext.decode('utf8'))
    elapsed2 = time.clock() - start
    print(dir(parsed))
    print(parsed)
    start = time.clock()
    result = HtmlFormatter().visit(parsed)
    elapsed3 = time.clock() - start

    print("timeto create grammar:", elapsed1)
    print("timeto parse text:", elapsed2)
    print("timeto convert to xml:", elapsed3)

    return result.encode('utf8')
    def test_parens(self):
        grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''')
        # Make sure it's not as if the parens aren't there:
        eq_(grammar.parse('chitty bangbang'), None)

        s = 'chitty bang bang'
        eq_(str(grammar.parse(s)),
            """<Node called "sequence" matching "chitty bang bang">
    <Node matching "chitty">
    <Node matching " bang bang">
        <Node matching " bang">
            <Node matching " ">
            <Node matching "bang">
        <Node matching " bang">
            <Node matching " ">
            <Node matching "bang">""")
Example #17
0
def lex(text):
    grammar = Grammar("""\
    entry = _ (statement _)* _
    statement = multiline / single
    multiline = atom _ args _ ":" _ "_INDENT_" _ (statement _)+ "_DEDENT_"
    single = atom _ args
    atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'")
    _ = ~"\s*" (~"#[^\\r\\n]*\s*")*
    args = ( _ map ) / ( _ "(" _ term (_ "," _ term)* _ ")" ) / (_ "(" _ ")")
    map = "(" _ kv (_ "," _ kv)* _ ")"
    list = ( _ "[" _ term (_ "," _ term)* _ "]" ) / ( _ "[" _ "]")
    kv = term _ "=" _ term _
    term = unumber / logic_exp / single / list / string / atom / number
    logic_exp = logic_priority / logic_unary / logic_plain
    logic_priority = "(" _ logic_exp _ ")" _ (logic_binary _ logic_exp _)*
    logic_unary = "not" _ logic_exp _
    logic_binary = "and" / "or"
    logic_plain = logic_op _ (logic_binary _ logic_exp _)*
    logic_op = (string / number) _ ("<=" / ">=" / "<" / ">" / "==" / "!=" / "<>" / "/=") _ (string / number)
    string = '"' ~r'(\\\\.|[^\\\\"])*' '"'
    number = ~"[0-9]+(\.[0-9]+)?(e\-?[0-9]+)?[GKM]?"
    unumber = (number / single) _ atom
    """)
    try:
        return grammar.parse(add_indents(text))
    except parsimonious.exceptions.ParseError as e:
        raise ParseError(e)
Example #18
0
 def test_right_recursive(self):
     """Right-recursive refs should resolve."""
     grammar = Grammar("""
         digits = digit digits?
         digit = ~r"[0-9]"
         """)
     ok_(grammar.parse('12') is not None)
Example #19
0
def load_props(file_name):
    g = Grammar(grammar)
    retd = {}
    cbs = {}
    g = g.parse(open(file_name).read())
    iv = Visitor()
    l = iv.visit(g)
    for e in l:
        o = toDict(e, 1)

        if o['name'] == "cdfData":
            o = toDict(o['value'], 0)['parameters']
            for p in o:
                d = toDict(p, 0)
                v = d['defValue']
                if v.startswith("iPar"):
                    v = "1"  #TODO: hack
                if d['type'] == "boolean":
                    v = True if v == 't' else None
                retd[d['name']] = {'value': v}
                if 'callback' in d:
                    cbs[d['name']] = d['callback']
            continue
        t = ""
        if "valueType" in o:
            t = o["valueType"]
        if t == "float":
            o['value'] = float(o['value'])
        retd[o['name']] = o['value']
        if o['value'] == "valueType":
            print o
            print e
    return {'props': retd, 'cbs': cbs}
Example #20
0
def parse_config_lines(lines: List[str],
                       name: Optional[str] = None) -> SimpleNamespace:
    grammar = Grammar(r"""
        section    = opt_header? (module / definition / comment / endl)*
        opt_header = '#SETTINGS_FILE ' filename
        filename   = ~r"[^\n]+"
        module     = '% ' module_name
        module_name = ~r"[a-zA-Z_]+"
        definition = sp? name sp? '=' sp? value? comment? endl?
        comment    = sp? '#' vartype? anychar*
        vartype    = '(' ('bool' / 'string' / 'double' / 'int') ')'
        name       = ~r"[a-zA-Z_]+"
        value      = ~r"[^#\n]+"
        sp         = ' '*
        endl       = sp? '\n'
        anychar    = ~r"[^\n]"
        """)
    output = grammar.parse('\n'.join(lines))
    filename, params = tree_to_filename_and_dict(output)
    # remove temporary state entries
    if '_current_module' in params:
        del params['_current_module']
    if '_filename' in params:
        del params['_filename']
    return filename, params
Example #21
0
 def test_right_recursive(self):
     """Right-recursive refs should resolve."""
     grammar = Grammar("""
         digits = digit digits?
         digit = ~r"[0-9]"
         """)
     ok_(grammar.parse('12') is not None)
Example #22
0
    def test_complex_custom_rules(self):
        """Run 5-arg custom rules through their paces.

        Incidentally tests returning an actual Node from the custom rule.

        """
        grammar = Grammar(
            """
            bracketed_digit = start digit end
            start = '['
            end = ']'
            real_digit = '6'""",
            # In this particular implementation of the digit rule, no node is
            # generated for `digit`; it falls right through to `real_digit`.
            # I'm not sure if this could lead to problems; I can't think of
            # any, but it's probably not a great idea.
            digit=lambda text, pos, cache, error, grammar: grammar[
                'real_digit'].match_core(text, pos, cache, error))
        s = '[6]'
        eq_(
            grammar.parse(s),
            Node(grammar['bracketed_digit'],
                 s,
                 0,
                 3,
                 children=[
                     Node(grammar['start'], s, 0, 1),
                     Node(grammar['real_digit'], s, 1, 2),
                     Node(grammar['end'], s, 2, 3)
                 ]))
def parse_jil(input_file):
    """Parse Jil file and return a python dictionary of the parsed data"""
    grammar = Grammar(r"""
        expr        = (entry / emptyline)*
        entry       = job pair*

        job         = jobstart colon jobname ws
        pair        = key colon value ws?

        key         = !jobstart word+
        value       = (word / quoted)+
        word        = ~r"[- ,\w\(\)\@\.\/\$\*\'\&\<\>]+"
        wordwild    = ~r"(.*)"
        quoted      = ~'"+[^\"]+"+'
        colon       = ws? ":" ws?
        jobname     = ~r"[\w]+"
        jobstart    = "insert_job"
        ws          = ~"\s*"
        emptyline   = ws+
        """)
    with open(input_file, 'r') as rfh:
        jil_data = rfh.read()

    tree = grammar.parse(jil_data)
    jil_vis = JilVisitor()
    output = jil_vis.visit(tree)
    return output
Example #24
0
    def test_lookahead(self):
        grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
        assert_raises(ParseError, grammar.parse, 'burp')

        s = 'arp'
        eq_(grammar.parse('arp'), Node('starts_with_a', s, 0, 3, children=[
                                      Node('', s, 0, 0),
                                      Node('', s, 0, 3)]))
Example #25
0
    def test_lookahead(self):
        grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
        assert_raises(ParseError, grammar.parse, 'burp')

        s = 'arp'
        eq_(grammar.parse('arp'), Node(grammar['starts_with_a'], s, 0, 3, children=[
                                      Node(Lookahead(Literal('a')), s, 0, 0),
                                      Node(Regex(r'[a-z]+'), s, 0, 3)]))
Example #26
0
    def test_lookahead(self):
        grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
        assert_raises(ParseError, grammar.parse, 'burp')

        s = 'arp'
        eq_(grammar.parse('arp'), Node('starts_with_a', s, 0, 3, children=[
                                      Node('', s, 0, 0),
                                      Node('', s, 0, 3)]))
Example #27
0
  def __init__(self, code):
    self.object_query = {}
    self.steps = []

    # parsing:
    grammar = Grammar(QUERY_PEG)
    self.__nodes = grammar.parse(code)
    self._translate()
Example #28
0
    def test_lookahead(self):
        grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
        self.assertRaises(ParseError, grammar.parse, 'burp')

        s = 'arp'
        self.assertEqual(grammar.parse('arp'), Node(grammar['starts_with_a'], s, 0, 3, children=[
                                      Node(Lookahead(Literal('a')), s, 0, 0),
                                      Node(Regex(r'[a-z]+'), s, 0, 3)]))
    def result(self):
        """ The 'result' property """

        g = Grammar("""
            condition = always / never / comparison

            ws = ~"\s*"

            never = ~"never"i
            always = ~"always"i

            value = numeric / varname

            numeric = ~"[+-]?\d+(\.\d+)?"
            varname = ~"[a-z_][a-z0-9_]*"i

            range = percentage / numeric

            percentage = numeric percent_sign
            percent_sign = "%"

            comparison = range_eq_comparison / range_leftrocket_comparison / range_rightrocket_comparison / range_muchlessthan_comparison / range_muchgreaterthan_comparison / simple_comparison

            simple_comparison = value ws simple_comparator ws value

            simple_comparator = cmp_eq / cmp_neq / cmp_gte / cmp_gt / cmp_lte / cmp_lt
            cmp_eq = "=="
            cmp_neq = "!="
            cmp_gte = ">="
            cmp_gt = ">"
            cmp_lte = "<="
            cmp_lt = "<"

            range_muchlessthan_comparison = value ws range_lt_prev range range_lt_post ws value
            range_lt_prev = "<"
            range_lt_post = "<"

            range_leftrocket_comparison = value ws range_lr_prev range range_lr_post ws value
            range_lr_prev = "<"
            range_lr_post = "="

            range_eq_comparison = value ws range_eq_prev range range_eq_post ws value
            range_eq_prev = "="
            range_eq_post = "="

            range_rightrocket_comparison = value ws range_rr_prev range range_rr_post ws value
            range_rr_prev = "="
            range_rr_post = ">"

            range_muchgreaterthan_comparison = value ws range_gt_prev range range_gt_post ws value
            range_gt_prev = ">"
            range_gt_post = ">"
        """)

        tree = g.parse(self._condition)
        v = ConditionVisitor(self.data)

        return v.visit(tree)[0]
Example #30
0
def parse(source):
    """Parse tbon Source"""
    grammar = Grammar("""
        score = wsc* music*
        music = (partswitch*  bar+)+ wsc*
        partswitch = "P=" partnum
        wsc = comment / ws+
        comment = ws* ~r"/\*.*?\*/"s ws*
        bar = (wsc* (meta / beat) wsc+)+ barline
        meta = beatspec / key / tempo /
               relativetempo / velocity /
               de_emphasis / channel / instrument
        beatspec = "B=" ("2." / "2" / "4." / "4" / "8." / "8")
        key = "K=" keyname
        keyname = ~r"[a-gA-G](@|#)?"
        tempo = "T=" floatnum
        relativetempo = "t=" floatnum
        velocity = "V=" floatnum
        de_emphasis = "D=" floatnum
        channel = "C=" chnum
        partnum = ~r"[1-9][0-9]*"i
        instrument = "I=" inum
        inum = ~r"[1-9][0-9]*"i
        floatnum = ~r"\d*\.?\d+"i
        chnum = ~r"\d*\.?\d+"i
        beat = subbeat+
        barline = "|" / ":"
        extendable = chord / roll / ornament / pitch / rest
        pitch = octave* alteration? pitchname
        chord = chordstart chorditem chorditem* rparen
        chordstart = "("
        chorditem = chordpitch / chordhold / chordrest
        chordpitch = octave* alteration? pitchname
        chordhold = '-'
        chordrest = "_" / "z"
        rparen = ")"
        roll = rollstart pitch pitch+ rparen
        rollstart = "(:"
        ornament = ornamentstart pitch pitch+ rparen
        ornamentstart = "(~"
        subbeat = extendable / hold
        rest = "_" / "z"
        hold = "-"
        octave = octave_up / octave_down
        alteration = doublesharp / sharp / doubleflat / flat / natural
        doublesharp = "𝄪" / "##"
        sharp = "♯" / "#"
        doubleflat = "𝄫" / "@@"
        flat = "♭" / "@"
        natural = "♮" / "%"
        octave_up = "^"
        octave_down = "/"
        pitchname = ~"[a-g1-7]"i
        ws = ~r"\s*"i
        """)
    return grammar.parse(source)
Example #31
0
    def test_lazy_default_rule(self):
        """Make sure we get an actual rule set as our default rule, even when
        the first rule has forward references and is thus a LazyReference at
        some point during grammar compilation.

        """
        grammar = Grammar(r"""
            styled_text = text
            text        = "hi"
            """)
        eq_(grammar.parse('hi'), Node('text', 'hi', 0, 2))
Example #32
0
 def test_multi_line(self):
     """Make sure we tolerate all sorts of crazy line breaks and comments in
     the middle of rules."""
     grammar = Grammar("""
         bold_text  = bold_open  # commenty comment
                      text  # more comment
                      bold_close
         text       = ~"[A-Z 0-9]*"i
         bold_open  = "((" bold_close =  "))"
         """)
     ok_(grammar.parse('((booyah))') is not None)
Example #33
0
    def test_lazy_default_rule(self):
        """Make sure we get an actual rule set as our default rule, even when
        the first rule has forward references and is thus a LazyReference at
        some point during grammar compilation.

        """
        grammar = Grammar(r"""
            styled_text = text
            text        = "hi"
            """)
        self.assertEqual(grammar.parse('hi'), Node(grammar['text'], 'hi', 0, 2))
Example #34
0
 def test_multi_line(self):
     """Make sure we tolerate all sorts of crazy line breaks and comments in
     the middle of rules."""
     grammar = Grammar("""
         bold_text  = bold_open  # commenty comment
                      text  # more comment
                      bold_close
         text       = ~"[A-Z 0-9]*"i
         bold_open  = "((" bold_close =  "))"
         """)
     ok_(grammar.parse('((booyah))') is not None)
Example #35
0
    def test_unconnected_custom_rules(self):
        """Make sure custom rules that aren't hooked to any other rules still
        get included in the grammar and that lone ones get set as the
        default.

        Incidentally test Grammar's `rules` default arg.

        """
        grammar = Grammar(one_char=lambda text, pos: pos + 1).default('one_char')
        s = '4'
        eq_(grammar.parse(s),
            Node('one_char', s, 0, 1))
Example #36
0
class TexnoMagicLanguage:
    def __init__(self):
        self.grammar = Grammar(TEXNO_MAGIC_GRAMMAR)

    def parse(self, text):
        """
        use this to parse TexnoMagic spell into its data representation
        """
        tree = self.grammar.parse(text)
        v = TexnoVisitor()
        output = v.visit(tree)
        return output
Example #37
0
def Assume(*args):
    grammar = Grammar(r"""
    
        expr        = expr1 / expr2 / expr3 /expr4 /expr5 / expr6 /expr7
        expr1       = expr_dist1 logic_op num_log
        expr2       = expr_dist2 logic_op num_log
        expr3       = classVar ws logic_op ws value
        expr4       = classVarArr ws logic_op ws value
        expr5       = classVar ws logic_op ws classVar
        expr6       = classVarArr ws logic_op ws classVarArr
        expr7       = "True"
        expr_dist1  = op_beg?abs?para_open classVar ws arith_op ws classVar para_close op_end?
        expr_dist2  = op_beg?abs?para_open classVarArr ws arith_op ws classVarArr para_close op_end?
        classVar    = variable brack_open number brack_close
        classVarArr = variable brack_open variable brack_close
        para_open   = "("
        para_close  = ")"
        brack_open  = "["
        brack_close = "]"
        variable    = ~"([a-zA-Z_][a-zA-Z0-9_]*)"
        logic_op    = ws (geq / leq / eq / neq / and / lt / gt) ws
        op_beg      = number arith_op
        op_end      = arith_op number
        arith_op    = (add/sub/div/mul)
        abs         = "abs"
        add         = "+"
        sub         = "-"
        div         = "/"
        mul         = "*"
        lt          = "<"
        gt          = ">"
        geq         = ">="
        leq         = "<="
        eq          = "="
        neq         = "!="
        and         = "&"
        ws          = ~"\s*"
        value       = ~"\d+"
        num_log     = ~"[+-]?([0-9]*[.])?[0-9]+"
        number      = ~"[+-]?([0-9]*[.])?[0-9]+"
        """)

    tree = grammar.parse(args[0])
    assumeVisitObj = assume2logic.AssumptionVisitor()
    if len(args) == 3:
        assumeVisitObj.storeInd(args[1])
        assumeVisitObj.storeArr(args[2])
        assumeVisitObj.visit(tree)
    elif len(args) == 2:
        assumeVisitObj.storeInd(args[1])
        assumeVisitObj.visit(tree)
    elif len(args) == 1:
        assumeVisitObj.visit(tree)
Example #38
0
    def test_unconnected_custom_rules(self):
        """Make sure custom rules that aren't hooked to any other rules still
        get included in the grammar and that lone ones get set as the
        default.

        Incidentally test Grammar's `rules` default arg.

        """
        grammar = Grammar(one_char=lambda text, pos: pos + 1).default('one_char')
        s = '4'
        self.assertEqual(grammar.parse(s),
            Node(grammar['one_char'], s, 0, 1))
    def test_expressions_from_rules(self):
        """Test the ``Grammar`` base class's ability to compile an expression
        tree from rules.

        That the correct ``Expression`` tree is built is already tested in
        ``RuleGrammarTests``. This tests only that the ``Grammar`` base class's
        ``_expressions_from_rules`` works.

        """
        greeting_grammar = Grammar('greeting = "hi" / "howdy"')
        tree = greeting_grammar.parse("hi")
        eq_(tree, Node("greeting", "hi", 0, 2, children=[Node("", "hi", 0, 2)]))
Example #40
0
    def funcReadXml(self):
        grammar = Grammar(r"""
    
        expr             = name / type / minimum / maximum / xmlStartDoc / xmlStartInps / xmlEndInps / xmlStartInp /
                                                                    xmlEndInp / xmlStartValTag /xmlEndValTag
        name             = xmlStartNameTag feName xmlEndNameTag
        type             = xmlStartTypeTag feType xmlEndTypeTag
        minimum          = xmlStartMinTag number xmlEndMinTag
        maximum          = xmlStartMaxTag number xmlEndMaxTag
        xmlStartDoc      = '<?xml version="1.0" encoding="UTF-8"?>'
        xmlStartInps     = "<Inputs>"
        xmlEndInps       = "<\Inputs>"
        xmlStartInp      = "<Input>"
        xmlEndInp        = "<\Input>"
        xmlStartNameTag  = "<Feature-name>"
        xmlEndNameTag    = "<\Feature-name>"
        xmlStartTypeTag  = "<Feature-type>"
        xmlEndTypeTag    = "<\Feature-type>"
        xmlStartValTag   = "<Value>"
        xmlEndValTag     = "<\Value>"
        xmlStartMinTag   = "<minVal>"
        xmlEndMinTag     = "<\minVal>"
        xmlStartMaxTag   = "<maxVal>"
        xmlEndMaxTag     = "<\maxVal>"
        feName           = ~"([a-zA-Z_][a-zA-Z0-9_]*)"
        feType           = ~"[A-Z 0-9]*"i
        number           = ~"[+-]?([0-9]*[.])?[0-9]+"
        """)

        with open(self.fileName) as f1:
            file_content = f1.readlines()
        file_content = [x.strip() for x in file_content]

        feNameArr = []
        feTypeArr = []
        minValArr = []
        maxValArr = []
        for lines in file_content:
            tree = grammar.parse(lines)
            dfObj = dataFrameCreate()
            dfObj.visit(tree)
            if dfObj.feName is not None:
                feNameArr.append(dfObj.feName)
            if dfObj.feType is not None:
                feTypeArr.append(dfObj.feType)
            if dfObj.feMinVal != -99999:
                minValArr.append(dfObj.feMinVal)
            if dfObj.feMaxVal != 0:
                maxValArr.append(dfObj.feMaxVal)

        genDataObj = generateData(feNameArr, feTypeArr, minValArr, maxValArr)
        genDataObj.funcGenerateTestData()
    def test_inner_rule_succeeding(self):
        """Make sure ``parse()`` fails and blames the
        rightward-progressing-most named Expression when an Expression isn't
        satisfied.

        Make sure ParseErrors have nice Unicode representations.

        """
        grammar = Grammar("""
            bold_text = open_parens text close_parens
            open_parens = "(("
            text = ~"[a-zA-Z]+"
            close_parens = "))"
            """)
        text = '((fred!!'
        try:
            grammar.parse(text)
        except ParseError as error:
            eq_(error.pos, 6)
            eq_(error.expr, grammar['close_parens'])
            eq_(error.text, text)
            eq_(str(error), "Rule 'close_parens' trying to match \"))\" didn't match at '!!' (line 1, column 7).")
Example #42
0
    def test_expressions_from_rules(self):
        """Test the ``Grammar`` base class's ability to compile an expression
        tree from rules.

        That the correct ``Expression`` tree is built is already tested in
        ``RuleGrammarTests``. This tests only that the ``Grammar`` base class's
        ``_expressions_from_rules`` works.

        """
        greeting_grammar = Grammar('greeting = "hi" / "howdy"')
        tree = greeting_grammar.parse('hi')
        self.assertEqual(tree, Node(greeting_grammar['greeting'], 'hi', 0, 2, children=[
                       Node(Literal('hi'), 'hi', 0, 2)]))
    def result(self):
        """ The 'result' property """

        g = Grammar("""
            replacement = ws replacevalue transformationlist ws

            replacevalue = expression / varname / literal

            transformationlist = transformation*
            transformation = ws comma ws transname transarglist

            transarglist = transarg*
            transarg = singlequotedstr / doublequotedstr / unquotedarg

            expression = term rws operator rws term

            term = numberliteral / varname

            varname = ~"[a-z_][a-z0-9_]*"i
            transname = ~"[a-z_][a-z0-9_]*"i

            literal = numberliteral / stringliteral
            numberliteral = ~"(\+|-)?\d+([.]\d+)?"
            stringliteral = singlequotedstr / doublequotedstr

            doublequotedstr = ws dblq notdblq dblq
            singlequotedstr = ws sngq notsngq sngq
            unquotedarg = ws notwsorcomma

            operator = plus / minus / times / divide

            plus = "+"
            minus = "-"
            times = "*"
            divide = "/"

            rws = ~"\s+"
            ws = ~"\s*"
            comma = ","
            notwsorcomma = ~"[^\s,]+"

            dblq = "\\""
            notdblq = ~"[^\\"]*"

            sngq = "'"
            notsngq = ~"[^']*"
        """)

        tree = g.parse(self._replacement)

        return ReplacementVisitor(self._data).visit(tree)
Example #44
0
    def result(self):
        """ The 'result' property """

        g = Grammar("""
            replacement = ws replacevalue transformationlist ws

            replacevalue = expression / varname / literal

            transformationlist = transformation*
            transformation = ws comma ws transname transarglist

            transarglist = transarg*
            transarg = singlequotedstr / doublequotedstr / unquotedarg

            expression = term rws operator rws term

            term = numberliteral / varname

            varname = ~"[a-z_][a-z0-9_]*"i
            transname = ~"[a-z_][a-z0-9_]*"i

            literal = numberliteral / stringliteral
            numberliteral = ~"(\+|-)?\d+([.]\d+)?"
            stringliteral = singlequotedstr / doublequotedstr

            doublequotedstr = ws dblq notdblq dblq
            singlequotedstr = ws sngq notsngq sngq
            unquotedarg = ws notwsorcomma

            operator = plus / minus / times / divide

            plus = "+"
            minus = "-"
            times = "*"
            divide = "/"

            rws = ~"\s+"
            ws = ~"\s*"
            comma = ","
            notwsorcomma = ~"[^\s,]+"

            dblq = "\\""
            notdblq = ~"[^\\"]*"

            sngq = "'"
            notsngq = ~"[^']*"
        """)

        tree = g.parse(self._replacement)

        return ReplacementVisitor(self._data).visit(tree)
Example #45
0
 def test_simple_custom_rules(self):
     """Run 2-arg custom-coded rules through their paces."""
     grammar = Grammar("""
         bracketed_digit = start digit end
         start = '['
         end = ']'""",
         digit=lambda text, pos:
                 (pos + 1) if text[pos].isdigit() else None)
     s = '[6]'
     self.assertEqual(grammar.parse(s),
         Node(grammar['bracketed_digit'], s, 0, 3, children=[
             Node(grammar['start'], s, 0, 1),
             Node(grammar['digit'], s, 1, 2),
             Node(grammar['end'], s, 2, 3)]))
Example #46
0
 def test_simple_custom_rules(self):
     """Run 2-arg custom-coded rules through their paces."""
     grammar = Grammar("""
         bracketed_digit = start digit end
         start = '['
         end = ']'""",
         digit = lambda text, pos:
                     (pos + 1) if text[pos].isdigit() else None)
     s = '[6]'
     eq_(grammar.parse(s),
         Node('bracketed_digit', s, 0, 3, children=[
             Node('start', s, 0, 1),
             Node('digit', s, 1, 2),
             Node('end', s, 2, 3)]))
Example #47
0
 def test_callability_custom_rules(self):
     """Confirms that functions, methods and method descriptors can all be
     used to supply custom grammar rules.
     """
     grammar = Grammar("""
         default = function method descriptor
         """,
         function=function_rule,
         method=self.method_rule,
         descriptor=self.rules['descriptor_rule'],
     )
     result = grammar.parse('functionmethoddescriptor')
     rule_names = [node.expr.name for node in result.children]
     self.assertEqual(rule_names, ['function', 'method', 'descriptor'])
Example #48
0
class PEGSyntaxRule(BaseSyntaxRule):
    def __init__(self, params: dict):
        BaseSyntaxRule.__init__(self, params)
        try:
            self._grammar = Grammar(params['peg'])
        except Exception:
            raise InvalidSchemaError(
                f'Failed to parse PEG grammar for {params["name"]}')

    def parse(self, value):
        try:
            return self._grammar.parse(value)
        except ParseError:
            raise SyntaxParseError()
Example #49
0
    def test_parens(self):
        grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''')
        # Make sure it's not as if the parens aren't there:
        assert_raises(ParseError, grammar.parse, 'chitty bangbang')

        s = 'chitty bang bang'
        eq_(str(grammar.parse(s)),
            """<Node called "sequence" matching "chitty bang bang">
    <Node called "__Literal__" matching "chitty">
    <Node called "__OneOrMore__" matching " bang bang">
        <Node called "__Sequence__" matching " bang">
            <Node called "__Literal__" matching " ">
            <Node called "__Literal__" matching "bang">
        <Node called "__Sequence__" matching " bang">
            <Node called "__Literal__" matching " ">
            <Node called "__Literal__" matching "bang">""")
Example #50
0
def test(inFP):
    with open(inFP) as inF:
        grammar = Grammar(clean(inF.read()))

    good_inputs = ['{the quick brown} > fox > jumps < over < ({the lazy} > dog)', 
                   'They > conspired < to < defenestrate < themselves\n(conspired* to defenestrate on < Tuesday)',
                   'a (** b c) d**', 'a (** b c**)', '::~1 :-)~1 ~(-: (0_0) ~(0_0)~2 *_*~3 )~1 ~( <*_*>',
                   '''
                      Found** < (the scarriest mystery door*)
                      Found < in < (my > school)
                      I’M** < (SO > CURIOUS)
                      D:**
                      my = I’M''',
                   '''
                      thers** < still
                      thers < ((1 1/2) > hours < till < (Biebs > bday))
                      (thers like 1 1/2 hours)
                      thers < here
                      (:P)**''',
                   '''
                      If < (it~1 > 's < restin')
                      I > 'll < [wake up] < it~2
                      If > 'll**
                      it~1 = it~2''',
                   '''
                      {Our three} > weapons > are < $a
                      $a :: {fear surprise efficiency} :: {and~1 and~2}
                      ruthless > efficiency''',
                   '''
                      We > are < knights < the
                      knights < (who > say < Ni)
                      who = knights''']
    bad_inputs = ['{the quick brown} > fox > jumps < over < {the lazy} > dog', 'the > {lazy dog}', 'the < lazy > dog', 
                  'They > conspired* < to < defenestrate < themselves\n(conspired* to defenestrate on < Tuesday)',
                  'big > **', '{** happy} > days', '(my big** fat Greek wedding*)', 'big** > day', 
                  'hi :: there', ':-)', '(-:', '(0_0)~1', '*_*', ') (']
    for x in bad_inputs:
        try:
            parse(x, grammar)
            assert False
        except GFLError as ex:
            print(ex)
    for x in good_inputs:
        p = grammar.parse(x)
        assert p is not None
        print(x)
        pprint(analyze(walk(p)))
Example #51
0
    def test_lazy_custom_rules(self):
        """Make sure LazyReferences manually shoved into custom rules are
        resolved.

        Incidentally test passing full-on Expressions as custom rules and
        having a custom rule as the default one.

        """
        grammar = Grammar("""
            four = '4'
            five = '5'""",
            forty_five=Sequence(LazyReference('four'),
                                LazyReference('five'),
                                name='forty_five')).default('forty_five')
        s = '45'
        eq_(grammar.parse(s),
            Node('forty_five', s, 0, 2, children=[
                Node('four', s, 0, 1),
                Node('five', s, 1, 2)]))
Example #52
0
def lex(text):
    grammar = Grammar("""\
    entry = (term _ "." _)* _
    term = boolean / atom / list / tuple / map / string / binary / number
    atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'")
    _ = ~"\s*"
    list = ( _ "[" _ term (_ "," _ term)* _ "]" ) / ( _ "[" _ "]")
    tuple = ( _ "{" _ term (_ "," _ term)* _ "}" ) / ( _ "{" _ "}")
    map   = ( _ "#{" _ keyvalue (_ "," _ keyvalue)* _ "}" ) / ( _ "#{" _ "}")
    keyvalue = term _ "=>" _ term _
    string = '"' ~r'(\\\\"|[^"])*' '"'
    binary = "<<" string ">>"
    boolean = "true" / "false"
    number = ~"[0-9]+\#[0-9a-zA-Z]+" / ~"[0-9]+(\.[0-9]+)?(e\-?[0-9]+)?"
    """)
    nocomments = re.sub("(?m)%.*?$", "", text)
    try:
        return grammar.parse(nocomments)
    except parsimonious.exceptions.ParseError as e:
        raise ParseError(e)
Example #53
0
def lex(text):
    grammar = Grammar("""\
    entry = (term _ "." _)* _
    term = boolean / atom / list / tuple / map / string / binary / number
    atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'")
    _ = ~"\s*" (~"%[^\\r\\n]*\s*")*
    list = ( _ "[" _ term (_ "," _ term)* _ "]" ) / ( _ "[" _ "]")
    tuple = ( _ "{" _ term (_ "," _ term)* _ "}" ) / ( _ "{" _ "}")
    map   = ( _ "#{" _ keyvalue (_ "," _ keyvalue)* _ "}" ) / ( _ "#{" _ "}")
    keyvalue = term _ "=>" _ term _
    string = '"' ~r'(\\\\.|[^"])*' '"'
    binary = ( "<<" _ binary_part ( _ "," _ binary_part)* _ ">>") / ("<<" _ ">>")
    binary_part = string / char_number
    char_number = ~"[0-9]+"
    boolean = "true" / "false"
    number = ~"\-?[0-9]+\#[0-9a-zA-Z]+" / ~"\-?[0-9]+(\.[0-9]+)?((e|E)(\-|\+)?[0-9]+)?"
    """)
    try:
        return grammar.parse(text)
    except parsimonious.exceptions.ParseError as e:
        raise ParseError(e)
Example #54
0
def lex(text):
    grammar = Grammar("""\
    entry = _ (statement _)* _
    statement = multiline / single
    multiline = atom _ args _ ":" _ "_INDENT_" _ (statement _)+ "_DEDENT_"
    single = atom _ args
    atom = ~"[a-z][0-9a-zA-Z_]*" / ("'" ~"[^']*" "'")
    _ = ~"\s*" (~"#[^\\r\\n]*\s*")*
    args = ( _ map ) / ( _ "(" _ term (_ "," _ term)* _ ")" ) / (_ "(" _ ")")
    map = "(" _ kv (_ "," _ kv)* _ ")"
    list = ( _ "[" _ term (_ "," _ term)* _ "]" ) / ( _ "[" _ "]")
    kv = term _ "=" _ term _
    term = unumber / logic_op / single / list / string / atom / number
    logic_op = (string / number) _ ("<=" / ">=" / "<" / ">" / "==") _ (string / number)
    string = '"' ~'[^"]*' '"'
    number = ~"[0-9]+(\.[0-9]+)?(e\-?[0-9]+)?[GKM]?"
    unumber = (number / single) _ atom
    """)
    try:
        return grammar.parse(add_indents(text))
    except parsimonious.exceptions.ParseError as e:
        raise ParseError(e)
Example #55
0
    def test_complex_custom_rules(self):
        """Run 5-arg custom rules through their paces.

        Incidentally tests returning an actual Node from the custom rule.

        """
        grammar = Grammar("""
            bracketed_digit = start digit end
            start = '['
            end = ']'
            real_digit = '6'""",
            # In this particular implementation of the digit rule, no node is
            # generated for `digit`; it falls right through to `real_digit`.
            # I'm not sure if this could lead to problems; I can't think of
            # any, but it's probably not a great idea.
            digit = lambda text, pos, cache, error, grammar:
                        grammar['real_digit']._match(text, pos, cache, error))
        s = '[6]'
        eq_(grammar.parse(s),
            Node('bracketed_digit', s, 0, 3, children=[
                Node('start', s, 0, 1),
                Node('real_digit', s, 1, 2),
                Node('end', s, 2, 3)]))
Example #56
0
                out('}}\n\n')
            except GenerationImpossible:
                print get_go_type(type)
                result = result[:old_len]

    return ''.join(result)


input_file = sys.argv[1]
package_name = sys.argv[2]
output_file = sys.argv[3]

text = open(input_file).read().decode('utf-8')
text = re.sub(r'//.*($|[\r\n])', '\\1', text)

nodes = grammar.parse(text + '\n')
if nodes is not None:
    types = Compiler().visit(nodes)
    main_type, types = resolve_types(types)

    type_names = {}
    for name, type in types.iteritems():
        resolve_attributes(type)
        type_names[type] = name

    out = convert_to_go(types, package_name)
    with open(output_file, 'w') as of:
        of.write(out)

    # template = open('test.template.cpp').read()
    # template = template.replace('{STRUCTS}', structs)
Example #57
0
def test_not_really_json_parsing():
    """As a baseline for speed, parse some JSON.

    I have no reason to believe that JSON is a particularly representative or
    revealing grammar to test with. Also, this is a naive, unoptimized,
    incorrect grammar, so don't use it as a basis for comparison with other
    parsers. It's just meant to compare across versions of Parsimonious.

    """
    father = """{
        "id" : 1,
        "married" : true,
        "name" : "Larry Lopez",
        "sons" : null,
        "daughters" : [
          {
            "age" : 26,
            "name" : "Sandra"
            },
          {
            "age" : 25,
            "name" : "Margaret"
            },
          {
            "age" : 6,
            "name" : "Mary"
            }
          ]
        }"""
    more_fathers = ','.join([father] * 60)
    json = '{"fathers" : [' + more_fathers + ']}'
    grammar = Grammar(r"""
        value = space (string / number / object / array / true_false_null)
                space

        object = "{" members "}"
        members = (pair ("," pair)*)?
        pair = string ":" value
        array = "[" elements "]"
        elements = (value ("," value)*)?
        true_false_null = "true" / "false" / "null"

        string = space "\"" chars "\"" space
        chars = ~"[^\"]*"  # TODO implement the real thing
        number = (int frac exp) / (int exp) / (int frac) / int
        int = "-"? ((digit1to9 digits) / digit)
        frac = "." digits
        exp = e digits
        digits = digit+
        e = "e+" / "e-" / "e" / "E+" / "E-" / "E"

        digit1to9 = ~"[1-9]"
        digit = ~"[0-9]"
        space = ~"\s*"
        """)

    # These number and repetition values seem to keep results within 5% of the
    # difference between min and max. We get more consistent results running a
    # bunch of single-parse tests and taking the min rather than upping the
    # NUMBER and trying to stomp out the outliers with averaging.
    NUMBER = 1
    REPEAT = 5
    total_seconds = min(repeat(lambda: grammar.parse(json),
                               lambda: gc.enable(),  # so we take into account how we treat the GC
                               repeat=REPEAT,
                               number=NUMBER))
    seconds_each = total_seconds / NUMBER

    kb = len(json) / 1024.0
    print('Took %.3fs to parse %.1fKB: %.0fKB/s.' % (seconds_each,
                                                     kb,
                                                     kb / seconds_each))
Example #58
0
 def test_not(self):
     """Make sure "not" predicates get parsed and work properly."""
     grammar = Grammar(r'''not_arp = !"arp" ~"[a-z]+"''')
     assert_raises(ParseError, grammar.parse, 'arp')
     ok_(grammar.parse('argle') is not None)