Example #1
0
 def test_badly_circular(self):
     """Uselessly circular references should be detected by the grammar
     compiler."""
     raise SkipTest(
         'We have yet to make the grammar compiler detect these.')
     grammar = Grammar("""
         foo = bar
         bar = foo
         """)
Example #2
0
 def test_resolve_refs_order(self):
     """Smoke-test a circumstance where lazy references don't get resolved."""
     grammar = Grammar("""
         expression = "(" terms ")"
         terms = term+
         term = number
         number = ~r"[0-9]+"
         """)
     grammar.parse('(34)')
Example #3
0
def parse(source):
    """Parse tbon Source"""
    grammar = Grammar("""
        score = wsc* music*
        music = (partswitch*  bar+)+ wsc*
        partswitch = "P=" partnum
        wsc = comment / ws+
        comment = ws* ~r"/\*.*?\*/"s ws*
        bar = (wsc* (meta / beat) wsc+)+ barline
        meta = beatspec / key / tempo /
               relativetempo / velocity /
               de_emphasis / channel / instrument
        beatspec = "B=" ("2." / "2" / "4." / "4" / "8." / "8")
        key = "K=" keyname
        keyname = ~r"[a-gA-G](@|#)?"
        tempo = "T=" floatnum
        relativetempo = "t=" floatnum
        velocity = "V=" floatnum
        de_emphasis = "D=" floatnum
        channel = "C=" chnum
        partnum = ~r"[1-9][0-9]*"i
        instrument = "I=" inum
        inum = ~r"[1-9][0-9]*"i
        floatnum = ~r"\d*\.?\d+"i
        chnum = ~r"\d*\.?\d+"i
        beat = subbeat+
        barline = "|" / ":"
        extendable = chord / roll / ornament / pitch / rest
        pitch = octave* alteration? pitchname
        chord = chordstart chorditem chorditem* rparen
        chordstart = "("
        chorditem = chordpitch / chordhold / chordrest
        chordpitch = octave* alteration? pitchname
        chordhold = '-'
        chordrest = "_" / "z"
        rparen = ")"
        roll = rollstart pitch pitch+ rparen
        rollstart = "(:"
        ornament = ornamentstart pitch pitch+ rparen
        ornamentstart = "(~"
        subbeat = extendable / hold
        rest = "_" / "z"
        hold = "-"
        octave = octave_up / octave_down
        alteration = doublesharp / sharp / doubleflat / flat / natural
        doublesharp = "𝄪" / "##"
        sharp = "♯" / "#"
        doubleflat = "𝄫" / "@@"
        flat = "♭" / "@"
        natural = "♮" / "%"
        octave_up = "^"
        octave_down = "/"
        pitchname = ~"[a-g1-7]"i
        ws = ~r"\s*"i
        """)
    return grammar.parse(source)
    def _grammar(self):
        # A PEG grammar for ground Problog. It is written in a very weird way
        # in order to make the visiting of the parse tree work.
        return Grammar(r"""
            program           = _ clauses
            clauses           = clause*
            clause            = predicate dot
            predicate         = prob_ann / rule / term

            rule              = term turnstile conjunction
            conjunction       = term conjunction_opt
            conjunction_opt   = conjunction_more?
            conjunction_more  = comma conjunction

            prob_ann          = prob_ann_heads prob_ann_rule_opt
            prob_ann_heads    = prob_fact prob_fact_opt
            prob_fact_opt     = prob_fact_more?
            prob_fact_more    = semicolon prob_ann_heads
            prob_ann_rule_opt = prob_ann_rule?
            prob_ann_rule     = turnstile conjunction
            prob_fact         = probability doublecolon term

            term              = negation_opt word_or_num arguments_opt
            word_or_num       = decimal_or_frac / word
            negation_opt      = negation?
            arguments_opt     = arguments?
            arguments         = lparen arguments_list rparen
            arguments_list    = term arguments_more_o
            arguments_more_o  = arguments_more?
            arguments_more    = comma arguments_list

            probability       = prob_num / prob_tunable_num / prob_tunable_none
            prob_num          = decimal_or_frac _
            prob_tunable_num  = _ tunable lparen decimal_or_frac rparen _
            prob_tunable_none = _ tunable_empty _
            decimal_or_frac   = decimal / fraction
            fraction          = number slash number
            # could to this in a better way, but this works and we are way over time already
            word              = ~r"([a-zA-Z0-9_\[\]]+|\"[a-zA-Z0-9_\-\'\.\/\=\<\>\+\[\]]*\"|\'[a-zA-Z0-9_\-\'\.\/\=\<\>\+\[\]]*\')"
            number            = ~r"[0-9]*"
            decimal           = ~r"[0-9]*\.[0-9]*"
            dot               = _ "." _
            comma             = _ "," _
            semicolon         = _ ";" _
            lparen            = _ "(" _
            rparen            = _ ")" _
            slash             = _ "/" _
            doublecolon       = _ "::" _
            turnstile         = _ ":-" _
            negation          = _ ~r"\\\+" _
            tunable           = _ "t" _
            tunable_empty     = "t(_)"

            _                 = meaninglessness*
            meaninglessness   = ~r"\s+"
        """)
Example #5
0
 def test_no_named_rule_succeeding(self):
     """Make sure ParseErrors have sane printable representations even if we
     never succeeded in matching any named expressions."""
     grammar = Grammar('''bork = "bork"''')
     try:
         grammar.parse('snork')
     except ParseError as error:
         eq_(error.pos, 0)
         eq_(error.expr, grammar['bork'])
         eq_(error.text, 'snork')
Example #6
0
 def __init__(self, utterances: List[str], tokenizer=None) -> None:
     self.utterances: List[str] = utterances
     self.tokenizer = tokenizer if tokenizer else WordTokenizer()
     self.tokenized_utterances = [
         self.tokenizer.tokenize(utterance) for utterance in self.utterances
     ]
     self.valid_actions: Dict[str,
                              List[str]] = self.init_all_valid_actions()
     self.grammar_str: str = self.get_grammar_str()
     self.grammar_with_context: Grammar = Grammar(self.grammar_str)
Example #7
0
    def test_lazy_default_rule(self):
        """Make sure we get an actual rule set as our default rule, even when
        the first rule has forward references and is thus a LazyReference at
        some point during grammar compilation.

        """
        grammar = Grammar(r"""
            styled_text = text
            text        = "hi"
            """)
        self.assertEqual(grammar.parse('hi'), Node(grammar['text'], 'hi', 0, 2))
Example #8
0
    def __init__(self, utterances           , tokenizer=None)        :
        self.utterances =  utterances
        self.tokenizer = tokenizer if tokenizer else WordTokenizer()
        self.tokenized_utterances = [self.tokenizer.tokenize(utterance) for utterance in self.utterances]
        valid_actions, linking_scores = self.init_all_valid_actions()
        self.valid_actions =  valid_actions

        # This has shape (num_entities, num_utterance_tokens).
        self.linking_scores: numpy.ndarray = linking_scores
        self.grammar_str: unicode = self.get_grammar_str()
        self.grammar_with_context: Grammar = Grammar(self.grammar_str)
Example #9
0
    def test_unicode_keep_parens(self):
        """Make sure converting an expression to unicode doesn't strip
        parenthesis.

        """
        # ZeroOrMore
        self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs")* "spam"')),
                         u"foo = 'bar' ('baz' 'eggs')* 'spam'")

        # OneOf
        self.assertEqual(str(Grammar('foo = "bar" ("baz" / "eggs") "spam"')),
                         u"foo = 'bar' ('baz' / 'eggs') 'spam'")

        # Lookahead
        self.assertEqual(str(Grammar('foo = "bar" &("baz" "eggs") "spam"')),
                         u"foo = 'bar' &('baz' 'eggs') 'spam'")

        # Multiple sequences
        self.assertEqual(str(Grammar('foo = ("bar" "baz") / ("baff" "bam")')),
                         u"foo = ('bar' 'baz') / ('baff' 'bam')")
Example #10
0
 def test_parse_with_leftovers(self):
     """Make sure ``parse()`` reports where we started failing to match,
     even if a partial match was successful."""
     grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''')
     try:
         grammar.parse('chitty bangbang')
     except IncompleteParseError as error:
         eq_(
             str(error),
             "Rule 'sequence' matched in its entirety, but it didn't consume all the text. The non-matching portion of the text begins with 'bang' (line 1, column 12)."
         )
Example #11
0
    def test_unicode_keep_parens(self):
        """Make sure converting an expression to unicode doesn't strip
        parenthesis.

        """
        # ZeroOrMore
        eq_(text_type(Grammar('foo = "bar" ("baz" "eggs")* "spam"')),
            u'foo = "bar" ("baz" "eggs")* "spam"')

        # OneOf
        eq_(text_type(Grammar('foo = "bar" ("baz" / "eggs") "spam"')),
            u'foo = "bar" ("baz" / "eggs") "spam"')

        # Lookahead
        eq_(text_type(Grammar('foo = "bar" &("baz" "eggs") "spam"')),
            u'foo = "bar" &("baz" "eggs") "spam"')

        # Multiple sequences
        eq_(text_type(Grammar('foo = ("bar" "baz") / ("baff" "bam")')),
            u'foo = ("bar" "baz") / ("baff" "bam")')
Example #12
0
 def test_multi_line(self):
     """Make sure we tolerate all sorts of crazy line breaks and comments in
     the middle of rules."""
     grammar = Grammar("""
         bold_text  = bold_open  # commenty comment
                      text  # more comment
                      bold_close
         text       = ~"[A-Z 0-9]*"i
         bold_open  = "((" bold_close =  "))"
         """)
     ok_(grammar.parse('((booyah))') is not None)
    def test_repr_special_character_escaping(self):
        """Make sure special characters are properly escaped in the repr.

        """

        # backslash
        eq_(repr(Grammar(r'''foo = "\\" ''')),
            u'''Grammar("foo = '\\\\\\\\'")''')

        # single quote
        eq_(repr(Grammar(r'''foo = "'" ''')), u'''Grammar('foo = "\\'"')''')

        # escaped single quote inside a single quoted string
        eq_(repr(Grammar(r'''foo = '\'' ''')), u'''Grammar('foo = "\\'"')''')

        # double quote
        eq_(repr(Grammar(r'''foo = '"' ''')), u'''Grammar('foo = \\'"\\'')''')

        # newline
        eq_(repr(Grammar(r'''foo = "\n" ''')), u'''Grammar("foo = '\\\\n'")''')
Example #14
0
def Assume(*args):
    grammar = Grammar(r"""
    
        expr        = expr1 / expr2 / expr3 /expr4 /expr5 / expr6 /expr7
        expr1       = expr_dist1 logic_op num_log
        expr2       = expr_dist2 logic_op num_log
        expr3       = classVar ws logic_op ws value
        expr4       = classVarArr ws logic_op ws value
        expr5       = classVar ws logic_op ws classVar
        expr6       = classVarArr ws logic_op ws classVarArr
        expr7       = "True"
        expr_dist1  = op_beg?abs?para_open classVar ws arith_op ws classVar para_close op_end?
        expr_dist2  = op_beg?abs?para_open classVarArr ws arith_op ws classVarArr para_close op_end?
        classVar    = variable brack_open number brack_close
        classVarArr = variable brack_open variable brack_close
        para_open   = "("
        para_close  = ")"
        brack_open  = "["
        brack_close = "]"
        variable    = ~"([a-zA-Z_][a-zA-Z0-9_]*)"
        logic_op    = ws (geq / leq / eq / neq / and / lt / gt) ws
        op_beg      = number arith_op
        op_end      = arith_op number
        arith_op    = (add/sub/div/mul)
        abs         = "abs"
        add         = "+"
        sub         = "-"
        div         = "/"
        mul         = "*"
        lt          = "<"
        gt          = ">"
        geq         = ">="
        leq         = "<="
        eq          = "="
        neq         = "!="
        and         = "&"
        ws          = ~"\s*"
        value       = ~"\d+"
        num_log     = ~"[+-]?([0-9]*[.])?[0-9]+"
        number      = ~"[+-]?([0-9]*[.])?[0-9]+"
        """)

    tree = grammar.parse(args[0])
    assumeVisitObj = assume2logic.AssumptionVisitor()
    if len(args) == 3:
        assumeVisitObj.storeInd(args[1])
        assumeVisitObj.storeArr(args[2])
        assumeVisitObj.visit(tree)
    elif len(args) == 2:
        assumeVisitObj.storeInd(args[1])
        assumeVisitObj.visit(tree)
    elif len(args) == 1:
        assumeVisitObj.visit(tree)
Example #15
0
    def test_unconnected_custom_rules(self):
        """Make sure custom rules that aren't hooked to any other rules still
        get included in the grammar and that lone ones get set as the
        default.

        Incidentally test Grammar's `rules` default arg.

        """
        grammar = Grammar(one_char=lambda text, pos: pos + 1).default('one_char')
        s = '4'
        self.assertEqual(grammar.parse(s),
            Node(grammar['one_char'], s, 0, 1))
Example #16
0
 def test_favoring_named_rules(self):
     """Named rules should be used in error messages in favor of anonymous
     ones, even if those are rightward-progressing-more, and even if the
     failure starts at position 0."""
     grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
     try:
         grammar.parse('burp')
     except ParseError as error:
         eq_(
             str(error),
             "Rule 'starts_with_a' didn't match at 'burp' (line 1, column 1)."
         )
Example #17
0
    def funcReadXml(self):
        grammar = Grammar(r"""
    
        expr             = name / type / minimum / maximum / xmlStartDoc / xmlStartInps / xmlEndInps / xmlStartInp /
                                                                    xmlEndInp / xmlStartValTag /xmlEndValTag
        name             = xmlStartNameTag feName xmlEndNameTag
        type             = xmlStartTypeTag feType xmlEndTypeTag
        minimum          = xmlStartMinTag number xmlEndMinTag
        maximum          = xmlStartMaxTag number xmlEndMaxTag
        xmlStartDoc      = '<?xml version="1.0" encoding="UTF-8"?>'
        xmlStartInps     = "<Inputs>"
        xmlEndInps       = "<\Inputs>"
        xmlStartInp      = "<Input>"
        xmlEndInp        = "<\Input>"
        xmlStartNameTag  = "<Feature-name>"
        xmlEndNameTag    = "<\Feature-name>"
        xmlStartTypeTag  = "<Feature-type>"
        xmlEndTypeTag    = "<\Feature-type>"
        xmlStartValTag   = "<Value>"
        xmlEndValTag     = "<\Value>"
        xmlStartMinTag   = "<minVal>"
        xmlEndMinTag     = "<\minVal>"
        xmlStartMaxTag   = "<maxVal>"
        xmlEndMaxTag     = "<\maxVal>"
        feName           = ~"([a-zA-Z_][a-zA-Z0-9_]*)"
        feType           = ~"[A-Z 0-9]*"i
        number           = ~"[+-]?([0-9]*[.])?[0-9]+"
        """)

        with open(self.fileName) as f1:
            file_content = f1.readlines()
        file_content = [x.strip() for x in file_content]

        feNameArr = []
        feTypeArr = []
        minValArr = []
        maxValArr = []
        for lines in file_content:
            tree = grammar.parse(lines)
            dfObj = dataFrameCreate()
            dfObj.visit(tree)
            if dfObj.feName is not None:
                feNameArr.append(dfObj.feName)
            if dfObj.feType is not None:
                feTypeArr.append(dfObj.feType)
            if dfObj.feMinVal != -99999:
                minValArr.append(dfObj.feMinVal)
            if dfObj.feMaxVal != 0:
                maxValArr.append(dfObj.feMaxVal)

        genDataObj = generateData(feNameArr, feTypeArr, minValArr, maxValArr)
        genDataObj.funcGenerateTestData()
Example #18
0
    def __init__(self, grammar, unwrapped_exceptions=None):
        '''
        Creates a new parser around the provided arguments.

        The grammar may define a special rule called '__ignored'.  This
        specifies a set of production names that are to be ignored
        in the output tokens.  This is useful for stipping out whitespace
        productions, comments, and other "noisy" filler that otherwise
        makes it hard to process the AST.
        '''

        self.grammar = Grammar(grammar)
        self.grammar.unwrapped_exceptions = unwrapped_exceptions or []
Example #19
0
    def test_lookahead(self):
        grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
        assert_raises(ParseError, grammar.parse, 'burp')

        s = 'arp'
        eq_(
            grammar.parse('arp'),
            Node('starts_with_a',
                 s,
                 0,
                 3,
                 children=[Node('', s, 0, 0),
                           Node('', s, 0, 3)]))
Example #20
0
    def test_expressions_from_rules(self):
        """Test the ``Grammar`` base class's ability to compile an expression
        tree from rules.

        That the correct ``Expression`` tree is built is already tested in
        ``RuleGrammarTests``. This tests only that the ``Grammar`` base class's
        ``_expressions_from_rules`` works.

        """
        greeting_grammar = Grammar('greeting = "hi" / "howdy"')
        tree = greeting_grammar.parse('hi')
        self.assertEqual(tree, Node(greeting_grammar['greeting'], 'hi', 0, 2, children=[
                       Node(Literal('hi'), 'hi', 0, 2)]))
Example #21
0
 def test_line_and_column(self):
     """Make sure we got the line and column computation right."""
     grammar = Grammar(r"""
         whee_lah = whee "\n" lah "\n"
         whee = "whee"
         lah = "lah"
         """)
     try:
         grammar.parse('whee\nlahGOO')
     except ParseError as error:
         # TODO: Right now, this says "Rule <Literal "\n" at 0x4368250432>
         # didn't match". That's not the greatest. Fix that, then fix this.
         self.assertTrue(str(error).endswith(r"""didn't match at 'GOO' (line 2, column 4)."""))
Example #22
0
 def test_match(self):
     """Make sure partial-matching (with pos) works."""
     grammar = Grammar(r"""
                       bold_text  = bold_open text bold_close
                       text       = ~"[A-Z 0-9]*"i
                       bold_open  = "(("
                       bold_close = "))"
                       """)
     s = ' ((boo))yah'
     self.assertEqual(grammar.match(s, pos=1), Node(grammar['bold_text'], s, 1, 8, children=[
                                      Node(grammar['bold_open'], s, 1, 3),
                                      Node(grammar['text'], s, 3, 6),
                                      Node(grammar['bold_close'], s, 6, 8)]))
Example #23
0
    def result(self):
        """ The 'result' property """

        g = Grammar("""
            replacement = ws replacevalue transformationlist ws

            replacevalue = expression / varname / literal

            transformationlist = transformation*
            transformation = ws comma ws transname transarglist

            transarglist = transarg*
            transarg = singlequotedstr / doublequotedstr / unquotedarg

            expression = term rws operator rws term

            term = numberliteral / varname

            varname = ~"[a-z_][a-z0-9_]*"i
            transname = ~"[a-z_][a-z0-9_]*"i

            literal = numberliteral / stringliteral
            numberliteral = ~"(\+|-)?\d+([.]\d+)?"
            stringliteral = singlequotedstr / doublequotedstr

            doublequotedstr = ws dblq notdblq dblq
            singlequotedstr = ws sngq notsngq sngq
            unquotedarg = ws notwsorcomma

            operator = plus / minus / times / divide

            plus = "+"
            minus = "-"
            times = "*"
            divide = "/"

            rws = ~"\s+"
            ws = ~"\s*"
            comma = ","
            notwsorcomma = ~"[^\s,]+"

            dblq = "\\""
            notdblq = ~"[^\\"]*"

            sngq = "'"
            notsngq = ~"[^']*"
        """)

        tree = g.parse(self._replacement)

        return ReplacementVisitor(self._data).visit(tree)
Example #24
0
    def test_infinite_loop(self):
        """Smoke-test a grammar that was causing infinite loops while building.

        This was going awry because the "int" rule was never getting marked as
        resolved, so it would just keep trying to resolve it over and over.

        """
        Grammar("""
            digits = digit+
            int = digits
            digit = ~"[0-9]"
            number = int
            main = number
            """)
Example #25
0
 def test_callability_custom_rules(self):
     """Confirms that functions, methods and method descriptors can all be
     used to supply custom grammar rules.
     """
     grammar = Grammar("""
         default = function method descriptor
         """,
         function=function_rule,
         method=self.method_rule,
         descriptor=self.rules['descriptor_rule'],
     )
     result = grammar.parse('functionmethoddescriptor')
     rule_names = [node.expr.name for node in result.children]
     self.assertEqual(rule_names, ['function', 'method', 'descriptor'])
Example #26
0
 def test_simple_custom_rules(self):
     """Run 2-arg custom-coded rules through their paces."""
     grammar = Grammar("""
         bracketed_digit = start digit end
         start = '['
         end = ']'""",
         digit=lambda text, pos:
                 (pos + 1) if text[pos].isdigit() else None)
     s = '[6]'
     self.assertEqual(grammar.parse(s),
         Node(grammar['bracketed_digit'], s, 0, 3, children=[
             Node(grammar['start'], s, 0, 1),
             Node(grammar['digit'], s, 1, 2),
             Node(grammar['end'], s, 2, 3)]))
Example #27
0
 def test_unicode(self):
     """Assert that a ``Grammar`` can convert into a string-formatted series
     of rules."""
     grammar = Grammar(r"""
                       bold_text  = bold_open text bold_close
                       text       = ~"[A-Z 0-9]*"i
                       bold_open  = "(("
                       bold_close = "))"
                       """)
     lines = unicode(grammar).splitlines()
     eq_(lines[0], 'bold_text = bold_open text bold_close')
     ok_('text = ~"[A-Z 0-9]*"i' in lines)
     ok_('bold_open = "(("' in lines)
     ok_('bold_close = "))"' in lines)
     eq_(len(lines), 4)
Example #28
0
    def test_lookahead(self):
        grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''')
        assert_raises(ParseError, grammar.parse, 'burp')

        s = 'arp'
        eq_(
            grammar.parse('arp'),
            Node(grammar['starts_with_a'],
                 s,
                 0,
                 3,
                 children=[
                     Node(Lookahead(Literal('a')), s, 0, 0),
                     Node(Regex(r'[a-z]+'), s, 0, 3)
                 ]))
Example #29
0
    def __init__(self,
                 all_tables: Dict[str, List[str]] = None,
                 tables_with_strings: Dict[str, List[str]] = None,
                 database_directory: str = None) -> None:
        self.all_tables = all_tables
        self.tables_with_strings = tables_with_strings
        if database_directory:
            self.database_directory = database_directory
            self.connection = sqlite3.connect(database_directory)
            self.cursor = self.connection.cursor()

        self.grammar_str: str = self.initialize_grammar_str()
        self.grammar: Grammar = Grammar(self.grammar_str)
        self.valid_actions: Dict[str,
                                 List[str]] = self.initialize_valid_actions()
Example #30
0
    def test_immutable_grammar(self):
        """Make sure that a Grammar is immutable after being created."""
        grammar = Grammar(r"""
            foo = 'bar'
        """)

        def mod_grammar(grammar):
            grammar['foo'] = 1
        self.assertRaises(TypeError, mod_grammar, [grammar])

        def mod_grammar(grammar):
            new_grammar = Grammar(r"""
                baz = 'biff'
            """)
            grammar.update(new_grammar)
        self.assertRaises(AttributeError, mod_grammar, [grammar])