def test_badly_circular(self): """Uselessly circular references should be detected by the grammar compiler.""" raise SkipTest( 'We have yet to make the grammar compiler detect these.') grammar = Grammar(""" foo = bar bar = foo """)
def test_resolve_refs_order(self): """Smoke-test a circumstance where lazy references don't get resolved.""" grammar = Grammar(""" expression = "(" terms ")" terms = term+ term = number number = ~r"[0-9]+" """) grammar.parse('(34)')
def parse(source): """Parse tbon Source""" grammar = Grammar(""" score = wsc* music* music = (partswitch* bar+)+ wsc* partswitch = "P=" partnum wsc = comment / ws+ comment = ws* ~r"/\*.*?\*/"s ws* bar = (wsc* (meta / beat) wsc+)+ barline meta = beatspec / key / tempo / relativetempo / velocity / de_emphasis / channel / instrument beatspec = "B=" ("2." / "2" / "4." / "4" / "8." / "8") key = "K=" keyname keyname = ~r"[a-gA-G](@|#)?" tempo = "T=" floatnum relativetempo = "t=" floatnum velocity = "V=" floatnum de_emphasis = "D=" floatnum channel = "C=" chnum partnum = ~r"[1-9][0-9]*"i instrument = "I=" inum inum = ~r"[1-9][0-9]*"i floatnum = ~r"\d*\.?\d+"i chnum = ~r"\d*\.?\d+"i beat = subbeat+ barline = "|" / ":" extendable = chord / roll / ornament / pitch / rest pitch = octave* alteration? pitchname chord = chordstart chorditem chorditem* rparen chordstart = "(" chorditem = chordpitch / chordhold / chordrest chordpitch = octave* alteration? pitchname chordhold = '-' chordrest = "_" / "z" rparen = ")" roll = rollstart pitch pitch+ rparen rollstart = "(:" ornament = ornamentstart pitch pitch+ rparen ornamentstart = "(~" subbeat = extendable / hold rest = "_" / "z" hold = "-" octave = octave_up / octave_down alteration = doublesharp / sharp / doubleflat / flat / natural doublesharp = "𝄪" / "##" sharp = "♯" / "#" doubleflat = "𝄫" / "@@" flat = "♭" / "@" natural = "♮" / "%" octave_up = "^" octave_down = "/" pitchname = ~"[a-g1-7]"i ws = ~r"\s*"i """) return grammar.parse(source)
def _grammar(self): # A PEG grammar for ground Problog. It is written in a very weird way # in order to make the visiting of the parse tree work. return Grammar(r""" program = _ clauses clauses = clause* clause = predicate dot predicate = prob_ann / rule / term rule = term turnstile conjunction conjunction = term conjunction_opt conjunction_opt = conjunction_more? conjunction_more = comma conjunction prob_ann = prob_ann_heads prob_ann_rule_opt prob_ann_heads = prob_fact prob_fact_opt prob_fact_opt = prob_fact_more? prob_fact_more = semicolon prob_ann_heads prob_ann_rule_opt = prob_ann_rule? prob_ann_rule = turnstile conjunction prob_fact = probability doublecolon term term = negation_opt word_or_num arguments_opt word_or_num = decimal_or_frac / word negation_opt = negation? arguments_opt = arguments? arguments = lparen arguments_list rparen arguments_list = term arguments_more_o arguments_more_o = arguments_more? arguments_more = comma arguments_list probability = prob_num / prob_tunable_num / prob_tunable_none prob_num = decimal_or_frac _ prob_tunable_num = _ tunable lparen decimal_or_frac rparen _ prob_tunable_none = _ tunable_empty _ decimal_or_frac = decimal / fraction fraction = number slash number # could to this in a better way, but this works and we are way over time already word = ~r"([a-zA-Z0-9_\[\]]+|\"[a-zA-Z0-9_\-\'\.\/\=\<\>\+\[\]]*\"|\'[a-zA-Z0-9_\-\'\.\/\=\<\>\+\[\]]*\')" number = ~r"[0-9]*" decimal = ~r"[0-9]*\.[0-9]*" dot = _ "." _ comma = _ "," _ semicolon = _ ";" _ lparen = _ "(" _ rparen = _ ")" _ slash = _ "/" _ doublecolon = _ "::" _ turnstile = _ ":-" _ negation = _ ~r"\\\+" _ tunable = _ "t" _ tunable_empty = "t(_)" _ = meaninglessness* meaninglessness = ~r"\s+" """)
def test_no_named_rule_succeeding(self): """Make sure ParseErrors have sane printable representations even if we never succeeded in matching any named expressions.""" grammar = Grammar('''bork = "bork"''') try: grammar.parse('snork') except ParseError as error: eq_(error.pos, 0) eq_(error.expr, grammar['bork']) eq_(error.text, 'snork')
def __init__(self, utterances: List[str], tokenizer=None) -> None: self.utterances: List[str] = utterances self.tokenizer = tokenizer if tokenizer else WordTokenizer() self.tokenized_utterances = [ self.tokenizer.tokenize(utterance) for utterance in self.utterances ] self.valid_actions: Dict[str, List[str]] = self.init_all_valid_actions() self.grammar_str: str = self.get_grammar_str() self.grammar_with_context: Grammar = Grammar(self.grammar_str)
def test_lazy_default_rule(self): """Make sure we get an actual rule set as our default rule, even when the first rule has forward references and is thus a LazyReference at some point during grammar compilation. """ grammar = Grammar(r""" styled_text = text text = "hi" """) self.assertEqual(grammar.parse('hi'), Node(grammar['text'], 'hi', 0, 2))
def __init__(self, utterances , tokenizer=None) : self.utterances = utterances self.tokenizer = tokenizer if tokenizer else WordTokenizer() self.tokenized_utterances = [self.tokenizer.tokenize(utterance) for utterance in self.utterances] valid_actions, linking_scores = self.init_all_valid_actions() self.valid_actions = valid_actions # This has shape (num_entities, num_utterance_tokens). self.linking_scores: numpy.ndarray = linking_scores self.grammar_str: unicode = self.get_grammar_str() self.grammar_with_context: Grammar = Grammar(self.grammar_str)
def test_unicode_keep_parens(self): """Make sure converting an expression to unicode doesn't strip parenthesis. """ # ZeroOrMore self.assertEqual(str(Grammar('foo = "bar" ("baz" "eggs")* "spam"')), u"foo = 'bar' ('baz' 'eggs')* 'spam'") # OneOf self.assertEqual(str(Grammar('foo = "bar" ("baz" / "eggs") "spam"')), u"foo = 'bar' ('baz' / 'eggs') 'spam'") # Lookahead self.assertEqual(str(Grammar('foo = "bar" &("baz" "eggs") "spam"')), u"foo = 'bar' &('baz' 'eggs') 'spam'") # Multiple sequences self.assertEqual(str(Grammar('foo = ("bar" "baz") / ("baff" "bam")')), u"foo = ('bar' 'baz') / ('baff' 'bam')")
def test_parse_with_leftovers(self): """Make sure ``parse()`` reports where we started failing to match, even if a partial match was successful.""" grammar = Grammar(r'''sequence = "chitty" (" " "bang")+''') try: grammar.parse('chitty bangbang') except IncompleteParseError as error: eq_( str(error), "Rule 'sequence' matched in its entirety, but it didn't consume all the text. The non-matching portion of the text begins with 'bang' (line 1, column 12)." )
def test_unicode_keep_parens(self): """Make sure converting an expression to unicode doesn't strip parenthesis. """ # ZeroOrMore eq_(text_type(Grammar('foo = "bar" ("baz" "eggs")* "spam"')), u'foo = "bar" ("baz" "eggs")* "spam"') # OneOf eq_(text_type(Grammar('foo = "bar" ("baz" / "eggs") "spam"')), u'foo = "bar" ("baz" / "eggs") "spam"') # Lookahead eq_(text_type(Grammar('foo = "bar" &("baz" "eggs") "spam"')), u'foo = "bar" &("baz" "eggs") "spam"') # Multiple sequences eq_(text_type(Grammar('foo = ("bar" "baz") / ("baff" "bam")')), u'foo = ("bar" "baz") / ("baff" "bam")')
def test_multi_line(self): """Make sure we tolerate all sorts of crazy line breaks and comments in the middle of rules.""" grammar = Grammar(""" bold_text = bold_open # commenty comment text # more comment bold_close text = ~"[A-Z 0-9]*"i bold_open = "((" bold_close = "))" """) ok_(grammar.parse('((booyah))') is not None)
def test_repr_special_character_escaping(self): """Make sure special characters are properly escaped in the repr. """ # backslash eq_(repr(Grammar(r'''foo = "\\" ''')), u'''Grammar("foo = '\\\\\\\\'")''') # single quote eq_(repr(Grammar(r'''foo = "'" ''')), u'''Grammar('foo = "\\'"')''') # escaped single quote inside a single quoted string eq_(repr(Grammar(r'''foo = '\'' ''')), u'''Grammar('foo = "\\'"')''') # double quote eq_(repr(Grammar(r'''foo = '"' ''')), u'''Grammar('foo = \\'"\\'')''') # newline eq_(repr(Grammar(r'''foo = "\n" ''')), u'''Grammar("foo = '\\\\n'")''')
def Assume(*args): grammar = Grammar(r""" expr = expr1 / expr2 / expr3 /expr4 /expr5 / expr6 /expr7 expr1 = expr_dist1 logic_op num_log expr2 = expr_dist2 logic_op num_log expr3 = classVar ws logic_op ws value expr4 = classVarArr ws logic_op ws value expr5 = classVar ws logic_op ws classVar expr6 = classVarArr ws logic_op ws classVarArr expr7 = "True" expr_dist1 = op_beg?abs?para_open classVar ws arith_op ws classVar para_close op_end? expr_dist2 = op_beg?abs?para_open classVarArr ws arith_op ws classVarArr para_close op_end? classVar = variable brack_open number brack_close classVarArr = variable brack_open variable brack_close para_open = "(" para_close = ")" brack_open = "[" brack_close = "]" variable = ~"([a-zA-Z_][a-zA-Z0-9_]*)" logic_op = ws (geq / leq / eq / neq / and / lt / gt) ws op_beg = number arith_op op_end = arith_op number arith_op = (add/sub/div/mul) abs = "abs" add = "+" sub = "-" div = "/" mul = "*" lt = "<" gt = ">" geq = ">=" leq = "<=" eq = "=" neq = "!=" and = "&" ws = ~"\s*" value = ~"\d+" num_log = ~"[+-]?([0-9]*[.])?[0-9]+" number = ~"[+-]?([0-9]*[.])?[0-9]+" """) tree = grammar.parse(args[0]) assumeVisitObj = assume2logic.AssumptionVisitor() if len(args) == 3: assumeVisitObj.storeInd(args[1]) assumeVisitObj.storeArr(args[2]) assumeVisitObj.visit(tree) elif len(args) == 2: assumeVisitObj.storeInd(args[1]) assumeVisitObj.visit(tree) elif len(args) == 1: assumeVisitObj.visit(tree)
def test_unconnected_custom_rules(self): """Make sure custom rules that aren't hooked to any other rules still get included in the grammar and that lone ones get set as the default. Incidentally test Grammar's `rules` default arg. """ grammar = Grammar(one_char=lambda text, pos: pos + 1).default('one_char') s = '4' self.assertEqual(grammar.parse(s), Node(grammar['one_char'], s, 0, 1))
def test_favoring_named_rules(self): """Named rules should be used in error messages in favor of anonymous ones, even if those are rightward-progressing-more, and even if the failure starts at position 0.""" grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') try: grammar.parse('burp') except ParseError as error: eq_( str(error), "Rule 'starts_with_a' didn't match at 'burp' (line 1, column 1)." )
def funcReadXml(self): grammar = Grammar(r""" expr = name / type / minimum / maximum / xmlStartDoc / xmlStartInps / xmlEndInps / xmlStartInp / xmlEndInp / xmlStartValTag /xmlEndValTag name = xmlStartNameTag feName xmlEndNameTag type = xmlStartTypeTag feType xmlEndTypeTag minimum = xmlStartMinTag number xmlEndMinTag maximum = xmlStartMaxTag number xmlEndMaxTag xmlStartDoc = '<?xml version="1.0" encoding="UTF-8"?>' xmlStartInps = "<Inputs>" xmlEndInps = "<\Inputs>" xmlStartInp = "<Input>" xmlEndInp = "<\Input>" xmlStartNameTag = "<Feature-name>" xmlEndNameTag = "<\Feature-name>" xmlStartTypeTag = "<Feature-type>" xmlEndTypeTag = "<\Feature-type>" xmlStartValTag = "<Value>" xmlEndValTag = "<\Value>" xmlStartMinTag = "<minVal>" xmlEndMinTag = "<\minVal>" xmlStartMaxTag = "<maxVal>" xmlEndMaxTag = "<\maxVal>" feName = ~"([a-zA-Z_][a-zA-Z0-9_]*)" feType = ~"[A-Z 0-9]*"i number = ~"[+-]?([0-9]*[.])?[0-9]+" """) with open(self.fileName) as f1: file_content = f1.readlines() file_content = [x.strip() for x in file_content] feNameArr = [] feTypeArr = [] minValArr = [] maxValArr = [] for lines in file_content: tree = grammar.parse(lines) dfObj = dataFrameCreate() dfObj.visit(tree) if dfObj.feName is not None: feNameArr.append(dfObj.feName) if dfObj.feType is not None: feTypeArr.append(dfObj.feType) if dfObj.feMinVal != -99999: minValArr.append(dfObj.feMinVal) if dfObj.feMaxVal != 0: maxValArr.append(dfObj.feMaxVal) genDataObj = generateData(feNameArr, feTypeArr, minValArr, maxValArr) genDataObj.funcGenerateTestData()
def __init__(self, grammar, unwrapped_exceptions=None): ''' Creates a new parser around the provided arguments. The grammar may define a special rule called '__ignored'. This specifies a set of production names that are to be ignored in the output tokens. This is useful for stipping out whitespace productions, comments, and other "noisy" filler that otherwise makes it hard to process the AST. ''' self.grammar = Grammar(grammar) self.grammar.unwrapped_exceptions = unwrapped_exceptions or []
def test_lookahead(self): grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') assert_raises(ParseError, grammar.parse, 'burp') s = 'arp' eq_( grammar.parse('arp'), Node('starts_with_a', s, 0, 3, children=[Node('', s, 0, 0), Node('', s, 0, 3)]))
def test_expressions_from_rules(self): """Test the ``Grammar`` base class's ability to compile an expression tree from rules. That the correct ``Expression`` tree is built is already tested in ``RuleGrammarTests``. This tests only that the ``Grammar`` base class's ``_expressions_from_rules`` works. """ greeting_grammar = Grammar('greeting = "hi" / "howdy"') tree = greeting_grammar.parse('hi') self.assertEqual(tree, Node(greeting_grammar['greeting'], 'hi', 0, 2, children=[ Node(Literal('hi'), 'hi', 0, 2)]))
def test_line_and_column(self): """Make sure we got the line and column computation right.""" grammar = Grammar(r""" whee_lah = whee "\n" lah "\n" whee = "whee" lah = "lah" """) try: grammar.parse('whee\nlahGOO') except ParseError as error: # TODO: Right now, this says "Rule <Literal "\n" at 0x4368250432> # didn't match". That's not the greatest. Fix that, then fix this. self.assertTrue(str(error).endswith(r"""didn't match at 'GOO' (line 2, column 4)."""))
def test_match(self): """Make sure partial-matching (with pos) works.""" grammar = Grammar(r""" bold_text = bold_open text bold_close text = ~"[A-Z 0-9]*"i bold_open = "((" bold_close = "))" """) s = ' ((boo))yah' self.assertEqual(grammar.match(s, pos=1), Node(grammar['bold_text'], s, 1, 8, children=[ Node(grammar['bold_open'], s, 1, 3), Node(grammar['text'], s, 3, 6), Node(grammar['bold_close'], s, 6, 8)]))
def result(self): """ The 'result' property """ g = Grammar(""" replacement = ws replacevalue transformationlist ws replacevalue = expression / varname / literal transformationlist = transformation* transformation = ws comma ws transname transarglist transarglist = transarg* transarg = singlequotedstr / doublequotedstr / unquotedarg expression = term rws operator rws term term = numberliteral / varname varname = ~"[a-z_][a-z0-9_]*"i transname = ~"[a-z_][a-z0-9_]*"i literal = numberliteral / stringliteral numberliteral = ~"(\+|-)?\d+([.]\d+)?" stringliteral = singlequotedstr / doublequotedstr doublequotedstr = ws dblq notdblq dblq singlequotedstr = ws sngq notsngq sngq unquotedarg = ws notwsorcomma operator = plus / minus / times / divide plus = "+" minus = "-" times = "*" divide = "/" rws = ~"\s+" ws = ~"\s*" comma = "," notwsorcomma = ~"[^\s,]+" dblq = "\\"" notdblq = ~"[^\\"]*" sngq = "'" notsngq = ~"[^']*" """) tree = g.parse(self._replacement) return ReplacementVisitor(self._data).visit(tree)
def test_infinite_loop(self): """Smoke-test a grammar that was causing infinite loops while building. This was going awry because the "int" rule was never getting marked as resolved, so it would just keep trying to resolve it over and over. """ Grammar(""" digits = digit+ int = digits digit = ~"[0-9]" number = int main = number """)
def test_callability_custom_rules(self): """Confirms that functions, methods and method descriptors can all be used to supply custom grammar rules. """ grammar = Grammar(""" default = function method descriptor """, function=function_rule, method=self.method_rule, descriptor=self.rules['descriptor_rule'], ) result = grammar.parse('functionmethoddescriptor') rule_names = [node.expr.name for node in result.children] self.assertEqual(rule_names, ['function', 'method', 'descriptor'])
def test_simple_custom_rules(self): """Run 2-arg custom-coded rules through their paces.""" grammar = Grammar(""" bracketed_digit = start digit end start = '[' end = ']'""", digit=lambda text, pos: (pos + 1) if text[pos].isdigit() else None) s = '[6]' self.assertEqual(grammar.parse(s), Node(grammar['bracketed_digit'], s, 0, 3, children=[ Node(grammar['start'], s, 0, 1), Node(grammar['digit'], s, 1, 2), Node(grammar['end'], s, 2, 3)]))
def test_unicode(self): """Assert that a ``Grammar`` can convert into a string-formatted series of rules.""" grammar = Grammar(r""" bold_text = bold_open text bold_close text = ~"[A-Z 0-9]*"i bold_open = "((" bold_close = "))" """) lines = unicode(grammar).splitlines() eq_(lines[0], 'bold_text = bold_open text bold_close') ok_('text = ~"[A-Z 0-9]*"i' in lines) ok_('bold_open = "(("' in lines) ok_('bold_close = "))"' in lines) eq_(len(lines), 4)
def test_lookahead(self): grammar = Grammar(r'''starts_with_a = &"a" ~"[a-z]+"''') assert_raises(ParseError, grammar.parse, 'burp') s = 'arp' eq_( grammar.parse('arp'), Node(grammar['starts_with_a'], s, 0, 3, children=[ Node(Lookahead(Literal('a')), s, 0, 0), Node(Regex(r'[a-z]+'), s, 0, 3) ]))
def __init__(self, all_tables: Dict[str, List[str]] = None, tables_with_strings: Dict[str, List[str]] = None, database_directory: str = None) -> None: self.all_tables = all_tables self.tables_with_strings = tables_with_strings if database_directory: self.database_directory = database_directory self.connection = sqlite3.connect(database_directory) self.cursor = self.connection.cursor() self.grammar_str: str = self.initialize_grammar_str() self.grammar: Grammar = Grammar(self.grammar_str) self.valid_actions: Dict[str, List[str]] = self.initialize_valid_actions()
def test_immutable_grammar(self): """Make sure that a Grammar is immutable after being created.""" grammar = Grammar(r""" foo = 'bar' """) def mod_grammar(grammar): grammar['foo'] = 1 self.assertRaises(TypeError, mod_grammar, [grammar]) def mod_grammar(grammar): new_grammar = Grammar(r""" baz = 'biff' """) grammar.update(new_grammar) self.assertRaises(AttributeError, mod_grammar, [grammar])