def testHTMLTable(self): repository = {'integer':RegularExpression("^[0123456789]*$")} productionrulesetlogical = load_bnf_file("pydsl/contrib/grammar/TrueHTMLTable.bnf", repository) parser = BacktracingErrorRecursiveDescentParser(productionrulesetlogical) lexed = lex(productionrulesetlogical.alphabet, ascii_encoding, "<table><tr><td>1</td></tr></table>") self.assertTrue(lexed) result = parser.get_trees(lexed) self.assertTrue(result) lexed = [x for x in lex(productionrulesetlogical.alphabet, ascii_encoding, "<table><td>1</td></tr></table>")] result = parser.get_trees(lexed) self.assertFalse(result)
def testLogLine(self): repository = { 'space': String(' '), 'integer': RegularExpression("^[0123456789]*$"), 'ipv4': RegularExpression( "^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$"), 'characters': RegularExpression("^[A-z]+$") } grammar = load_bnf_file("pydsl/contrib/grammar/logline.bnf", repository) checker = checker_factory(grammar) original_string = "1.2.3.4 - - [1/1/2003:11:11:11 +2] \"GET\" 1 1 \"referer\" \"useragent\"" tokenized = lex(grammar.alphabet, ascii_encoding, original_string, force_lexer="general") self.assertTrue(checker.check(tokenized)) self.assertFalse( checker.check( "1.2.3.4 - - [1/1/2003:11:11:11 +2] \"GOT\" 1 1 \"referer\" \"useragent\"" ))
def test_calculator_simple(self): grammar_def = [ "S ::= E", "E ::= number operator number", "number := Word,integer,max", "operator := String,+", ] from pydsl.file.BNF import strlist_to_production_set from pydsl.grammar import RegularExpression repository = {'integer':RegularExpression("^[0123456789]*$")} production_set = strlist_to_production_set(grammar_def, repository) rdp = LL1RecursiveDescentParser(production_set) parse_tree = rdp("1+2") def parse_tree_walker(tree): from pydsl.grammar.symbol import NonTerminalSymbol if tree.symbol == NonTerminalSymbol("S"): return parse_tree_walker(tree.childlist[0]) if tree.symbol == NonTerminalSymbol("E"): return int(str(tree.childlist[0].content)) + int(str(tree.childlist[2].content)) else: raise Exception result = parse_tree_walker(parse_tree[0]) self.assertEqual(result, 3) from pydsl.grammar.PEG import Choice from pydsl.grammar.definition import String, RegularExpression from pydsl.encoding import ascii_encoding math_alphabet = Choice([RegularExpression("^[0123456789]*$"),Choice([String('+')])]) from pydsl.lex import lex tokens = [x[0] for x in lex(math_alphabet, ascii_encoding, "11+2")] parse_tree = rdp(tokens) result = parse_tree_walker(parse_tree[0]) self.assertEqual(result, 13)
def testHTMLTable(self): repository = {'integer': RegularExpression("^[0123456789]*$")} productionrulesetlogical = load_bnf_file( "pydsl/contrib/grammar/TrueHTMLTable.bnf", repository) parser = BacktracingErrorRecursiveDescentParser( productionrulesetlogical) lexed = lex(productionrulesetlogical.alphabet, ascii_encoding, "<table><tr><td>1</td></tr></table>") self.assertTrue(lexed) result = parser.get_trees(lexed) self.assertTrue(result) lexed = [ x for x in lex(productionrulesetlogical.alphabet, ascii_encoding, "<table><td>1</td></tr></table>") ] result = parser.get_trees(lexed) self.assertFalse(result)
def testLogicalExpression(self): repository = {'TrueFalse':load_bnf_file("pydsl/contrib/grammar/TrueFalse.bnf")} productionrulesetlogical = load_bnf_file("pydsl/contrib/grammar/LogicalExpression.bnf", repository) parser = BacktracingErrorRecursiveDescentParser(productionrulesetlogical) tokens = [x[0] for x in lex(productionrulesetlogical.alphabet, ascii_encoding, "True&&False")] result = parser.get_trees(tokens) self.assertTrue(result) result = parser.get_trees("True&|False") self.assertFalse(result)
def testRecursiveDescentParserNullBad(self): descentparser = BacktracingErrorRecursiveDescentParser(productionset2) from pydsl.encoding import ascii_encoding ascii_encoding = ascii_encoding lexed_string4 = [x[0] for x in lex(productionset2.alphabet, ascii_encoding, string4)] result = descentparser(lexed_string4) self.assertFalse(result) result = descentparser(list(string4)) self.assertFalse(result)
def testRecursiveDescentParserNullBad(self): descentparser = BacktracingErrorRecursiveDescentParser(productionset2) from pydsl.encoding import ascii_encoding ascii_encoding = ascii_encoding lexed_string4 = lex(productionset2.alphabet, ascii_encoding, string4) result = descentparser(lexed_string4) self.assertFalse(result) result = descentparser(list(string4)) self.assertFalse(result)
def testLogicalExp(self): repository = {'TrueFalse':load_bnf_file("pydsl/contrib/grammar/TrueFalse.bnf")} productionrulesetlogical = load_bnf_file("pydsl/contrib/grammar/LogicalExpression.bnf", repository) parser = BacktracingErrorRecursiveDescentParser(productionrulesetlogical) tokens = [x for x in lex(repository['TrueFalse'].alphabet, ascii_encoding, self.tokelist5)] self.assertEqual(len(tokens), 1) #tokens = [x[0] for x in lex(productionrulesetlogical.alphabet, Encoding('ascii'), tokens)] #FIXME tokens = [Token('True', repository['TrueFalse'])] result = parser.get_trees(tokens) self.assertTrue(result)
def testLogLine(self): repository = {'space':String(' '), 'integer':RegularExpression("^[0123456789]*$"), 'ipv4':RegularExpression("^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$"), 'characters':RegularExpression("^[A-z]+$")} grammar = load_bnf_file("pydsl/contrib/grammar/logline.bnf", repository) checker = checker_factory(grammar) original_string = "1.2.3.4 - - [1/1/2003:11:11:11 +2] \"GET\" 1 1 \"referer\" \"useragent\"" tokenized = lex(grammar.alphabet, ascii_encoding, original_string, force_lexer="general") self.assertTrue(checker.check(tokenized)) self.assertFalse(checker.check("1.2.3.4 - - [1/1/2003:11:11:11 +2] \"GOT\" 1 1 \"referer\" \"useragent\""))
def testTrueFalse(self): productionrulesetlogical = load_bnf_file( "pydsl/contrib/grammar/TrueFalse.bnf") parser = BacktracingErrorRecursiveDescentParser( productionrulesetlogical) tokens = [ x for x in lex(productionrulesetlogical.alphabet, ascii_encoding, self.tokelist5) ] result = parser.get_trees(tokens) self.assertTrue(result)
def testLogicalExpression(self): repository = { 'TrueFalse': load_bnf_file("pydsl/contrib/grammar/TrueFalse.bnf") } productionrulesetlogical = load_bnf_file( "pydsl/contrib/grammar/LogicalExpression.bnf", repository) parser = BacktracingErrorRecursiveDescentParser( productionrulesetlogical) tokens = [ x[0] for x in lex(productionrulesetlogical.alphabet, ascii_encoding, "True&&False") ] result = parser.get_trees(tokens) self.assertTrue(result) result = parser.get_trees("True&|False") self.assertFalse(result)
def testLogicalExp(self): repository = { 'TrueFalse': load_bnf_file("pydsl/contrib/grammar/TrueFalse.bnf") } productionrulesetlogical = load_bnf_file( "pydsl/contrib/grammar/LogicalExpression.bnf", repository) parser = BacktracingErrorRecursiveDescentParser( productionrulesetlogical) tokens = [ x for x in lex(repository['TrueFalse'].alphabet, ascii_encoding, self.tokelist5) ] self.assertEqual(len(tokens), 1) #tokens = [x[0] for x in lex(productionrulesetlogical.alphabet, Encoding('ascii'), tokens)] #FIXME tokens = [Token('True', repository['TrueFalse'])] result = parser.get_trees(tokens) self.assertTrue(result)
def test_calculator_simple(self): grammar_def = [ "S ::= E", "E ::= number operator number", "number := Word,integer,max", "operator := String,+", ] from pydsl.file.BNF import strlist_to_production_set from pydsl.grammar import RegularExpression repository = {'integer': RegularExpression("^[0123456789]*$")} production_set = strlist_to_production_set(grammar_def, repository) rdp = LL1RecursiveDescentParser(production_set) parse_tree = rdp("1+2") def parse_tree_walker(tree): from pydsl.grammar.symbol import NonTerminalSymbol if tree.symbol == NonTerminalSymbol("S"): return parse_tree_walker(tree.childlist[0]) if tree.symbol == NonTerminalSymbol("E"): return int(str(tree.childlist[0].content)) + int( str(tree.childlist[2].content)) else: raise Exception result = parse_tree_walker(parse_tree[0]) self.assertEqual(result, 3) from pydsl.grammar.PEG import Choice from pydsl.grammar.definition import String, RegularExpression from pydsl.encoding import ascii_encoding math_alphabet = Choice( [RegularExpression("^[0123456789]*$"), Choice([String('+')])]) from pydsl.lex import lex tokens = [x for x in lex(math_alphabet, ascii_encoding, "11+2")] parse_tree = rdp(tokens) result = parse_tree_walker(parse_tree[0]) self.assertEqual(result, 13)
def testTrueFalse(self): productionrulesetlogical = load_bnf_file("pydsl/contrib/grammar/TrueFalse.bnf") parser = BacktracingErrorRecursiveDescentParser(productionrulesetlogical) tokens = [x for x in lex(productionrulesetlogical.alphabet, ascii_encoding, self.tokelist5)] result = parser.get_trees(tokens) self.assertTrue(result)