def test_Concept(self): red = String("red") green = String("green") blue = String("blue") alphabet = Choice([red,green,blue]) lexer = lexer_factory(alphabet) def concept_translator_fun(inputtokens): result = [] for x,_ in inputtokens: if x == "red" or x == ["r","e","d"]: result.append("color red") elif x == "green" or x == ["g","r","e","e","n"]: result.append("color green") elif x == "blue" or x == ["b","l","u","e"]: result.append("color blue") else: raise Exception("%s,%s" % (x, x.__class__.__name__)) return result ct = concept_translator_fun self.assertListEqual(ct(lexer("red")), ["color red"]) self.assertListEqual(ct(lexer([x for x in "red"])), ["color red"])
def test_Concept(self): red = String("red") green = String("green") blue = String("blue") alphabet = Choice([red, green, blue]) lexer = lexer_factory(alphabet) def concept_translator_fun(inputtokens): result = [] for x, _ in inputtokens: if x == "red" or x == ["r", "e", "d"]: result.append("color red") elif x == "green" or x == ["g", "r", "e", "e", "n"]: result.append("color green") elif x == "blue" or x == ["b", "l", "u", "e"]: result.append("color blue") else: raise Exception("%s,%s" % (x, x.__class__.__name__)) return result ct = concept_translator_fun self.assertListEqual(ct(lexer("red")), ["color red"]) self.assertListEqual(ct(lexer([x for x in "red"])), ["color red"])
def testOverlappingLexing(self): integer = RegularExpression("^[0123456789]*$") date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')}) mydef = GrammarCollection([integer,date]) lexer = lexer_factory(mydef) self.assertListEqual(lexer("123411/11/2001"), [("1234", integer),("11/11/2001", date)]) self.assertListEqual(lexer([x for x in "123411/11/2001"]), [("1234", integer),("11/11/2001", date)])
def extract_alphabet(alphabet, inputdata, fixed_start = False): """ Receives a sequence and an alphabet, returns a list of PositionTokens with all of the parts of the sequence that are a subset of the alphabet """ if not inputdata: return [] base_alphabet = alphabet.alphabet if isinstance(inputdata[0], (Token, PositionToken)): inputdata = [x.content for x in inputdata] lexer = lexer_factory(alphabet, base_alphabet) totallen = len(inputdata) maxl = totallen minl = 1 if fixed_start: max_start = 1 else: max_start = totallen result = [] for i in range(max_start): for j in range(i+minl, min(i+maxl, totallen) + 1): try: lexed = lexer(inputdata[i:j]) if lexed: result.append((i,j, inputdata[i:j])) except: continue result = filter_subsets(result) return [PositionToken(content, None, left, right) for (left, right, content) in result]
def testSimpleLexing(self): """Test checker instantiation and call""" integer = RegularExpression("^[0123456789]*$") date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')}) mydef = Choice([integer,date]) lexer = lexer_factory(mydef) self.assertListEqual(lexer("1234"), [("1234", integer)]) self.assertListEqual(lexer("123411/11/2001"), [("1234", integer),("11/11/2001", date)])
def testSimpleLexing(self): """Test checker instantiation and call""" integer = RegularExpression("^[0123456789]*$") date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')}) mydef = GrammarCollection([integer,date]) lexer = lexer_factory(mydef) self.assertListEqual(lexer("1234"), [(["1","2","3","4"], integer)]) self.assertListEqual(lexer([x for x in "1234"]), [(["1","2","3","4"], integer)])
def testAlphabetChain(self): a1 = Choice([String('a'), String('b'), String('c')]) ab_sequence = Sequence([String('a'), String('b')]) ac_sequence = Sequence([String('a'), String('c')]) a2 = Choice([ ab_sequence, ac_sequence ], base_alphabet = a1) ac = AlphabetChain([a1, a2]) self.assertEqual(a1.first, ac.first) lexer = lexer_factory(ac) self.assertListEqual([x[1] for x in lexer("abac", include_gd=True)], [ab_sequence , ac_sequence])
def test_main_case(self): input_data = "1+2" ascii_encoding = Encoding("ascii") ascii_lexer = lexer_factory(ascii_encoding) ascii_tokens = [x.content for x in ascii_lexer(input_data)] self.assertListEqual([str(x) for x in ascii_tokens], ['1', '+', '2']) def concept_translator_fun(inputtokens): result = [] for x in inputtokens: if x == "1": result.append("one") elif x == "2": result.append("two") elif x == "+": result.append("addition") else: raise Exception(x.__class__.__name__) return result def to_number(number): if number == "one": return 1 if number == "two": return 2 math_expression_concepts = concept_translator_fun(ascii_tokens) self.assertListEqual(math_expression_concepts, ['one', 'addition', 'two']) grammar_def = [ "S ::= E", "E ::= one addition two", "one := String,one", "two := String,two", "addition := String,addition", ] from pydsl.File.BNF import strlist_to_production_set production_set = strlist_to_production_set(grammar_def, {}) from pydsl.Parser.Backtracing import BacktracingErrorRecursiveDescentParser rdp = BacktracingErrorRecursiveDescentParser(production_set) parse_tree = rdp(math_expression_concepts) from pydsl.Grammar.Symbol import NonTerminalSymbol def parse_tree_walker(tree): if tree.symbol == NonTerminalSymbol("S"): return parse_tree_walker(tree.childlist[0]) if tree.symbol == NonTerminalSymbol("E"): return to_number(tree.childlist[0].symbol.gd) + to_number( tree.childlist[2].symbol.gd) raise Exception result = parse_tree_walker(parse_tree[0]) self.assertEqual(result, 3)
def testEmptyInput(self): integer = RegularExpression("^[0123456789]*$") date = load_bnf_file( "pydsl/contrib/grammar/Date.bnf", { 'integer': integer, 'DayOfMonth': load_python_file('pydsl/contrib/grammar/DayOfMonth.py') }) mydef = GrammarCollection([integer, date]) lexer = lexer_factory(mydef) self.assertFalse(lexer(""))
def test_main_case(self): input_data = "1+2" from pydsl.Grammar.Alphabet import Encoding ascii_encoding = Encoding("ascii") ascii_lexer = lexer_factory(ascii_encoding) ascii_tokens = [x for x in ascii_lexer(input_data)] self.assertListEqual([str(x) for x in ascii_tokens], ['1', '+', '2']) def concept_translator_fun(inputtokens): result = [] for x in inputtokens: if x == "1": result.append("one") elif x == "2": result.append("two") elif x == "+": result.append("addition") else: raise Exception(x.__class__.__name__) return result def to_number(number): if number == "one": return 1 if number == "two": return 2 math_expression_concepts = concept_translator_fun(ascii_tokens) self.assertListEqual(math_expression_concepts, ['one', 'addition', 'two']) grammar_def = [ "S ::= E", "E ::= one addition two", "one := String,one", "two := String,two", "addition := String,addition", ] from pydsl.File.BNF import strlist_to_production_set production_set = strlist_to_production_set(grammar_def, {}) from pydsl.Parser.Backtracing import BacktracingErrorRecursiveDescentParser rdp = BacktracingErrorRecursiveDescentParser(production_set) parse_tree = rdp(math_expression_concepts) from pydsl.Grammar.Symbol import NonTerminalSymbol def parse_tree_walker(tree): if tree.symbol == NonTerminalSymbol("S"): return parse_tree_walker(tree.childlist[0]) if tree.symbol == NonTerminalSymbol("E"): return to_number(tree.childlist[0].symbol.gd) + to_number(tree.childlist[2].symbol.gd) raise Exception result = parse_tree_walker(parse_tree[0]) self.assertEqual(result, 3)
def testSimpleLexing(self): """Test checker instantiation and call""" integer = RegularExpression("^[0123456789]*$") date = load_bnf_file( "pydsl/contrib/grammar/Date.bnf", { 'integer': integer, 'DayOfMonth': load_python_file('pydsl/contrib/grammar/DayOfMonth.py') }) mydef = GrammarCollection([integer, date]) lexer = lexer_factory(mydef) self.assertListEqual(lexer("1234"), [(["1", "2", "3", "4"], integer)]) self.assertListEqual(lexer([x for x in "1234"]), [(["1", "2", "3", "4"], integer)])
def testOverlappingLexing(self): integer = RegularExpression("^[0123456789]*$") date = load_bnf_file( "pydsl/contrib/grammar/Date.bnf", { 'integer': integer, 'DayOfMonth': load_python_file('pydsl/contrib/grammar/DayOfMonth.py') }) mydef = GrammarCollection([integer, date]) lexer = lexer_factory(mydef) self.assertListEqual(lexer("123411/11/2001"), [("1234", integer), ("11/11/2001", date)]) self.assertListEqual(lexer([x for x in "123411/11/2001"]), [("1234", integer), ("11/11/2001", date)])
def testSecondLevelGrammar(self): a = String("a") b = String("b") c = String("c") x = String("x") y = String("y") z = String("z") first_level = Choice([a,b,c]) first_levelb = Choice([x,y,z]) second_level = Sequence([a,b], base_alphabet=first_level) from pydsl.Check import checker_factory checker = checker_factory(second_level) self.assertTrue(checker([a,b])) second_level_alphabet = Choice([first_level, first_levelb]) lexer = lexer_factory(second_level_alphabet, base=first_level+first_levelb) self.assertListEqual(lexer("ab"), [("a",first_level),("b",first_level)])
def testSecondLevelGrammar(self): a = String("a") b = String("b") c = String("c") x = String("x") y = String("y") z = String("z") first_level = Choice([a, b, c]) first_levelb = Choice([x, y, z]) second_level = Sequence([a, b], base_alphabet=first_level) from pydsl.Check import checker_factory checker = checker_factory(second_level) self.assertTrue(checker([a, b])) second_level_alphabet = Choice([first_level, first_levelb]) lexer = lexer_factory(second_level_alphabet, base=first_level + first_levelb) self.assertListEqual(lexer("ab"), [("a", first_level), ("b", first_level)])
def extract_alphabet(alphabet, inputdata, fixed_start=False): """ Receives a sequence and an alphabet, returns a list of PositionTokens with all of the parts of the sequence that are a subset of the alphabet """ if not inputdata: return [] if isinstance(alphabet, Encoding): base_alphabet = None else: base_alphabet = alphabet.alphabet if isinstance(inputdata[0], (Token, PositionToken)): inputdata = [x.content for x in inputdata] lexer = lexer_factory(alphabet, base_alphabet) totallen = len(inputdata) maxl = totallen minl = 1 if fixed_start: max_start = 1 else: max_start = totallen result = [] for i in range(max_start): for j in range(i + minl, min(i + maxl, totallen) + 1): try: lexed = lexer(inputdata[i:j]) if lexed: result.append((i, j, inputdata[i:j])) except: continue result = filter_subsets(result) return [ PositionToken(content, None, left, right) for (left, right, content) in result ]
def testLexerGenerator(self): abc = String("abc") numbers = String("123") mydef = Choice([abc, numbers]) mylexer = lexer_factory(mydef) def text_generator(receiver): next(receiver) receiver.send("123") receiver.send("abc") receiver.send("abc") receiver.send("123") receiver.close() result = [] def collector(): try: while True: result.append((yield)) except GeneratorExit: pass text_generator(mylexer.lexer_generator(collector())) self.assertListEqual(result, ["123", "abc","abc", "123"])
def test_Concept(self): red = String("red") green = String("green") blue = String("blue") alphabet = Choice([red,green,blue]) lexer = lexer_factory(alphabet) def concept_translator_fun(inputtokens): result = [] for x,_ in inputtokens: if x == "red": result.append("color red") elif x == "green": result.append("color green") elif x == "blue": result.append("color blue") else: raise Exception(x.__class__.__name__) return result ct = concept_translator_fun self.assertListEqual(ct(lexer("red")), ["color red"])
def testLexer(self): """Lexer call""" lexer = lexer_factory(productionset1.alphabet) result = list(lexer(string1)) self.assertTrue(result)
def __init__(self, bnfgrammar): self._lexer = lexer_factory(bnfgrammar.alphabet) self._productionset = bnfgrammar
def testLexer(self): lexer = lexer_factory(self.alphabet) self.assertListEqual(lexer("1234"), [("1234", self.integer)]) self.assertListEqual(lexer("123411/11/2001"), [("1234", self.integer),("11/11/2001", self.date)])
def testEmptyInput(self): integer = RegularExpression("^[0123456789]*$") date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')}) mydef = GrammarCollection([integer,date]) lexer = lexer_factory(mydef) self.assertFalse(lexer(""))