def extract_alphabet(alphabet, inputdata, fixed_start = False): """ Receives a sequence and an alphabet, returns a list of PositionTokens with all of the parts of the sequence that are a subset of the alphabet """ if not inputdata: return [] base_alphabet = alphabet.alphabet if isinstance(inputdata[0], (Token, PositionToken)): inputdata = [x.content for x in inputdata] lexer = lexer_factory(alphabet, base_alphabet) totallen = len(inputdata) maxl = totallen minl = 1 if fixed_start: max_start = 1 else: max_start = totallen result = [] for i in range(max_start): for j in range(i+minl, min(i+maxl, totallen) + 1): try: lexed = lexer(inputdata[i:j]) if lexed: result.append((i,j, inputdata[i:j])) except: continue result = filter_subsets(result) return [PositionToken(content, None, left, right) for (left, right, content) in result]
def test_Concept(self): red = String("red") green = String("green") blue = String("blue") alphabet = Choice([red, green, blue], ascii_encoding) lexer = lexer_factory(alphabet, ascii_encoding) def concept_translator_fun(inputtokens): result = [] for token in inputtokens: x = str(token) if x == "red": result.append("color red") elif x == "green": result.append("color green") elif x == "blue": result.append("color blue") else: raise Exception("%s,%s" % (x, x.__class__.__name__)) return result ct = concept_translator_fun self.assertListEqual(ct(lexer("red")), ["color red"]) red_list = [PositionToken(content=character, gd=ascii_encoding, left=i, right=i+1) for i, character in enumerate("red")] self.assertListEqual(ct(lexer(red_list)), ["color red"])
def testOverlappingLexing(self): integer = RegularExpression("^[0123456789]*$") date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')}) mydef = Alphabet([integer,date]) lexer = lexer_factory(mydef) self.assertListEqual(lexer("123411/11/2001"), [("1234", integer),("11/11/2001", date)]) self.assertListEqual(lexer([x for x in "123411/11/2001"]), [("1234", integer),("11/11/2001", date)])
def test_Concept(self): red = Sequence.from_string("red") green = Sequence.from_string("green") blue = Sequence.from_string("blue") alphabet = Choice([red,green,blue]) lexer = lexer_factory(alphabet) def concept_translator_fun(inputtokens): result = [] for x,_ in inputtokens: if x == "red" or x == ["r","e","d"]: result.append("color red") elif x == "green" or x == ["g","r","e","e","n"]: result.append("color green") elif x == "blue" or x == ["b","l","u","e"]: result.append("color blue") else: raise Exception("%s,%s" % (x, x.__class__.__name__)) return result ct = concept_translator_fun self.assertListEqual(ct(lexer("red")), ["color red"]) self.assertListEqual(ct(lexer([x for x in "red"])), ["color red"])
def test_Concept(self): red = String("red") green = String("green") blue = String("blue") alphabet = Choice([red, green, blue], ascii_encoding) lexer = lexer_factory(alphabet, ascii_encoding) def concept_translator_fun(inputtokens): result = [] for token in inputtokens: x = str(token) if x == "red": result.append("color red") elif x == "green": result.append("color green") elif x == "blue": result.append("color blue") else: raise Exception("%s,%s" % (x, x.__class__.__name__)) return result ct = concept_translator_fun self.assertListEqual(ct(lexer("red")), ["color red"]) red_list = [ PositionToken(content=character, gd=ascii_encoding, left=i, right=i + 1) for i, character in enumerate("red") ] self.assertListEqual(ct(lexer(red_list)), ["color red"])
def extract_alphabet(alphabet, inputdata, fixed_start=False): """ Receives a sequence and an alphabet, returns a list of PositionTokens with all of the parts of the sequence that are a subset of the alphabet """ if not inputdata: return [] base_alphabet = alphabet.alphabet lexer = lexer_factory(alphabet, base_alphabet) totallen = len(inputdata) maxl = totallen minl = 1 if fixed_start: max_start = 1 else: max_start = totallen result = [] for i in range(max_start): for j in range(i + minl, min(i + maxl, totallen) + 1): try: lexed = lexer(inputdata[i:j]) if lexed and len(lexed) == 1: result.append((i, j, inputdata[i:j], lexed[0].gd)) elif lexed: raise Exception except: continue result = filter_subsets(result) return [ PositionToken(content, gd, left, right) for (left, right, content, gd) in result ]
def testSimpleLexing(self): """Test checker instantiation and call""" integer = RegularExpression("^[0123456789]*$") date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')}) mydef = Choice([integer, date]) lexer = lexer_factory(mydef, ascii_encoding) self.assertListEqual(lexer("1234"), [Token("1234", integer)]) self.assertListEqual(lexer([Token(x, ascii_encoding) for x in "1234"]), [Token("1234", integer)])
def testSimpleLexing(self): """Test checker instantiation and call""" integer = RegularExpression("^[0123456789]*$") date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')}) mydef = Alphabet([integer,date]) lexer = lexer_factory(mydef) self.assertListEqual(lexer("1234"), [(["1","2","3","4"], integer)]) self.assertListEqual(lexer([x for x in "1234"]), [(["1","2","3","4"], integer)])
def testEmptyInput(self): integer = RegularExpression("^[0123456789]*$") date = load_bnf_file( "pydsl/contrib/grammar/Date.bnf", { 'integer': integer, 'DayOfMonth': load_python_file('pydsl/contrib/grammar/DayOfMonth.py') }) mydef = Choice([integer, date]) lexer = lexer_factory(mydef, ascii_encoding) self.assertFalse(lexer(""))
def test_main_case(self): input_data = "1+2" ascii_lexer = lexer_factory(ascii_encoding, None) ascii_tokens = [x for x in ascii_lexer(input_data)] self.assertListEqual([str(x) for x in ascii_tokens], ['1', '+', '2']) def concept_translator_fun(inputtokens): result = [] for x in inputtokens: if str(x) == "1": result.append("one") elif str(x) == "2": result.append("two") elif str(x) == "+": result.append("addition") else: raise Exception(x.__class__.__name__) return result def to_number(number): if number == "one": return 1 if number == "two": return 2 math_expression_concepts = concept_translator_fun(ascii_tokens) self.assertListEqual(math_expression_concepts, ['one', 'addition', 'two']) grammar_def = [ "S ::= E", "E ::= one addition two", "one := String,one", "two := String,two", "addition := String,addition", ] from pydsl.file.BNF import strlist_to_production_set production_set = strlist_to_production_set(grammar_def, {}) from pydsl.parser.backtracing import BacktracingErrorRecursiveDescentParser rdp = BacktracingErrorRecursiveDescentParser(production_set) parse_tree = rdp(math_expression_concepts) from pydsl.grammar.symbol import NonTerminalSymbol def parse_tree_walker(tree): if tree.symbol == NonTerminalSymbol("S"): return parse_tree_walker(tree.childlist[0]) if tree.symbol == NonTerminalSymbol("E"): return to_number(tree.childlist[0].symbol.gd) + to_number( tree.childlist[2].symbol.gd) raise Exception result = parse_tree_walker(parse_tree[0]) self.assertEqual(result, 3)
def test_main_case(self): input_data = "1+2" ascii_lexer = lexer_factory(ascii_encoding) ascii_tokens = [x.content for x in ascii_lexer(input_data)] self.assertListEqual([str(x) for x in ascii_tokens], ['1', '+', '2']) def concept_translator_fun(inputtokens): result = [] for x in inputtokens: if x == "1": result.append("one") elif x == "2": result.append("two") elif x == "+": result.append("addition") else: raise Exception(x.__class__.__name__) return result def to_number(number): if number == [x for x in "one"]: return 1 if number == [x for x in "two"]: return 2 math_expression_concepts = concept_translator_fun(ascii_tokens) self.assertListEqual(math_expression_concepts, ['one', 'addition', 'two']) grammar_def = [ "S ::= E", "E ::= one addition two", "one := String,one", "two := String,two", "addition := String,addition", ] from pydsl.file.BNF import strlist_to_production_set production_set = strlist_to_production_set(grammar_def, {}) from pydsl.parser.backtracing import BacktracingErrorRecursiveDescentParser rdp = BacktracingErrorRecursiveDescentParser(production_set) parse_tree = rdp(math_expression_concepts) from pydsl.grammar.symbol import NonTerminalSymbol def parse_tree_walker(tree): if tree.symbol == NonTerminalSymbol("S"): return parse_tree_walker(tree.childlist[0]) if tree.symbol == NonTerminalSymbol("E"): return to_number(tree.childlist[0].symbol.gd) + to_number(tree.childlist[2].symbol.gd) raise Exception result = parse_tree_walker(parse_tree[0]) self.assertEqual(result, 3)
def testSimpleLexing(self): """Test checker instantiation and call""" integer = RegularExpression("^[0123456789]*$") date = load_bnf_file( "pydsl/contrib/grammar/Date.bnf", { 'integer': integer, 'DayOfMonth': load_python_file('pydsl/contrib/grammar/DayOfMonth.py') }) mydef = Choice([integer, date]) lexer = lexer_factory(mydef, ascii_encoding) self.assertListEqual(lexer("1234"), [Token("1234", integer)]) self.assertListEqual(lexer([Token(x, ascii_encoding) for x in "1234"]), [Token("1234", integer)])
def testOverlappingLexing(self): integer = RegularExpression("^[0123456789]*$") date = load_bnf_file( "pydsl/contrib/grammar/Date.bnf", { 'integer': integer, 'DayOfMonth': load_python_file('pydsl/contrib/grammar/DayOfMonth.py') }) mydef = Choice([integer, date]) lexer = lexer_factory(mydef, ascii_encoding) self.assertListEqual(lexer("123411/11/2001"), [("1234", integer), ("11/11/2001", date)]) self.assertListEqual(lexer([x for x in "123411/11/2001"]), [("1234", integer), ("11/11/2001", date)])
def testSecondLevelGrammar(self): a = String("a") b = String("b") c = String("c") x = String("x") y = String("y") z = String("z") first_level = Choice([a,b,c]) first_levelb = Choice([x,y,z]) second_level = Sequence([a,b], base_alphabet=first_level) from pydsl.Check import checker_factory checker = checker_factory(second_level) self.assertTrue(checker([a,b])) second_level_alphabet = Choice([first_level, first_levelb]) lexer = lexer_factory(second_level_alphabet, base=first_level+first_levelb) self.assertListEqual(lexer("ab"), [("a",first_level),("b",first_level)])
def testSecondLevelGrammar(self): a = String("a") b = String("b") c = String("c") x = String("x") y = String("y") z = String("z") first_level = Choice([a, b, c]) first_levelb = Choice([x, y, z]) second_level = Sequence([a, b], base_alphabet=first_level) from pydsl.check import checker_factory checker = checker_factory(second_level) self.assertTrue(checker([a, b])) second_level_alphabet = Choice([first_level, first_levelb]) lexer = lexer_factory(second_level_alphabet, base=first_level + first_levelb) self.assertListEqual( lexer("ab"), [Token("a", first_level), Token("b", first_level)])
def __init__(self, bnfgrammar): self._lexer = lexer_factory(bnfgrammar.alphabet, ascii_encoding) super().__init__(bnfgrammar)
def testLexer(self): """Lexer call""" lexer = lexer_factory(productionset1.alphabet, ascii_encoding) result = list(lexer(string1)) self.assertTrue(result)
def testEmptyInput(self): integer = RegularExpression("^[0123456789]*$") date = load_bnf_file("pydsl/contrib/grammar/Date.bnf", {'integer':integer, 'DayOfMonth':load_python_file('pydsl/contrib/grammar/DayOfMonth.py')}) mydef = Alphabet([integer,date]) lexer = lexer_factory(mydef) self.assertFalse(lexer(""))
def testLexer(self): """Lexer call""" lexer = lexer_factory(productionset1.alphabet) result = list(lexer(string1)) self.assertTrue(result)
def __init__(self, bnfgrammar): self._lexer = lexer_factory(bnfgrammar.alphabet) self._productionset = bnfgrammar