def test_triplequotes1(self): lexer = IncrementalLexer(""" "\"\"\"[^\"]*\"\"\"":triplestring "\"[^\"]*\"":string "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"\"\"")) text2 = TextNode(Terminal("abc")) text3 = TextNode(Terminal("\"\"\"")) bos.insert_after(text1) text1.insert_after(text2) text2.insert_after(text3) lexer.relex(text1) assert bos.next_term.symbol == Terminal("\"\"\"abc\"\"\"") assert bos.next_term.lookup == "triplestring" bos.next_term.symbol.name = "\"\"\"ab\"\"\"c\"\"\"" pytest.raises(LexingError, lexer.relex, bos.next_term) bos.next_term.symbol.name = "\"\"\"ab\"\"\"c\"\"" lexer.relex(bos.next_term)
def test_backwards_lexing(self): lexer = IncrementalLexer(""" "::=":doublecolon "=":equal ":":singlecolon """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text = TextNode(Terminal(":")) bos.insert_after(text) lexer.relex(text) assert bos.next_term.symbol.name == ":" assert bos.next_term.lookup == "singlecolon" assert text.lookahead == 1 text2 = TextNode(Terminal(":")) text.insert_after(text2) lexer.relex(text2) assert text2.lookahead == 1 assert bos.next_term.symbol.name == ":" assert bos.next_term.next_term.symbol.name == ":" text3 = TextNode(Terminal("=")) text2.insert_after(text3) lexer.relex(text3) assert bos.next_term.symbol.name == "::=" assert isinstance(bos.next_term.next_term, EOS)
def test_multitoken_real_lbox_relex(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"abc")) lbox = TextNode(MagicTerminal("<SQL>")) text2 = TextNode(Terminal("def\"")) bos.insert_after(text1) text1.insert_after(lbox) lbox.insert_after(text2) lexer.relex(text1) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("def\"")]) bos.next_term.children[0].symbol.name = "\"ab\rc" lexer.relex(bos.next_term) assert bos.next_term == mk_multitextnode([ Terminal("\"ab"), Terminal("\r"), Terminal("c"), MagicTerminal("<SQL>"), Terminal("def\"") ])
def test_multitoken_real_lbox_relex_cut_off_string(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"abc")) lbox = TextNode(MagicTerminal("<SQL>")) text2 = TextNode(Terminal("def\"")) bos.insert_after(text1) text1.insert_after(lbox) lbox.insert_after(text2) lexer.relex(text1) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("def\"")]) assert bos.next_term.lookahead == 0 bos.next_term.children[2].symbol.name = "d\"ef\"" pytest.raises(LexingError, lexer.relex, bos.next_term) bos.next_term.children[2].symbol.name = "d\"ef" lexer.relex(bos.next_term) assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("d\"")]) assert bos.next_term.next_term.symbol.name == "ef"
def test_normal_and_multi_to_normal_and_multi(self): lexer = IncrementalLexer(""" "\"[a-z\r\x80]*\"":str "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] n1 = TextNode(Terminal("ab")) n2 = mk_multitextnode( [Terminal("cd\"e"), Terminal("\r"), Terminal("fg\"")]) bos.insert_after(n1) n1.insert_after(n2) lexer.relex(n1) assert bos.next_term.symbol.name == "abcd" assert bos.next_term is n1 assert bos.next_term.next_term.lookup == "str" assert bos.next_term.next_term == mk_multitextnode( [Terminal("\"e"), Terminal("\r"), Terminal("fg\"")]) assert bos.next_term.next_term is n2 assert bos.next_term.next_term.next_term is eos
def test_multitoken_real_lbox_cut_off_string(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"abc")) lbox = TextNode(MagicTerminal("<SQL>")) text2 = TextNode(Terminal("d\"ef\"g")) bos.insert_after(text1) text1.insert_after(lbox) lbox.insert_after(text2) pytest.raises(LexingError, lexer.relex, text1) assert type(bos.next_term) is MultiTextNode assert bos.next_term.children[0] is text1 assert bos.next_term.children[1] is lbox assert bos.next_term.children[2] is text2 assert text2.symbol.name == "d\"" assert bos.next_term.next_term.symbol.name == "ef" leftover = bos.next_term.next_term.next_term assert leftover.symbol.name == "\"g" leftover.symbol.name = "g" leftover.changed = True lexer.relex(leftover) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("d\"")]) assert bos.next_term.next_term.symbol.name == "efg"
def test_normal_and_multi_to_multi3(self): lexer = IncrementalLexer(""" "\"[a-z\r\x80]*\"":str "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] n1 = TextNode(Terminal("\"abc\r")) n2 = mk_multitextnode( [Terminal("def"), Terminal("\r"), Terminal("gh\"")]) bos.insert_after(n1) n1.insert_after(n2) lexer.relex(n1) assert bos.next_term == mk_multitextnode([ Terminal("\"abc"), Terminal("\r"), Terminal("def"), Terminal("\r"), Terminal("gh\"") ]) assert bos.next_term.next_term is eos
def test_backwards_lexing(self): lexer = IncrementalLexer(""" "::=":doublecolon "=":equal ":":singlecolon """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text = TextNode(Terminal(":")) bos.insert_after(text) lexer.relex(text) assert bos.next_term.symbol.name == ":" assert bos.next_term.lookup == "singlecolon" assert text.lookahead == 1 text2 = TextNode(Terminal(":")) text.insert_after(text2) lexer.relex(text2) assert text2.lookahead == 1 assert bos.next_term.symbol.name == ":" assert bos.next_term.next_term.symbol.name == ":" text3 = TextNode(Terminal("=")) text2.insert_after(text3) lexer.relex(text3) assert bos.next_term.symbol.name == "::=" assert isinstance(bos.next_term.next_term, EOS)
def test_relex_stop(self): ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("1+2")) old1 = TextNode(Terminal("*")) old2 = TextNode(Terminal("3")) old2.lookup = "INT" bos.insert_after(new) new.insert_after(old1) old1.insert_after(old2) self.relex(new) assert ast.parent.symbol == Nonterminal("Root") assert isinstance(ast.parent.children[0], BOS) assert isinstance(ast.parent.children[-1], EOS) node = bos.next_term assert node.symbol == Terminal("1") node = node.next_term assert node.symbol == Terminal("+") node = node.next_term assert node.symbol == Terminal("2") node = node.next_term assert node.symbol == Terminal("*") node = node.next_term assert node.symbol == Terminal("3") node = node.next_term assert isinstance(node, EOS)
def test_multitoken_real_lbox_multiple(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] n1 = TextNode(Terminal("\"abc")) n2 = TextNode(MagicTerminal("<SQL>")) n3 = TextNode(Terminal("def")) n4 = TextNode(MagicTerminal("<Calc>")) n5 = TextNode(Terminal("ghi\"")) bos.insert_after(n1) n1.insert_after(n2) n2.insert_after(n3) n3.insert_after(n4) n4.insert_after(n5) lexer.relex(n1) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode([ Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("def"), MagicTerminal("<Calc>"), Terminal("ghi\"") ])
def test_stringwrapper(self): ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("abc")) text2 = TextNode(Terminal("+")) text3 = TextNode(Terminal("1")) text4 = TextNode(Terminal("*")) text5 = TextNode(Terminal("3456")) bos.insert_after(text1) text1.insert_after(text2) text2.insert_after(text3) text3.insert_after(text4) text4.insert_after(text5) wrapper = StringWrapper(text1) assert wrapper[0] == "a" assert wrapper[2] == "c" assert wrapper[3] == "+" assert wrapper[4] == "1" assert wrapper[5] == "*" assert wrapper[6] == "3" assert wrapper[9] == "6" s = "abc+1*3456" for i in range(len(s)): for j in range(len(s)): assert wrapper[i:j] == s[i:j] print(i,j,wrapper[i:j])
def test_token_iter_newline_lbox(self): ast = AST() ast.init() bos = ast.parent.children[0] new1 = TextNode(Terminal("'a")) new2 = TextNode(Terminal("\r")) new3 = TextNode(Terminal("b")) new4 = TextNode(MagicTerminal("<SQL>")) new5 = TextNode(Terminal("c'")) bos.insert_after(new1) new1.insert_after(new2) new2.insert_after(new3) new3.insert_after(new4) new4.insert_after(new5) it = self.lexer.get_token_iter(new1) assert it.next() == (["'a", "\r", "b", lbph, "c'"], "string", 1, [ TextNode(Terminal("'a")), TextNode(Terminal("\r")), TextNode(Terminal("b")), TextNode(MagicTerminal("<SQL>")), TextNode(Terminal("c'")) ]) with pytest.raises(StopIteration): it.next()
def test_relex3(self): ast = AST() ast.init() bos = ast.parent.children[0] new1 = TextNode(Terminal("1+2")) new2 = TextNode(Terminal("345")) new3 = TextNode(Terminal("6+")) new4 = TextNode(Terminal("789")) # this should never be touched new5 = TextNode(Terminal("+")) # this should never be touched bos.insert_after(new1) new1.insert_after(new2) new2.insert_after(new3) new3.insert_after(new4) new4.insert_after(new5) self.relex(new1) assert ast.parent.symbol == Nonterminal("Root") assert isinstance(ast.parent.children[0], BOS) assert isinstance(ast.parent.children[-1], EOS) node = bos.next_term; assert node.symbol == Terminal("1") node = node.next_term; assert node.symbol == Terminal("+") node = node.next_term; assert node.symbol == Terminal("23456") node = node.next_term; assert node.symbol == Terminal("+") # check that 789 hasn't been relexed assert node.next_term is new4 assert node.next_term.symbol is new4.symbol
def test_relex3(self): ast = AST() ast.init() bos = ast.parent.children[0] new1 = TextNode(Terminal("1+2")) new2 = TextNode(Terminal("345")) new3 = TextNode(Terminal("6+")) new4 = TextNode(Terminal("789")) # this should never be touched new4.lookup = "INT" new5 = TextNode(Terminal("+")) # this should never be touched new5.lookup = "plus" bos.insert_after(new1) new1.insert_after(new2) new2.insert_after(new3) new3.insert_after(new4) new4.insert_after(new5) self.relex(new1) assert ast.parent.symbol == Nonterminal("Root") assert isinstance(ast.parent.children[0], BOS) assert isinstance(ast.parent.children[-1], EOS) node = bos.next_term assert node.symbol == Terminal("1") node = node.next_term assert node.symbol == Terminal("+") node = node.next_term assert node.symbol == Terminal("23456") node = node.next_term assert node.symbol == Terminal("+") # check that 789 hasn't been relexed assert node.next_term is new4 assert node.next_term.symbol is new4.symbol
def test_token_iter2(self): ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("12")) new2 = TextNode(Terminal("34")) bos.insert_after(new) new.insert_after(new2) next_token = self.lexer.lexer.get_token_iter(new).next assert next_token() == ("1234", "INT", 1, [ TextNode(Terminal("12")), TextNode(Terminal("34")) ], 0)
def test_stringwrapper(self): ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("abc")) text2 = TextNode(Terminal("+")) text3 = TextNode(Terminal("1")) text4 = TextNode(Terminal("*")) text5 = TextNode(Terminal("3456")) bos.insert_after(text1) text1.insert_after(text2) text2.insert_after(text3) text3.insert_after(text4) text4.insert_after(text5) wrapper = StringWrapper(text1) assert wrapper[0] == "a" assert wrapper[2] == "c" assert wrapper[3] == "+" assert wrapper[4] == "1" assert wrapper[5] == "*" assert wrapper[6] == "3" assert wrapper[9] == "6" s = "abc+1*3456" for i in range(len(s)): for j in range(len(s)): assert wrapper[i:j] == s[i:j] print(i, j, wrapper[i:j])
def test_token_iter_newline(self): ast = AST() ast.init() bos = ast.parent.children[0] new1 = TextNode(Terminal("'a")) new2 = TextNode(Terminal("\r")) new3 = TextNode(Terminal("b'")) bos.insert_after(new1) new1.insert_after(new2) new2.insert_after(new3) it = self.lexer.get_token_iter(new1) assert next(it) == (["'a", "\r", "b'"], "string", 0, [TextNode(Terminal("'a")), TextNode(Terminal("\r")), TextNode(Terminal("b'"))], 0) with pytest.raises(StopIteration): next(it)
def test_lookback_wagner(self): lexer = IncrementalLexer(""" "\"\"\"[^\"]*\"\"\"":triplestring "\"[^\"]*\"":string "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\n")) text2 = TextNode(Terminal(" ")) text3 = TextNode(Terminal("/*aaaaaaaaaaaaaaaaaaaaa*/")) text4 = TextNode(Terminal(" ")) text5 = TextNode(Terminal("if")) text6 = TextNode(Terminal("(")) lexer.relexed.add(text1) lexer.relexed.add(text2) lexer.relexed.add(text3) lexer.relexed.add(text4) lexer.relexed.add(text5) lexer.relexed.add(text6) bos.insert_after(text1) text1.insert_after(text2) text2.insert_after(text3) text3.insert_after(text4) text4.insert_after(text5) text5.insert_after(text6) text1.lookahead = 27 text2.lookahead = 1 text3.lookahead = 0 text4.lookahead = 1 text5.lookahead = 1 text6.lookahead = 0 lexer.update_lookback(text1, text1) assert text2.lookback == 1 assert text3.lookback == 2 assert text4.lookback == 3 assert text5.lookback == 1 assert text6.lookback == 1
def test_token_iter_lbox(self): ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("12")) new2 = TextNode(MagicTerminal("<SQL>")) new3 = TextNode(Terminal("34")) bos.insert_after(new) new.insert_after(new2) new2.insert_after(new3) it = self.lexer.get_token_iter(new) assert next(it) == ("12", "INT", 1, [TextNode(Terminal("12"))], 0) assert next(it) == (lbph, "", 0, [TextNode(MagicTerminal("<SQL>"))], 0) assert next(it) == ("34", "INT", 1, [TextNode(Terminal("34"))], 0) with pytest.raises(Exception): next(it)
def test_token_iter_lbox(self): lexer = IncrementalLexer(""" "[0-9]+":INT """) ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("12")) new2 = TextNode(MagicTerminal("<SQL>")) new3 = TextNode(Terminal("34")) bos.insert_after(new) new.insert_after(new2) new2.insert_after(new3) next_token = lexer.lexer.get_token_iter(new).next assert next_token() == ("12", "INT", 1, [TextNode(Terminal("12"))], 0) assert next_token() == (lbph, "", 0, [TextNode(MagicTerminal("<SQL>"))], 0) assert next_token() == ("34", "INT", 1, [TextNode(Terminal("34"))], 0)
def test_token_iter_lbox_multi(self): lexer = IncrementalLexer(""" "[0-9]+":INT "\"[^\"]*\"":STRING """) ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("\"abc")) new2 = TextNode(MagicTerminal("<SQL>")) new3 = TextNode(Terminal("def\"")) bos.insert_after(new) new.insert_after(new2) new2.insert_after(new3) next_token = lexer.lexer.get_token_iter(new).next assert next_token() == (["\"abc", lbph, "def\""], "STRING", 1, [ TextNode(Terminal("\"abc")), TextNode(MagicTerminal("<SQL>")), TextNode(Terminal("def\"")) ])
def test_token_iter_lbox_x80(self): lexer = IncrementalLexer(""" "[0-9]+":INT "\"[^\"]*\"":STRING """) ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("\"abc")) new2 = TextNode(Terminal("\x80")) new3 = TextNode(Terminal("def\"")) bos.insert_after(new) new.insert_after(new2) new2.insert_after(new3) next_token = lexer.lexer.get_token_iter(new).next assert next_token() == ("\"abc\x80def\"", "STRING", 0, [ TextNode(Terminal("\"abc")), TextNode(Terminal("\x80")), TextNode(Terminal("def\"")) ], 0)
def test_relex_update_insert(self): ast = AST() ast.init() bos = ast.parent.children[0] new1 = TextNode(Terminal("1")) new2 = TextNode(Terminal("2")) new3 = TextNode(Terminal("+3")) bos.insert_after(new1) new1.insert_after(new2) new2.insert_after(new3) self.relex(new1) twelve = bos.next_term assert twelve.symbol == Terminal("12") assert twelve is new1 assert new2.deleted is True plus = twelve.next_term assert plus.symbol == Terminal("+") assert plus is new3 assert plus.next_term.symbol == Terminal("3")
def test_relex_altered_comment(self): lexer = IncrementalLexer(""" "#[^\r]*":comment "[0-9]+":INT "\+":PLUS """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("1+")) text2 = TextNode(Terminal("#abc")) text3 = TextNode(MagicTerminal("<SQL>")) bos.insert_after(text1) text1.insert_after(text2) text2.insert_after(text3) lexer.relex(text1) assert bos.next_term.symbol == Terminal("1") assert bos.next_term.next_term.symbol == Terminal("+") assert bos.next_term.next_term.next_term == mk_multitextnode( [Terminal("#abc"), MagicTerminal("<SQL>")])
def test_lexer_returns_nodes(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"abc")) lbox = TextNode(MagicTerminal("<SQL>")) text2 = TextNode(Terminal("def\"")) bos.insert_after(text1) text1.insert_after(lbox) lbox.insert_after(text2) lexer.relex(text1) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("def\"")]) assert bos.next_term.lookahead == 0
def test_relex4(self): ast = AST() ast.init() bos = ast.parent.children[0] new1 = TextNode(Terminal("1")) new2 = TextNode(Terminal("2")) new3 = TextNode(Terminal("+")) new4 = TextNode(Terminal("3+4")) new5 = TextNode(Terminal("+4")) new6 = TextNode(Terminal("5")) bos.insert_after(new1) new1.insert_after(new2) new2.insert_after(new3) new3.insert_after(new4) new4.insert_after(new5) new5.insert_after(new6) self.relex(new1) assert ast.parent.symbol == Nonterminal("Root") assert isinstance(ast.parent.children[0], BOS) assert isinstance(ast.parent.children[-1], EOS) node = bos.next_term assert node.symbol == Terminal("12") node = node.next_term assert node.symbol == Terminal("+") node = node.next_term assert node.symbol == Terminal("3") node = node.next_term assert node.symbol == Terminal("+") node = node.next_term assert node.symbol == Terminal("4") node = node.next_term assert node.symbol == Terminal("+") node = node.next_term assert node.symbol == Terminal("45") node = node.next_term assert isinstance(node, EOS)
def test_token_iter_lbox2(self): ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("12")) new2 = TextNode(Terminal("'string with")) new3 = TextNode(MagicTerminal("<SQL>")) new4 = TextNode(Terminal("inside'")) bos.insert_after(new) new.insert_after(new2) new2.insert_after(new3) new3.insert_after(new4) it = self.lexer.get_token_iter(new) assert next(it) == ("12", "INT", 1, [TextNode(Terminal("12"))], 0) assert next(it) == (["'string with", lbph, "inside'"], "string", 0, [TextNode(Terminal("'string with")), TextNode(MagicTerminal("<SQL>")), TextNode(Terminal("inside'"))], 0) with pytest.raises(StopIteration): next(it)
def test_lookback(self): lexer = IncrementalLexer(""" "\"\"\"[^\"]*\"\"\"":triplestring "\"[^\"]*\"":string "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("abcd")) text2 = TextNode(Terminal("ef")) text3 = TextNode(Terminal("ghij")) text4 = TextNode(Terminal("k")) text5 = TextNode(Terminal("lm")) text6 = TextNode(Terminal("nopqr")) text7 = TextNode(Terminal("stu")) bos.insert_after(text1) text1.insert_after(text2) text2.insert_after(text3) text3.insert_after(text4) text4.insert_after(text5) text5.insert_after(text6) text6.insert_after(text7) lexer.relexed.add(text1) lexer.relexed.add(text2) lexer.relexed.add(text3) lexer.relexed.add(text4) lexer.relexed.add(text5) lexer.relexed.add(text6) text1.lookahead = 7 text2.lookahead = 7 text3.lookahead = 1 text4.lookahead = 0 text5.lookahead = 0 text6.lookahead = 3 text7.lookahead = 0 lexer.update_lookback(text1, text1) assert text1.lookback == 0 assert text2.lookback == 1 assert text3.lookback == 2 assert text4.lookback == 3 assert text5.lookback == 3 assert text6.lookback == 0 assert text7.lookback == 1