def test_token_iter_newline_lbox(self): ast = AST() ast.init() bos = ast.parent.children[0] new1 = TextNode(Terminal("'a")) new2 = TextNode(Terminal("\r")) new3 = TextNode(Terminal("b")) new4 = TextNode(MagicTerminal("<SQL>")) new5 = TextNode(Terminal("c'")) bos.insert_after(new1) new1.insert_after(new2) new2.insert_after(new3) new3.insert_after(new4) new4.insert_after(new5) it = self.lexer.get_token_iter(new1) assert it.next() == (["'a", "\r", "b", lbph, "c'"], "string", 1, [ TextNode(Terminal("'a")), TextNode(Terminal("\r")), TextNode(Terminal("b")), TextNode(MagicTerminal("<SQL>")), TextNode(Terminal("c'")) ]) with pytest.raises(StopIteration): it.next()
def test_multitoken_real_lbox_relex(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"abc")) lbox = TextNode(MagicTerminal("<SQL>")) text2 = TextNode(Terminal("def\"")) bos.insert_after(text1) text1.insert_after(lbox) lbox.insert_after(text2) lexer.relex(text1) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("def\"")]) bos.next_term.children[0].symbol.name = "\"ab\rc" lexer.relex(bos.next_term) assert bos.next_term == mk_multitextnode([ Terminal("\"ab"), Terminal("\r"), Terminal("c"), MagicTerminal("<SQL>"), Terminal("def\"") ])
def test_multitoken_real_lbox_cut_off_string(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"abc")) lbox = TextNode(MagicTerminal("<SQL>")) text2 = TextNode(Terminal("d\"ef\"g")) bos.insert_after(text1) text1.insert_after(lbox) lbox.insert_after(text2) pytest.raises(LexingError, lexer.relex, text1) assert type(bos.next_term) is MultiTextNode assert bos.next_term.children[0] is text1 assert bos.next_term.children[1] is lbox assert bos.next_term.children[2] is text2 assert text2.symbol.name == "d\"" assert bos.next_term.next_term.symbol.name == "ef" leftover = bos.next_term.next_term.next_term assert leftover.symbol.name == "\"g" leftover.symbol.name = "g" leftover.changed = True lexer.relex(leftover) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("d\"")]) assert bos.next_term.next_term.symbol.name == "efg"
def test_multitoken_real_lbox_relex_cut_off_string(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str "[a-z]+":var """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"abc")) lbox = TextNode(MagicTerminal("<SQL>")) text2 = TextNode(Terminal("def\"")) bos.insert_after(text1) text1.insert_after(lbox) lbox.insert_after(text2) lexer.relex(text1) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("def\"")]) assert bos.next_term.lookahead == 0 bos.next_term.children[2].symbol.name = "d\"ef\"" pytest.raises(LexingError, lexer.relex, bos.next_term) bos.next_term.children[2].symbol.name = "d\"ef" lexer.relex(bos.next_term) assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("d\"")]) assert bos.next_term.next_term.symbol.name == "ef"
def test_multitoken_real_lbox_multiple(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] n1 = TextNode(Terminal("\"abc")) n2 = TextNode(MagicTerminal("<SQL>")) n3 = TextNode(Terminal("def")) n4 = TextNode(MagicTerminal("<Calc>")) n5 = TextNode(Terminal("ghi\"")) bos.insert_after(n1) n1.insert_after(n2) n2.insert_after(n3) n3.insert_after(n4) n4.insert_after(n5) lexer.relex(n1) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode([ Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("def"), MagicTerminal("<Calc>"), Terminal("ghi\"") ])
def heuristic_line(self, errornode): valid = [] pv = self.op.prev_version for sub in self.langs: lbox = MagicTerminal("<{}>".format(sub)) node = errornode.prev_term while True: element = self.op.syntaxtable.lookup(node.state, lbox) if type(element) in [Reduce, Shift]: r = self.langs[sub] r.mode_limit_tokens_new = self.mode_limit_tokens_new start = node.next_term result = r.parse(start) if r.possible_ends: for e, enddist, split in r.possible_ends: if e.lookup == "<ws>" or e.lookup == "<return>": continue if self.contains_errornode(start, e, errornode): if self.parse_after_lbox_h2( lbox, e, start, pv, split): total_distance = self.abs_parse_distance + enddist valid.append( (start, e, sub, total_distance, split)) if node.lookup == "<return>" or type(node) is BOS: break node = node.prev_term return valid
def heuristic_history(self, errornode): valid = [] ws = ["<ws>", "<return>"] searched = set() pv = self.op.prev_version for sub in self.langs: lbox = MagicTerminal("<{}>".format(sub)) parent = errornode.parent while parent is not None: if parent.get_attr("parent", pv) is None: # Root # If we've reached the root, try inserting the box after # BOS, i.e. the beginning of the file left = parent.get_attr("children", pv)[0] # bos else: left = parent.get_attr("left", pv) while left and type(left.symbol) is Nonterminal and len( left.get_attr("children", pv)) == 0: # If left is an empty nonterminal, keep going left until we # find a non-empty nonterminal or a terminal left = left.get_attr("left", pv) if left: state = left.state element = self.op.syntaxtable.lookup(state, lbox) if type(element) in [Reduce, Shift]: term = self.find_terminal(left, pv) if term and term not in searched: tleft = term.prev_term # left's most right terminal if type(term) is EOS: parent = parent.get_attr("parent", pv) continue while term and term.lookup in ws: # skip whitespace term = term.next_term element = self.op.syntaxtable.lookup( tleft.state, lbox) if type(element) not in [Reduce, Shift]: # Usually if `lbox` can be shifted after `left` # this means it should also be shiftable after # `left`'s most right terminal. However, that # terminal might have changed and caused an error # which was isolated, which means that `lbox` isn't # valid after all. parent = parent.get_attr("parent", pv) continue r = self.langs[sub] r.mode_limit_tokens_new = self.mode_limit_tokens_new result = r.parse(term) if r.possible_ends: for e, enddist, split in r.possible_ends: if e.lookup in ws: continue if (self.contains_errornode(term, e, errornode) \ and self.parse_after_lbox_h2(lbox, e, parent, pv, split)): total_distance = self.abs_parse_distance + enddist valid.append((term, e, sub, total_distance, split)) searched.add(term) parent = parent.get_attr("parent", pv) return valid
def test_multitoken_relex_to_normal(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str "[0-9]+":INT """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = mk_multitextnode([Terminal("123"), MagicTerminal("<SQL>")]) bos.insert_after(text1) lexer.relex(text1) assert bos.next_term.lookup == "INT" assert bos.next_term.symbol == Terminal("123") assert bos.next_term.lookahead == 1 assert bos.next_term.next_term.symbol == MagicTerminal("<SQL>")
def test_token_iter_lbox(self): ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("12")) new2 = TextNode(MagicTerminal("<SQL>")) new3 = TextNode(Terminal("34")) bos.insert_after(new) new.insert_after(new2) new2.insert_after(new3) it = self.lexer.get_token_iter(new) assert next(it) == ("12", "INT", 1, [TextNode(Terminal("12"))], 0) assert next(it) == (lbph, "", 0, [TextNode(MagicTerminal("<SQL>"))], 0) assert next(it) == ("34", "INT", 1, [TextNode(Terminal("34"))], 0) with pytest.raises(Exception): next(it)
def test_token_iter_lbox(self): lexer = IncrementalLexer(""" "[0-9]+":INT """) ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("12")) new2 = TextNode(MagicTerminal("<SQL>")) new3 = TextNode(Terminal("34")) bos.insert_after(new) new.insert_after(new2) new2.insert_after(new3) next_token = lexer.lexer.get_token_iter(new).next assert next_token() == ("12", "INT", 1, [TextNode(Terminal("12"))], 0) assert next_token() == (lbph, "", 0, [TextNode(MagicTerminal("<SQL>"))], 0) assert next_token() == ("34", "INT", 1, [TextNode(Terminal("34"))], 0)
def test_token_iter_lbox_multi(self): lexer = IncrementalLexer(""" "[0-9]+":INT "\"[^\"]*\"":STRING """) ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("\"abc")) new2 = TextNode(MagicTerminal("<SQL>")) new3 = TextNode(Terminal("def\"")) bos.insert_after(new) new.insert_after(new2) new2.insert_after(new3) next_token = lexer.lexer.get_token_iter(new).next assert next_token() == (["\"abc", lbph, "def\""], "STRING", 0, [ TextNode(Terminal("\"abc")), TextNode(MagicTerminal("<SQL>")), TextNode(Terminal("def\"")) ], 0)
def parse_symbol(self, node): node = node.children[0] if node.lookup == "nonterminal": return Nonterminal(node.symbol.name) elif node.lookup == "terminal": if node.symbol.name != "\"<eos>\"": self.terminals.add(node.symbol.name[1:-1]) return Terminal(node.symbol.name[1:-1]) elif node.lookup == "languagebox": return MagicTerminal(node.symbol.name) elif node.symbol.name == "function": return self.parse_function(node)
def test_relex_altered_comment(self): lexer = IncrementalLexer(""" "#[^\r]*":comment "[0-9]+":INT "\+":PLUS """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("1+")) text2 = TextNode(Terminal("#abc")) text3 = TextNode(MagicTerminal("<SQL>")) bos.insert_after(text1) text1.insert_after(text2) text2.insert_after(text3) lexer.relex(text1) assert bos.next_term.symbol == Terminal("1") assert bos.next_term.next_term.symbol == Terminal("+") assert bos.next_term.next_term.next_term == mk_multitextnode( [Terminal("#abc"), MagicTerminal("<SQL>")])
def test_lexer_returns_nodes(self): lexer = IncrementalLexer(""" "\"[^\"]*\"":str """) ast = AST() ast.init() bos = ast.parent.children[0] eos = ast.parent.children[1] text1 = TextNode(Terminal("\"abc")) lbox = TextNode(MagicTerminal("<SQL>")) text2 = TextNode(Terminal("def\"")) bos.insert_after(text1) text1.insert_after(lbox) lbox.insert_after(text2) lexer.relex(text1) assert bos.next_term.lookup == "str" assert bos.next_term == mk_multitextnode( [Terminal("\"abc"), MagicTerminal("<SQL>"), Terminal("def\"")]) assert bos.next_term.lookahead == 0
def test_token_iter_lbox2(self): ast = AST() ast.init() bos = ast.parent.children[0] new = TextNode(Terminal("12")) new2 = TextNode(Terminal("'string with")) new3 = TextNode(MagicTerminal("<SQL>")) new4 = TextNode(Terminal("inside'")) bos.insert_after(new) new.insert_after(new2) new2.insert_after(new3) new3.insert_after(new4) it = self.lexer.get_token_iter(new) assert it.next() == ("12", "INT", 1, [TextNode(Terminal("12"))]) assert it.next() == (["'string with", lbph, "inside'"], "string", 1, [ TextNode(Terminal("'string with")), TextNode(MagicTerminal("<SQL>")), TextNode(Terminal("inside'")) ]) with pytest.raises(StopIteration): it.next()
def parse_rule(self, node): name = node.children[0].symbol.name self.current_rulename = name alternatives = self.parse_alternatives(node.children[4]) symbol = Nonterminal(name) if self.start_symbol is None: self.start_symbol = symbol if self.change_startrule and symbol.name == self.change_startrule: self.start_symbol = symbol r = Rule(symbol) for a in alternatives: r.add_alternative(a[0], a[1], a[2]) # add additional alternatives to the grammar (grammar extension feature, e.g. languageboxes) if self.extra_alternatives.has_key(symbol.name): for n in self.extra_alternatives[symbol.name]: r.add_alternative([MagicTerminal(n), Nonterminal("WS")], None) self.rules[symbol] = r
def heuristic_stack(self, errornode): # Find position on stack where lbox would be valid valid = [] for sub in self.langs: lbox = MagicTerminal("<{}>".format(sub)) cut = len(self.op.stack) - 1 while cut >= 0: top = self.op.stack[cut] if isinstance(top, EOS): top = top.parent.children[0] # bos state = 0 else: state = self.op.stack[cut].state # get all possible sublangs element = self.op.syntaxtable.lookup(state, lbox) if type(element) in [Reduce, Shift]: term = self.find_terminal(top) if type(term) is EOS: cut = cut - 1 continue if term: n = term # See if we can get a valid language box using the Recogniser r = self.langs[sub] r.mode_limit_tokens_new = self.mode_limit_tokens_new result = r.parse(n) if r.possible_ends: # Filter results and test if remaining file can be # parsed after shifting the language box for e, enddist, split in r.possible_ends: if e.lookup == "<ws>" or e.lookup == "<return>": continue if (self.contains_errornode(n, e, errornode) \ and self.parse_after_lbox_h1(lbox, e, cut, split=split, distance=10)) \ or self.parse_after_lbox_h1(lbox, e, cut, errornode): # Either the error was solved by # moving it into the box or a box # was created before it, allowing # the error to be shifted total_distance = self.abs_parse_distance + enddist valid.append( (n, e, sub, total_distance, split)) cut = cut - 1 return valid
def heuristic_line(self, errornode): valid = [] for sub in self.langs: lbox = MagicTerminal("<{}>".format(sub)) node = errornode.prev_term while True: element = self.op.syntaxtable.lookup(node.state, lbox) if type(element) in [Reduce, Shift]: r = self.langs[sub] r.mode_limit_tokens_new = self.mode_limit_tokens_new start = node.next_term result = r.parse(start) if r.possible_ends: for e, enddist, split in r.possible_ends: if e.lookup == "<ws>" or e.lookup == "<return>": continue valid.append((start, e, sub, enddist, split, lbox, errornode)) if node.lookup == "<return>" or type( node) is BOS or node.ismultinode(): break node = node.prev_term return valid