Beispiel #1
0
    def test_relex_altered_string(self):
        lexer = IncrementalLexer("""
"\"[^\"]*\"":str
"[0-9]+":INT
"\+":PLUS
        """)

        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        text1 = TextNode(Terminal("123+\"\""))
        bos.insert_after(text1)
        lexer.relex(text1)
        assert bos.next_term.symbol == Terminal("123")
        assert bos.next_term.lookup == "INT"
        assert bos.next_term.lookahead == 1
        assert bos.next_term.next_term.symbol == Terminal("+")
        assert bos.next_term.next_term.lookup == "PLUS"
        assert bos.next_term.next_term.lookahead == 0
        assert bos.next_term.next_term.next_term.symbol == Terminal("\"\"")
        assert bos.next_term.next_term.next_term.lookup == "str"
        assert bos.next_term.next_term.next_term.lookahead == 0

        string = bos.next_term.next_term.next_term
        string.symbol.name = "\"abc\""
        lexer.relex(string)
Beispiel #2
0
    def test_multitoken_real_lbox_cut_off_string(self):
        lexer = IncrementalLexer("""
"\"[^\"]*\"":str
"[a-z]+":var
        """)

        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        text1 = TextNode(Terminal("\"abc"))
        lbox = TextNode(MagicTerminal("<SQL>"))
        text2 = TextNode(Terminal("d\"ef\"g"))
        bos.insert_after(text1)
        text1.insert_after(lbox)
        lbox.insert_after(text2)
        pytest.raises(LexingError, lexer.relex, text1)
        assert type(bos.next_term) is MultiTextNode
        assert bos.next_term.children[0] is text1
        assert bos.next_term.children[1] is lbox
        assert bos.next_term.children[2] is text2
        assert text2.symbol.name == "d\""
        assert bos.next_term.next_term.symbol.name == "ef"
        leftover = bos.next_term.next_term.next_term
        assert leftover.symbol.name == "\"g"

        leftover.symbol.name = "g"
        leftover.changed = True
        lexer.relex(leftover)
        assert bos.next_term.lookup == "str"
        assert bos.next_term == mk_multitextnode(
            [Terminal("\"abc"),
             MagicTerminal("<SQL>"),
             Terminal("d\"")])
        assert bos.next_term.next_term.symbol.name == "efg"
Beispiel #3
0
 def test_relex(self):
     ast = AST()
     ast.init()
     bos = ast.parent.children[0]
     new = TextNode(Terminal("1 + 2 * 3"))
     bos.insert_after(new)
     self.relex(new)
     assert ast.parent.symbol == Nonterminal("Root")
     assert isinstance(ast.parent.children[0], BOS)
     assert isinstance(ast.parent.children[-1], EOS)
     node = bos.next_term
     assert node.symbol == Terminal("1")
     assert node.lookahead == 1
     node = node.next_term
     assert node.symbol == Terminal(" ")
     assert node.lookahead == 1
     node = node.next_term
     assert node.symbol == Terminal("+")
     assert node.lookahead == 0
     node = node.next_term
     assert node.symbol == Terminal(" ")
     node = node.next_term
     assert node.symbol == Terminal("2")
     node = node.next_term
     assert node.symbol == Terminal(" ")
     node = node.next_term
     assert node.symbol == Terminal("*")
     node = node.next_term
     assert node.symbol == Terminal(" ")
     node = node.next_term
     assert node.symbol == Terminal("3")
     node = node.next_term
     assert isinstance(node, EOS)
Beispiel #4
0
    def test_triplequotes1(self):
        lexer = IncrementalLexer("""
"\"\"\"[^\"]*\"\"\"":triplestring
"\"[^\"]*\"":string
"[a-z]+":var
        """)

        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        text1 = TextNode(Terminal("\"\"\""))
        text2 = TextNode(Terminal("abc"))
        text3 = TextNode(Terminal("\"\"\""))
        bos.insert_after(text1)
        text1.insert_after(text2)
        text2.insert_after(text3)
        lexer.relex(text1)
        assert bos.next_term.symbol == Terminal("\"\"\"abc\"\"\"")
        assert bos.next_term.lookup == "triplestring"

        bos.next_term.symbol.name = "\"\"\"ab\"\"\"c\"\"\""
        pytest.raises(LexingError, lexer.relex, bos.next_term)

        bos.next_term.symbol.name = "\"\"\"ab\"\"\"c\"\""
        lexer.relex(bos.next_term)
Beispiel #5
0
 def test_multi(self):
     ast = AST()
     ast.init()
     new = TextNode(Terminal('--[[test\rtest]]'))
     ast.parent.children[0].insert_after(new)
     it = self.lexer.get_token_iter(new)
     assert next(it) == (['--[[test', '\r', 'test]]'], "mcomment", 0, [TextNode(Terminal('--[[test\rtest]]'))], 0)
Beispiel #6
0
    def test_simple(self):
        bos = self.ast.parent.children[0]
        new = TextNode(Terminal("1+2"))
        bos.insert_after(new)
        self.lexer.relex(new)
        assert self.parser.inc_parse([]) == True
        assert self.ast.parent.symbol == Nonterminal("Root")
        assert isinstance(self.ast.parent.children[0], BOS)
        assert isinstance(self.ast.parent.children[-1], EOS)
        bos = self.ast.parent.children[0]

        root = TextNode(Nonterminal("Root"))
        bos = BOS(Terminal(""))
        eos = EOS(FinishSymbol())
        Start = TextNode(Nonterminal("Startrule"))
        root.set_children([bos, Start, eos])
        E1 = TextNode(Nonterminal("E"))
        Start.set_children([TextNode(N("WS")), E1])

        E1.set_children(self.make_nodes([N("E"), T("+"), N("WS"), N("T")]))

        E2 = E1.children[0]
        E2.set_children(self.make_nodes([N("T")]))
        T1 = E2.children[0]
        T1.set_children(self.make_nodes([N("P")]))
        P1 = T1.children[0]
        P1.set_children(self.make_nodes([T("1"), N("WS")]))

        T2 = E1.children[3]
        T2.set_children(self.make_nodes([N("P")]))

        P2 = T2.children[0]
        P2.set_children(self.make_nodes([T("2"), N("WS")]))

        self.compare_trees(self.ast.parent, root)
Beispiel #7
0
 def test_simple(self):
     ast = AST()
     ast.init()
     new = TextNode(Terminal('--[[testtest]]'))
     ast.parent.children[0].insert_after(new)
     it = self.lexer.get_token_iter(new)
     assert next(it) == ('--[[testtest]]', "mcomment", 0, [TextNode(Terminal('--[[testtest]]'))], 0)
Beispiel #8
0
 def test_lookahead(self):
     ast = AST()
     ast.init()
     new = TextNode(Terminal('--[[test\rtest'))
     ast.parent.children[0].insert_after(new)
     it = self.lexer.get_token_iter(new)
     assert next(it) == ('--[[test', "scomment", 6, [TextNode(Terminal('--[[test\rtest'))], -5)
Beispiel #9
0
 def test_simple3(self):
     ast = AST()
     ast.init()
     new = TextNode(Terminal('"""'))
     ast.parent.children[0].insert_after(new)
     it = self.lexer.get_token_iter(new)
     assert next(it) == ('""', "dstring", 2, [TextNode(Terminal('"""'))], -1)
Beispiel #10
0
 def test_simple(self):
     ast = AST()
     ast.init()
     new = TextNode(Terminal("asd"))
     ast.parent.children[0].insert_after(new)
     it = self.lexer.get_token_iter(new)
     assert next(it) == ("asd", "NAME", 1, [TextNode(Terminal("asd"))], 0)
Beispiel #11
0
    def test_normal_and_multi_to_multi2(self):
        lexer = IncrementalLexer("""
"\"[a-z\r\x80]*\"":str
"[a-z]+":var
        """)

        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        n1 = TextNode(Terminal("\"abc"))
        n2 = TextNode(Terminal("\r"))
        n3 = mk_multitextnode(
            [Terminal("def"),
             Terminal("\r"), Terminal("gh\"")])
        bos.insert_after(n1)
        n1.insert_after(n2)
        n2.insert_after(n3)
        lexer.relex(n1)
        assert bos.next_term == mk_multitextnode([
            Terminal("\"abc"),
            Terminal("\r"),
            Terminal("def"),
            Terminal("\r"),
            Terminal("gh\"")
        ])
        assert bos.next_term.next_term is eos
Beispiel #12
0
    def test_stringwrapper(self):
        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        text1 = TextNode(Terminal("abc"))
        text2 = TextNode(Terminal("+"))
        text3 = TextNode(Terminal("1"))
        text4 = TextNode(Terminal("*"))
        text5 = TextNode(Terminal("3456"))
        bos.insert_after(text1)
        text1.insert_after(text2)
        text2.insert_after(text3)
        text3.insert_after(text4)
        text4.insert_after(text5)

        wrapper = StringWrapper(text1)
        assert wrapper[0] == "a"
        assert wrapper[2] == "c"
        assert wrapper[3] == "+"
        assert wrapper[4] == "1"
        assert wrapper[5] == "*"
        assert wrapper[6] == "3"
        assert wrapper[9] == "6"

        s = "abc+1*3456"
        for i in range(len(s)):
            for j in range(len(s)):
                assert wrapper[i:j] == s[i:j]
                print(i, j, wrapper[i:j])
Beispiel #13
0
    def test_backwards_lexing(self):
        lexer = IncrementalLexer("""
"::=":doublecolon
"=":equal
":":singlecolon
        """)
        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        text = TextNode(Terminal(":"))
        bos.insert_after(text)
        lexer.relex(text)

        assert bos.next_term.symbol.name == ":"
        assert bos.next_term.lookup == "singlecolon"
        assert text.lookahead == 1

        text2 = TextNode(Terminal(":"))
        text.insert_after(text2)
        lexer.relex(text2)
        assert text2.lookahead == 1

        assert bos.next_term.symbol.name == ":"
        assert bos.next_term.next_term.symbol.name == ":"

        text3 = TextNode(Terminal("="))
        text2.insert_after(text3)
        lexer.relex(text3)

        assert bos.next_term.symbol.name == "::="
        assert isinstance(bos.next_term.next_term, EOS)
Beispiel #14
0
 def parse(self, tokens):
     self.log = []
     tokens = iter(tokens)
     token = tokens.next()
     la = Terminal(token[1])
     while True:
         self.log.append(token)
         elem = self.stable.lookup(self.state, la)
         if type(elem) is Shift:
             self.state = elem.action
             self.stack.append((la, self.state))
             try:
                 token = tokens.next()
                 la = Terminal(token[1])
             except StopIteration:
                 la = FinishSymbol()
         elif type(elem) is Reduce:
             for i in range(elem.amount()):
                 self.stack.pop()
             self.state = self.stack[-1][1]
             goto = self.stable.lookup(self.state, elem.action.left)
             assert goto != None
             self.state = goto.action
             self.stack.append((elem.action.left, self.state))
         elif type(elem) is Accept:
             return True
         else:
             return False
 def test_simple(self):
     ast = AST()
     ast.init()
     new = TextNode(Terminal('"""abc"""'))
     ast.parent.children[0].insert_after(new)
     it = self.lexer.get_token_iter(new)
     assert it.next() == ('"""abc"""', "MLS", 0,
                          [TextNode(Terminal('"""abc"""'))], 0)
Beispiel #16
0
def test_terminal():
    t1 = Nonterminal("E")
    t2 = Nonterminal("E")
    assert t1 == t2

    t1 = Terminal("a")
    t2 = Terminal("a")
    assert t1 == t2
Beispiel #17
0
 def is_finished(self):
     states = list(self.state)
     if self.temp_parse(states, Terminal("NEWLINE")):
         element = self.syntaxtable.lookup(states[-1], FinishSymbol())
         if element:
             return True
         elif self.temp_parse(states, Terminal("DEDENT")):
             return True
     return False
Beispiel #18
0
 def get_lookup(self, la):
     """Get the lookup symbol of a node. If no such lookup symbol exists use the nodes symbol instead."""
     if la.lookup != "":
         lookup_symbol = Terminal(la.lookup)
     else:
         lookup_symbol = la.symbol
     if isinstance(lookup_symbol, IndentationTerminal):
         #XXX hack: change parsing table to accept IndentationTerminals
         lookup_symbol = Terminal(lookup_symbol.name)
     return lookup_symbol
Beispiel #19
0
def get_lookup(la):
    """Get the lookup symbol of a node. If no such lookup symbol exists use
    the nodes symbol instead."""
    if la.lookup != "":
        lookup_symbol = Terminal(la.lookup)
    else:
        lookup_symbol = la.symbol
    if isinstance(lookup_symbol, IndentationTerminal):
        lookup_symbol = Terminal(lookup_symbol.name)
    return lookup_symbol
Beispiel #20
0
    def test_multitoken_relex_merge(self):
        lexer = IncrementalLexer("""
"\"[a-z\r\x80]*\"":str
        """)

        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        text = TextNode(Terminal("\"abc\rde\rf\""))
        bos.insert_after(text)
        lexer.relex(text)
        assert bos.next_term.lookup == "str"
        assert bos.next_term == mk_multitextnode([
            Terminal("\"abc"),
            Terminal("\r"),
            Terminal("de"),
            Terminal("\r"),
            Terminal("f\"")
        ])

        bos.next_term.children.pop(3)  # remove a newline
        bos.next_term.update_children()
        child0 = bos.next_term.children[0]
        child1 = bos.next_term.children[1]
        child2 = bos.next_term.children[2]

        lexer.relex(bos.next_term)
        assert bos.next_term == mk_multitextnode(
            [Terminal("\"abc"),
             Terminal("\r"),
             Terminal("def\"")])
        assert bos.next_term.children[0] is child0
        assert bos.next_term.children[1] is child1
        assert bos.next_term.children[2] is child2
Beispiel #21
0
 def test_relex3(self):
     ast = AST()
     ast.init()
     bos = ast.parent.children[0]
     new1 = TextNode(Terminal("1+2"))
     new2 = TextNode(Terminal("345"))
     new3 = TextNode(Terminal("6+"))
     new4 = TextNode(Terminal("789"))  # this should never be touched
     new4.lookup = "INT"
     new5 = TextNode(Terminal("+"))  # this should never be touched
     new5.lookup = "plus"
     bos.insert_after(new1)
     new1.insert_after(new2)
     new2.insert_after(new3)
     new3.insert_after(new4)
     new4.insert_after(new5)
     self.relex(new1)
     assert ast.parent.symbol == Nonterminal("Root")
     assert isinstance(ast.parent.children[0], BOS)
     assert isinstance(ast.parent.children[-1], EOS)
     node = bos.next_term
     assert node.symbol == Terminal("1")
     node = node.next_term
     assert node.symbol == Terminal("+")
     node = node.next_term
     assert node.symbol == Terminal("23456")
     node = node.next_term
     assert node.symbol == Terminal("+")
     # check that 789 hasn't been relexed
     assert node.next_term is new4
     assert node.next_term.symbol is new4.symbol
Beispiel #22
0
    def test_lexingerror(self):
        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        new = TextNode(Terminal("1b"))
        bos.insert_after(new)

        it = self.lexer.get_token_iter(new)
        assert next(it) == ("1", "INT", 1, [TextNode(Terminal("1b"))], -1)
        with pytest.raises(LexingError):
            next(it)
Beispiel #23
0
def test_option_rule():
    p = Parser("""
        A ::= "a" [ "b" ] "g"
    """)
    p.parse()
    print(p.rules)
    assert p.rules[Nonterminal("A")].alternatives == [[
        Terminal("a"), Nonterminal("A_option")
    ]]
    assert p.rules[Nonterminal("A_option")].alternatives == [[
        Terminal("b"), Terminal("g")
    ], [Terminal("g")]]
Beispiel #24
0
def test_loop_rule():
    p = Parser("""
        A ::= "a" { "b" } "g"
    """)
    p.parse()
    print(p.rules)
    assert p.rules[Nonterminal("A")].alternatives == [[
        Terminal("a"), Nonterminal("A_loop")
    ]]
    assert p.rules[Nonterminal("A_loop")].alternatives == [[
        Terminal("b"), Nonterminal("A_loop")
    ], [Terminal("g")]]
Beispiel #25
0
def test_more_complex_grammar():
    p = Parser("""
    name ::= "ID"
           | "&" "ID"
           | splice
           | insert
    """)
    p.parse()
    assert p.rules[Nonterminal("name")].alternatives == [
        [Terminal("ID")], [Terminal("&"), Terminal("ID")],
        [Nonterminal("splice")], [Nonterminal("insert")]
    ]
Beispiel #26
0
def test_group_rule():
    p = Parser("""
        A ::= "a" ( "b" | "c" ) "g"
    """)
    p.parse()
    print(p.rules)
    assert p.rules[Nonterminal("A")].alternatives == [[
        Terminal("a"), Nonterminal("A_group1")
    ]]
    assert p.rules[Nonterminal("A_group1")].alternatives == [[
        Terminal("b"), Nonterminal("A_group2")
    ], [Terminal("c"), Nonterminal("A_group2")]]
    assert p.rules[Nonterminal("A_group2")].alternatives == [[Terminal("g")]]
Beispiel #27
0
    def test_token_iter2(self):
        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        new = TextNode(Terminal("12"))
        new2 = TextNode(Terminal("34"))
        bos.insert_after(new)
        new.insert_after(new2)

        next_token = self.lexer.lexer.get_token_iter(new).next
        assert next_token() == ("1234", "INT", 1, [
            TextNode(Terminal("12")),
            TextNode(Terminal("34"))
        ], 0)
Beispiel #28
0
    def test_multitoken_reuse1(self):
        lexer = IncrementalLexer("""
"\"[a-z\r\x80]*\"":str
        """)

        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        text = TextNode(Terminal("\"abc\rdef\""))
        bos.insert_after(text)
        lexer.relex(text)
        assert bos.next_term.lookup == "str"
        assert bos.next_term == mk_multitextnode(
            [Terminal("\"abc"),
             Terminal("\r"),
             Terminal("def\"")])
        assert bos.next_term.children[0] is text

        bos.next_term.children[
            2].symbol.name = "de\rf\""  # insert another newline
        child0 = bos.next_term.children[0]
        child1 = bos.next_term.children[1]
        child2 = bos.next_term.children[2]

        mt = bos.next_term

        lexer.relex(bos.next_term)
        assert bos.next_term == mk_multitextnode([
            Terminal("\"abc"),
            Terminal("\r"),
            Terminal("de"),
            Terminal("\r"),
            Terminal("f\"")
        ])
        # test if nodes within a MultiTextNode are reused
        assert bos.next_term.children[0] is child0
        assert bos.next_term.children[1] is child1
        assert bos.next_term.children[2] is child2

        child3 = bos.next_term.children[3]
        child4 = bos.next_term.children[4]

        assert child0.prev_term is None
        assert child0.next_term is child1
        assert child1.prev_term is child0
        assert child1.next_term is child2
        assert child2.prev_term is child1
        assert child2.next_term is child3
        assert child3.prev_term is child2
        assert child3.next_term is child4
        assert child4.prev_term is child3
        assert child4.next_term is None

        assert bos.next_term is mt  # reused the MultiTextNode
Beispiel #29
0
    def test_normal_to_normal_and_multi(self):
        lexer = IncrementalLexer("""
"\"[^\"]*\"":str
"[a-z]+":var
        """)

        ast = AST()
        ast.init()
        bos = ast.parent.children[0]
        eos = ast.parent.children[1]
        text = TextNode(Terminal("\"abc\rdef\""))
        bos.insert_after(text)
        lexer.relex(text)
        assert bos.next_term.lookup == "str"
        assert bos.next_term == mk_multitextnode(
            [Terminal("\"abc"),
             Terminal("\r"),
             Terminal("def\"")])

        bos.next_term.children[0].symbol.name = "ab\"c"
        lexer.relex(bos.next_term)

        assert bos.next_term == TextNode(Terminal("ab"))
        assert bos.next_term.next_term == mk_multitextnode(
            [Terminal("\"c"),
             Terminal("\r"),
             Terminal("def\"")])
Beispiel #30
0
 def test_relex_stop(self):
     ast = AST()
     ast.init()
     bos = ast.parent.children[0]
     new = TextNode(Terminal("1+2"))
     old1 = TextNode(Terminal("*"))
     old2 = TextNode(Terminal("3"))
     old2.lookup = "INT"
     bos.insert_after(new)
     new.insert_after(old1)
     old1.insert_after(old2)
     self.relex(new)
     assert ast.parent.symbol == Nonterminal("Root")
     assert isinstance(ast.parent.children[0], BOS)
     assert isinstance(ast.parent.children[-1], EOS)
     node = bos.next_term
     assert node.symbol == Terminal("1")
     node = node.next_term
     assert node.symbol == Terminal("+")
     node = node.next_term
     assert node.symbol == Terminal("2")
     node = node.next_term
     assert node.symbol == Terminal("*")
     node = node.next_term
     assert node.symbol == Terminal("3")
     node = node.next_term
     assert isinstance(node, EOS)