Exemple #1
0
    def test_non_empty_handler_is_true(self):
        #arrange
        err = ErrorHandler()

        #act
        err.push(1, "foo")

        #assert
        self.assertTrue(bool(err))
Exemple #2
0
    def test_attempt_push_on_non_full_queue_is_true(self):
        #arrange
        err = ErrorHandler(2)

        #act
        err.push(1, "foo")
        status = err.push(2, "SUT")

        #assert
        self.assertTrue(status)
Exemple #3
0
 def __init__(self):
     """\
     convert a list of tokens to an abstract syntax tree
     @i: tokens index
     @ast: abstract syntax tree generated by self.expression()
     """
     self.err = ErrorHandler()
     self.tokens = []
     self.i = 0
     self.ast = None
Exemple #4
0
    def test_pushed_error_is_recovered_on_iteration(self):
        #arrange
        msg = "line 1: test error"
        err = ErrorHandler()

        #act
        err.push(1, "test error")

        #assert
        for i in err:
            self.assertEqual(msg, i)
Exemple #5
0
    def test_pushed_mulitple_errors_are_recovered_on_iteration(self):
        #arrange
        msgs = ["line 1: test error 1", "line 2: test error 2"]
        err = ErrorHandler()

        #act
        err.push(1, "test error 1")
        err.push(2, "test error 2")

        #assert
        for i, val in enumerate(err):
            self.assertEqual(msgs[i], val)
Exemple #6
0
    def parse(self, tokens, limit=10):
        """\
        recursive descent entry point

        @tokens: list of tokens provided by lexical analysis, tokens[-1] == EOF
        @limit: internal ErrorHandler limit
        Returns: abstract syntax tree, top level is a list of statements
        """
        self.err = ErrorHandler(limit)
        self.tokens = tokens
        self.i = 0

        try:
            return (self.program(), self.err)
        except ParseError:
            return (None, self.err)
Exemple #7
0
    def __init__(self, env):
        self.err = ErrorHandler(1)

        if env:
            self.env = env
        else:
            self.env = Environment(None)
Exemple #8
0
    def test_reset_then_push_results_in_true_handler(self):
        #arrange
        err = ErrorHandler(1)

        #act
        err.push(1, "foo")
        err.reset()
        err.push(1, "bar")

        #assert
        self.assertTrue(bool(err))
Exemple #9
0
    def test_reset_results_in_false_handler(self):
        #arrange
        err = ErrorHandler(1)

        #act
        err.push(1, "foo")
        err.reset()

        #assert
        self.assertFalse(bool(err))
Exemple #10
0
    def tokenize(self, src, limit=3):
        """\
        @src: source code, newline terminated so that src[i + 1] is valid
        @limit: internal ErrorHandler limit
        """
        #reset attrs on multiple calls
        self.src = src
        self.err = ErrorHandler(limit)
        self.line = 1
        self.i = 0
        self.tokens = []

        while self.i < len(self.src):
            assert (self.i >= 0 and "index is not strictly increasing")
            assert (self.line >= 1 and "line is not strictly increasing")

            if (char := self.src[self.i]) in self.single_map:
                tok = Token(self.single_map[char], self.line, char, None)
                self.tokens.append(tok)
            elif self.src[self.i] in self.double_map:
                self.handle_double()
Exemple #11
0
    def test_initialized_handler_is_false(self):
        #arrange
        err = ErrorHandler()

        #assert
        self.assertFalse(bool(err))
Exemple #12
0
    def __init__(self):
        """\
        convert input source string into a List[Token]
        """
        self.src = " "
        self.err = ErrorHandler()
        self.line = 0
        self.i = 0  #src index
        self.tokens = []

        #inverse mapping of single char TokenTypes, except slash
        self.single_map = {
            '(': TokenType.LEFT_PAREN,
            ')': TokenType.RIGHT_PAREN,
            '{': TokenType.LEFT_BRACE,
            '}': TokenType.RIGHT_BRACE,
            ',': TokenType.COMMA,
            '.': TokenType.DOT,
            '-': TokenType.MINUS,
            '+': TokenType.PLUS,
            ';': TokenType.SEMICOLON,
            '*': TokenType.STAR
        }

        #inverse mapping of single/double tokens from TokenType
        self.double_map = {
            '!': TokenType.BANG,
            '!=': TokenType.BANG_EQUAL,
            '=': TokenType.EQUAL,
            '==': TokenType.EQUAL_EQUAL,
            '>': TokenType.GREATER,
            '>=': TokenType.GREATER_EQUAL,
            '<': TokenType.LESS,
            '<=': TokenType.LESS_EQUAL,
        }

        #inverse mapping of keywords from TokenType
        self.keywords_map = {
            'and': TokenType.AND,
            'class': TokenType.CLASS,
            'else': TokenType.ELSE,
            'false': TokenType.FALSE,
            'fun': TokenType.FUN,
            'for': TokenType.FOR,
            'if': TokenType.IF,
            'nil': TokenType.NIL,
            'or': TokenType.OR,
            'print': TokenType.PRINT,
            'return': TokenType.RETURN,
            'super': TokenType.SUPER,
            'this': TokenType.THIS,
            'true': TokenType.TRUE,
            'var': TokenType.VAR,
            'while': TokenType.WHILE
        }

        #whitespace characters excluding newline and comments
        self.whitespace_set = set([' ', '\t', '\r', '\f', '\v'])

        #digit characters
        self.digit_set = set(
            ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
Exemple #13
0
class Parser():
    def __init__(self):
        """\
        convert a list of tokens to an abstract syntax tree
        @i: tokens index
        @ast: abstract syntax tree generated by self.expression()
        """
        self.err = ErrorHandler()
        self.tokens = []
        self.i = 0
        self.ast = None

    def parse(self, tokens, limit=10):
        """\
        recursive descent entry point

        @tokens: list of tokens provided by lexical analysis, tokens[-1] == EOF
        @limit: internal ErrorHandler limit
        Returns: abstract syntax tree, top level is a list of statements
        """
        self.err = ErrorHandler(limit)
        self.tokens = tokens
        self.i = 0

        try:
            return (self.program(), self.err)
        except ParseError:
            return (None, self.err)

    def curr_type(self):
        """\
        helper function: returns token type of token at current list index
        """
        token = self.tokens[self.i]
        return token.type

    def curr_token(self):
        """\
        helper function: syntactic sugar to fetch current token
        """
        return self.tokens[self.i]

    def advance(self):
        """\
        helper function: syntactic sugar for iteration over the tokens list
        """
        assert (self.i + 1 < len(self.tokens))
        self.i += 1

    def prev_token(self):
        """\
        helper function: syntactic sugar to fetch previous token
        """
        assert (self.i - 1 >= 0)
        return self.tokens[self.i - 1]

    def check_semicolon(self):
        if self.curr_type() == TokenType.SEMICOLON:
            self.advance()
            return True
        else:
            tok = self.curr_token()
            if tok.type == TokenType.EOF:
                self.trap("expected ';' at end of file")
            else:
                self.trap("expected ';' before {}".format(tok.lexeme))

            return False

    def program(self):
        """\
        <program> := <declaration>* EOF
        """
        tree = []

        while self.curr_type() != TokenType.EOF:
            tree.append(self.declaration())

        assert (self.curr_type() == TokenType.EOF)
        return tree

    def declaration(self):
        """\
        <declaration> := <variable declaration> | <statement>
        """
        if self.curr_type() == TokenType.VAR:
            self.advance()
            return self.var_declaration()

        return self.statement()

    def var_declaration(self):
        """\
        <var_declaration> := "var" IDENTIFIER ("=" <expression>)? ";"
        """
        name = None

        #if no initializer is present, assume that there was actually
        #an intializer to nil, i.e., var x = nil; instead of var x;
        initializer = Literal(Token(TokenType.NIL, -1, "nil", None))

        if self.curr_type() == TokenType.IDENTIFIER:
            name = self.curr_token()
            self.advance()

            if self.curr_type() == TokenType.EQUAL:
                self.advance()
                initializer = self.expression()

            self.check_semicolon()
        else:
            self.trap("missing variable identifier")

        return VariableDeclaration(name, initializer)

    def statement(self):
        """\
        <statement> := <expression statement> | <print statement> |
                       <block statement> | <if statement> | <while statement> |
                       <for statement>
        """
        # this isn't the world's fastest code, a jump table or dictionary-based
        # switch would be better, but hey we're writing an interpreter in
        # python! This is hardly the bottleneck!

        if self.curr_type() == TokenType.PRINT:
            self.advance()
            stmt = self.print_stmt()
        elif self.curr_type() == TokenType.LEFT_BRACE:
            self.advance()
            stmt_list = self.block_stmt()
            stmt = Block(stmt_list)
        elif self.curr_type() == TokenType.IF:
            self.advance()
            stmt = self.branch_stmt()
        elif self.curr_type() == TokenType.WHILE:
            self.advance()
            stmt = self.while_stmt()
        elif self.curr_type() == TokenType.FOR:
            self.advance()
            stmt = self.for_stmt()
        else:
            stmt = self.generic_stmt()

        return stmt

    def print_stmt(self):
        """\
        <print statement> := "print" <expression> ";"
        """
        stmt = Printer(self.expression())
        self.check_semicolon()
        return stmt

    def block_stmt(self):
        """\
        <block statement> := "{" <declaration>* "}"
        this method returns a list of statements rather than a block node, b/c
        it is used for both generic block statements and function blocks. The
        caller must wrap the list into the appropriate node class.
        """
        stmt_list = []

        while self.curr_type() != TokenType.RIGHT_BRACE:
            expr = self.declaration()
            stmt_list.append(expr)

        if self.curr_type() == TokenType.EOF:
            self.trap("expected '}' at end of file")
        elif self.curr_type() != TokenType.RIGHT_BRACE:
            tok = self.curr_token()
            self.trap("expected '}' at {}".format(tok.lexeme))
        else:
            self.advance()

        return stmt_list

    def branch_stmt(self):
        """\
        <branch> := "if" "(" <expr> ")" <stmt> ("else" <stmt>)?
        """
        if self.curr_type() != TokenType.LEFT_PAREN:
            self.trap("expected open parenthesis after 'if'")
            return Branch(None, None, None)

        self.advance()

        condition = self.expression()

        if self.curr_type() != TokenType.RIGHT_PAREN:
            self.trap("expected close parenthesis after condition")
            return Branch(None, None, None)

        self.advance()

        then_branch = self.statement()
        else_branch = None

        if self.curr_type() == TokenType.ELSE:
            self.advance()
            else_branch = self.statement()

        return Branch(condition, then_branch, else_branch)

    def while_stmt(self):
        """
        <while> := "while" "(" <expression> ")" <statement>
        """
        if self.curr_type() != TokenType.LEFT_PAREN:
            self.trap("expected open parenthesis after 'if'")
            return Loop(None, None)

        self.advance()

        condition = self.expression()

        if self.curr_type() != TokenType.RIGHT_PAREN:
            self.trap("expected close parenthesis after condition")
            return Loop(None, None)

        self.advance()

        body = self.statement()

        return Loop(condition, body)

    def for_stmt(self):
        """\
        <for statement> := "for" "(" (<var decl> | <expr stmt> | ";")
                            <expression>? ";" <expression>? ")" <statement>

        for statements are desugared into an equivalent while statement.
        """
        if self.curr_type() != TokenType.LEFT_PAREN:
            self.trap("expected '(' after 'for'")
            return Loop(None, None)

        self.advance()

        initializer = None

        if self.curr_type() == TokenType.SEMICOLON:
            self.advance()
        elif self.curr_type() == TokenType.VAR:
            self.advance()
            initializer = self.var_declaration()
        else:
            initializer = self.generic_stmt()

        condition = None

        if self.curr_type() == TokenType.SEMICOLON:
            self.advance()
        else:
            condition = self.expression()
            if not self.check_semicolon():
                return Loop(None, None)

        increment = None

        if self.curr_type() != TokenType.RIGHT_PAREN:
            increment = self.expression()

        if self.curr_type() == TokenType.RIGHT_PAREN:
            self.advance()
        else:
            self.trap("expected ')' after a for loop clause")
            return Loop(None, None)

        body = self.statement()

        #desugar the for loop by nesting the disjoint nodes into blocks
        if increment is not None:
            body = Block([body, increment])

        if condition is not None:
            body = Loop(condition, body)
        else:
            #a little different from standard lox
            #if you want an infinite loop, make it clear with a while(true)
            #so in reality, this isn't a by-the-book lox implementation
            self.trap("infinite loop detected, use while(true) instead")
            return Loop(None, None)

        if initializer is not None:
            body = Block([initializer, body])

        return body

    def generic_stmt(self):
        """\
        <expression statement> := <expression> ";"
        """
        stmt = Generic(self.expression())
        self.check_semicolon()
        return stmt

    def expression(self):
        """\
        dummy method used to encode the lox grammar explicity in the source.
        <expression> := <assignment>
        """
        return self.assignment()

    def assignment(self):
        """\
        assign rvalue to lvalue
        <assignment> := (IDENTIFIER "=" <assignment>) | <logical or>
        """
        lval = self.logical_or()

        if self.curr_type() == TokenType.EQUAL:
            self.advance()
            rval = self.assignment()

            if isinstance(lval, Variable):
                #extract token from variable node as a valid lvalue
                return Assignment(lval.name, rval)

            self.trap("assignment target is not an l-value")

        #if trap was initiated, this return node is just a dummy.
        #trap synchronized to the next statement anyways so its no risk.
        #on the other hand, if the branch was skipped entirely, then lval
        #is just some expression.
        return lval

    def logical_or(self):
        """\
        <logical or> := <logical and> ("or" <logical and>)*
        """
        expr = self.logical_and()

        if self.curr_type() == TokenType.OR:
            self.advance()

            left = expr
            tok = self.prev_token()
            right = self.logical_and()

            return Logical(left, tok, right)

        return expr

    def logical_and(self):
        """\
        <logical and> := <equality> ("and" <equality>)*
        """
        expr = self.equality()

        if self.curr_type() == TokenType.AND:
            self.advance()

            left = expr
            tok = self.prev_token()
            right = self.logical_and()

            return Logical(left, tok, right)

        return expr

    def equality(self):
        """\
        <equality> := <comparison> (("==" | "!=") <comparison>)*
        """
        expr = self.comparison()

        types = set([TokenType.EQUAL_EQUAL, TokenType.BANG_EQUAL])

        while self.curr_type() in types:
            self.advance()

            left = expr
            operator = self.prev_token()
            right = self.comparison()

            expr = Binary(left, operator, right)

        return expr

    def comparison(self):
        """\
        <comparison> := <term> ((">" | "<" | "<=" | ">=") <term>)*
        """
        expr = self.term()

        types = set([TokenType.GREATER, TokenType.GREATER_EQUAL, \
                     TokenType.LESS, TokenType.LESS_EQUAL])

        while self.curr_type() in types:
            self.advance()

            left = expr
            operator = self.prev_token()
            right = self.term()

            expr = Binary(left, operator, right)

        return expr

    def term(self):
        """\
        <term> := <factor> (("+" | "-") <factor>)*
        """
        expr = self.factor()

        while self.curr_type() in set([TokenType.PLUS, TokenType.MINUS]):
            self.advance()

            left = expr
            operator = self.prev_token()
            right = self.factor()

            expr = Binary(left, operator, right)

        return expr

    def factor(self):
        """\
        <factor> := <unary> (("*" | "/") <unary>)*
        """
        expr = self.unary()

        while self.curr_type() in set([TokenType.STAR, TokenType.SLASH]):
            self.advance()

            left = expr
            operator = self.prev_token()
            right = self.unary()

            expr = Binary(left, operator, right)

        return expr

    def unary(self):
        """\
        <unary> := ("!" | "-") <unary> | <primary>
        """
        if self.curr_type() in set([TokenType.BANG, TokenType.MINUS]):
            self.advance()
            return Unary(self.prev_token(), self.unary())

        return self.primary()

    def primary(self):
        """\
        <primary> := NUMBER | STRING | "true" | "false" | "nil"
        <primary> := "(" <expression> ")"
        """
        types = set([TokenType.NUMBER, TokenType.STRING, TokenType.NIL, \
                     TokenType.TRUE, TokenType.FALSE])

        if self.curr_type() in types:
            expr = Literal(self.curr_token())
            self.advance()
        elif self.curr_type() == TokenType.LEFT_PAREN:
            self.advance()
            expr = Grouping(self.expression())

            if self.curr_type() == TokenType.RIGHT_PAREN:
                self.advance()
            else:
                self.trap("missing right parenthesis for grouped expression")
        elif self.curr_type() == TokenType.IDENTIFIER:
            expr = Variable(self.curr_token())
            self.advance()
        elif self.curr_type() == TokenType.EOF:
            #this situation occurs when the user has a grammar error at the
            #end of file such as "3-". In this situation, the parser has been
            #passing the EOF token along the call stack. The else branch can
            #handle this issue, but its not user friendly because it presents
            #a EOF:"None" lexeme to the user.
            #
            #trap is at EOF so no need to create a dummy expr for return
            tok = self.prev_token()
            self.trap("misplaced symbol '{}' at end of file".format(
                tok.lexeme))
        else:
            lexeme = (self.tokens[self.i]).lexeme
            self.trap("misplaced symbol '{}'".format(lexeme))
            #dummy statement will be added to program tree
            expr = None

        return expr

    def trap(self, msg):
        """\
        push parameters to error handler then enter panic mode to reset at the
        next sequence point.
        """
        tok = self.tokens[self.i]
        line = tok.line

        if not self.err.push(line, msg):
            self.err.grow(1)
            self.error.push(line, "additional errors found (hidden)")

        #synchronize parser to continue at next program statement
        types = set([TokenType.CLASS, TokenType.FUN, TokenType.VAR, \
                     TokenType.FOR, TokenType.IF, TokenType.WHILE, \
                     TokenType.PRINT, TokenType.RETURN])

        while self.curr_type() not in types:
            if self.curr_type() == TokenType.EOF:
                #no statements left in program so no need to continue parsing
                #unwind call stack back to self.program and let it handle return
                raise ParseError
            else:
                self.advance()