Beispiel #1
0
    def read_token(self):
        # Skip whitespace.
        char = self.peek_char()
        newline = False
        while char and char.isspace():
            if char == '\n':
                newline = True
            self.read_char()
            char = self.peek_char()

        lexeme = ''
        row = self.row
        column = self.column

        if newline:
            return Token(NEWLINE, '\n', row=row, column=column)

        num_attempts = 0
        token = None
        while True:
            char = self.peek_char()

            # Maybe we have reached the end of the source?
            if not char:
                break

            lexeme += char

            # Attempt to create a token from the lexeme.
            temp = self.create_token(lexeme)
            if not temp:
                if not token:
                    num_attempts += 1
                    if num_attempts > 10:
                        break
                else:
                    break

            token = temp

            # Consume the character that made the token creation possible.
            self.read_char()

        if len(lexeme) == 0:
            # We've reached the end of the source.
            token = Token(EOF)

        if token:
            token.row = row
            token.column = column
            #print token
            return token

        mshl.error('sequence not understood: {}'.format(lexeme))
Beispiel #2
0
def parse_str(parser):
    tok = parser.expect(lexemes.STR)

    value = str(tok.lexeme)

    if ((not value.startswith('"') or not value.endswith('"')) and
        (not value.startswith('\'') or not value.endswith('\''))):
            mshl.error("unterminated string", tok)

    value = value[1:-1]

    value = value.replace('\\\\', '\\').replace('\\\'', '\'')

    return Node(STRING, value, tok)
    def expect(self, *args):
        tokens = []

        for lexeme in args:
            token = self.read_token()

            if token.category != lexeme:
                mshl.error('expected {}'.format(lexeme), token)

            tokens.append(token)

        if len(tokens) == 1:
            tokens = tokens[0]

        return tokens
Beispiel #4
0
def parse_int(parser):
    tok = parser.expect(lexemes.INT)

    #if len(tok.lexeme) > 10:
    #    parser.warn("integer too large", tok)

    value = tok.lexeme
    if value.startswith("0x"):
        if len(value) == 2:
            mshl.error("invalid hex value", tok)
            value = 0
        else:
            value = int(value[2:], 16)
    elif value.endswith("b"):
        value = int(value[:-1], 2)

    return Node(INTEGER, int(value), tok)
Beispiel #5
0
def parse_expr4(parser):
    parser.eat_whitespace()
    expr = None

    tok = parser.peek_token()

    # (<expr>)
    if tok.category == lexemes.L_PAREN:
        parser.expect(lexemes.L_PAREN)
        expr = parse_expr(parser)
        parser.expect(lexemes.R_PAREN)

    # [<expr>[, <expr> ...]]
    elif tok.category == lexemes.L_BRACK:
        parser.expect(lexemes.L_BRACK)

        items = []

        while True:
            tok = parser.peek_token()
            if tok.category == lexemes.R_BRACK:
                break

            parser.eat_whitespace()
            items.append(parse_expr(parser))
            parser.eat_whitespace()

            tok = parser.peek_token()
            if tok.category == lexemes.R_BRACK:
                break

            parser.expect(lexemes.COMMA)

        parser.eat_whitespace()
        parser.expect(lexemes.R_BRACK)

        expr = Node(ARRAY, token=tok, children=items)

    # <identifier>
    elif tok.category == lexemes.IDENT:
        expr = parse_ident(parser)

    # -<integer>
    elif tok.category == lexemes.MINUS_SIGN:
        parser.read_token()
        expr = parse_int(parser)
        expr.data = -int(expr.data)

    # <integer>
    elif tok.category == lexemes.INT:
        expr = parse_int(parser)

    # <string>
    elif tok.category == lexemes.STR:
        expr = parse_str(parser)

    # for ([<expr>]; [<expr>]; [<expr>]) ...
    elif tok.category == lexemes.FOR:
        expr = parse_for(parser)

    # func <identifier>(...)
    elif tok.category == lexemes.FUNC:
        expr = parse_func(parser)

    # if (<expr>) ...
    elif tok.category == lexemes.IF:
        expr = parse_if(parser)

    # while (<expr>) ...
    elif tok.category == lexemes.WHILE:
        expr = parse_while(parser)

    # return <expr>
    elif tok.category == lexemes.RETURN:
        parser.read_token()

        newline_after_return = False
        if parser.peek_token().category == lexemes.NEWLINE:
            newline_after_return = True

        parser.eat_whitespace()

        if parser.peek_token().category in (lexemes.R_BRACE, lexemes.SEMICOLON):
            expr = Node(RETURN, children=[Node(INTEGER, 0, tok)])
        else:
            if newline_after_return:
                mshl.warning('prefer semicolon or expression on same row as return keyword', tok)
            expr = Node(RETURN, children=[parse_expr(parser)])

    # true
    elif tok.category == lexemes.TRUE:
        parser.read_token()
        expr = Node(INTEGER, 1)

    # false
    elif tok.category == lexemes.FALSE:
        parser.read_token()
        expr = Node(INTEGER, 0)

    # undefined (should not be used this way, see == operator)
    #elif tok.category == lexemes.UNDEFINED:
    #    parser.read_token()
    #    expr = Node(INTEGER, 0)
    #    mshl.warning('undefined should only be used in equality tests')

    elif tok.category == lexemes.NOT:
        parser.read_token()
        parser.eat_whitespace()
        expr = parse_expr(parser)
        expr = Node(IF_TERNARY, children=[expr, Node(INTEGER, 0, tok), Node(INTEGER, 1, tok)])

    elif tok.category == lexemes.BREAK:
        parser.read_token()
        expr = Node(BREAK)

    elif tok.category == lexemes.CONTINUE:
        parser.read_token()
        expr = Node(CONTINUE)

    # <eof> | <newline>
    #elif tok.category in (lexemes.EOF, lexemes.NEWLINE):
    #    pass

    elif tok.category != lexemes.EOF:
        parser.read_token()
        mshl.error("unexpected token: {}".format(tok.category), tok)

    if expr:
        expr.token = tok

    return expr