Example #1
0
    def _string(self) -> t.Token:
        """Handles strings."""
        line_no = self._line_no
        column = self._column

        self._advance()

        string = ''
        while self._current_char is not None and self._current_char != '"':
            string += self._current_char
            self._advance()

        if self._current_char is None:
            raise err.LexerError(
                error_code=err.ErrorCode.RS_EXPECTED_DOUBLE_QUOTE,
                token=t.Token(type=t.TokenType.INVALID,
                              value=string,
                              line_no=line_no,
                              column=column))

        self._advance()

        return t.Token(type=t.TokenType.STRING,
                       value=string,
                       line_no=line_no,
                       column=column)
Example #2
0
    def _skip_multi_line_comment(self) -> None:
        self._advance()
        self._advance()

        line_no = self._line_no
        column = self._column

        while True:
            if self._current_char == '|':
                next_char = self._peek()
                if next_char == '#':
                    self._advance()
                    self._advance()
                    break
                elif next_char is None:
                    raise err.LexerError(
                        error_code=err.ErrorCode.RS_EOF_IN_BLOCK_COMMENT,
                        token=t.Token(type=t.TokenType.INVALID,
                                      value=None,
                                      line_no=line_no,
                                      column=column))

            elif self._current_char is None:
                raise err.LexerError(
                    error_code=err.ErrorCode.RS_EOF_IN_BLOCK_COMMENT,
                    token=t.Token(type=t.TokenType.INVALID,
                                  value=None,
                                  line_no=line_no,
                                  column=column))

            self._advance()
Example #3
0
    def _boolean(self) -> t.Token:
        line_no = self._line_no
        column = self._column

        boolean = self._current_char
        self._advance()
        while self._current_char is not None and not self._current_char.isspace(
        ):
            current_char = self._current_char
            if current_char in ['"', "'", '`', '#']:
                boolean += self._current_char
                break
            elif current_char in ['(', ')', '{', '}', '[', ']']:
                break

            boolean += self._current_char
            self._advance()

            lowered = boolean.lower()
            if lowered not in '#true' and lowered not in '#false':
                break

        if self._current_char is None or boolean not in [
                '#T', '#t', '#true', '#F', '#f', '#false'
        ]:
            raise err.LexerError(error_code=err.ErrorCode.RS_BAD_SYNTAX,
                                 token=t.Token(type=t.TokenType.INVALID,
                                               value=boolean,
                                               line_no=line_no,
                                               column=column),
                                 text=boolean)

        if boolean in ['#T', '#t', '#true']:
            return t.Token(type=t.TokenType.BOOLEAN,
                           value=True,
                           line_no=line_no,
                           column=column)
        elif boolean in ['#F', '#f', '#false']:
            return t.Token(type=t.TokenType.BOOLEAN,
                           value=False,
                           line_no=line_no,
                           column=column)
Example #4
0
    def _identifier(self, initial: str = '') -> t.Token:
        """Handles identifiers (including builtin functions)."""
        line_no = self._line_no
        column = self._column

        result = initial
        while (self._current_char is not None
               and self._current_char not in self.NON_ID_CHARS
               and not self._current_char.isspace()):
            result += self._current_char
            self._advance()

        return t.Token(t.TokenType.NAME, result, line_no, column)
Example #5
0
    def _process_all_tokens(self) -> None:
        """ Responsible for breaking apart text into tokens."""

        paren_analyzer = self.paren_analyzer

        while True:
            token = self._process_next_token()

            if token is None:
                token = t.Token(t.TokenType.EOF, None, self._line_no,
                                self._column)
                self.tokens.append(token)

                paren_analyzer.reached_eof(token)
                break

            self.tokens.append(token)

            if token.type in [t.TokenType.LPAREN, t.TokenType.RPAREN]:
                paren_analyzer.received_paren(token)
Example #6
0
    def _process_next_token(self) -> tp.Optional[t.Token]:
        token = None

        while self._current_char:
            try:
                self._skip_whitespace_or_comments()
            except err.ReachedEOF as e:
                break

            if (self._current_char.isdigit() or self._current_char == '.'
                    or (self._current_char == '-' and
                        (self._peek().isdigit() or self._peek() == '.'))):
                token = self._number()
                break

            if self._current_char not in self.NON_ID_CHARS:
                token = self._identifier()
                break

            if self._current_char == '#':
                token = self._boolean()
                break

            if self._current_char == '"':
                token = self._string()
                break

            if self._current_char == "'":
                token_type = t.TokenType.QUOTE
                value = self._current_char
                token = t.Token(type=token_type,
                                value=value,
                                line_no=self._line_no,
                                column=self._column)
                self._advance()
                break

            if self._current_char in ['(', '{', '[']:
                token_type = t.TokenType.LPAREN
                value = self._current_char
                token = t.Token(type=token_type,
                                value=value,
                                line_no=self._line_no,
                                column=self._column)
                self._advance()
                break

            if self._current_char in [')', '}', ']']:
                token_type = t.TokenType.RPAREN
                value = self._current_char
                token = t.Token(type=token_type,
                                value=value,
                                line_no=self._line_no,
                                column=self._column)
                self._advance()
                break

            if self._current_char == '|':
                raise err.LexerError(
                    error_code=err.ErrorCode.FEATURE_NOT_IMPLEMENTED,
                    token=t.Token(type=t.TokenType.INVALID,
                                  value="''",
                                  line_no=self._line_no,
                                  column=self._column))

            raise err.IllegalStateError

        return token
Example #7
0
    def _number(self) -> t.Token:
        """Return a number token from a number consumed from the input (or an ID if not a valid number)."""
        line_no = self._line_no
        column = self._column

        if self._current_char == '-':
            number = '-'
            self._advance()
        else:
            number = ''

        is_rational = False
        numerator = ''
        denominator = ''

        while (self._current_char is not None
               and not self._current_char.isspace()
               and self._current_char not in self.NON_ID_CHARS):
            if self._current_char == '/':
                is_rational = True
                numerator = number
                number += self._current_char
                self._advance()
                continue

            if is_rational:
                denominator += self._current_char

            number += self._current_char
            self._advance()

        if is_rational:
            try:
                numerator = int(numerator)
                denominator = int(denominator)

                if denominator < 0:
                    raise ValueError

            except ValueError:
                return t.Token(type=t.TokenType.NAME,
                               value=number,
                               line_no=line_no,
                               column=column)
            else:
                token = t.Token(type=t.TokenType.RATIONAL,
                                value=(numerator, denominator),
                                line_no=line_no,
                                column=column)

                if denominator == 0:
                    raise err.LexerError(
                        error_code=err.ErrorCode.DIVISION_BY_ZERO, token=token)

                return token
        else:
            try:
                number = int(number)
            except ValueError:
                try:
                    number = float(number)
                except ValueError:
                    return t.Token(type=t.TokenType.NAME,
                                   value=number,
                                   line_no=line_no,
                                   column=column)
                else:
                    return t.Token(type=t.TokenType.DECIMAL,
                                   value=number,
                                   line_no=line_no,
                                   column=column)
            else:
                return t.Token(type=t.TokenType.INTEGER,
                               value=number,
                               line_no=line_no,
                               column=column)
Example #8
0
    def visit_NameAssign(self, node: ast.NameAssign) -> None:
        token = node.token
        exprs = node.exprs
        exprs_len = len(exprs)

        if exprs_len != 0 and type(exprs[0]) is ast.Sym:
            # a little bit hacky..., not sure why Racket has this behavior
            raise err.SemanticError(
                error_code=err.ErrorCode.BUILTIN_OR_IMPORTED_NAME,
                token=t.Token(type=t.TokenType.INVALID,
                              value='quote',
                              line_no=token.line_no,
                              column=token.column))

        if exprs_len == 0 or type(
                exprs[0]) is not ast.Name or exprs[0].value in t.KEYWORDS:
            next_token = exprs[0].token if exprs_len > 0 else None
            raise err.SemanticError(error_code=err.ErrorCode.D_EXPECTED_A_NAME,
                                    token=token,
                                    next_token=next_token)

        const_token = exprs[0].token
        const_name = const_token.value
        if exprs_len == 1:
            raise err.SemanticError(
                error_code=err.ErrorCode.D_V_MISSING_AN_EXPRESSION,
                token=token,
                name=const_name)
        elif exprs_len > 2:
            extra_count = exprs_len - 2
            raise err.SemanticError(
                error_code=err.ErrorCode.D_V_EXPECTED_ONE_EXPRESSION,
                token=token,
                extra_count=extra_count,
                name=const_name)
        elif type(exprs[1]) is ast.Name and exprs[1].value in t.KEYWORDS:
            keyword = exprs[1].value
            token = exprs[1].token
            if keyword == t.Keyword.COND.value:
                raise err.SemanticError(
                    error_code=err.ErrorCode.C_EXPECTED_OPEN_PARENTHESIS,
                    token=token)
            elif keyword == t.Keyword.DEFINE.value:
                raise err.SemanticError(
                    error_code=err.ErrorCode.D_EXPECTED_OPEN_PARENTHESIS,
                    token=token)
            elif keyword == t.Keyword.DEFINE_STRUCT.value:
                raise err.SemanticError(
                    error_code=err.ErrorCode.DS_EXPECTED_OPEN_PARENTHESIS,
                    token=token)
            elif keyword == t.Keyword.ELSE.value:
                raise err.SemanticError(error_code=err.ErrorCode.E_NOT_ALLOWED,
                                        token=token)
            else:
                raise err.IllegalStateError

        node.identifier = node.exprs[0].value
        node.expr = node.exprs[1]

        var_name = node.identifier
        var_symbol = sym.AmbiguousSymbol(var_name)

        if var_name in BUILT_IN_PROCS:
            raise err.SemanticError(
                error_code=err.ErrorCode.BUILTIN_OR_IMPORTED_NAME,
                token=const_token)

        if self.current_scope.lookup(var_name,
                                     current_scope_only=True) is not None:
            raise err.SemanticError(
                error_code=err.ErrorCode.PREVIOUSLY_DEFINED_NAME,
                token=const_token)

        self.visit(node.expr)

        self.current_scope.define(var_symbol)
    def data(
        self
    ) -> Union[ast.Bool, ast.Dec, ast.Int, ast.List, ast.Rat, ast.Str,
               ast.Sym]:
        """
        data: BOOLEAN
            | DECIMAL
            | INTEGER
            | LIST
            | RATIONAL
            | STRING
            | SYMBOL
        """
        token = self.current_token

        if token.type is t.TokenType.BOOLEAN:
            self.eat(t.TokenType.BOOLEAN)
            return ast.Bool(token)
        elif token.type is t.TokenType.DECIMAL:
            self.eat(t.TokenType.DECIMAL)
            return ast.Dec(token)
        elif token.type is t.TokenType.INTEGER:
            self.eat(t.TokenType.INTEGER)
            return ast.Int(token)
        elif token.type is t.TokenType.RATIONAL:
            self.eat(t.TokenType.RATIONAL)
            return ast.Rat(token)
        elif token.type is t.TokenType.STRING:
            self.eat(t.TokenType.STRING)
            return ast.Str(token)
        elif token.type is t.TokenType.QUOTE:
            self.eat(t.TokenType.QUOTE)

            next_token = self.current_token

            if next_token.type is t.TokenType.LPAREN:
                self.eat(t.TokenType.LPAREN)

                prims_stack = [[]]

                open_parens = 1
                while open_parens > 0:
                    curr_token = self.current_token
                    if curr_token.type is t.TokenType.EOF:
                        raise err.LexerError(
                            error_code=err.ErrorCode.RS_SYMBOL_FOUND_EOF,
                            token=t.Token(type=t.TokenType.INVALID,
                                          value="'",
                                          line_no=curr_token.line_no,
                                          column=curr_token.column))

                    elif curr_token.type is t.TokenType.LPAREN:
                        open_parens += 1
                        self.eat(t.TokenType.LPAREN)

                        prims = []
                        prims_stack.append(prims)

                        continue

                    elif curr_token.type is t.TokenType.RPAREN:
                        open_parens -= 1
                        self.eat(t.TokenType.RPAREN)

                        if open_parens > 0:
                            expr = ast.List(token, prims_stack[-1])
                            prims_stack = prims_stack[:-1]
                            prims_stack[-1].append(expr)

                        continue

                    prims = prims_stack[-1]
                    if curr_token.type in [
                            t.TokenType.BOOLEAN, t.TokenType.DECIMAL,
                            t.TokenType.INTEGER, t.TokenType.RATIONAL,
                            t.TokenType.STRING
                    ]:
                        prims.append(self.data())
                    elif curr_token.type is t.TokenType.NAME:
                        self.eat(t.TokenType.NAME)
                        name_token = t.Token(type=t.TokenType.SYMBOL,
                                             value=curr_token.value,
                                             line_no=curr_token.line_no,
                                             column=curr_token.column)
                        prims.append(ast.Sym(name_token))
                    else:
                        raise err.IllegalStateError

                node = ast.List(token, prims_stack[0])
                return node
            elif next_token.type in [
                    t.TokenType.BOOLEAN, t.TokenType.DECIMAL,
                    t.TokenType.INTEGER, t.TokenType.RATIONAL,
                    t.TokenType.STRING
            ]:
                return self.data()
            elif next_token.type is t.TokenType.NAME:
                self.eat(t.TokenType.NAME)
                name_token = t.Token(type=t.TokenType.SYMBOL,
                                     value=next_token.value,
                                     line_no=token.line_no,
                                     column=token.column)
                return ast.Sym(name_token)
            else:
                raise err.IllegalStateError

        else:
            self.error_unexpected_token(token=token)