Esempio n. 1
0
    def _skip_multi_line_comment(self) -> None:
        self._advance()
        self._advance()

        line_no = self._line_no
        column = self._column

        while True:
            if self._current_char == '|':
                next_char = self._peek()
                if next_char == '#':
                    self._advance()
                    self._advance()
                    break
                elif next_char is None:
                    raise err.LexerError(
                        error_code=err.ErrorCode.RS_EOF_IN_BLOCK_COMMENT,
                        token=t.Token(type=t.TokenType.INVALID,
                                      value=None,
                                      line_no=line_no,
                                      column=column))

            elif self._current_char is None:
                raise err.LexerError(
                    error_code=err.ErrorCode.RS_EOF_IN_BLOCK_COMMENT,
                    token=t.Token(type=t.TokenType.INVALID,
                                  value=None,
                                  line_no=line_no,
                                  column=column))

            self._advance()
Esempio n. 2
0
    def _string(self) -> t.Token:
        """Handles strings."""
        line_no = self._line_no
        column = self._column

        self._advance()

        string = ''
        while self._current_char is not None and self._current_char != '"':
            string += self._current_char
            self._advance()

        if self._current_char is None:
            raise err.LexerError(
                error_code=err.ErrorCode.RS_EXPECTED_DOUBLE_QUOTE,
                token=t.Token(type=t.TokenType.INVALID,
                              value=string,
                              line_no=line_no,
                              column=column))

        self._advance()

        return t.Token(type=t.TokenType.STRING,
                       value=string,
                       line_no=line_no,
                       column=column)
Esempio n. 3
0
    def _boolean(self) -> t.Token:
        line_no = self._line_no
        column = self._column

        boolean = self._current_char
        self._advance()
        while self._current_char is not None and not self._current_char.isspace(
        ):
            current_char = self._current_char
            if current_char in ['"', "'", '`', '#']:
                boolean += self._current_char
                break
            elif current_char in ['(', ')', '{', '}', '[', ']']:
                break

            boolean += self._current_char
            self._advance()

            lowered = boolean.lower()
            if lowered not in '#true' and lowered not in '#false':
                break

        if self._current_char is None or boolean not in [
                '#T', '#t', '#true', '#F', '#f', '#false'
        ]:
            raise err.LexerError(error_code=err.ErrorCode.RS_BAD_SYNTAX,
                                 token=t.Token(type=t.TokenType.INVALID,
                                               value=boolean,
                                               line_no=line_no,
                                               column=column),
                                 text=boolean)

        if boolean in ['#T', '#t', '#true']:
            return t.Token(type=t.TokenType.BOOLEAN,
                           value=True,
                           line_no=line_no,
                           column=column)
        elif boolean in ['#F', '#f', '#false']:
            return t.Token(type=t.TokenType.BOOLEAN,
                           value=False,
                           line_no=line_no,
                           column=column)
Esempio n. 4
0
    def _process_next_token(self) -> tp.Optional[t.Token]:
        token = None

        while self._current_char:
            try:
                self._skip_whitespace_or_comments()
            except err.ReachedEOF as e:
                break

            if (self._current_char.isdigit() or self._current_char == '.'
                    or (self._current_char == '-' and
                        (self._peek().isdigit() or self._peek() == '.'))):
                token = self._number()
                break

            if self._current_char not in self.NON_ID_CHARS:
                token = self._identifier()
                break

            if self._current_char == '#':
                token = self._boolean()
                break

            if self._current_char == '"':
                token = self._string()
                break

            if self._current_char == "'":
                token_type = t.TokenType.QUOTE
                value = self._current_char
                token = t.Token(type=token_type,
                                value=value,
                                line_no=self._line_no,
                                column=self._column)
                self._advance()
                break

            if self._current_char in ['(', '{', '[']:
                token_type = t.TokenType.LPAREN
                value = self._current_char
                token = t.Token(type=token_type,
                                value=value,
                                line_no=self._line_no,
                                column=self._column)
                self._advance()
                break

            if self._current_char in [')', '}', ']']:
                token_type = t.TokenType.RPAREN
                value = self._current_char
                token = t.Token(type=token_type,
                                value=value,
                                line_no=self._line_no,
                                column=self._column)
                self._advance()
                break

            if self._current_char == '|':
                raise err.LexerError(
                    error_code=err.ErrorCode.FEATURE_NOT_IMPLEMENTED,
                    token=t.Token(type=t.TokenType.INVALID,
                                  value="''",
                                  line_no=self._line_no,
                                  column=self._column))

            raise err.IllegalStateError

        return token
Esempio n. 5
0
    def _number(self) -> t.Token:
        """Return a number token from a number consumed from the input (or an ID if not a valid number)."""
        line_no = self._line_no
        column = self._column

        if self._current_char == '-':
            number = '-'
            self._advance()
        else:
            number = ''

        is_rational = False
        numerator = ''
        denominator = ''

        while (self._current_char is not None
               and not self._current_char.isspace()
               and self._current_char not in self.NON_ID_CHARS):
            if self._current_char == '/':
                is_rational = True
                numerator = number
                number += self._current_char
                self._advance()
                continue

            if is_rational:
                denominator += self._current_char

            number += self._current_char
            self._advance()

        if is_rational:
            try:
                numerator = int(numerator)
                denominator = int(denominator)

                if denominator < 0:
                    raise ValueError

            except ValueError:
                return t.Token(type=t.TokenType.NAME,
                               value=number,
                               line_no=line_no,
                               column=column)
            else:
                token = t.Token(type=t.TokenType.RATIONAL,
                                value=(numerator, denominator),
                                line_no=line_no,
                                column=column)

                if denominator == 0:
                    raise err.LexerError(
                        error_code=err.ErrorCode.DIVISION_BY_ZERO, token=token)

                return token
        else:
            try:
                number = int(number)
            except ValueError:
                try:
                    number = float(number)
                except ValueError:
                    return t.Token(type=t.TokenType.NAME,
                                   value=number,
                                   line_no=line_no,
                                   column=column)
                else:
                    return t.Token(type=t.TokenType.DECIMAL,
                                   value=number,
                                   line_no=line_no,
                                   column=column)
            else:
                return t.Token(type=t.TokenType.INTEGER,
                               value=number,
                               line_no=line_no,
                               column=column)
Esempio n. 6
0
    def data(
        self
    ) -> Union[ast.Bool, ast.Dec, ast.Int, ast.List, ast.Rat, ast.Str,
               ast.Sym]:
        """
        data: BOOLEAN
            | DECIMAL
            | INTEGER
            | LIST
            | RATIONAL
            | STRING
            | SYMBOL
        """
        token = self.current_token

        if token.type is t.TokenType.BOOLEAN:
            self.eat(t.TokenType.BOOLEAN)
            return ast.Bool(token)
        elif token.type is t.TokenType.DECIMAL:
            self.eat(t.TokenType.DECIMAL)
            return ast.Dec(token)
        elif token.type is t.TokenType.INTEGER:
            self.eat(t.TokenType.INTEGER)
            return ast.Int(token)
        elif token.type is t.TokenType.RATIONAL:
            self.eat(t.TokenType.RATIONAL)
            return ast.Rat(token)
        elif token.type is t.TokenType.STRING:
            self.eat(t.TokenType.STRING)
            return ast.Str(token)
        elif token.type is t.TokenType.QUOTE:
            self.eat(t.TokenType.QUOTE)

            next_token = self.current_token

            if next_token.type is t.TokenType.LPAREN:
                self.eat(t.TokenType.LPAREN)

                prims_stack = [[]]

                open_parens = 1
                while open_parens > 0:
                    curr_token = self.current_token
                    if curr_token.type is t.TokenType.EOF:
                        raise err.LexerError(
                            error_code=err.ErrorCode.RS_SYMBOL_FOUND_EOF,
                            token=t.Token(type=t.TokenType.INVALID,
                                          value="'",
                                          line_no=curr_token.line_no,
                                          column=curr_token.column))

                    elif curr_token.type is t.TokenType.LPAREN:
                        open_parens += 1
                        self.eat(t.TokenType.LPAREN)

                        prims = []
                        prims_stack.append(prims)

                        continue

                    elif curr_token.type is t.TokenType.RPAREN:
                        open_parens -= 1
                        self.eat(t.TokenType.RPAREN)

                        if open_parens > 0:
                            expr = ast.List(token, prims_stack[-1])
                            prims_stack = prims_stack[:-1]
                            prims_stack[-1].append(expr)

                        continue

                    prims = prims_stack[-1]
                    if curr_token.type in [
                            t.TokenType.BOOLEAN, t.TokenType.DECIMAL,
                            t.TokenType.INTEGER, t.TokenType.RATIONAL,
                            t.TokenType.STRING
                    ]:
                        prims.append(self.data())
                    elif curr_token.type is t.TokenType.NAME:
                        self.eat(t.TokenType.NAME)
                        name_token = t.Token(type=t.TokenType.SYMBOL,
                                             value=curr_token.value,
                                             line_no=curr_token.line_no,
                                             column=curr_token.column)
                        prims.append(ast.Sym(name_token))
                    else:
                        raise err.IllegalStateError

                node = ast.List(token, prims_stack[0])
                return node
            elif next_token.type in [
                    t.TokenType.BOOLEAN, t.TokenType.DECIMAL,
                    t.TokenType.INTEGER, t.TokenType.RATIONAL,
                    t.TokenType.STRING
            ]:
                return self.data()
            elif next_token.type is t.TokenType.NAME:
                self.eat(t.TokenType.NAME)
                name_token = t.Token(type=t.TokenType.SYMBOL,
                                     value=next_token.value,
                                     line_no=token.line_no,
                                     column=token.column)
                return ast.Sym(name_token)
            else:
                raise err.IllegalStateError

        else:
            self.error_unexpected_token(token=token)