def _string(self) -> t.Token: """Handles strings.""" line_no = self._line_no column = self._column self._advance() string = '' while self._current_char is not None and self._current_char != '"': string += self._current_char self._advance() if self._current_char is None: raise err.LexerError( error_code=err.ErrorCode.RS_EXPECTED_DOUBLE_QUOTE, token=t.Token(type=t.TokenType.INVALID, value=string, line_no=line_no, column=column)) self._advance() return t.Token(type=t.TokenType.STRING, value=string, line_no=line_no, column=column)
def _skip_multi_line_comment(self) -> None: self._advance() self._advance() line_no = self._line_no column = self._column while True: if self._current_char == '|': next_char = self._peek() if next_char == '#': self._advance() self._advance() break elif next_char is None: raise err.LexerError( error_code=err.ErrorCode.RS_EOF_IN_BLOCK_COMMENT, token=t.Token(type=t.TokenType.INVALID, value=None, line_no=line_no, column=column)) elif self._current_char is None: raise err.LexerError( error_code=err.ErrorCode.RS_EOF_IN_BLOCK_COMMENT, token=t.Token(type=t.TokenType.INVALID, value=None, line_no=line_no, column=column)) self._advance()
def _boolean(self) -> t.Token: line_no = self._line_no column = self._column boolean = self._current_char self._advance() while self._current_char is not None and not self._current_char.isspace( ): current_char = self._current_char if current_char in ['"', "'", '`', '#']: boolean += self._current_char break elif current_char in ['(', ')', '{', '}', '[', ']']: break boolean += self._current_char self._advance() lowered = boolean.lower() if lowered not in '#true' and lowered not in '#false': break if self._current_char is None or boolean not in [ '#T', '#t', '#true', '#F', '#f', '#false' ]: raise err.LexerError(error_code=err.ErrorCode.RS_BAD_SYNTAX, token=t.Token(type=t.TokenType.INVALID, value=boolean, line_no=line_no, column=column), text=boolean) if boolean in ['#T', '#t', '#true']: return t.Token(type=t.TokenType.BOOLEAN, value=True, line_no=line_no, column=column) elif boolean in ['#F', '#f', '#false']: return t.Token(type=t.TokenType.BOOLEAN, value=False, line_no=line_no, column=column)
def _identifier(self, initial: str = '') -> t.Token: """Handles identifiers (including builtin functions).""" line_no = self._line_no column = self._column result = initial while (self._current_char is not None and self._current_char not in self.NON_ID_CHARS and not self._current_char.isspace()): result += self._current_char self._advance() return t.Token(t.TokenType.NAME, result, line_no, column)
def _process_all_tokens(self) -> None: """ Responsible for breaking apart text into tokens.""" paren_analyzer = self.paren_analyzer while True: token = self._process_next_token() if token is None: token = t.Token(t.TokenType.EOF, None, self._line_no, self._column) self.tokens.append(token) paren_analyzer.reached_eof(token) break self.tokens.append(token) if token.type in [t.TokenType.LPAREN, t.TokenType.RPAREN]: paren_analyzer.received_paren(token)
def _process_next_token(self) -> tp.Optional[t.Token]: token = None while self._current_char: try: self._skip_whitespace_or_comments() except err.ReachedEOF as e: break if (self._current_char.isdigit() or self._current_char == '.' or (self._current_char == '-' and (self._peek().isdigit() or self._peek() == '.'))): token = self._number() break if self._current_char not in self.NON_ID_CHARS: token = self._identifier() break if self._current_char == '#': token = self._boolean() break if self._current_char == '"': token = self._string() break if self._current_char == "'": token_type = t.TokenType.QUOTE value = self._current_char token = t.Token(type=token_type, value=value, line_no=self._line_no, column=self._column) self._advance() break if self._current_char in ['(', '{', '[']: token_type = t.TokenType.LPAREN value = self._current_char token = t.Token(type=token_type, value=value, line_no=self._line_no, column=self._column) self._advance() break if self._current_char in [')', '}', ']']: token_type = t.TokenType.RPAREN value = self._current_char token = t.Token(type=token_type, value=value, line_no=self._line_no, column=self._column) self._advance() break if self._current_char == '|': raise err.LexerError( error_code=err.ErrorCode.FEATURE_NOT_IMPLEMENTED, token=t.Token(type=t.TokenType.INVALID, value="''", line_no=self._line_no, column=self._column)) raise err.IllegalStateError return token
def _number(self) -> t.Token: """Return a number token from a number consumed from the input (or an ID if not a valid number).""" line_no = self._line_no column = self._column if self._current_char == '-': number = '-' self._advance() else: number = '' is_rational = False numerator = '' denominator = '' while (self._current_char is not None and not self._current_char.isspace() and self._current_char not in self.NON_ID_CHARS): if self._current_char == '/': is_rational = True numerator = number number += self._current_char self._advance() continue if is_rational: denominator += self._current_char number += self._current_char self._advance() if is_rational: try: numerator = int(numerator) denominator = int(denominator) if denominator < 0: raise ValueError except ValueError: return t.Token(type=t.TokenType.NAME, value=number, line_no=line_no, column=column) else: token = t.Token(type=t.TokenType.RATIONAL, value=(numerator, denominator), line_no=line_no, column=column) if denominator == 0: raise err.LexerError( error_code=err.ErrorCode.DIVISION_BY_ZERO, token=token) return token else: try: number = int(number) except ValueError: try: number = float(number) except ValueError: return t.Token(type=t.TokenType.NAME, value=number, line_no=line_no, column=column) else: return t.Token(type=t.TokenType.DECIMAL, value=number, line_no=line_no, column=column) else: return t.Token(type=t.TokenType.INTEGER, value=number, line_no=line_no, column=column)
def visit_NameAssign(self, node: ast.NameAssign) -> None: token = node.token exprs = node.exprs exprs_len = len(exprs) if exprs_len != 0 and type(exprs[0]) is ast.Sym: # a little bit hacky..., not sure why Racket has this behavior raise err.SemanticError( error_code=err.ErrorCode.BUILTIN_OR_IMPORTED_NAME, token=t.Token(type=t.TokenType.INVALID, value='quote', line_no=token.line_no, column=token.column)) if exprs_len == 0 or type( exprs[0]) is not ast.Name or exprs[0].value in t.KEYWORDS: next_token = exprs[0].token if exprs_len > 0 else None raise err.SemanticError(error_code=err.ErrorCode.D_EXPECTED_A_NAME, token=token, next_token=next_token) const_token = exprs[0].token const_name = const_token.value if exprs_len == 1: raise err.SemanticError( error_code=err.ErrorCode.D_V_MISSING_AN_EXPRESSION, token=token, name=const_name) elif exprs_len > 2: extra_count = exprs_len - 2 raise err.SemanticError( error_code=err.ErrorCode.D_V_EXPECTED_ONE_EXPRESSION, token=token, extra_count=extra_count, name=const_name) elif type(exprs[1]) is ast.Name and exprs[1].value in t.KEYWORDS: keyword = exprs[1].value token = exprs[1].token if keyword == t.Keyword.COND.value: raise err.SemanticError( error_code=err.ErrorCode.C_EXPECTED_OPEN_PARENTHESIS, token=token) elif keyword == t.Keyword.DEFINE.value: raise err.SemanticError( error_code=err.ErrorCode.D_EXPECTED_OPEN_PARENTHESIS, token=token) elif keyword == t.Keyword.DEFINE_STRUCT.value: raise err.SemanticError( error_code=err.ErrorCode.DS_EXPECTED_OPEN_PARENTHESIS, token=token) elif keyword == t.Keyword.ELSE.value: raise err.SemanticError(error_code=err.ErrorCode.E_NOT_ALLOWED, token=token) else: raise err.IllegalStateError node.identifier = node.exprs[0].value node.expr = node.exprs[1] var_name = node.identifier var_symbol = sym.AmbiguousSymbol(var_name) if var_name in BUILT_IN_PROCS: raise err.SemanticError( error_code=err.ErrorCode.BUILTIN_OR_IMPORTED_NAME, token=const_token) if self.current_scope.lookup(var_name, current_scope_only=True) is not None: raise err.SemanticError( error_code=err.ErrorCode.PREVIOUSLY_DEFINED_NAME, token=const_token) self.visit(node.expr) self.current_scope.define(var_symbol)
def data( self ) -> Union[ast.Bool, ast.Dec, ast.Int, ast.List, ast.Rat, ast.Str, ast.Sym]: """ data: BOOLEAN | DECIMAL | INTEGER | LIST | RATIONAL | STRING | SYMBOL """ token = self.current_token if token.type is t.TokenType.BOOLEAN: self.eat(t.TokenType.BOOLEAN) return ast.Bool(token) elif token.type is t.TokenType.DECIMAL: self.eat(t.TokenType.DECIMAL) return ast.Dec(token) elif token.type is t.TokenType.INTEGER: self.eat(t.TokenType.INTEGER) return ast.Int(token) elif token.type is t.TokenType.RATIONAL: self.eat(t.TokenType.RATIONAL) return ast.Rat(token) elif token.type is t.TokenType.STRING: self.eat(t.TokenType.STRING) return ast.Str(token) elif token.type is t.TokenType.QUOTE: self.eat(t.TokenType.QUOTE) next_token = self.current_token if next_token.type is t.TokenType.LPAREN: self.eat(t.TokenType.LPAREN) prims_stack = [[]] open_parens = 1 while open_parens > 0: curr_token = self.current_token if curr_token.type is t.TokenType.EOF: raise err.LexerError( error_code=err.ErrorCode.RS_SYMBOL_FOUND_EOF, token=t.Token(type=t.TokenType.INVALID, value="'", line_no=curr_token.line_no, column=curr_token.column)) elif curr_token.type is t.TokenType.LPAREN: open_parens += 1 self.eat(t.TokenType.LPAREN) prims = [] prims_stack.append(prims) continue elif curr_token.type is t.TokenType.RPAREN: open_parens -= 1 self.eat(t.TokenType.RPAREN) if open_parens > 0: expr = ast.List(token, prims_stack[-1]) prims_stack = prims_stack[:-1] prims_stack[-1].append(expr) continue prims = prims_stack[-1] if curr_token.type in [ t.TokenType.BOOLEAN, t.TokenType.DECIMAL, t.TokenType.INTEGER, t.TokenType.RATIONAL, t.TokenType.STRING ]: prims.append(self.data()) elif curr_token.type is t.TokenType.NAME: self.eat(t.TokenType.NAME) name_token = t.Token(type=t.TokenType.SYMBOL, value=curr_token.value, line_no=curr_token.line_no, column=curr_token.column) prims.append(ast.Sym(name_token)) else: raise err.IllegalStateError node = ast.List(token, prims_stack[0]) return node elif next_token.type in [ t.TokenType.BOOLEAN, t.TokenType.DECIMAL, t.TokenType.INTEGER, t.TokenType.RATIONAL, t.TokenType.STRING ]: return self.data() elif next_token.type is t.TokenType.NAME: self.eat(t.TokenType.NAME) name_token = t.Token(type=t.TokenType.SYMBOL, value=next_token.value, line_no=token.line_no, column=token.column) return ast.Sym(name_token) else: raise err.IllegalStateError else: self.error_unexpected_token(token=token)