def _pop_comment(self, start_pos: Pos) -> Token: chars = [] while not self._at_char_eof() and not self._try_dropc('\n'): chars.append(self._popc()) return Token(TokenKind.COMMENT, Span(start_pos, self.pos), value=''.join(chars))
def test_format_binop(self): m = ast.Module('test') fake_pos = self.fake_pos fake_span = Span(fake_pos, fake_pos) le = ast.Binop(m, Token(TokenKind.OANGLE_EQUALS, span=fake_span), self.five, self.five) self.assertEqual('(5) <= (5)', str(le))
def _pop_whitespace(self, start_pos: Pos) -> Token: assert self._at_whitespace() chars = [] while not self._at_char_eof() and self._at_whitespace(): chars.append(self._popc()) return Token(TokenKind.WHITESPACE, Span(start_pos, self.pos), value=''.join(chars))
def test_let_destructure_nested(self): e = self.parse_expression( 'let (w, (x, (y)), z): (u32,(u32,(u32)),u32) = (1, (2, (3,)), 4); y') self.assertIsInstance(e.rhs, ast.XlsTuple) # Three top-level members. self.assertLen(e.rhs.members, 3) # The middle one has two members. self.assertLen(e.rhs.members[1], 2) # The second one of those has one member. self.assertLen(e.rhs.members[1].members[1], 1) self.assertEqual( e.name_def_tree.span, Span(Pos('/fake/fake.x', 0, 4), Pos('/fake/fake.x', 0, 20))) self.assertEqual( e.name_def_tree.tree[1].span, Span(Pos('/fake/fake.x', 0, 8), Pos('/fake/fake.x', 0, 16)))
def test_parse_name_def_tree(self): text = '(a, (b, (c, d), e), f)' fparse = lambda p, b: p._parse_name_def_tree(b) bindings = parser.Bindings() ndt = self._parse_internal(text, bindings, fparse) self.assertIsInstance(ndt, ast.NameDefTree) self.assertLen(ndt.tree, 3) self.assertIsInstance(ndt.tree[0], ast.NameDefTree) self.assertTrue(ndt.tree[0].is_leaf()) self.assertIsInstance(ndt.tree[2], ast.NameDefTree) self.assertTrue(ndt.tree[2].is_leaf()) self.assertEqual( ndt.tree[0].span, Span(Pos(self.fake_filename, 0, 1), Pos(self.fake_filename, 0, 2))) self.assertEqual( ndt.tree[2].span, Span(Pos(self.fake_filename, 0, 20), Pos(self.fake_filename, 0, 21))) self.assertNotEqual(ndt.tree[2].span, ndt.tree[0].span)
def resolve_or_none(self, name: Text) -> Optional[NameDefNode]: fake_pos = Pos('<fake>', 0, 0) fake_span = Span(fake_pos, fake_pos) try: node = self.resolve(name, fake_span) except ParseError: return None else: return node
def test_visit_match_multi_pattern(self): fake_pos = self.fake_pos fake_span = Span(fake_pos, fake_pos) e = ast.Number(fake_span, u'0xf00') p0 = ast.NameDefTree(fake_span, e) p1 = ast.NameDefTree(fake_span, e) arm = ast.MatchArm(patterns=(p0, p1), expr=e) c = _Collector() arm.accept(c) self.assertEqual(c.collected, [e])
def test_import(self): program = """ import thing """ bindings = parser.Bindings(None) fparse = lambda p, bindings: p.parse_module(bindings) m = self._parse_internal(program, bindings, fparse) self.assertIsInstance(m.top[0], ast.Import) fake_pos = Pos(self.fake_filename, 0, 0) fake_span = Span(fake_pos, fake_pos) self.assertIsInstance(bindings.resolve_node('thing', fake_span), ast.Import)
def _try_pop_whitespace_or_comment(self) -> Optional[Token]: """Attempts to pop a whitespace or a newline-delimited comment.""" start_pos = self.pos if self._at_char_eof(): return Token(TokenKind.EOF, Span(start_pos, start_pos)) if self._at_whitespace(): return self._pop_whitespace(start_pos) if self._peekc() == '/' and self._peekc2() == '/': self._dropc(2) return self._pop_comment(start_pos) else: return None
def _scan_number(self, startc: Text, start_pos: Pos) -> Token: """Scans a number token out of the character stream and returns it.""" negative = startc == '-' if negative: startc = self._popc() if startc == '0' and self._try_dropc('x'): # Hex prefix. s = self._scan_while( '0x', lambda c: '0' <= c <= '9' or 'a' <= c.lower() <= 'f' or c == '_') if s == '0x': raise ScanError( start_pos, 'Expected hex characters following 0x prefix.') if negative: s = '-' + s return Token(TokenKind.NUMBER, Span(start_pos, self.pos), s) if startc == '0' and self._try_dropc('b'): # Bin prefix. s = self._scan_while('0b', lambda c: '0' <= c and c <= '1' or c == '_') if s == '0b': raise ScanError( start_pos, 'Expected binary characters following 0b prefix.') if not self.at_eof() and '0' <= self._peekc() <= '9': raise ScanError( self.pos, 'Invalid digit for binary number: {}'.format( self._peekc())) if negative: s = '-' + s return Token(TokenKind.NUMBER, Span(start_pos, self.pos), s) s = self._scan_while(startc, lambda c: c.isdigit()) assert s, 'Must have seen numerical values to attempt to scan a number.' if negative: s = '-' + s return Token(TokenKind.NUMBER, Span(start_pos, self.pos), s)
def __init__(self, rng: Random, options: AstGeneratorOptions, codegen_ops_only: bool = True): self.options = options self.rng = rng # Should we only generate ops that can be codegenned? self._codegen_ops_only = codegen_ops_only self.fake_pos = Pos('<fake>', 0, 0) self.fake_span = Span(self.fake_pos, self.fake_pos) self.name_generator = self._name_generator() if options.binop_allowlist: assert all( binop in ast.Binop.SAME_TYPE_KIND_LIST for binop in options.binop_allowlist ), 'Contains invalid TokenKinds for same-type binop allowlist: {}'.format( options.binop_allowlist) self._binops = options.binop_allowlist else: self._binops = list(ast.Binop.SAME_TYPE_KIND_LIST) if options.disallow_divide: self._binops.remove(scanner.TokenKind.SLASH) type_kws = set(scanner.TYPE_KEYWORD_STRINGS) - set( ['bits', 'uN', 'sN']) if not options.emit_signed_types: type_kws = {kw for kw in type_kws if not kw.startswith('s')} def kw_width(kw): if kw == 'bool': return 1 # Keyword should be of uN or sN form. return int(kw[1:]) type_kws = { kw for kw in type_kws if kw_width(kw) <= self.options.max_width_bits_types } self._kw_identifiers = sorted(list(type_kws)) # Set of functions created during generation. self._functions = [] # Set of types defined during module generation. self._type_defs = [] # Widths of the aggregate types, indexed by str(TypeAnnotation). self._type_bit_counts = {}
def _scan_char(self, start_pos: Pos) -> Token: """Scans a TokenKind.CHARACTER token.""" open_quote = self._popc() assert open_quote == '\'', 'Must be called at starting quote.' if self._at_char_eof(): raise ScanError( self.pos, 'Expected character after single quote, saw end of file') char = self._popc() if self._at_char_eof() or self._peekc() != '\'': raise ScanError( self.pos, 'Expected closing single quote for character literal; got {!r}' .format( 'end of file' if self._at_char_eof() else self._peekc())) self._dropc() return Token(TokenKind.CHARACTER, Span(start_pos, self.pos), char)
def _scan_identifier_or_keyword(self, startc: Text, start_pos: Pos) -> Token: """Scans the identifier-looking entity beginning with startc. Args: startc: first (already popped) character of the identiifer/keyword token. start_pos: start position for the identifier/keyword token. Returns: Either a keyword (if the scanned identifier turns out to be in the set of keywords) or an identifier token. """ s = self._scan_while(startc, lambda c: c.isalpha() or c.isdigit() or c in '_!') span = Span(start_pos, self.pos) if s in KEYWORDS: return Token(TokenKind.KEYWORD, span, Keyword(s)) return Token(TokenKind.IDENTIFIER, span, s)
def test_pprint_parse_error(self): output = io.StringIO() filename = '/fake/test_file.x' text = 'oh\nwhoops\nI did an\nerror somewhere\nthat is bad' with fakefs_util.scoped_fakefs(filename, text): pos = scanner.Pos(filename, lineno=2, colno=0) span = Span(pos, pos.bump_col()) error = parser.ParseError(span, 'This is bad') parser_helpers.pprint_positional_error(error, output=cast( io.IOBase, output), color=False, error_context_line_count=3) expected = textwrap.dedent("""\ /fake/test_file.x:2-4 0002: whoops * 0003: I did an ^^ This is bad @ /fake/test_file.x:3:1 0004: error somewhere """) self.assertMultiLineEqual(expected, output.getvalue())
def test_ndt_preorder(self): fake_pos = self.fake_pos fake_span = Span(fake_pos, fake_pos) t = ast.NameDef(fake_span, 't') u = ast.NameDef(fake_span, 'u') wrapped_t = ast.NameDefTree(fake_span, t) wrapped_u = ast.NameDefTree(fake_span, u) interior = ast.NameDefTree(fake_span, (wrapped_t, wrapped_u)) outer = ast.NameDefTree(fake_span, (interior, )) walk_data = [] def walk(item: ast.NameDefTree, level: int, i: int): walk_data.append((item, level, i)) outer.do_preorder(walk) self.assertLen(walk_data, 3) self.assertEqual(walk_data[0], (interior, 1, 0)) self.assertEqual(walk_data[1], (wrapped_t, 2, 0)) self.assertEqual(walk_data[2], (wrapped_u, 2, 1))
def test_bindings_stack(self): top = parser.Bindings(None) leaf0 = parser.Bindings(top) leaf1 = parser.Bindings(top) a = ast.BuiltinNameDef('a') b = ast.BuiltinNameDef('b') c = ast.BuiltinNameDef('c') top.add('a', a) leaf0.add('b', b) leaf1.add('c', c) pos = scanner.Pos(self.fake_filename, lineno=0, colno=0) span = Span(pos, pos) self.assertEqual(leaf0.resolve('a', span), a) self.assertEqual(leaf1.resolve('a', span), a) self.assertEqual(top.resolve('a', span), a) with self.assertRaises(parser.ParseError): top.resolve('b', span) with self.assertRaises(parser.ParseError): leaf1.resolve('b', span) with self.assertRaises(parser.ParseError): leaf0.resolve('c', span) self.assertEqual(leaf0.resolve('b', span), b) self.assertEqual(leaf1.resolve('c', span), c)
def test_binary_number_with_underscores(self): fake_pos = self.fake_pos fake_span = Span(fake_pos, fake_pos) n = ast.Number(self.m, fake_span, u'0b1_0_0_1') self.assertEqual(9, n.get_value_as_int())
def test_hex_number_with_underscores(self): fake_pos = self.fake_pos fake_span = Span(fake_pos, fake_pos) n = ast.Number(self.m, fake_span, '0xf_abcde_1234') self.assertEqual(0xfabcde1234, n.get_value_as_int())
def test_unicode_hex_number(self): fake_pos = self.fake_pos fake_span = Span(fake_pos, fake_pos) n = ast.Number(self.m, fake_span, u'0xf00') self.assertEqual(0xf00, n.get_value_as_int())
def test_stringify_single_member_tuple(self): fake_pos = Pos('<fake>', 0, 0) fake_span = Span(fake_pos, fake_pos) t = ast.XlsTuple(self.m, fake_span, (self.five,)) self.assertEqual('(5,)', str(t))
def span(self) -> Span: return Span(self.patterns[0].span.start, self.expr.span.limit)
def __init__(self, owner: AstNodeOwner, type_: TypeAnnotation, expr: Expr): start_pos = min(type_.span.start, expr.span.start) limit_pos = max(type_.span.limit, expr.span.limit) super().__init__(owner, Span(start_pos, limit_pos)) self.type_ = type_ self.expr = expr
def __init__(self, pos: Pos, message: Text): super(ScanError, self).__init__(message, Span(pos, pos)) self.pos = pos
def peek(self) -> Token: """Peeks at a scanned token at the head of the stream. Returns: The scanned token at the head of the stream. Note this may be an EOF token if the character stream is extinguished. Raises: ScanError: If an unknown character sequence is encountered (that cannot be converted into a token). This does not destructively update the scan state (i.e. the caller can peek at the same token again after this call returns). """ if self._include_whitespace_and_comments: tok = self._try_pop_whitespace_or_comment() if tok: return tok else: self._drop_comments_and_leading_whitespace() # If there's a lookahead token already, we return that as the result of the # peek. if self._lookahead: return self._lookahead # Record the position the token starts at. start_pos = self.pos # Helper that makes a span from start_pos to the current point. mk_span = lambda: Span(start_pos, self.pos) # After dropping whitespace this may be EOF. if self._at_char_eof(): return Token(TokenKind.EOF, mk_span()) # Peek at one character for prefix scanning. startc = self._peekc() assert self._lookahead is None, self._lookahead if startc == '\'': lookahead = self._scan_char(start_pos) elif startc == '#': self._dropc() lookahead = Token(TokenKind('#'), mk_span()) elif startc == '!': self._dropc() if self._try_dropc('='): lookahead = Token(TokenKind('!='), mk_span()) else: lookahead = Token(TokenKind('!'), mk_span()) elif startc == '=': self._dropc() if self._try_dropc('='): lookahead = Token(TokenKind('=='), mk_span()) elif self._try_dropc('>'): lookahead = Token(TokenKind('=>'), mk_span()) else: lookahead = Token(TokenKind('='), mk_span()) elif startc in SIMPLE_TOKEN_KINDS: c = self._popc() assert startc == c lookahead = Token(TokenKind(c), mk_span()) elif startc in DOUBLED_SIMPLE_TOKEN_KINDS: self._dropc() if self._try_dropc(startc): # Doubled up. kind = TokenKind(startc * 2) lookahead = Token(kind, mk_span()) else: # Not doubled up. lookahead = Token(TokenKind(startc), mk_span()) elif startc == '+': self._dropc() if self._try_dropc('+'): lookahead = Token(TokenKind('++'), mk_span()) elif self._try_dropc(':'): lookahead = Token(TokenKind('+:'), mk_span()) else: lookahead = Token(TokenKind('+'), mk_span()) elif startc == '<': self._dropc() if self._try_dropc('<'): lookahead = Token(TokenKind('<<'), mk_span()) elif self._try_dropc('='): lookahead = Token(TokenKind('<='), mk_span()) else: lookahead = Token(TokenKind('<'), mk_span()) elif startc == '>': self._dropc() if self._try_dropc('>'): if self._try_dropc('>'): lookahead = Token(TokenKind('>>>'), mk_span()) else: lookahead = Token(TokenKind('>>'), mk_span()) elif self._try_dropc('='): lookahead = Token(TokenKind('>='), mk_span()) else: lookahead = Token(TokenKind('>'), mk_span()) elif startc.isalpha() or startc == '_': lookahead = self._scan_identifier_or_keyword( self._popc(), start_pos) elif startc.isdigit( ) or startc == '-' and self._peekc2_is(lambda c: c.isdigit()): lookahead = self._scan_number(self._popc(), start_pos) elif startc == '-': self._dropc() if self._try_dropc('>'): # '->' token lookahead = Token(TokenKind.ARROW, mk_span()) else: # Simply '-' token. lookahead = Token(TokenKind.MINUS, mk_span()) elif startc == '.': self._dropc() if self._try_dropc('.'): if self._try_dropc('.'): lookahead = Token(TokenKind.ELLIPSIS, mk_span()) else: lookahead = Token(TokenKind.DOUBLE_DOT, mk_span()) else: lookahead = Token(TokenKind.DOT, mk_span()) else: raise ScanError(start_pos, 'Unrecognized character: {!r}'.format(startc)) assert lookahead is not None assert self._lookahead is None self._lookahead = lookahead return self._lookahead
def __init__(self, type_: TypeAnnotation, expr: Expr): start_pos = min(type_.span.start, expr.span.start) limit_pos = max(type_.span.limit, expr.span.limit) super(Cast, self).__init__(Span(start_pos, limit_pos)) self.type_ = type_ self.expr = expr
def get_span_or_fake(self) -> Span: if hasattr(self, 'span'): assert isinstance(self.span, Span) return self.span fake_pos = Pos('<no-file>', 0, 0) return Span(fake_pos, fake_pos)
def fake_span(self) -> Span: return Span(self.fake_pos, self.fake_pos)
def span(self) -> Span: return Span(self.name.span.start, self.type_.span.limit)