def merge_tokens(*tokenlists, typename=None): tokens = flatten_tokens(*tokenlists) if typename is not None: merged = TokenInfo( type=getattr(tokenmod, typename), string=tokens[0].string, start=tokens[0].start, end=tokens[0].end, line=tokens[0].line, ) else: merged = tokens[0] for token in tokens[1:]: # contiguous - disable for now as bad tokenization # assert token.start[1] == (merged.end[1] + 1) or token.start[0] == ( # merged.end[0] + 1 # ) merged = TokenInfo( type=merged.type, string=merged.string + token.string, start=merged.start, end=token.end, line=merged.line, ) return merged
def test_peek_getnext(): source = io.StringIO("# test\n1") t = Tokenizer(generate_tokens(source.readline)) assert t.peek() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1") assert t.getnext() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1") assert t.peek() == TokenInfo(NEWLINE, "", (2, 1), (2, 2), "") assert t.getnext() == TokenInfo(NEWLINE, "", (2, 1), (2, 2), "")
def merge_operators(tokens: Iterable[TokenInfo]) -> List[TokenInfo]: result = [] for toknum, tokval, (srow, scol), (erow, ecol), linenum in tokens: if tokval == ">" and result[-1].string == "|": # |> token_info = TokenInfo(token.OP, "|>", result[-1][2], (erow, ecol), linenum) del result[-1] result.append(token_info) continue elif tokval == "?": if result[-1].string == "?": # ?? token_info = TokenInfo(token.OP, "??", result[-1][2], (erow, ecol), linenum) del result[-1] result.append(token_info) continue else: token_info = TokenInfo(token.OP, "?", (srow, scol), (erow, ecol), linenum) result.append(token_info) continue elif tokval == ">" and result[-1].string == "=": # => token_info = TokenInfo(token.OP, "=>", result[-1][2], (erow, ecol), linenum) del result[-1] result.append(token_info) continue result.append( TokenInfo(toknum, tokval, (srow, scol), (erow, ecol), linenum)) return result
def test_repeat_1_simple() -> None: grammar = """ start: thing thing+ NEWLINE thing: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1 2 3\n", parser_class) assert node == [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"), [ TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"), TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"), ], TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"), ] with pytest.raises(SyntaxError): parse_string("1\n", parser_class)
def test_repeat_with_sep_simple() -> None: grammar = """ start: ','.thing+ NEWLINE thing: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1, 2, 3\n", parser_class) assert node == [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n"), TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n"), TokenInfo(NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n"), ], TokenInfo(NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"), ]
def test_optional_literal(): grammar = """ start: sum NEWLINE sum: term '+' ? term: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1+\n", parser_class) assert node == [ [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n") ], TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"), ], TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"), ] node = parse_string("1\n", parser_class) assert node == [ [[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None], TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), ]
def test_combine_tokens(): from tokenize import TokenInfo, generate_tokens, ERRORTOKEN, OP, NUMBER, NAME from asttokens.util import combine_tokens, patched_generate_tokens text = "℘·2=1" original_tokens = list(generate_tokens(io.StringIO(text).readline))[:4] assert original_tokens == [ TokenInfo(ERRORTOKEN, string='℘', start=(1, 0), end=(1, 1), line='℘·2=1'), TokenInfo(ERRORTOKEN, string='·', start=(1, 1), end=(1, 2), line='℘·2=1'), TokenInfo(NUMBER, string='2', start=(1, 2), end=(1, 3), line='℘·2=1'), TokenInfo(OP, string='=', start=(1, 3), end=(1, 4), line='℘·2=1'), ] assert combine_tokens(original_tokens[:1]) == [ TokenInfo(NAME, string='℘', start=(1, 0), end=(1, 1), line='℘·2=1'), ] assert combine_tokens(original_tokens[:2]) == [ TokenInfo(NAME, string='℘·', start=(1, 0), end=(1, 2), line='℘·2=1'), ] assert combine_tokens(original_tokens[:3]) == [ TokenInfo(NAME, string='℘·2', start=(1, 0), end=(1, 3), line='℘·2=1'), ] assert list(patched_generate_tokens(iter(original_tokens))) == [ TokenInfo(NAME, string='℘·2', start=(1, 0), end=(1, 3), line='℘·2=1'), TokenInfo(OP, string='=', start=(1, 3), end=(1, 4), line='℘·2=1'), ] assert list(patched_generate_tokens(iter(original_tokens[:-1]))) == [ TokenInfo(NAME, string='℘·2', start=(1, 0), end=(1, 3), line='℘·2=1'), ]
def test_repeat_1_simple(): grammar = """ start: thing thing+ NEWLINE thing: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1 2 3\n", parser_class) assert node == [[ TokenInfo(NUMBER, string='1', start=(1, 0), end=(1, 1), line='1 2 3\n') ], [[[ TokenInfo(NUMBER, string='2', start=(1, 2), end=(1, 3), line='1 2 3\n') ]], [[ TokenInfo(NUMBER, string='3', start=(1, 4), end=(1, 5), line='1 2 3\n') ]]], TokenInfo(NEWLINE, string='\n', start=(1, 5), end=(1, 6), line='1 2 3\n')] with pytest.raises(SyntaxError): parse_string("1\n", parser_class)
def test_tokens_expr(): src, names = expr_fix assert list(util.tokenize(src)) == [ TokenInfo(type=2, string='2', start=(1, 0), end=(1, 1), line='2 + 3'), TokenInfo(type=14, string='+', start=(1, 2), end=(1, 3), line='2 + 3'), TokenInfo(type=2, string='3', start=(1, 4), end=(1, 5), line='2 + 3'), TokenInfo(type=0, string='', start=(2, 0), end=(2, 0), line='') ]
def test_last_non_whitespace(): source = io.StringIO("\n1\n2") t = Tokenizer(generate_tokens(source.readline)) assert t.peek() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1\n") assert t.getnext() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1\n") assert t.getnext() == TokenInfo(NEWLINE, "\n", (2, 1), (2, 2), "1\n") assert t.get_last_non_whitespace_token() == TokenInfo( NUMBER, "1", (2, 0), (2, 1), "1\n")
def test_mark_reset(): source = io.StringIO("\n1 2") t = Tokenizer(generate_tokens(source.readline)) index = t.mark() assert t.peek() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1 2") assert t.getnext() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1 2") t.reset(index) assert t.peek() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1 2") assert t.getnext() == TokenInfo(NUMBER, "1", (2, 0), (2, 1), "1 2")
def test_cut(self) -> None: grammar = """ start: '(' ~ expr ')' expr: NUMBER """ parser_class = make_parser(grammar) node = parse_string("(1)", parser_class) self.assertEqual(node, [ TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"), [TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)")], TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"), ])
def test_cut(): grammar = """ start: '(' ~ expr ')' expr: NUMBER """ parser_class = make_parser(grammar) node = parse_string("(1)", parser_class, verbose=True) assert node == [ TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"), [TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)")], TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"), ]
def test_expr_grammar() -> None: grammar = """ start: sum NEWLINE sum: term '+' term | term term: NUMBER """ parser_class = make_parser(grammar) node = parse_string("42\n", parser_class) assert node == [ TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n"), TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"), ]
def test_gather(self) -> None: grammar = """ start: ','.thing+ NEWLINE thing: NUMBER """ rules = parse_string(grammar, GrammarParser).rules self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE") self.assertTrue( repr(rules["start"]).startswith( "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'" )) self.assertEqual(str(rules["thing"]), "thing: NUMBER") parser_class = make_parser(grammar) node = parse_string("42\n", parser_class) assert node == [ [[ TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n") ]], TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"), ] node = parse_string("1, 2\n", parser_class) assert node == [ [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n") ], [ TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n") ], ], TokenInfo(NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"), ]
def test_lookahead(): grammar = """ start: (expr_stmt | assign_stmt) &'.' expr_stmt: !(target '=') expr assign_stmt: target '=' expr expr: term ('+' term)* target: NAME term: NUMBER """ parser_class = make_parser(grammar) node = parse_string("foo = 12 + 12 .", parser_class) assert node == [[[ [ TokenInfo(NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 .") ], TokenInfo(OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."), [ [ TokenInfo(NUMBER, string="12", start=(1, 6), end=(1, 8), line="foo = 12 + 12 .") ], [[[ TokenInfo( OP, string="+", start=(1, 9), end=(1, 10), line="foo = 12 + 12 .", ), [ TokenInfo( NUMBER, string="12", start=(1, 11), end=(1, 13), line="foo = 12 + 12 .", ) ], ]]], ], ]]]
def pre_parse(code): result = [] replace_mode = None try: code = code.encode('utf-8') g = tokenize(io.BytesIO(code).readline) for token in g: toks = [token] line = token.line start = token.start end = token.end string = token.string if token.type == COMMENT and "@version" in token.string: parse_version_pragma(token.string[1:]) if token.type == NAME and string == "class" and start[1] == 0: raise StructureException( "The `class` keyword is not allowed. Perhaps you meant `contract` or `struct`?", token.start) # `contract xyz` -> `class xyz(__VYPER_ANNOT_CONTRACT__)` # `struct xyz` -> `class xyz(__VYPER_ANNOT_STRUCT__)` if token.type == NAME and replace_mode: toks.extend([ TokenInfo(OP, "(", end, end, line), TokenInfo(NAME, replace_mode, end, end, line), TokenInfo(OP, ")", end, end, line), ]) replace_mode = None if token.type == NAME and string == "contract" and start[1] == 0: replace_mode = "__VYPER_ANNOT_CONTRACT__" toks = [TokenInfo(NAME, "class", start, end, line)] # In the future, may relax the start-of-line restriction if token.type == NAME and string == "struct" and start[1] == 0: replace_mode = "__VYPER_ANNOT_STRUCT__" toks = [TokenInfo(NAME, "class", start, end, line)] # Prevent semi-colon line statements. if (token.type, token.string) == (OP, ";"): raise StructureException("Semi-colon statements not allowed.", token.start) result.extend(toks) except TokenError as e: raise StructureException(e.args[0], e.args[1]) from e return untokenize(result).decode('utf-8')
def __replace_operators(self, tokens): VALID_OPS = {"(", ")", "~", "|", "^", "&"} OP_TRANSLATE = { 'and': "&", 'or' : "|", 'xor': "^", 'not': "~", } for token in tokens: if token.type == OP: if token.string in VALID_OPS: yield token else: raise InvalidExpression(f"Operator '{token.string}' is not allowed") elif token.type == NAME: try: op = OP_TRANSLATE[token.string.lower()] yield TokenInfo(OP, op, token.start, token.end, token.line) except KeyError: yield token elif token.type == ERRORTOKEN: raise InvalidExpression(f"Invalid syntax") elif token.type not in {ENCODING, ENDMARKER, NEWLINE}: raise InvalidExpression(f"Invalid syntax '{token.string}'") else: yield token
def test_expr_grammar(): grammar = """ start: sum NEWLINE sum: term '+' term | term term: NUMBER """ parser_class = make_parser(grammar) node = parse_string("42\n", parser_class) assert node == [[[ TokenInfo(NUMBER, string='42', start=(1, 0), end=(1, 2), line='42\n') ]], TokenInfo(NEWLINE, string='\n', start=(1, 2), end=(1, 3), line='42\n')]
def to_token_info(self): """ Convert to TokenInfo object used by Python's tokenizer. """ return TokenInfo(self.type, self.string, self.start, self.end, self.line)
def vyperize(code, *, class_types=None): """Recovers the vyper contract source code from its python-valid representation. This more or less undoes what ``vyper.ast.pre_parser.pre_parse`` does. Parameters ---------- code : str The python-valid formatted vyper source code to be "un-formatted" back into "pure" vyper code aka "vyperized". class_types: dict Mapping of class types contained in the contract. """ tokens = [] previous_token = None try: code_bytes = code.encode("utf-8") g = tokenize(io.BytesIO(code_bytes).readline) for token in g: # if previous token was "class" then restore it to its vyper form if token.type == NAME and previous_token is not None: prev_token_dict = previous_token._asdict() prev_token_dict["string"] = class_types[token.string] vyper_restored_token = TokenInfo(**prev_token_dict) tokens[-1] = vyper_restored_token if (token.type == OP and token.string in ("(", ")") and previous_token is not None): continue if token.type == OP and token.string == ":" and previous_token is not None: token_dict = token._asdict() token_dict["start"] = (token.start[0], token.start[1] - 2) token_dict["end"] = (token.end[0], token.end[1] - 2) token = TokenInfo(**token_dict) previous_token = None if token.type == NAME and token.string == "class" and token.start[ 1] == 0: previous_token = token tokens.append(token) except TokenError as e: raise SyntaxException(e.args[0], code, e.args[1][0], e.args[1][1]) from e return untokenize(tokens).decode("utf-8")
def infix(left, right, callback): a, b = left.pop(), right.pop(0) supistettu = TokenInfo(type=NUMBER, string=callback(literal(a), literal(b)), start=a.start, end=b.end, line=a.line) return left + [supistettu] + right
def test_repeat_1_simple(self) -> None: grammar = """ start: thing thing+ NEWLINE thing: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1 2 3\n", parser_class) self.assertEqual(node, [ [TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")], [ [[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]], [[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]], ], TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"), ]) with self.assertRaises(SyntaxError): parse_string("1\n", parser_class)
async def post(self): path = configuration.PROJECT_PATH / self.request.match_info.get( 'file_name', '') body = await self.request.json() print('INPUT BODY:', body) body['data']['lineText'] = body['data']['lineText'].strip().rstrip() t_struct_adjusted = [] result = [] for i in tokenize.tokenize( BytesIO(body['data']['lineText'].encode('utf8')).readline): if i.type == token.ENDMARKER: continue t_struct_adjusted.append( TokenInfo(type=i.type, string=i.string, start=(body['data']['lineNumber'], i.start[1]), end=(body['data']['lineNumber'], i.end[1]), line=i.line)) # pprint(t_struct_adjusted) token_string = '' if t_struct_adjusted[-1].type == token.NEWLINE: t_struct_adjusted.pop(-1) if t_struct_adjusted[-1].string == '.': # ищем имена result += AST_PARSER[path]['ast_tree'].get_autocomlete( token_string='', owner_attribute_string=t_struct_adjusted[-2].string, line_number=t_struct_adjusted[-1].start[0], col_offset=t_struct_adjusted[-1].start[1]) token_string = '' elif t_struct_adjusted[-2].string == '.': # ищем атрибуты предыдущего имени result += AST_PARSER[path]['ast_tree'].get_autocomlete( t_struct_adjusted[-1].string, owner_attribute_string=t_struct_adjusted[-3].string, line_number=t_struct_adjusted[-1].start[0], col_offset=t_struct_adjusted[-1].start[1]) token_string = t_struct_adjusted[-1].string elif t_struct_adjusted[-1].type == token.NAME: # ищем атрибуты предыдущего имени result += AST_PARSER[path]['ast_tree'].get_autocomlete( t_struct_adjusted[-1].string, line_number=t_struct_adjusted[-1].start[0], col_offset=t_struct_adjusted[-1].start[1]) token_string = t_struct_adjusted[-1].string elif t_struct_adjusted[-1].type != token.NAME: # чо возвращать то последнии символы не имя переменно нечего дополять pass return aiohttp.web.json_response({ 'type': 'autocomplete', 'data': { 'lineNumber': body['data']['lineNumber'], 'result': result, 'prefix': token_string } })
def test_iter_comments(): source = "\n".join(["# foo", "assert True # E: bar",]) actual = list(generate_per_line_token_lists(source)) # fmt: off expected = [ [], # line 0 [ TokenInfo(type=COMMENT, string="# foo", start=(1, 0), end=(1, 5), line="# foo\n",), TokenInfo(type=NL, string="\n", start=(1, 5), end=(1, 6), line="# foo\n"), ], [ TokenInfo(type=NAME, string="assert", start=(2, 0), end=(2, 6), line="assert True # E: bar",), TokenInfo(type=NAME, string="True", start=(2, 7), end=(2, 11), line="assert True # E: bar",), TokenInfo(type=COMMENT, string="# E: bar", start=(2, 12), end=(2, 20), line="assert True # E: bar",), TokenInfo(type=NEWLINE, string="", start=(2, 20), end=(2, 21), line=""), ], [ TokenInfo(type=ENDMARKER, string="", start=(3, 0), end=(3, 0), line="") ], ] # fmt: on assert actual == expected
def test_optional_literal(self) -> None: grammar = """ start: sum NEWLINE sum: term '+' ? term: NUMBER """ parser_class = make_parser(grammar) node = parse_string("1+\n", parser_class) self.assertEqual( node, [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n"), TokenInfo( OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"), ], TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"), ], ) node = parse_string("1\n", parser_class) self.assertEqual( node, [ [ TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"), None, ], TokenInfo( NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), ], )
def _transform( self, tokens: Iterable[TokenInfo] ) -> Iterable[Tuple[TokenInfo, TokenInfo]]: for t in tokens: if t.type == STRING and 'f' in t.string.partition( t.string[-1])[0].lower(): yield t, TokenInfo(STRING, self._transform_fstring(t.string), t.start, t.end, t.line) else: yield t, t
def _token_iter(tokens): """Collapse tokens considered a single token in danish into one. We need this function, because the tokenizer considers `ellers-hvis` and `for-hver` as "ellers" MINUS "hvis" and "for" MINUS "hver" respectively. Send the tokenized danish code through this guy. """ if len(tokens) < 3: yield from tokens return def _is_elif_token(t1, t2, t3): return (t1.type == NAME and t1.string == "ellers" and t2.type == OP and t2.string == "-" and t3.type == NAME and t3.string == "hvis") def _is_for_token(t1, t2, t3): return (t1.type == NAME and t1.string == "for" and t2.type == OP and t2.string == "-" and t3.type == NAME and t3.string == "hver") skip = 0 # skip this many tokens before proceding for t1, t2, t3 in zip(tokens, islice(tokens, 1, None), islice(tokens, 2, None)): if skip > 0: skip -= 1 continue if _is_elif_token(t1, t2, t3): yield TokenInfo(NAME, "ellers-hvis", t1.start, t1.end, t1.line) skip = 2 elif _is_for_token(t1, t2, t3): yield TokenInfo(NAME, "for-hver", t1.start, t1.end, t1.line) skip = 2 else: yield t1 yield tokens[-2] yield tokens[-1]
def _token(self, type): i = self.pos line = self.index[i].line start = self.index[i].col pattern = TOKENS[type] string = self._matchre(pattern) if not string: return e = self.pos end = self.index[e].col return TokenInfo(type, string, start, end, line)
def suppress(block): tok = block.tokens[1] new_func_name = 'test_' + tok.string block.tokens[1] = TokenInfo(type=NAME, string=new_func_name, start=tok.start, end=tok.end, line=tok.line) return block