def main(): py_input = """exec admin 'show info' print 'hello' exec sql 'select * from namespace1'\n""" print py_input py_stream = cStringIO.StringIO(py_input) print tokenize.untokenize(tarantool_translate(py_stream.readline))
def check_roundtrip(self, f): """ Test roundtrip for `untokenize`. `f` is an open file or a string. The source code in f is tokenized to both 5- and 2-tuples. Both sequences are converted back to source code via tokenize.untokenize(), and the latter tokenized again to 2-tuples. The test fails if the 3 pair tokenizations do not match. When untokenize bugs are fixed, untokenize with 5-tuples should reproduce code that does not contain a backslash continuation following spaces. A proper test should test this. """ # Get source code and original tokenizations if isinstance(f, str): code = f.encode('utf-8') else: code = f.read() f.close() readline = iter(code.splitlines(keepends=True)).__next__ tokens5 = list(tokenize(readline)) tokens2 = [tok[:2] for tok in tokens5] # Reproduce tokens2 from pairs bytes_from2 = untokenize(tokens2) readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__ tokens2_from2 = [tok[:2] for tok in tokenize(readline2)] self.assertEqual(tokens2_from2, tokens2) # Reproduce tokens2 from 5-tuples bytes_from5 = untokenize(tokens5) readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__ tokens2_from5 = [tok[:2] for tok in tokenize(readline5)] self.assertEqual(tokens2_from5, tokens2)
def get_context(source, position): lines, lineno = get_block(source, position) tokens = TokenGenerator(lines) ctype, ctx, match, fctx = 'expr', '', '', '' while True: tid, value = tokens.next() if not tid: break if tid == NAME and value == 'import': ctype, fctx = 'import', None ctx, match = parse_import(tokens) elif tid == NAME and value == 'from': fctx = None ctype, ctx, match = parse_from(tokens) elif tid == NAME or value in BRACKETS.keys(): ctype = 'expr' tokens.hold(tid, value) ctx, match, fctx = parse_expr(tokens) ctx = untokenize(prep_tokens(ctx)).strip().rstrip('.') fctx = untokenize(prep_tokens(fctx)).strip().rstrip('.') else: ctype, ctx, match, fctx = 'expr', '', '', '' return ctype, lineno, ctx, match, fctx
def __init__(self, tokens, filename='<unknown>', line_offset=0): """Create an executor for a token stream Arguments: tokens (List[TokenInfo]): The tokens to execute. filename (Optional[str]): The filename where the tokens originated (default: ``'<unknown>'``). Used in error handling, but never opened. line_offset (Optional[str]): An offset of tokens within the input file (default: zero). Raises: RuleExecutionError: Raised if the token stream is invalid or if it could not be compiled. """ self.input_tokens = tokens self.input_lines = tokenize.untokenize(self.input_tokens).split('\n') self.filename = filename self.line_offset = line_offset self._validate_paren_levels(tokens) self.eval_tokens = self._gen_eval_tokens(tokens) self.eval_str = tokenize.untokenize(self.eval_tokens) self.codeobj = self._compile(self.eval_str)
def dealwith(self, readline, **kwargs): """ Replace the contents of spec file with the translated version readline should be a callable object , which provides the same interface as the readline() method of built-in file objects """ data = [] try: # We pass in the data variable as an argument so that we # get partial output even in the case of an exception. self.tokeniser.translate(readline, data, **kwargs) except Exception as e: # Comment out partial output so that it doesn't result in # a syntax error when received by the interpreter. lines = [] for line in untokenize(data).split('\n'): lines.append("# %s" % line) # Create exception to put into code to announce error exception = 'raise Exception("""--- internal spec codec error --- %s""")' % e # Need to make sure the exception doesn't add a new line and put out line numberes if len(lines) == 1: data = "%s%s" % (exception, lines[0]) else: lines.append(exception) first_line = lines.pop() lines[0] = "%s %s" % (first_line, lines[0]) data = '\n'.join(lines) else: # At this point, data is a list of tokens data = untokenize(data) return data
def main(): """Executed when script is run as-is.""" # magic_files = {} for filename in locate_files(ROOT_DIR): print("Processing %s" % filename) with open(filename, "rt") as f: tokens = list(tokenize.generate_tokens(f.readline)) text1 = tokenize.untokenize(tokens) ntokens = normalize_tokens(tokens) text2 = tokenize.untokenize(ntokens) assert text1 == text2
def remove_comments(src): """ This reads tokens using tokenize.generate_tokens and recombines them using tokenize.untokenize, and skipping comment/docstring tokens in between """ f = cStringIO.StringIO(src) class SkipException(Exception): pass processed_tokens = [] last_token = None # go thru all the tokens and try to skip comments and docstrings for tok in tokenize.generate_tokens(f.readline): t_type, t_string, t_srow_scol, t_erow_ecol, t_line = tok try: if t_type == tokenize.COMMENT: raise SkipException() elif t_type == tokenize.STRING: if last_token is None or last_token[0] in [tokenize.INDENT]: # FIXEME: this may remove valid strings too? #raise SkipException() pass except SkipException: pass else: processed_tokens.append(tok) last_token = tok return tokenize.untokenize(processed_tokens)
def __init__(self, *args, **kwargs): utf_8.StreamReader.__init__(self, *args, **kwargs) try: data = tokenize.untokenize(tarantool_translate(self.stream.readline)) self.stream = cStringIO.StringIO(data) except Exception: self.stream.seek(0)
def transform_source_code(text): '''Input text is assumed to contain some French equivalent words to normal Python keywords and a few builtin functions. These are transformed into normal Python keywords and functions. ''' # continue, def, global, lambda, nonlocal remain unchanged by choice dictionary = {'Faux': 'False', 'Aucun': 'None', 'Vrai': 'True', 'et': 'and', 'comme': 'as', 'affirme': 'assert', 'sortir': 'break', 'classe': 'class', 'élimine': 'del', 'ousi': 'elif', 'autrement': 'else', 'exception': 'except', 'finalement': 'finally', 'pour': 'for', 'de': 'from', 'si': 'if', 'importe': 'import', 'dans': 'in', 'est': 'is', 'non': 'not', 'ou': 'or', 'passe': 'pass', 'soulever': 'raise', 'retourne': 'return', 'essayer': 'try', 'pendant': 'while', 'avec': 'with', 'céder': 'yield', 'imprime': 'print', 'intervalle': 'range'} toks = tokenize.generate_tokens(StringIO(text).readline) result = [] for toktype, tokvalue, _, _, _ in toks: if toktype == tokenize.NAME and tokvalue in dictionary: result.append((toktype, dictionary[tokvalue])) else: result.append((toktype, tokvalue)) return tokenize.untokenize(result)
def commandline(): """zhpy3, the python language in Traditional Chinese usage: twpy file.twpy """ if len(sys.argv) != 2: print(commandline.__doc__) sys.exit(1) file_path = sys.argv[1] if not os.path.exists(file_path): print("twpy: file '%s' does not exists" % file_path) sys.exit(1) #sys.meta_path = [ImportHook()] sys.path[0] = os.path.dirname(os.path.join(os.getcwd(), file_path)) source = tokenize.untokenize( list(translate_code(open(file_path).readline, translations))) #translate_module(__builtins__) code = compile(source, file_path, "exec") runpy._run_module_code(code, mod_name="__main__")
def fixLazyJson (in_text): tokengen = tokenize.generate_tokens(StringIO(in_text).readline) result = [] for tokid, tokval, _, _, _ in tokengen: # fix unquoted strings if (tokid == token.NAME): if tokval not in ['true', 'false', 'null', '-Infinity', 'Infinity', 'NaN']: tokid = token.STRING tokval = u'"%s"' % tokval # fix single-quoted strings elif (tokid == token.STRING): if tokval.startswith ("'"): tokval = u'"%s"' % tokval[1:-1].replace ('"', '\\"') # remove invalid commas elif (tokid == token.OP) and ((tokval == '}') or (tokval == ']')): if (len(result) > 0) and (result[-1][1] == ','): result.pop() # fix single-quoted strings elif (tokid == token.STRING): if tokval.startswith ("'"): tokval = u'"%s"' % tokval[1:-1].replace ('"', '\\"') result.append((tokid, tokval)) return tokenize.untokenize(result)
def globals_from_file(filename): _file = open(filename) data = tokenize.untokenize(translate(_file.readline)) compiled = compile(data, filename, "exec") globals_ = {} exec(compiled, globals_) return globals_
def decistmt(s): """Substitute Decimals for floats in a string of statements. >>> from decimal import Decimal >>> s = 'print +21.3e-5*-.1234/81.7' >>> decistmt(s) "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')" The format of the exponent is inherited from the platform C library. Known cases are "e-007" (Windows) and "e-07" (not Windows). Since we're only showing 12 digits, and the 13th isn't close to 5, the rest of the output should be platform-independent. >>> exec(s) #doctest: +ELLIPSIS -3.21716034272e-0...7 Output from calculations with Decimal should be identical across all platforms. >>> exec(decistmt(s)) -3.217160342717258261933904529E-7 """ result = [] g = generate_tokens(StringIO(s).readline) # tokenize the string for toknum, tokval, _, _, _ in g: if toknum == NUMBER and "." in tokval: # replace NUMBER tokens result.extend([(NAME, "Decimal"), (OP, "("), (STRING, repr(tokval)), (OP, ")")]) else: result.append((toknum, tokval)) return untokenize(result)
def _get_trait_definition(self): """ Retrieve the Trait attribute definition """ # Get the class source and tokenize it. source = inspect.getsource(self.parent) string_io = StringIO.StringIO(source) tokens = tokenize.generate_tokens(string_io.readline) # find the trait definition start trait_found = False name_found = False while not trait_found: item = tokens.next() if name_found and item[:2] == (token.OP, '='): trait_found = True continue if item[:2] == (token.NAME, self.object_name): name_found = True # Retrieve the trait definition. definition_tokens = [] for type, name, start, stop, line in tokens: if type == token.NEWLINE: break item = (type, name, (0, start[1]), (0, stop[1]), line) definition_tokens.append(item) return tokenize.untokenize(definition_tokens).strip()
def Untokenize(offset_tokens): """Return the string representation of an iterable of OffsetTokens.""" # Make a copy. Don't modify the original. offset_tokens = collections.deque(offset_tokens) # Strip leading NL tokens. while offset_tokens[0].type == tokenize.NL: offset_tokens.popleft() # Strip leading vertical whitespace. first_token = offset_tokens.popleft() # Take care not to modify the existing token. Create a new one in its place. first_token = OffsetToken(first_token.type, first_token.string, (0, first_token.offset[1])) offset_tokens.appendleft(first_token) # Convert OffsetTokens to tokenize tokens. tokenize_tokens = [] row = 1 col = 0 for t in offset_tokens: offset_row, offset_col = t.offset if offset_row == 0: col += offset_col else: row += offset_row col = offset_col tokenize_tokens.append((t.type, t.string, (row, col), (row, col), None)) # tokenize can't handle whitespace before line continuations. # So add a space. return tokenize.untokenize(tokenize_tokens).replace('\\\n', ' \\\n')
def main(): import tempfile if sys.argv[1] == '-p': file = sys.argv[2] print_script = True tree = maketree(Tokens(file), preamble=True) else: file = sys.argv[1] print_script = False tree = maketree(Tokens(file)) try: code = tokenize.untokenize(flatten(tree)).decode() except: pprint(tree, indent=4) raise if print_script: print(code) sys.exit() del sys.argv[0] tf = tempfile.NamedTemporaryFile('w') tf.write(code) tf.flush() ns = {'__name__': '__main__'} exec(PREAMBLE, ns) try: exec(compile(code, tf.name, 'exec'), ns) except Exception as e: # pprint(tree, indent=4) print(code) raise
def _preparse(source, f=compose(_replace_locals, _replace_booleans, _rewrite_assign)): """Compose a collection of tokenization functions Parameters ---------- source : str A Python source code string f : callable This takes a tuple of (toknum, tokval) as its argument and returns a tuple with the same structure but possibly different elements. Defaults to the composition of ``_rewrite_assign``, ``_replace_booleans``, and ``_replace_locals``. Returns ------- s : str Valid Python source code Notes ----- The `f` parameter can be any callable that takes *and* returns input of the form ``(toknum, tokval)``, where ``toknum`` is one of the constants from the ``tokenize`` module and ``tokval`` is a string. """ assert callable(f), 'f must be callable' return tokenize.untokenize(lmap(f, tokenize_string(source)))
def decistmt(s): """Substitute Decimals for floats in a string of statements. >>> from decimal import Decimal >>> s = 'print +21.3e-5*-.1234/81.7' >>> decistmt(s) "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')" >>> exec(s) -3.21716034272e-007 >>> exec(decistmt(s)) -3.217160342717258261933904529E-7 """ result = [] # tokenize the string g = tokenize.generate_tokens(StringIO(s).readline) for toknum, tokval, _, _, _ in g: # replace NUMBER tokens if toknum == tokenize.NUMBER and '.' in tokval: result.extend([ (tokenize.NAME, 'Decimal'), (tokenize.OP, '('), (tokenize.STRING, repr(tokval)), (tokenize.OP, ')') ]) else: result.append((toknum, tokval)) return tokenize.untokenize(result)
def test_DeleteStatement_valid(input, expected_type, expected_expr): smt = parser.ExpressionStatement.try_parse(tok(input)) str_expr = tokenize.untokenize(smt.expr).strip() assert smt.type == expected_type assert str_expr == expected_expr
def feedInput(code, test) : # Initial variable declaration temp = 0 i = 0 limit = len(test) # Tokenize the code g = tokenize.generate_tokens(io.BytesIO("\n".join(code)).readline) result = [] # Traverse for each token for toknum, tokval, _, _, _ in g: # True if an input statement wasnt found 3 tokens prior if(temp==0) : # True is there are test cases to be inputed and token found happens to be input if(i<limit and tokval=="input") : # replace token with value result.append((toknum, test[i])) i += 1 temp = 3 else : result.append((toknum, tokval)) else : # Input was found temp -= 1 # Return the untokenized form of code in form of list return tokenize.untokenize(result).split("\n")
def fix_lazy_json(in_text): """ This function modifies JS-contained JSON to be valid. Posted in http://stackoverflow.com/questions/4033633/handling-lazy-json-\ in-python-expecting-property-name by Pau Sánchez (codigomanso.com) """ tokengen = tokenize.generate_tokens(io.StringIO(in_text).readline) valid_tokens = ['true', 'false', 'null', '-Infinity', 'Infinity', 'NaN'] result = [] for tokid, tokval, _, _, _ in tokengen: # fix unquoted strings if tokid == token.NAME: tokid, tokval = fix_unquoted((tokid, tokval), valid_tokens) # fix single-quoted strings elif tokid == token.STRING: tokval = fix_single_quoted(tokval) # remove invalid commas elif (tokid == token.OP) and ((tokval == '}') or (tokval == ']')): result = remove_invalid_commas(result) result.append((tokid, tokval)) return tokenize.untokenize(result)
def convert(readline): result = [] in_repeat = False for ttype, tval, _, _, _ in tokenize.generate_tokens(readline): if ttype == token.NAME and tval == "repeat": result.extend([ (token.NAME, "for"), (token.NAME, "_"), (token.NAME, "in"), (token.NAME, "range"), (token.OP, "(") ]) in_repeat = True elif in_repeat and ttype == token.OP and tval == ":": result.extend([ (token.NAME, ")"), (token.OP, ":") ]) else: result.append((ttype, tval)) return tokenize.untokenize(result)
def preprocess(source): # Syntax tree has whitespace & comments stripped, so use the tokenizer # to get them instead & strip out any pydoc. import tokenize, token, io line_indents = [] comments = [] def _preprocess(tokens): import token lineno = 0 indent = comment = "" for t in tokens: if t.type == token.INDENT: indent = t.string if t.type == tokenize.COMMENT: comment = "//" + t.string[1:] if t.type in (token.NEWLINE, tokenize.NL): line_indents.append(indent) comments.append(comment) # indent = '' - only counts new indents? comment = "" lineno += 1 if t.type == token.STRING: continue yield (t.type, t.string) stream = io.StringIO(source).readline stream = _preprocess(tokenize.generate_tokens(stream)) source = tokenize.untokenize(stream) return source, line_indents, comments
def __substituteVars(self, code, env): ''' Expand any variables that exist in the given environment to their corresponding values ''' # tokenize the given expression code gtoks = tokenize.generate_tokens(StringIO.StringIO(code).readline) # iterate over each token and replace any matching token with its corresponding value tokens = [] for toknum, tokval, _, _, _ in gtoks: if toknum == tokenize.NAME and tokval in env: ntoks = tokenize.generate_tokens(StringIO.StringIO(str(env[tokval])).readline) tokens.extend(ntoks) else: tokens.append((toknum, tokval)) # convert the tokens back to a string code = tokenize.untokenize(tokens) # remove all the leading and trailing spaces code = code.strip() # return the modified string return code
def visit(self): modified = [] for toknum, tokval, tokbegin, tokend, tokline in self.tokens: # print (token.tok_name[toknum], tokval) if toknum != tokenize.COMMENT: modified.append((toknum, tokval)) else: tokval_1 = tokval.strip(" \t#") tokbegin = tokbegin[0] + self.offset, tokbegin[1] tokend = tokend[0] + self.offset, tokend[1] handler_name = "%s_handler" % tokval_1.split()[0].lower() handler = getattr(self, handler_name, None) if handler: dedents_new = len(modified) new_tokens = handler(toknum, tokval_1, tokbegin, tokend, tokline) self.offset += sum([1 for x in new_tokens if x[0] == tokenize.NEWLINE]) modified.extend(new_tokens) dedents_old = len(modified) + 1 self.dedents_patch_loc.append((dedents_new, dedents_old)) else: modified.append((toknum, tokval)) # for x,y in modified: # print (token.tok_name[x], y) # print self.dedents_patch_loc # print modified for x, y in self.dedents_patch_loc: # print modified[x], token.tok_name[modified[y][0]] if modified[y][0] in [tokenize.INDENT, tokenize.DEDENT]: modified.insert(x, modified[y]) del modified[y + 1] # print modified return tokenize.untokenize(modified)
def _get_trait_definition(self): """ Retrieve the Trait attribute definition """ # Get the class source and tokenize it. source = inspect.getsource(self.parent) string_io = StringIO.StringIO(source) tokens = tokenize.generate_tokens(string_io.readline) # find the trait definition start trait_found = False name_found = False while not trait_found: item = next(tokens) if name_found and item[:2] == (token.OP, '='): trait_found = True continue if item[:2] == (token.NAME, self.object_name): name_found = True # Retrieve the trait definition. definition_tokens = _get_definition_tokens(tokens) definition = tokenize.untokenize(definition_tokens).strip() if not IS_PY3: definition = unicode(definition, 'utf-8') return definition
def tostring(tokens): '''Converte lista de tokens para string''' last_pos = tokens[0].start while tokens[-1].type == DEDENT: tokens.pop() if tokens[-1].type != ENDMARKER: start = end = tokens[-1].end tokens.append(tknew(ENDMARKER, '', start, end, line='')) # tkprint(tokens) tokens = [tk.to_token_info() for tk in tokens] try: return tokenize.untokenize(tokens) except ValueError: for idx, tk in enumerate(tokens): a, b = tk.start c, d = last_pos if (a < c) or (a == c and d > b): fmt = idx, tokens[idx - 1], tk print(tokens) raise ValueError( 'tokens sobrepõe a partir de #%s:\n\t%s\n\t%s)' % fmt) last_pos = tk.end else: raise
def gen_lambdas(): def gen(): yield src + "\n" g = gen() step = 0 tokens = [] for tok in tokenize.generate_tokens(getattr(g, "next", getattr(g, "__next__", None))): if step == 0: if tok[0] == tokenize.NAME and tok[1] == "lambda": step = 1 tokens = [tok] level = 0 elif step == 1: if tok[0] == tokenize.NAME: tokens.append(tok) step = 2 else: step = 0 elif step == 2: if tok[0] == tokenize.OP and tok[1] == ":": tokens.append(tok) step = 3 else: step = 0 elif step == 3: if level == 0 and (tok[0] == tokenize.OP and tok[1] in ",)" or tok[0] == tokenize.ENDMARKER): yield tokenize.untokenize(tokens).strip() step = 0 else: tokens.append(tok) if tok[0] == tokenize.OP: if tok[1] in "[({": level += 1 if tok[1] in "])}": level -= 1 assert not tokens
def __init__(self, *args, **kwargs): codecs.StreamReader.__init__(self, *args, **kwargs) data = tokenize.untokenize(translate(self.stream.readline)) logging.debug('START RESULT') logging.debug(data) logging.debug('END RESULT') self.stream = StringIO.StringIO(data)
def nocomment(s): result = [] g = tokenize.generate_tokens(io.BytesIO(s).readline) for toknum, tokval, _, _, _ in g: if toknum != tokenize.COMMENT: result.append((toknum, tokval)) return tokenize.untokenize(result)
def from_string(cls, input_string): """Parse linear expression mathematical units and return a quantity object. """ if not input_string: return cls() input_string = string_preprocessor(input_string) if '[' in input_string: input_string = input_string.replace('[', '__obra__').replace(']', '__cbra__') reps = True else: reps = False gen = ptok(input_string) result = [] for toknum, tokval, _, _, _ in gen: if toknum == NAME: if not tokval: continue result.extend([ (NAME, 'L_'), (OP, '('), (STRING, '"' + tokval + '"'), (OP, ')') ]) else: result.append((toknum, tokval)) ret = eval(untokenize(result), {'__builtins__': None}, {'L_': cls.from_word}) if isinstance(ret, Number): return ParserHelper(ret) if not reps: return ret return ParserHelper(ret.scale, {key.replace('__obra__', '[').replace('__cbra__', ']'): value for key, value in ret.items()})
def _change_text(text): """Pre-processing of the input text. - Wrap constant parameters: ``a = 1`` is converted as ``a = _CONVERT_VARIABLE(EXPR="1")`` - Wrap comments: ``# line of comment.`` is converted as ``_CONVERT_COMMENT(EXPR="# line of comment.")`` Returns: list[int]: list of line numbers of end of instruction. str: changed text. """ generator = tokenize.generate_tokens(StringIO(text).readline) result = [] buff = [] eoi = [] started = False for ret in generator: num, val = ret[:2] started = started or num == token.NAME # _debug_parse(num, val, ret[4]) if num == token.NEWLINE: eoi.append(ret[2][0]) buff.append((num, val)) if num in (token.NEWLINE, token.ENDMARKER): buff = _replace_variable(buff) started = False elif num == tokenize.COMMENT and len(buff) == 1: # ignore inline comment buff = _replace_comment(buff) started = False if not started: result.extend(buff) # _debug_parse(tokenize.COMMENT, "> > > new buffer > > >", "???") buff = [] changed = tokenize.untokenize(result) debug_message("Pre-processed text:\n", changed) return eoi, changed
def pre_parse(code): result = [] try: g = tokenize(io.BytesIO(code.encode('utf-8')).readline) for token in g: # Alias contract definition to class definition. if token.type == COMMENT and "@version" in token.string: parse_version_pragma(token.string[1:]) if (token.type, token.string, token.start[1]) == (NAME, "contract", 0): token = TokenInfo(token.type, "class", token.start, token.end, token.line) # Prevent semi-colon line statements. elif (token.type, token.string) == (OP, ";"): raise StructureException("Semi-colon statements not allowed.", token.start) result.append(token) except TokenError as e: raise StructureException(e.args[0], e.args[1]) from e return untokenize(result).decode('utf-8')
def prg2py_after_preproc(data, parser_start, input_filename): input_stream = antlr4.InputStream(data) lexer = VisualFoxpro9Lexer(input_stream) stream = antlr4.CommonTokenStream(lexer) parser = VisualFoxpro9Parser(stream) tree = run_parser(stream, parser, parser_start) TreeCleanVisitor().visit(tree) output_tree = PythonConvertVisitor(input_filename).visit(tree) if not isinstance(output_tree, list): return output_tree output = add_indents(output_tree, 0) options = autopep8.parse_args(['--max-line-length', '100000', '-']) output = autopep8.fix_code(output, options) tokens = list(tokenize.generate_tokens(io.StringIO(output).readline)) for i, token in enumerate(tokens): token = list(token) if token[0] == tokenize.STRING and token[1].startswith('u'): token[1] = token[1][1:] tokens[i] = tuple(token) return tokenize.untokenize(tokens)
def fixLazyJson (self, in_text): tokengen = tokenize.generate_tokens(StringIO(in_text).readline) result = [] for tokid, tokval, _, _, _ in tokengen: if (tokid == token.NAME): if tokval not in ['true', 'false', 'null', '-Infinity', 'Infinity', 'NaN']: tokid = token.STRING tokval = u'"%s"' % tokval elif (tokid == token.STRING): if tokval.startswith ("'"): tokval = u'"%s"' % tokval[1:-1].replace ('"', '\\"') elif (tokid == token.OP) and ((tokval == '}') or (tokval == ']')): if (len(result) > 0) and (result[-1][1] == ','): result.pop() elif (tokid == token.STRING): if tokval.startswith ("'"): tokval = u'"%s"' % tokval[1:-1].replace ('"', '\\"') result.append((tokid, tokval)) return tokenize.untokenize(result)
def inspect_signature(obj): """ Custom signature inspection primarily for cython generated callables. Cython puts the signatures to the first line of the docstrings, which we can reuse to parse the python signature from, but some gymnastics are required, like removing the cython typehints. It converts the cython signature: array(obj, type=None, mask=None, size=None, from_pandas=None, bool safe=True, MemoryPool memory_pool=None) To: <Signature (obj, type=None, mask=None, size=None, from_pandas=None, safe=True, memory_pool=None)> """ cython_signature = obj.__doc__.splitlines()[0] cython_tokens = _tokenize_signature(cython_signature) python_tokens = _convert_typehint(cython_tokens) python_signature = tokenize.untokenize(python_tokens) return inspect._signature_fromstr(inspect.Signature, obj, python_signature)
def 中翻英檔案(存放目錄, 待翻譯檔案, 新檔案名= None): if 存放目錄== "": pass elif not os.path.exists(存放目錄): os.mkdir(存放目錄) f= open(待翻譯檔案, 'r', encoding='utf-8') 程式碼= f.read() f.close() 英文化程式碼= 中翻英後處理翻譯(程式碼) f= open('temp12321.py', 'w', encoding='utf-8') f.write(英文化程式碼) f.close() ## 關鍵處理,把程式分塊,(tokenize), # # 切出所有 變數,函數,物類,方法 及它們的形態(type)。 # 程式碼, Token表= 剖析程式碼('temp12321.py') os.remove("temp12321.py") Token表= 中翻英名稱翻譯(Token表) 英文化程式碼= tn.untokenize(Token表) # 就這樣一行搞定! # 後處理,大多是暴力法 字串取代。 # # 把翻譯過的程式 個別 存起來, # print(新檔案名) if 新檔案名== None: 新檔案名= 'te_'+ os.path.basename(待翻譯檔案) if 存放目錄!= "": 新檔案名= 存放目錄 + os.path.sep + 新檔案名 print(新檔案名) 翻譯後檔案= open(新檔案名,'w', encoding= 'utf-8') 翻譯後檔案.write(英文化程式碼) 翻譯後檔案.close() return 程式碼,英文化程式碼,Token表
def compile_as_decimal(expr): '''This function takes as expression give as an argument to one of the verbs like arr or filter or sort or tap, and compiles it so that we can execute it more efficiently. Two little bits of syntactic sugar are applied to the expression: First we make all tokens that look like floats (NUMBER and contains '.') into Decimals, so that we avoid the normal FP accuracy & rounding issues. Second we translate '?' into a (decimal) random number. There are two bits of syntax sugar to help when calling tab from Vi, to avoid the need to escape ! and % you can write <> for != and ' mod ' for %. ''' clean_expression = expr.replace('<>', '!=') clean_expression = re.sub(r'\bmod\b', '%', clean_expression) clean_expression = re.sub(r'(?<![<>!])=+', '==', clean_expression) # also allow a=b out = [] try: for tn, tv, _, _, _ in tokenize.generate_tokens( io.StringIO(clean_expression).readline): if tn == tokenize.NUMBER and '.' in tv: out.append((tokenize.NAME, 'Decimal')) out.append((tokenize.OP, '(')) out.append((tokenize.STRING, repr(tv))) out.append((tokenize.OP, ')')) elif tv == '?': out.append((tokenize.NAME, 'randomd')) out.append((tokenize.OP, '(')) out.append((tokenize.OP, ')')) else: out.append((tn, tv)) except tokenize.TokenError: return (False, '?! tokens ' + expr) try: cc = compile(tokenize.untokenize(out), "<string>", 'eval') except (SyntaxError, ValueError): return (False, '?! syntax ' + expr) else: return (True, cc)
def eval_arguments(args): args = args.strip() if not args or (args == '()'): return () tokens = list(tokenize.generate_tokens(StringIO(args).readline)) def remap(): for type, name, _, _, _ in tokens: if type == tokenize.NAME and name not in REMAPPINGS: yield tokenize.STRING, '"%s"' % name else: yield type, name untok = tokenize.untokenize(remap()) if untok[1:-1].strip(): untok = untok[:-1] + ',)' # Force a tuple. try: return eval(untok, REMAPPINGS) except Exception as e: raise ValueError('Couldn\'t evaluate expression "%s" (became "%s"), ' 'error "%s"' % (args, untok, str(e)))
def generate_ctypes(header_file, py_file, cpp_flags): logging.info("Generating %s from %s", py_file, header_file) buffer = io.StringIO() ctypeslib.codegen.codegenerator.generate_code([header_file], buffer, types=(ctypeslib.codegen.typedesc.Alias, ctypeslib.codegen.typedesc.Structure, ctypeslib.codegen.typedesc.Variable, ctypeslib.codegen.typedesc.Enumeration, ctypeslib.codegen.typedesc.Function, ctypeslib.codegen.typedesc.Macro, ctypeslib.codegen.typedesc.Typedef, ctypeslib.codegen.typedesc.Union), filter_location=True, flags=cpp_flags) bytes_buffer = io.BytesIO(buffer.getvalue().encode()) bytes = tokenize.untokenize(rewrite_ctypes_little_endian(bytes_buffer.readline)) with open(py_file, 'wb') as outfile: outfile.write(bytes)
def indent(code, indentation=4, count=1): """Remove indentation at the beginning and end.""" tokens = [(x[0], x[1]) for x in _tokenize(code)] for _ in range(count): tokens_ = [] for token in tokens: if token[0] == INDENT: token = (INDENT, indentation * " " + token[1]) tokens_.append(token) tokens = tokens_ if tokens[0][0] != INDENT: tokens = [(INDENT, indentation * " ") ] + tokens[:-1] + [(DEDENT, "")] + [tokens[-1]] logger.debug(tokens) tokens = [(NEWLINE, "\n")] + tokens result = untokenize(tokens) result = "\n".join(result.split("\n")[1:]) return result
def parse_expression(self, input_string): """Parse a mathematical expression including units and return a quantity object. """ if not input_string: return self.Quantity(1) input_string = string_preprocessor(input_string) gen = ptok(input_string) result = [] unknown = set() for toknum, tokval, _, _, _ in gen: if toknum in (STRING, NAME): # replace NUMBER tokens # TODO: Integrate math better, Replace eval if tokval == 'pi': result.append((toknum, str(math.pi))) continue try: tokval = self.get_name(tokval) except UndefinedUnitError as ex: unknown.add(ex.unit_names) if tokval: result.extend([(NAME, 'Q_'), (OP, '('), (NUMBER, '1'), (OP, ','), (NAME, 'U_'), (OP, '('), (STRING, tokval), (OP, '='), (NUMBER, '1'), (OP, ')'), (OP, ')')]) else: result.extend([(NAME, 'Q_'), (OP, '('), (NUMBER, '1'), (OP, ','), (NAME, 'U_'), (OP, '('), (OP, ')'), (OP, ')')]) else: result.append((toknum, tokval)) if unknown: raise UndefinedUnitError(unknown) return eval(untokenize(result), {'__builtins__': None}, { 'REGISTRY': self._units, 'Q_': self.Quantity, 'U_': UnitsContainer })
def code_analysis_py(program_contents): "count lines and words in python" f = io.BytesIO(program_contents.encode()) g = tokenize.tokenize(f.readline) processed_tokens = [] for tok in g: t_type = tok[0] if t_type not in [tokenize.COMMENT]: processed_tokens.append(tok) # remove the docstring i = 0 while processed_tokens[i][0] == tokenize.NL: i = i + 1 if processed_tokens[i][0] == tokenize.STRING: processed_tokens = processed_tokens[i + 1:] # remove strings newtok = [] i = 0 while (i < len(processed_tokens) - 2): if processed_tokens[i][0] == tokenize.INDENT: pass #print('a',processed_tokens[i],processed_tokens[i+1],processed_tokens[i+2]) #print('b',tokenize.INDENT,tokenize.STRING,tokenize.NEWLINE) if processed_tokens[i][0] == tokenize.INDENT \ and processed_tokens[i+1][0] == tokenize.STRING \ and processed_tokens[i+2][0] == tokenize.NEWLINE: i += 3 newtok.append(processed_tokens[i]) i += 1 newtok = newtok + processed_tokens[i:i + 2] #for t in newtok: # print(t) src = "\n".join(x for x in tokenize.untokenize(newtok).decode().splitlines() if x.strip() and x != "\\") return {'lines': len(src.splitlines()), 'words': len(src.split())}
def get_code_str_and_surrounding(frame) -> Tuple[str, Surrounding]: """Gets code string and surrounding information for line event. The reason to record both code_str and surrounding is because code_str is not guaranteed to be unique, for example "a = true" appeared twice. While (frame_id, surrounding) is distinct, therefore we can detect duplicate computations by checking their (frame_id, surrounding). Both lineno and surrounding are 1-based, aka the smallest lineno is 1. """ lineno = _get_lineno(frame) groups: List[List[tokenize.TokenInfo]] = _get_module_token_groups(frame) # Given a lineno, locates the logical line that contains this line. if len(groups) == 1: return ( inspect.getsource(frame), Surrounding(start_lineno=lineno, end_lineno=lineno), ) for group, next_group in zip(groups[:-1], groups[1:]): start_lineno, end_lineno = group[0].start[0], group[-1].end[0] if start_lineno <= lineno <= end_lineno: break else: # Reachs end of groups group = next_group # Removes leading NL and DEDENT as they cause untokenize to fail. while group[0].type in {token_NL, token.DEDENT, token.INDENT}: group.pop(0) # When untokenizing, Python adds \\\n for absent lines(because lineno in # group doesn't start from 1), removes them. # Note that since we've removed the leading ENCODING token, untokenize will return # a str instead of encoded bytes. return ( tokenize.untokenize(group).lstrip("\\\n"), Surrounding(start_lineno=group[0].start[0], end_lineno=group[-1].end[0]), )
def tiefighter(readline): source_tokens = list(tokenize.tokenize(readline)) modified_source_tokens = source_tokens.copy() def inc(token, by=1, page=0): start = list(token.start) end = list(token.end) start[page] += by end[page] += by return token._replace(start=tuple(start), end=tuple(end)) for index, token in enumerate(source_tokens): if token.exact_type == tokens.TIEFIGHTER: cxx = index - 1 left = modified_source_tokens.pop(cxx) __op = modified_source_tokens.pop(cxx) right = modified_source_tokens.pop(cxx) stmt_start = modified_source_tokens[cxx - 1] stmt_end = modified_source_tokens.pop(cxx) new_line = modified_source_tokens.pop(cxx) pattern = io.BytesIO( f"abs({left.string}) == abs({right.string})\n".encode("utf8")) absolute_comp = list(tokenize.tokenize(pattern.readline))[1:-2] stmt_end = inc(stmt_end, absolute_comp[-1].end[1], 1) new_line = inc(new_line, stmt_end.end[1] - new_line.start[1], 1) modified_source_tokens.insert(cxx, new_line) modified_source_tokens.insert(cxx, stmt_end) for token in reversed(absolute_comp): token = inc(token, by=stmt_start.end[0] - 1) token = inc(token, by=stmt_start.end[1] + 1, page=1) modified_source_tokens.insert(cxx, token) return tokenize.untokenize(modified_source_tokens)
def decistmt(s): """Substitute Decimals for floats in a string of statements. >>> from decimal import Decimal >>> s = 'print(+21.3e-5*-.1234/81.7)' >>> decistmt(s) "print (+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7'))" The format of the exponent is inherited from the platform C library. Known cases are "e-007" (Windows) and "e-07" (not Windows). Since we're only showing 12 digits, and the 13th isn't close to 5, the rest of the output should be platform-independent. >>> exec(s) #doctest: +ELLIPSIS -3.21716034272e-0...7 Output from calculations with Decimal should be identical across all platforms. >>> exec(decistmt(s)) -3.217160342717258261933904529E-7 """ result = [] g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string for toknum, tokval, _, _, _ in g: if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens result.extend([ (NAME, 'Decimal'), (OP, '('), (STRING, repr(tokval)), (OP, ')') ]) else: result.append((toknum, tokval)) return untokenize(result).decode('utf-8')
def unescape(code_str): """Substitutes '{{' by indents and '}}' by dedents. Args: code_str: The 1-line Python snippet. Returns: Standard valid Python as a string. Raises: Error: The conversion failed. """ code_file = io.StringIO(code_str) tokens = tokenize.generate_tokens(code_file.readline) try: unescaped_tokens = list(_unescape_tokens(tokens)) return tokenize.untokenize(unescaped_tokens) except tokenize.TokenError as e: raise CannotTokenize(message=e.args[0], position=e.args[1]) except IndentationError as e: raise CannotTokenize(message=e.args[0], position=(e.args[1][1], e.args[1][2]))
def _filter_header(s): """Clean up 'L' in npz header ints. Cleans up the 'L' in strings representing integers. Needed to allow npz headers produced in Python2 to be read in Python3. Parameters ---------- s : string Npy file header. Returns ------- header : str Cleaned up header. """ import tokenize if sys.version_info[0] >= 3: from io import StringIO else: from StringIO import StringIO tokens = [] last_token_was_number = False # adding newline as python 2.7.5 workaround string = s + "\n" for token in tokenize.generate_tokens(StringIO(string).readline): token_type = token[0] token_string = token[1] if (last_token_was_number and token_type == tokenize.NAME and token_string == "L"): continue else: tokens.append(token) last_token_was_number = (token_type == tokenize.NUMBER) # removing newline (see above) as python 2.7.5 workaround return tokenize.untokenize(tokens)[:-1]
def untokenize_abstract(self, whole_tokens): # Reconstruct Python tokenizer tuples, so that Python's untokenize can be # invoked. token_tuples: List[Tuple[int, str]] = [] for whole_token in whole_tokens: if whole_token in PythonTokenizer._EXACT_TOKEN_TYPES: token_tuples.append((tokenize.OP, whole_token)) elif cubert_tokenizer.token_from_token_type( tokenize.INDENT) in whole_token: # We baked the type and spelling into one token. Break them up. spelling = whole_token.replace( cubert_tokenizer.token_from_token_type(tokenize.INDENT), '') token_tuples.append((tokenize.INDENT, spelling)) elif whole_token in PythonTokenizer._REVERSE_TOKEN_MAP: python_kind = PythonTokenizer._REVERSE_TOKEN_MAP[whole_token] if python_kind in (tokenize.DEDENT, tokenize.ENDMARKER, tokenize.ERRORTOKEN): spelling = '' else: # python_kind in (tokenize.NEWLINE, tokenize.NL) spelling = '\n' token_tuples.append((python_kind, spelling)) elif keyword.iskeyword(whole_token): token_tuples.append((tokenize.NAME, whole_token)) elif PythonTokenizer._NUMBERS.match(whole_token): token_tuples.append((tokenize.NUMBER, whole_token)) elif PythonTokenizer._SINGLE_STRINGS.match(whole_token): token_tuples.append((tokenize.STRING, whole_token)) elif PythonTokenizer._TRIPLE_STRING_BEGINNINGS.match(whole_token): token_tuples.append((tokenize.STRING, whole_token)) elif PythonTokenizer._COMMENTS.match(whole_token): token_tuples.append((tokenize.COMMENT, whole_token)) else: # Everything else we map back to NAME. token_tuples.append((tokenize.NAME, whole_token)) reconstructed = tokenize.untokenize(typing.cast(Any, token_tuples)) return reconstructed
def parse_python(path): """ Look though a python file and extract the specified `LANG_FILES` constant value and return it. `LANG_FILES` must be defined at the module level, and can be a string or list of strings. """ result = [] in_lang = False in_lang_val = False with codecs.open(path, encoding='utf-8') as src_f: tokens = generate_tokens(src_f.readline) for token in tokens: t_type, t_val, (t_row, t_col) = token[:3] # find the start of the constant declaration if t_type == NAME and t_col == 0 and t_val == 'LANG_FILES': in_lang = True continue if in_lang: # we only want the value, so start recording after the = OP if t_type == OP and t_val == '=': in_lang_val = True continue # stop when there's a newline. continuation newlines are a # different type so multiline list literals work fine if t_type == NEWLINE: break if in_lang_val: result.append((t_type, t_val)) if result: new_lang_files = eval(untokenize(result)) if isinstance(new_lang_files, basestring): new_lang_files = [new_lang_files] # remove empties return [lf for lf in new_lang_files if lf] return []
def gen_lambdas(): def gen(): yield src + "\n" g = gen() step = 0 tokens = [] for tok in tokenize.generate_tokens( getattr(g, "next", getattr(g, "__next__", None))): if step == 0: if tok[0] == tokenize.NAME and tok[1] == "lambda": step = 1 tokens = [tok] level = 0 elif step == 1: if tok[0] == tokenize.NAME: tokens.append(tok) step = 2 else: step = 0 elif step == 2: if tok[0] == tokenize.OP and tok[1] == ":": tokens.append(tok) step = 3 else: step = 0 elif step == 3: if level == 0 and (tok[0] == tokenize.OP and tok[1] in ",)" or tok[0] == tokenize.ENDMARKER): yield tokenize.untokenize(tokens).strip() step = 0 else: tokens.append(tok) if tok[0] == tokenize.OP: if tok[1] in "[({": level += 2 if tok[1] in "])}": level -= 1 assert not tokens
def remove_comments(src): """ This reads tokens using tokenize.generate_tokens and recombines them using tokenize.untokenize, and skipping comment/docstring tokens in between """ f = cStringIO.StringIO(src) class SkipException(Exception): pass processed_tokens = [] # go through all the tokens and try to skip comments for tok in tokenize.generate_tokens(f.readline): t_type, t_string, t_srow_scol, t_erow_ecol, t_line = tok try: if t_type == tokenize.COMMENT: raise SkipException() except SkipException: pass else: processed_tokens.append(tok) return tokenize.untokenize(processed_tokens)
def decode(byteslike, errors="replace", *, ignore_first_line): read_code = io.BytesIO(bytes(byteslike)).readline if ignore_first_line: read_code() # its the encoding comment tokens = list(tokenize(read_code)) new_tokens = [] for token in _token_iter(tokens): if token.string in _viking_to_english: new_tokens.append( TokenInfo( token.type, _viking_to_english[token.string], token.start, token.end, token.line, ) # its a copy with token.string replaced ) else: new_tokens.append(token) return str(untokenize(new_tokens), "utf-8"), len(byteslike)
def _decorate_variables(expression, variable_store): variable_started = False variable_found = False tokens = [] for toknum, tokval, _, _, _ in generate_tokens( StringIO(expression).readline): if variable_started: if toknum == token.NAME: if tokval not in variable_store: variable_not_found( '$%s' % tokval, variable_store.as_dict(decoration=False), deco_braces=False) tokval = 'RF_VAR_' + tokval variable_found = True else: tokens.append((token.ERRORTOKEN, '$')) variable_started = False if toknum == token.ERRORTOKEN and tokval == '$': variable_started = True else: tokens.append((toknum, tokval)) return untokenize(tokens).strip() if variable_found else expression
def __init__(self, source_code_path: str): tokens = [] file_name = os.path.basename(source_code_path) with open(source_code_path, "r") as source_code: source_raw = source_code.read() source_code_copy_str = StringIO(source_raw) source_code_copy_byte = BytesIO(source_raw.encode("UTF-8")) source_code_iter = tokenize.tokenize(source_code_copy_byte.readline) for token_type, token_val, *_ in source_code_iter: if token_type == tokenize.COMMENT: temp_token_val = token_val[1:] res = Parser.parse_and_expand_instruction(temp_token_val) tokens.extend(res) else: tokens.append((token_type, token_val)) source = tokenize.untokenize(tokens) with open("{}_parsed.py".format(file_name), "wb") as s: s.write(source)
def _get_trait_definition(self): """ Retrieve the Trait attribute definition """ # Get the class source and tokenize it. source = inspect.getsource(self.parent) string_io = StringIO.StringIO(source) tokens = tokenize.generate_tokens(string_io.readline) # find the trait definition start trait_found = False name_found = False while not trait_found: item = next(tokens) if name_found and item[:2] == (token.OP, '='): trait_found = True continue if item[:2] == (token.NAME, self.object_name): name_found = True # Retrieve the trait definition. definition_tokens = _get_definition_tokens(tokens) return tokenize.untokenize(definition_tokens).strip()
def transform_source(text): '''Replaces instances of repeat n: by for __VAR_i in range(n): where __VAR_i is a string that does not appear elsewhere in the code sample. ''' loop_keyword = 'repeat' nb = text.count(loop_keyword) if nb == 0: return text var_names = get_unique_variable_names(text, nb) toks = tokenize.generate_tokens(StringIO(text).readline) result = [] replacing_keyword = [] for toktype, tokvalue, start, _, _ in toks: if toktype == tokenize.NAME and tokvalue == loop_keyword: result.extend([(tokenize.NAME, 'for'), (tokenize.NAME, var_names.pop()), (tokenize.NAME, 'in'), (tokenize.NAME, 'range'), (tokenize.OP, '(')]) replacing_keyword.append(start[0]) elif replacing_keyword and tokvalue == ':': if start[0] != replacing_keyword.pop(): raise SyntaxError("colon and 'repeat' must be on same line") result.extend([(tokenize.OP, ')'), (tokenize.OP, ':')]) else: result.append((toktype, tokvalue)) return tokenize.untokenize(result)
def transform_source(src): toks = tokenize.generate_tokens(StringIO(src).readline) result = [] last_name = None last_plus = False for toktype, tokvalue, _, _, _ in toks: if toktype == tokenize.NAME: if last_name is not None: # two names in a row: not an increment result.append((tokenize.NAME, last_name)) result.append((tokenize.NAME, tokvalue)) last_name = None else: last_name = tokvalue elif last_name is not None: if toktype == tokenize.OP and tokvalue == '+': if last_plus: result.extend([(tokenize.NAME, last_name), (tokenize.OP, '='), (tokenize.NAME, last_name), (tokenize.OP, '+'), (tokenize.NUMBER, '1')]) last_plus = False last_name = None else: last_plus = True else: result.append((tokenize.NAME, last_name)) if last_plus: result.append((tokenize.OP, '+')) last_plus = False result.append((toktype, tokvalue)) last_name = None else: result.append((toktype, tokvalue)) if last_name: result.append((tokenize.NAME, last_name)) return tokenize.untokenize(result)
def transform_settings(app_name, filename, key, transformer): """Get the settings specified in the config file under `security` section. It looks for the comma separated list of setting names specified in the config file under the `security` section and the `secure_settings` option. Params app_name: Application name. The settings of this app are the ones parsed. filename: Name of the file holding the settings. Raises ValueError if no settings are found for that app name. Returns A list of tuples where the first element of the tuple is the setting name and second element the setting value. """ settings_path = os.path.join(get_current_path(), app_name, filename) if not os.path.isfile(settings_path): msg = "No settings found for {0!r} app".format(app_name) raise ValueError(msg) secure_settings_string = config("secure_settings", section="security") if secure_settings_string is not None: secure_settings = set(s.strip() for s in secure_settings_string.split(",")) with open(settings_path) as f: cipher_tokens = [] callback = parse_settings(cipher_tokens, secure_settings, key, transformer) tokenize.tokenize(f.readline, callback) cipher_settings = tokenize.untokenize(cipher_tokens) with open(settings_path, "w") as f: f.write(cipher_settings)
def _filter_header(s): """Clean up 'L' in npz header ints. Cleans up the 'L' in strings representing integers. Needed to allow npz headers produced in Python2 to be read in Python3. Parameters ---------- s : byte string Npy file header. Returns ------- header : str Cleaned up header. """ import tokenize if sys.version_info[0] >= 3: from io import StringIO else: from StringIO import StringIO tokens = [] last_token_was_number = False for token in tokenize.generate_tokens(StringIO(asstr(s)).read): token_type = token[0] token_string = token[1] if (last_token_was_number and token_type == tokenize.NAME and token_string == "L"): continue else: tokens.append(token) last_token_was_number = (token_type == tokenize.NUMBER) return tokenize.untokenize(tokens)