def __substituteVars(self, code, env): ''' Expand any variables that exist in the given environment to their corresponding values ''' # tokenize the given expression code gtoks = tokenize.generate_tokens(StringIO.StringIO(code).readline) # iterate over each token and replace any matching token with its corresponding value tokens = [] for toknum, tokval, _, _, _ in gtoks: if toknum == tokenize.NAME and tokval in env: ntoks = tokenize.generate_tokens(StringIO.StringIO(str(env[tokval])).readline) tokens.extend(ntoks) else: tokens.append((toknum, tokval)) # convert the tokens back to a string code = tokenize.untokenize(tokens) # remove all the leading and trailing spaces code = code.strip() # return the modified string return code
def generate_tokenizer(self,linenoList=None): """ 定義されたソースコードへの参照ファイル名から、トークンジェネレーターを生成する """ # Generate if linenoList is not None : max_lineno = file_utils.count_lineno(self.filename) if not all( (isinstance(num,int) and 1 <= num <= max_lineno) for num in linenoList ) : raise Exception("行数定義が不正です: %s " % linenoList) elif linecache.getline(self.filename,linenoList[-1]).strip().endswith('\\') : return self.generate_tokenizer(linenoList + [linenoList[-1]+1]) gen = ( linecache.getline(self.filename,lineno) for lineno in linenoList ) def readline(): try : line = gen.next() except StopIteration : return "" return line tokenizer = tokenize.generate_tokens(readline) else : # Generate tokenizer f = open(self.filename) tokenizer = tokenize.generate_tokens(f.readline) return tokenizer
def do_viprcli(self, command): #pass # Command to be executed command = "viprcli " + command # Tokenize the command STRING = 1 L2 = list(token[STRING] for token in generate_tokens(StringIO(command).readline) if token[STRING]) # Check if this was a command other than authenticate if(L2[1] != "authenticate"): # If cf is set then use it else show a message if(len(self.cf) != 0): command = command + " -cf "+ self.cf # run the command output = commands.getoutput(command) # Find the cf information if(L2[1] == "authenticate"): self.cf = "" L1 = list(token[STRING] for token in generate_tokens(StringIO(output).readline) if token[STRING]) cf_length = len(L1) - 8 for i in range(0, cf_length-1): self.cf = self.cf + str(L1[5 + i]) print output
def test_check_dict_formatting_in_string(self): bad = [ '"%(a)s" % d', '"Split across "\n"multiple lines: %(a)f" % d', '"%(a)X split across "\n"multiple lines" % d', '"%(a)-5.2f: Split %("\n"a)#Lu stupidly" % d', '"Comment between " # wtf\n"split lines: %(a) -6.2f" % d', '"Two strings" + " added: %(a)-6.2f" % d', '"half legit (%(a)s %(b)s)" % d + " half bogus: %(a)s" % d', '("Parenthesized: %(a)s") % d', '("Parenthesized "\n"concatenation: %(a)s") % d', '("Parenthesized " + "addition: %(a)s") % d', '"Complete %s" % ("foolisness: %(a)s%(a)s" % d)', '"Modulus %(a)s" % {"a": (5 % 3)}', ] for sample in bad: sample = "print(%s)" % sample tokens = tokenize.generate_tokens(six.moves.StringIO(sample).readline) self.assertEqual(1, len(list(checks.check_dict_formatting_in_string(sample, tokens)))) sample = 'print("%(a)05.2lF" % d + " added: %(a)s" % d)' tokens = tokenize.generate_tokens(six.moves.StringIO(sample).readline) self.assertEqual(2, len(list(checks.check_dict_formatting_in_string(sample, tokens)))) good = ['"This one is okay: %(a)s %(b)s" % d', '"So is %(a)s"\n"this one: %(b)s" % d'] for sample in good: sample = "print(%s)" % sample tokens = tokenize.generate_tokens(six.moves.StringIO(sample).readline) self.assertEqual([], list(checks.check_dict_formatting_in_string(sample, tokens)))
def extract_docstring(self): """ Extract a module-level docstring """ lines = open(self.filename).readlines() start_row = 0 if lines[0].startswith('#!'): lines.pop(0) start_row = 1 docstring = '' first_par = '' if sys.version_info[0] >= 3: tokens = tokenize.generate_tokens(lines.__iter__().__next__) else: tokens = tokenize.generate_tokens(lines.__iter__().next) for tok_type, tok_content, _, (erow, _), _ in tokens: tok_type = token.tok_name[tok_type] if tok_type in ('NEWLINE', 'COMMENT', 'NL', 'INDENT', 'DEDENT'): continue elif tok_type == 'STRING': docstring = eval(tok_content) # If the docstring is formatted with several paragraphs, extract # the first one: paragraphs = '\n'.join(line.rstrip() for line in docstring.split('\n')).split('\n\n') if len(paragraphs) > 0: first_par = paragraphs[0] break self.docstring = docstring self.short_desc = first_par self.end_line = erow + 1 + start_row
def find_fold_points(block): """ Returns a list of (start_row, end_row, indent) tuples that denote fold locations. Basically anywhere that there's an indent. """ token_whitelist = (tokenize.NL, tokenize.NEWLINE, tokenize.INDENT, tokenize.DEDENT, tokenize.COMMENT, ) # temporary code that allows for running a block or a full file if os.path.isfile(block): with open(block) as open_file: token_block = tokenize.generate_tokens(open_file) else: token_block = tokenize.generate_tokens(StringIO(block).readline) indent_level = 0 nl_counter = 0 comment_counter = 0 indents = [] result = [] for toknum, _, srowcol, _, _ in token_block: # Account for comments at the start of a block and newlines at the # end of a block. if toknum == tokenize.NL: nl_counter += 1 if toknum == tokenize.COMMENT: comment_counter += 1 if toknum == tokenize.INDENT: indent_level += 1 indents.append(srowcol[0] - 1 - comment_counter) if toknum == tokenize.DEDENT: # the next DEDENT belongs to the most recent INDENT, so we pop off # the last indent from the stack indent_level -= 1 matched_indent = indents.pop() result.append((matched_indent, srowcol[0] - 1 - nl_counter, indent_level + 1)) if toknum not in token_whitelist: nl_counter = 0 comment_counter = 0 if len(indents) != 0: raise ValueError("Number of DEDENTs does not match number of INDENTs.") return result
def test_roundtrip(f): ## print 'Testing:', f fobj = open(f) try: fulltok = list(generate_tokens(fobj.readline)) finally: fobj.close() t1 = [tok[:2] for tok in fulltok] newtext = untokenize(t1) readline = iter(newtext.splitlines(1)).next t2 = [tok[:2] for tok in generate_tokens(readline)] if t1 != t2: raise TestFailed("untokenize() roundtrip failed for %r" % f)
def check_roundtrip(self, f): """ Test roundtrip for `untokenize`. `f` is an open file or a string. The source code in f is tokenized, converted back to source code via tokenize.untokenize(), and tokenized again from the latter. The test fails if the second tokenization doesn't match the first. """ if isinstance(f, str): f = StringIO(f) token_list = list(generate_tokens(f.readline)) f.close() tokens1 = [tok[:2] for tok in token_list] new_text = untokenize(tokens1) readline = iter(new_text.splitlines(1)).next tokens2 = [tok[:2] for tok in generate_tokens(readline)] self.assertEqual(tokens2, tokens1)
def columns_in_filters(filters): """ Returns a list of the columns used in a set of query filters. Parameters ---------- filters : list of str or str List of the filters as passed passed to ``apply_filter_query``. Returns ------- columns : list of str List of all the strings mentioned in the filters. """ if not filters: return [] if not isinstance(filters, str): filters = ' '.join(filters) columns = [] reserved = {'and', 'or', 'in', 'not'} for toknum, tokval, _, _, _ in generate_tokens(StringIO(filters).readline): if toknum == NAME and tokval not in reserved: columns.append(tokval) return list(tz.unique(columns))
def tokenize_python_to_unmatched_close_curly(source_text, start, line_starts): """Apply Python's tokenize to source_text starting at index start while matching open and close curly braces. When an unmatched close curly brace is found, return its index. If not found, return len(source_text). If there's a tokenization error, return the position of the error. """ stream = StringIO(source_text) stream.seek(start) nesting = 0 try: for kind, text, token_start, token_end, line_text \ in tokenize.generate_tokens(stream.readline): if text == '{': nesting += 1 elif text == '}': nesting -= 1 if nesting < 0: return token_pos_to_index(token_start, start, line_starts) except tokenize.TokenError as error: (message, error_pos) = error.args return token_pos_to_index(error_pos, start, line_starts) return len(source_text)
def tokenize_select(expression): '''This function returns the list of tokens present in a selection. The expression can contain parenthesis. It will use a subclass of str with the attribute level, which will specify the nesting level of the token into parenthesis.''' g = generate_tokens(StringIO(str(expression)).readline) l = list(token[1] for token in g) l.remove('') # Changes the 'a','.','method' token group into a single 'a.method' token try: while True: dot = l.index('.') l[dot] = '%s.%s' % (l[dot - 1], l[dot + 1]) l.pop(dot + 1) l.pop(dot - 1) except: pass level = 0 for i in range(len(l)): l[i] = level_string(l[i]) l[i].level = level if l[i] == '(': level += 1 elif l[i] == ')': level -= 1 return l
def __init__(self, buffers): # type: (List[str]) -> None lines = iter(buffers) self.buffers = buffers self.tokens = tokenize.generate_tokens(lambda: next(lines)) self.current = None # type: Token self.previous = None # type: Token
def fixLazyJson (in_text): tokengen = tokenize.generate_tokens(StringIO(in_text).readline) result = [] for tokid, tokval, _, _, _ in tokengen: # fix unquoted strings if (tokid == token.NAME): if tokval not in ['true', 'false', 'null', '-Infinity', 'Infinity', 'NaN']: tokid = token.STRING tokval = u'"%s"' % tokval # fix single-quoted strings elif (tokid == token.STRING): if tokval.startswith ("'"): tokval = u'"%s"' % tokval[1:-1].replace ('"', '\\"') # remove invalid commas elif (tokid == token.OP) and ((tokval == '}') or (tokval == ']')): if (len(result) > 0) and (result[-1][1] == ','): result.pop() # fix single-quoted strings elif (tokid == token.STRING): if tokval.startswith ("'"): tokval = u'"%s"' % tokval[1:-1].replace ('"', '\\"') result.append((tokid, tokval)) return tokenize.untokenize(result)
def set_url_param(parser, token): """ Creates a URL (containing only the querystring [including "?"]) based on the current URL, but updated with the provided keyword arguments. Example:: {% set_url_param name="help" age=20 %} ?name=help&age=20 **Deprecated** as of 0.7.0, use `querystring`. """ bits = token.contents.split() qschanges = {} for i in bits[1:]: try: key, value = i.split('=', 1) key = key.strip() value = value.strip() key_line_iter = StringIO.StringIO(key).readline keys = list(tokenize.generate_tokens(key_line_iter)) if keys[0][0] == tokenize.NAME: # workaround bug #5270 value = Variable(value) if value == '""' else parser.compile_filter(value) qschanges[str(key)] = value else: raise ValueError except ValueError: raise TemplateSyntaxError("Argument syntax wrong: should be" "key=value") return SetUrlParamNode(qschanges)
def is_mlab_example(filename): tokens = tokenize.generate_tokens(open(filename).readline) code_only = ''.join([tok_content for tok_type, tok_content, _, _, _ in tokens if not token.tok_name[tok_type] in ('COMMENT', 'STRING')]) return ('mlab.show()' in code_only)
def extract_docstring(filename): # Extract a module-level docstring, if any lines = open(filename).readlines() start_row = 0 if lines[0].startswith('#!'): lines.pop(0) start_row = 1 docstring = '' first_par = '' li = lines.__iter__() li_next = li.__next__ if hasattr(li, '__next__') else li.next tokens = tokenize.generate_tokens(li_next) for tok_type, tok_content, _, (erow, _), _ in tokens: tok_type = token.tok_name[tok_type] if tok_type in ('NEWLINE', 'COMMENT', 'NL', 'INDENT', 'DEDENT'): continue elif tok_type == 'STRING': docstring = eval(tok_content) # If the docstring is formatted with several paragraphs, extract # the first one: paragraphs = '\n'.join(line.rstrip() for line in docstring.split('\n')).split('\n\n') if len(paragraphs) > 0: first_par = paragraphs[0] break return docstring, first_par, erow+1+start_row
def tokenize(text): if not hasattr(text, 'readline'): readline = StringIO.StringIO(text).readline else: readline = text.readline for token in tokenizer.generate_tokens(readline): yield Token(*token)
def readFile(): global curId script = StringIO(QuestScripts.SCRIPT) def readLine(): return script.readline().replace('\r', '') gen = tokenize.generate_tokens(readLine) line = getLineOfTokens(gen) while line is not None: if line == []: line = getLineOfTokens(gen) continue if line[0] == 'ID': parseId(line) elif curId is None: notify.error('Every script must begin with an ID') else: lineDict[curId].append(line) line = getLineOfTokens(gen) script.close()
def analyse_file_by_tokens(filename, ignore_errors): """This function analyses a file and produces a dict with these members: - 'tokens': number of tokens; - 'bad_indentation': list of lines with a bad indentation; """ stats = {'tokens': 0} plugins = [ cls() for cls in token_plugins ] for plugin in plugins: stats[plugin.key] = [] tokens = generate_tokens(file(filename).readline) try: for token, value, (srow, scol), _, _ in tokens: # Tokens number stats['tokens'] += 1 for plugin in plugins: if plugin.analyse_token(token, value, srow, scol): stats[plugin.key].append(srow) except TokenError, e: if ignore_errors is False: raise e print e return {'tokens': 0}
def _template_decorator(self, func): """Registers template as expected by _create_template_function. The template data consists of: - the function object as it comes from the sandbox evaluation of the template declaration. - its code, modified as described in the comments of this method. - the path of the file containing the template definition. """ if not inspect.isfunction(func): raise Exception('`template` is a function decorator. You must ' 'use it as `@template` preceding a function declaration.') name = func.func_name if name in self.templates: raise KeyError( 'A template named "%s" was already declared in %s.' % (name, self.templates[name][2])) if name.islower() or name.isupper() or name[0].islower(): raise NameError('Template function names must be CamelCase.') lines, firstlineno = inspect.getsourcelines(func) first_op = None generator = tokenize.generate_tokens(iter(lines).next) # Find the first indent token in the source of this template function, # which corresponds to the beginning of the function body. for typ, s, begin, end, line in generator: if typ == tokenize.OP: first_op = True if first_op and typ == tokenize.INDENT: break if typ != tokenize.INDENT: # This should never happen. raise Exception('Could not find the first line of the template %s' % func.func_name) # The code of the template in moz.build looks like this: # m def Foo(args): # n FOO = 'bar' # n+1 (...) # # where, # - m is firstlineno - 1, # - n is usually m + 1, but in case the function signature takes more # lines, is really m + begin[0] - 1 # # We want that to be replaced with: # m if True: # n FOO = 'bar' # n+1 (...) # # (this is simpler than trying to deindent the function body) # So we need to prepend with n - 1 newlines so that line numbers # are unchanged. code = '\n' * (firstlineno + begin[0] - 3) + 'if True:\n' code += ''.join(lines[begin[0] - 1:]) self.templates[name] = func, code, self._context.current_path
def get_quotes_errors(self, file_contents): tokens = [Token(t) for t in tokenize.generate_tokens(lambda L=iter(file_contents): next(L))] for token in tokens: if token.type != tokenize.STRING: # ignore non strings continue if not token.string.startswith(self.quotes['bad_single']): # ignore strings that do not start with our quotes continue if token.string.startswith(self.quotes['bad_multiline']): # ignore multiline strings continue if self.quotes['good_single'] in token.string: # ignore alternate quotes wrapped in our quotes (e.g. `'` in `"it's"`) continue start_row, start_col = token.start yield { 'message': 'Q000 Remove bad quotes.', 'line': start_row, 'col': start_col, }
def decistmt(s): """Substitute Decimals for floats in a string of statements. >>> from decimal import Decimal >>> s = 'print +21.3e-5*-.1234/81.7' >>> decistmt(s) "print +Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')" The format of the exponent is inherited from the platform C library. Known cases are "e-007" (Windows) and "e-07" (not Windows). Since we're only showing 12 digits, and the 13th isn't close to 5, the rest of the output should be platform-independent. >>> exec(s) #doctest: +ELLIPSIS -3.21716034272e-0...7 Output from calculations with Decimal should be identical across all platforms. >>> exec(decistmt(s)) -3.217160342717258261933904529E-7 """ result = [] g = generate_tokens(StringIO(s).readline) # tokenize the string for toknum, tokval, _, _, _ in g: if toknum == NUMBER and "." in tokval: # replace NUMBER tokens result.extend([(NAME, "Decimal"), (OP, "("), (STRING, repr(tokval)), (OP, ")")]) else: result.append((toknum, tokval)) return untokenize(result)
def extract (self, arg): arg = arg.strip('\r\n\t ') arg_type = [] arg_name = '' if not arg: return None, None import tokenize import StringIO g = tokenize.generate_tokens(StringIO.StringIO(arg).readline) tokens = [] for t in g: if not t[0] in (tokenize.NL, tokenize.ENDMARKER): tokens.append(t) p = -1 for i in xrange(len(tokens)): t = tokens[i] if t[0] == tokenize.NAME: p = i if p < 0: return None, None arg_name = tokens[p][1] g1 = ' '.join([ tokens[x][1] for x in xrange(p) ]) g2 = ' '.join([ tokens[x][1] for x in xrange(p + 1, len(tokens)) ]) g1 = g1.strip('\r\n\t ') g2 = g2.strip('\r\n\t ') if g2 == '[ ]': g2 = '[]' elif g2 == '[ ] [ ]': gs = '[][]' return (g1, g2), arg_name
def get_block_edges(source_file): """Return starting line numbers of code and text blocks Returns ------- block_edges : list of int Line number for the start of each block. Note the idx_first_text_block : {0 | 1} 0 if first block is text then, else 1 (second block better be text). """ block_edges = [] with open(source_file) as f: token_iter = tokenize.generate_tokens(f.readline) for token_tuple in token_iter: t_id, t_str, (srow, scol), (erow, ecol), src_line = token_tuple if (token.tok_name[t_id] == 'STRING' and scol == 0): # Add one point to line after text (for later slicing) block_edges.extend((srow, erow+1)) idx_first_text_block = 0 # when example doesn't start with text block. if not block_edges[0] == 1: block_edges.insert(0, 1) idx_first_text_block = 1 # when example doesn't end with text block. if not block_edges[-1] == erow: # iffy: I'm using end state of loop block_edges.append(erow) return block_edges, idx_first_text_block
def tokenize(readline): """ Tokenizer for the quasiquotes language extension. """ #import pdb;pdb.set_trace() #tokens = default_tokenize(readline, tokinfo) tokens = generate_tokens(readline) tok_stream = PeekableIterator(itertools.starmap(TokenInfo, tokens)) for t in tok_stream: #t = TokenInfo(*t) print 'OKOKOK', t #print ti if t == with_tok: # DO LATER continue elif t == left_bracket_tok: try: dol, name, pipe = tok_stream.peek(3) except ValueError: continue if dol == dollar_tok and pipe == pipe_tok: tuple(islice(tok_stream, None, 3)) for val in quote_expr_tokenizer(name, t, tok_stream): yield val continue yield t return
def test_with_correct_code(self, MockNannyNag): """A python source code without any whitespace related problems.""" with TemporaryPyFile(SOURCE_CODES["error_free"]) as file_path: with open(file_path) as f: tabnanny.process_tokens(tokenize.generate_tokens(f.readline)) self.assertFalse(MockNannyNag.called)
def get_description(filename): """ #doc... For now, only the filename option is supported. Someday, support the other ones mentioned in our caller, ParameterDialog.__init__.__doc__. """ assert type(filename) == type(""), "get_description only supports filenames for now (and not even unicode filenames, btw)" file = open(filename, 'rU') gentok = generate_tokens(file.readline) res, newrest = parse_top(Whole, list(gentok)) if debug_parse: print len(` res `), 'chars in res' #3924 ## print res # might be an error message if newrest and debug_parse: # boolean test, since normal value is [] print "res is", res # assume it is an error message print "newrest is", newrest print "res[0].pprint() :" print res[0].pprint() #k if debug_parse: print "parse done" desc = res[0] #k class ThingData in parse_utils - move to another file? it stays with the toplevel grammar... return desc # from get_description
def fromstring(src, start=None): '''Retorna lista de tokens a partir de uma string''' # Realinha as tokens se start for fornecido if start is not None: line_no, col = start tokens = fromstring(src) # Alinha horizontalmente for idx, tk in enumerate(tokens): if tk.start[0] != 1: break tokens[idx] = tkcopy(tk, hshift=col) # Alinha verticalmente return [tkcopy(tk, vshift=line_no - 1) for tk in tokens] # Cria novas tokens começando na primeira linha current_string = src def iterlines(): nonlocal current_string if current_string: line, sep, current_string = current_string.partition('\n') return line + sep else: raise StopIteration tokens = TokenStream(tokenize.generate_tokens(iterlines)) tokens = list(map(Token, tokens)) while tokens[-1].type == ENDMARKER: tokens.pop() return tokens
def _parse_line(self, line): """Parses a single line consisting of a tag-value pair and optional modifiers. Returns the tag name and the value as a `Value` object.""" match = self.line_re.match(line) if not match: return False tag, value_and_mod = match.group("tag"), match.group("value") # If the value starts with a quotation mark, we parse it as a # Python string -- luckily this is the same as an OBO string if value_and_mod and value_and_mod[0] == '"': stringio = StringIO(value_and_mod) gen = tokenize.generate_tokens(stringio.readline) for toknum, tokval, _, (_, ecol), _ in gen: if toknum == tokenize.STRING: value = eval(tokval) mod = (value_and_mod[ecol:].strip(),) break raise ParseError("cannot parse string literal", self.lineno) else: value = value_and_mod mod = None value = Value(value, mod) return tag, value
def check(file): """check(file_or_dir) If file_or_dir is a directory and not a symbolic link, then recursively descend the directory tree named by file_or_dir, checking all .py files along the way. If file_or_dir is an ordinary Python source file, it is checked for whitespace related problems. The diagnostic messages are written to standard output using the print statement. """ if os.path.isdir(file) and not os.path.islink(file): if verbose: print("%r: listing directory" % (file,)) names = os.listdir(file) for name in names: fullname = os.path.join(file, name) if (os.path.isdir(fullname) and not os.path.islink(fullname) or os.path.normcase(name[-3:]) == ".py"): check(fullname) return try: f = tokenize.open(file) except IOError as msg: errprint("%r: I/O Error: %s" % (file, msg)) return if verbose > 1: print("checking %r ..." % file) try: process_tokens(tokenize.generate_tokens(f.readline)) except tokenize.TokenError as msg: errprint("%r: Token Error: %s" % (file, msg)) return except IndentationError as msg: errprint("%r: Indentation Error: %s" % (file, msg)) return except NannyNag as nag: badline = nag.get_lineno() line = nag.get_line() if verbose: print("%r: *** Line %d: trouble in tab city! ***" % (file, badline)) print("offending line: %r" % (line,)) print(nag.get_msg()) else: if ' ' in file: file = '"' + file + '"' if filename_only: print(file) else: print(file, badline, repr(line)) return finally: f.close() if verbose: print("%r: Clean bill of health." % (file,))
def parse_file(src_filepy, dst_filepy, ctx): # pdb.set_trace() if ctx['opt_verbose']: print "Reading %s" % src_filepy fd = open(src_filepy, 'rb') tokenize.generate_tokens(fd.readline, ) fd.close() fd = open(src_filepy, 'rb') source = fd.read() fd.close() lines = source.split('\n') ctx = init_parse(ctx) if ctx['opt_gpl']: lines = write_license_info(lines, ctx) LAST_RID = -1 lineno = 0 del_empty_line = True ignore = None while lineno < len(lines): if ctx['open_doc'] != 2 and re.match('.*"""', lines[lineno]): if len(lines[lineno]) > 79: ln1, ln2 = split_line(lines[lineno]) if ln2: lines[lineno] = ln2 lines.insert(lineno, ln1) if ctx['open_doc'] == 1: ctx['open_doc'] = 0 elif re.match('.*""".*"""', lines[lineno]): pass else: ctx['open_doc'] = 1 lines, meta, rid = update_4_api(lines, lineno, ctx) del_empty_line = False elif ctx['open_doc'] != 1 and re.match('\s*"""', lines[lineno]): if len(lines[lineno]) > 79: ln1, ln2 = split_line(lines[lineno]) if ln2: lines[lineno] = ln2 lines.insert(lineno, ln1) if ctx['open_doc'] == 2: ctx['open_doc'] = 0 elif re.match(".*'''.*'''", lines[lineno]): pass else: ctx['open_doc'] = 2 lines, meta, rid = update_4_api(lines, lineno, ctx) del_empty_line = False elif ctx['open_doc']: lines, meta, rid = update_4_api(lines, lineno, ctx) del_empty_line = False elif lines[lineno] == "": if del_empty_line: del lines[lineno] lineno -= 1 else: ctx['empty_line'] += 1 else: if lines[lineno][0] != '#': del_empty_line = False lines, meta, rid = update_4_api(lines, lineno, ctx, ignore=ignore) ignore = None if meta: if meta in ('+B', '-B', '+b', '-b', '#'): pass elif meta[0] == '+': nebef = eval(meta[1]) lines, lineno, ctx = set_empty_lines( lines, lineno, nebef, True, ctx) elif meta[0] == '*': nebef = eval(meta[1]) lines, lineno, ctx = set_empty_lines( lines, lineno, nebef, rid != LAST_RID, ctx) elif meta == '&&': ignore = meta tk = "and" move_tk_line_up(lines, lineno, tk) elif meta == '||': ignore = meta tk = "or" move_tk_line_up(lines, lineno, tk) elif meta == '^+': ignore = meta tk = "+" move_tk_line_up(lines, lineno, tk) elif meta == '^-': ignore = meta tk = "-" move_tk_line_up(lines, lineno, tk) elif meta == 'del1': del lines[lineno + 1] elif meta == '-u': ignore = meta nebef = 2 lines, lineno, ctx = set_empty_lines( lines, lineno, nebef, True, ctx) lineno += 1 lines.insert(lineno, ' def env7(self, model):') lineno += 1 lines.insert(lineno, ' return self.registry(model)') elif meta in ('-u0', '-u1', '-u2', '-u3'): ignore = meta line = lines[lineno] tabstop, line_ctrs = parse_tokens_line(line) if line_ctrs['any_paren'] >= 0: # lm = ' ' * line_ctrs['lm'] while line_ctrs['any_paren'] > 0 or \ line_ctrs['cont_line']: if line_ctrs['cont_line']: line = line[0:-1] del lines[lineno] tabstop, line_ctrs = parse_tokens_line( lines[lineno], ctrs=line_ctrs) line = line + ' ' + lines[lineno].strip() del lines[lineno] tabstop, line_ctrs = parse_tokens_line(line) # print "<%s>" % (line) #debug ipos = -1 states = {} tabstop_rule = {} tabstop_beg = {} tabstop_end = {} paren_ctrs = {} line_ctrs['paren'] = 0 line_ctrs['brace'] = 0 line_ctrs['bracket'] = 0 paren_ctrs['paren'] = -1 paren_ctrs['brace'] = -1 paren_ctrs['bracket'] = -1 for inxt in sorted(tabstop): if tabstop[inxt] == 'space': continue elif ipos < 0: ipos = inxt continue istkn = tabstop[ipos] tok = line[ipos:inxt].strip() if istkn == 'rparen': line_ctrs['paren'] -= 1 elif istkn == 'rbrace': line_ctrs['brace'] -= 1 elif istkn == 'rbracket': line_ctrs['bracket'] -= 1 for ir in SPEC_SYNTAX.keys(): irule = SPEC_SYNTAX[ir] if ir not in states: states[ir] = 0 tabstop_rule[ir] = ipos if states[ir] < 0: pass elif isinstance(irule[states[ir]], bool): if irule[states[ir]]: if states[ir] == 0: tabstop_rule[ir] = ipos states[ir] += 1 else: states[ir] = 0 elif tok == irule[states[ir]]: if states[ir] == 0: tabstop_rule[ir] = ipos states[ir] += 1 else: tabstop_rule[ir] = ipos if ir[0:3] == 'equ': states[ir] = -1 else: states[ir] = 0 if states[ir] >= len(irule): if istkn == 'rparen' and \ paren_ctrs['paren'] < \ line_ctrs['paren']: states[ir] = 0 elif istkn == 'rbrace' and \ paren_ctrs['brace'] < \ line_ctrs['brace']: states[ir] = 0 elif istkn == 'rbracket' and \ paren_ctrs['bracket'] < \ line_ctrs['bracket']: states[ir] = 0 elif ir == 'clo1': ir1 = paren_ctrs['-paren'] if ir == 'icr1' or ir1 == 'env2': states[ir] = 0 if states[ir] >= len(irule): if ir[0:3] == 'clo': if ir == 'clo1' or ir == 'clo4': ir1 = paren_ctrs['-paren'] elif ir == 'clo2': ir1 = paren_ctrs['-brace'] elif ir == 'clo3': ir1 = paren_ctrs['-bracket'] ir1 = '-' + ir1 tabstop_beg[ir1] = tabstop_rule[ir] tabstop_end[ir1] = inxt if ir1 == '-icr1' or ir1 == '-env2': tabstop_beg[ir1] += 1 elif ir[0:3] == 'equ': tabstop_beg[ir] = tabstop_rule[ir] tabstop_end[ir] = ipos elif ir[0:3] == 'icr': tabstop_beg[ir] = ipos + 1 tabstop_end[ir] = inxt else: tabstop_beg[ir] = tabstop_rule[ir] tabstop_end[ir] = inxt if ir[0:3] == 'equ': states[ir] = -1 else: states[ir] = 0 if istkn == 'lparen': paren_ctrs['paren'] = \ line_ctrs['paren'] paren_ctrs['-paren'] = ir elif istkn == 'lbrace': paren_ctrs['brace'] = \ line_ctrs['brace'] paren_ctrs['-brace'] = ir elif istkn == 'lbracket': paren_ctrs['bracket'] = \ line_ctrs['bracket'] paren_ctrs['-bracket'] = ir if istkn == 'lparen': line_ctrs['paren'] += 1 elif istkn == 'lbrace': line_ctrs['brace'] += 1 elif istkn == 'lbracket': line_ctrs['bracket'] += 1 ipos = inxt tabstop_rule = {} line1 = '' found_srch = False for ir in tabstop_beg: ipos = tabstop_beg[ir] tabstop_rule[ipos] = ir for ipos in sorted(tabstop_rule, reverse=True): ir = tabstop_rule[ipos] if ir == '-icr1': found_srch = True line1 = line[ipos:] line = line[0:ipos] + line[tabstop_end[ir]:] elif ir == '-env2': line = line[0:ipos] + line[tabstop_end[ir]:] elif ir[0:4] == '-env': line = line[0:ipos] + ')' + line[ipos + 1:] elif ir[0:3] == 'icr': line = line[0:ipos] + 'self.cr, self.uid, ' + \ line[ipos:] elif ir == 'env1': tok = line[tabstop_beg[ir]:tabstop_end[ir]] tok = tok.replace('env[', 'env7(') line = line[0:ipos] + tok + \ line[tabstop_end[ir]:] elif ir[0:3] == 'env': tok = line[tabstop_beg[ir]:tabstop_end[ir]] tok = tok.replace('self.env.ref(', 'self.ref(') line = line[0:ipos] + tok + \ line[tabstop_end[ir]:] elif ir[0:3] == 'equ' and found_srch: line1 = line[0:tabstop_beg['icr1'] - 7] + \ 'browse(ids[0])' + line1 line = line[0:ipos] + 'ids ' + \ line[tabstop_end[ir]:] lines.insert(lineno, line) if line1: lines.insert(lineno + 1, line1) ignore = None ctx['empty_line'] = 0 if len(lines[lineno]) > 79: ln1, ln2 = split_line(lines[lineno]) if ln2: lines[lineno] = ln2 lines.insert(lineno, ln1) if not ignore or not lines[lineno]: lineno += 1 LAST_RID = rid lineno = len(lines) - 1 while lineno > 2 and lines[lineno] == "": del lines[lineno] lineno = len(lines) - 1 lineno = 0 if not ctx['dry_run'] and len(lines): if ctx['opt_verbose']: print "Writing %s" % dst_filepy fd = open(dst_filepy, 'w') fd.write(''.join('%s\n' % l for l in lines)) fd.close() return 0
def parse(query): """ Parse query of the form: ra, dec, u , g, r, sdss.u, sdss.r, tmass.*, func(ra,dec) as xx WHERE (expr) """ g = tokenize.generate_tokens(StringIO.StringIO(query).readline) where_clause = 'True' select_clause = [] from_clause = [] into_clause = None first = True try: for (id, token, _, _, _) in g: if first: # Optional "SELECT" first = False if token.lower() == "select": continue if id == tokenize.ENDMARKER: break col = '' while token.lower() not in ['', ',', 'where', 'as', 'from']: col = col + token if token == '(': # find matching ')' pt = 1 while pt: (_, token, _, _, _) = next(g) if token == '(': pt = pt + 1 if token == ')': pt = pt - 1 col = col + token (_, token, _, _, _) = next(g) if col == '': raise Exception('Syntax error') # Parse column for the simple case of col='*' or col='table.*' if col == '*' or len(col) > 2 and col[-2:] == '.*': # wildcards tbl = col[:-2] newcols = [([], col)] else: # as token is disallowed after wildcards if token.lower() == 'as': # expect: # ... as COLNAME # ... as (COL1, COL2, ...) (_, name, _, _, _) = next(g) if name == '(': token = ',' names = [] while token != ')': assert token == ',' (_, name, _, _, _) = next(g) # Expect the column name names.append(name) (_, token, _, _, _) = next(g) # Expect , or ')' else: names = [name] (_, token, _, _, _) = next(g) else: names = [] newcols = [(names, col)] # Column delimiter or end of SELECT clause if token.lower() in ['', ',', 'from']: select_clause += newcols if token.lower() == "from": # FROM clause while token.lower() not in ['', 'where', 'into']: # Slurp the table path, allowing for db.tabname constructs (_, table, _, _, _) = next(g) # table path token = next(g)[1] if token == '.': table += '.' + next(g)[1] token = next(g)[1] table = unquote(table) # At this point we expect: # ... [EOL] # <-- end of line # ... WHERE # ... (inner/outer) # ... AS asname # ... (inner/outer) AS asname join_args = [] astable = table for _ in xrange(2): if token == '(': args, token = parse_args( g, token, valid_keys_from) if 'inner' in args and 'outer' in args: raise Exception( 'Cannot simultaneously have both "inner" and "outer" as join type' ) if len(args): join_args.append(args) elif token.lower() == 'as': # table rename (_, astable, _, _, _) = next(g) (_, token, _, _, _) = next(g) # next token break elif token.lower() in ['', ',', 'where', 'into']: break (_, token, _, _, _) = next(g) if not join_args: join_args.append(dict()) from_clause += [(astable, table, join_args)] # WHERE clause (optional) if token.lower() == 'where': # WHERE clause where_clause = '' (_, token, _, _, _) = next(g) while token.lower() not in ['', 'into']: where_clause = where_clause + token (_, token, _, _, _) = next(g) # INTO clause (optional) if token.lower() == 'into': (_, table, _, _, _) = next(g) (_, token, _, _, _) = next(g) into_col = keyexpr = None into_args = {} kind = 'append' # Look for explicit into_args in parenthesis if token == '(': into_args, token = parse_args( g, token, valid_keys_into) #dtype = '' #(_, token, _, _, _) = next(g) #while token not in [')']: # dtype += token # (_, token, _, _, _) = next(g) (_, token, _, _, _) = next(g) # Look for WHERE xx = expr clause (update key specification) # or for AT idexpr clause (insert with given IDs) if token.lower() in ['where', 'at']: if token.lower() == 'where': # WHERE xx = expr construct (_, into_col, _, _, _) = next( g ) # column against which to mach in the INTO table (_, token, _, _, _) = next(g) # must be '=' if token == '==': kind = 'update/ignore' # update if exists, ignore otherwise elif token == '|=': kind = 'update/insert' # update if exists, insert otherwise else: raise Exception( 'Syntax error in INTO clause near "%s" (expected "==")', token) else: # AT expr construct into_col = '_ID' kind = 'insert' # slurp up everything to the end -- this will be the expr giving the keys tokens = [] while token != '': (_, token, _, _, _) = next(g) tokens.append(token) keyexpr = ''.join(tokens) into_clause = (table, into_args, into_col, keyexpr, kind) if token != '': raise Exception('Syntax error near "%s"', token) break except list as dummy: #except StopIteration: pass return (select_clause, where_clause, from_clause, into_clause)
def normalize_token_spacing(code): tokens = [(t[0], t[1]) for t in tokenize.generate_tokens(StringIO(code).readline)] return pretty_untokenize(tokens)
import tokenize reader = open("endless_func.py").readline print(reader) print(type(reader)) tokens = tokenize.generate_tokens(reader) #tokens is a generator. print(tokens) print(next(tokens)) print(next(tokens)) print(next(tokens))
ERRORCODE_REGEX = re.compile(r'[EW]\d{3}') DOCSTRING_REGEX = re.compile(r'u?r?["\']') EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]') WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)') COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)') COMPARE_TYPE_REGEX = re.compile(r'([=!]=|is|is\s+not)\s*type(?:s\.(\w+)Type' r'|\(\s*(\(\s*\)|[^)]*[^ )])\s*\))') KEYWORD_REGEX = re.compile(r'(?:[^\s])(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS)) OPERATOR_REGEX = re.compile(r'(?:[^\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)') LAMBDA_REGEX = re.compile(r'\blambda\b') HUNK_REGEX = re.compile(r'^@@ -\d+,\d+ \+(\d+),(\d+) @@.*$') # Work around Python < 2.6 behaviour, which does not generate NL after # a comment which is on a line by itself. COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n' #Define instance options to set options to be analized class Options: pass options = Options() #Set the options to be analized options.show_source = True options.repeat = True options.show_pep8 = False options.messages = {} options.select = [] options.ignore = []
def _get_tokens(statement): return [ x[1] for x in generate_tokens(StringIO(statement).readline) if x[1] not in ("", "\n") ]
def __init__(self, text): self.text = text i = iter([text]) readline = lambda: next(i) self.tokens = tokenize.generate_tokens(readline) self.index = 0
def __init__(self, filelike): self._generator = tk.generate_tokens(filelike.readline) self.current = Token(*next(self._generator, None)) self.line = self.current.start[0] self.log = log self.got_logical_newline = True
a.pop() elif s.strip() == '': #Whitespace pass else: #Operators down here try: opDetails = operators[s] except: print('Unknown operator:', s) while len(a) > 0 and not a[-1] == '(' and a[-1][0] <= opDetails[0]: o.append(stack.pop()) stack.append(opDetails + (s, )) else: pass for token in generate_tokens(StringIO(dataz).readline): #Main tokenizer loop t, s = token.type, token.string handle(t, s, output, stack) output += reversed(stack) result, prints = [], [] for var in output: #Computations loop if isinstance(var, int): result.append(var) prints.append(str(var)) else: o1, o2 = result.pop(), result.pop() result.append(var[1](o2, o1)) prints.append(var[2])
def Exec(content, filename='<unknown>', vars_override=None, builtin_vars=None): """Safely execs a set of assignments.""" def _validate_statement(node, local_scope): if not isinstance(node, ast.Assign): raise ValueError('unexpected AST node: %s %s (file %r, line %s)' % (node, ast.dump(node), filename, getattr(node, 'lineno', '<unknown>'))) if len(node.targets) != 1: raise ValueError( 'invalid assignment: use exactly one target (file %r, line %s)' % (filename, getattr(node, 'lineno', '<unknown>'))) target = node.targets[0] if not isinstance(target, ast.Name): raise ValueError( 'invalid assignment: target should be a name (file %r, line %s)' % (filename, getattr(node, 'lineno', '<unknown>'))) if target.id in local_scope: raise ValueError( 'invalid assignment: overrides var %r (file %r, line %s)' % (target.id, filename, getattr(node, 'lineno', '<unknown>'))) node_or_string = ast.parse(content, filename=filename, mode='exec') if isinstance(node_or_string, ast.Expression): node_or_string = node_or_string.body if not isinstance(node_or_string, ast.Module): raise ValueError('unexpected AST node: %s %s (file %r, line %s)' % (node_or_string, ast.dump(node_or_string), filename, getattr(node_or_string, 'lineno', '<unknown>'))) statements = {} for statement in node_or_string.body: _validate_statement(statement, statements) statements[statement.targets[0].id] = statement.value # The tokenized representation needs to end with a newline token, otherwise # untokenization will trigger an assert later on. # In Python 2.7 on Windows we need to ensure the input ends with a newline # for a newline token to be generated. # In other cases a newline token is always generated during tokenization so # this has no effect. # TODO: Remove this workaround after migrating to Python 3. content += '\n' tokens = { token[2]: list(token) for token in tokenize.generate_tokens(StringIO(content).readline) } local_scope = _NodeDict({}, tokens) # Process vars first, so we can expand variables in the rest of the DEPS file. vars_dict = {} if 'vars' in statements: vars_statement = statements['vars'] value = _gclient_eval(vars_statement, filename) local_scope.SetNode('vars', value, vars_statement) # Update the parsed vars with the overrides, but only if they are already # present (overrides do not introduce new variables). vars_dict.update(value) if builtin_vars: vars_dict.update(builtin_vars) if vars_override: vars_dict.update( {k: v for k, v in vars_override.items() if k in vars_dict}) for name, node in statements.items(): value = _gclient_eval(node, filename, vars_dict) local_scope.SetNode(name, value, node) try: return _GCLIENT_SCHEMA.validate(local_scope) except schema.SchemaError as e: raise gclient_utils.Error(str(e))
def tokenize_str(code): return list(tokenize.generate_tokens(StringIO.StringIO(code).readline))
def __init__(self, term, is_blob=False): """ Pass in a string (or possibly another term object), and is parsed. If is_blob is True, we do not do any parsing (other than squeezing out internal spaces). An equation is allowed one "blob" term, which is the first term. It may be followed by non-blob terms. As parsing improves, terms can be peeled off of the "blob." :param term: str :param is_blob: False """ if type(term) == Term: self.Constant = term.Constant self.Term = term.Term self.IsSimple = term.IsSimple # Ignore the is_blob input self.IsBlob = term.IsBlob return # Force to be a string; remove whitespace term_s = str(term).strip() # internal spaces do not matter term_s = term_s.replace(' ', '') if is_blob: # If we are a "blob", don't do any parsing. self.Constant = 1.0 self.Term = term_s self.IsSimple = True self.IsBlob = True return self.IsBlob = False # Rule #1: Eliminate '+' or '-' at front self.Constant = 1.0 if term_s.startswith('+'): term_s = term_s[1:] elif term_s.startswith('-'): self.Constant = -1.0 term_s = term_s[1:] # Rule #2: Allow matched "(" if term_s.startswith('('): if not term_s.endswith(')'): raise SyntaxError('Term does not have matching ) - ' + str(term)) # Remove brackets term_s = term_s[1:-1] # If we peeled the brackets, remove '+' or '-' again if term_s.startswith('+'): term_s = term_s[1:] elif term_s.startswith('-'): # Flip the sign self.Constant *= -1.0 term_s = term_s[1:] # We now cannot have embedded '+' or '-' signs. if '+' in term_s: raise LogicError('Term cannot contain interior "+" :' + str(term)) if '-' in term_s: raise LogicError('Term cannot contain interior "-" :' + str(term)) # Do we consist of anything besides a single name token? # If so, we are not simple. # (Will eventually allow for things like '2*x'.) if len(term_s) == 0: raise LogicError('Attempting to create an empty term object.') if is_python_3: g = tokenize.tokenize(BytesIO( term_s.encode('utf-8')).readline) # tokenize the string else: # pragma: no cover [Do my coverage on Python 3] g = tokenize.generate_tokens( BytesIO( term_s.encode('utf-8')).readline) # tokenize the string self.IsSimple = True g = tuple(g) if is_python_3: if not g[0][0] == ENCODING: # pragma: no cover raise LogicError('Internal error: tokenize behaviour changed') if not g[-1][0] == ENDMARKER: # pragma: no cover raise LogicError('Internal error: tokenize behaviour changed') if len(g) > 3: if len(g) == 5: # Allow variable*variable as a "simple" Variable. if g[1][0] == NAME and g[3][0] == NAME and g[2][0] == OP: if g[2][1] in ('*', '/'): self.Term = term_s return raise NotImplementedError('Non-simple parsing not done') # self.IsSimple = False else: if not g[1][0] == NAME: raise NotImplementedError('Non-simple parsing not done') # self.IsSimple = False self.Term = term_s else: # Python 2.7 # pragma: no cover # Missing the first term - augh if not g[-1][0] == ENDMARKER: # pragma: no cover raise LogicError('Internal error: tokenize behaviour changed') if len(g) > 3: if len(g) == 4: # Allow variable*variable as a "simple" Variable. if g[0][0] == NAME and g[2][0] == NAME and g[1][0] == OP: if g[1][1] in ('*', '/'): self.Term = term_s return raise NotImplementedError('Non-simple parsing not done') # self.IsSimple = False else: if not g[0][0] == NAME: raise NotImplementedError('Non-simple parsing not done') # self.IsSimple = False self.Term = term_s
def main(): global default_keywords try: opts, args = getopt.getopt(sys.argv[1:], 'ad:DEhk:Kno:p:S:Vvw:x:X:', [ 'extract-all', 'default-domain=', 'escape', 'help', 'keyword=', 'no-default-keywords', 'ngettext-keyword=', 'add-location', 'no-location', 'output=', 'output-dir=', 'style=', 'verbose', 'version', 'width=', 'exclude-file=', 'docstrings', 'no-docstrings', ]) except getopt.error as msg: usage(1, msg) # for holding option values class Options: # constants GNU = 1 SOLARIS = 2 # defaults extractall = 0 # FIXME: currently this option has no effect at all. escape = 0 keywords = [] ngettext_keywords = [] outpath = '' outfile = 'messages.pot' writelocations = 1 locationstyle = GNU verbose = 0 width = 78 excludefilename = '' docstrings = 0 nodocstrings = {} options = Options() locations = { 'gnu': options.GNU, 'solaris': options.SOLARIS, } # parse options for opt, arg in opts: if opt in ('-h', '--help'): usage(0) elif opt in ('-a', '--extract-all'): options.extractall = 1 elif opt in ('-d', '--default-domain'): options.outfile = arg + '.pot' elif opt in ('-E', '--escape'): options.escape = 1 elif opt in ('-D', '--docstrings'): options.docstrings = 1 elif opt in ('-k', '--keyword'): options.keywords.append(arg) elif opt in ('--ngettext-keyword'): options.ngettext_keywords.append(arg) elif opt in ('-K', '--no-default-keywords'): default_keywords = [] elif opt in ('-n', '--add-location'): options.writelocations = 1 elif opt in ('--no-location', ): options.writelocations = 0 elif opt in ('-S', '--style'): options.locationstyle = locations.get(arg.lower()) if options.locationstyle is None: usage(1, _('Invalid value for --style: %s') % arg) elif opt in ('-o', '--output'): options.outfile = arg elif opt in ('-p', '--output-dir'): options.outpath = arg elif opt in ('-v', '--verbose'): options.verbose = 1 elif opt in ('-V', '--version'): print(_('pygettext.py (xgettext for Python) %s') % __version__) sys.exit(0) elif opt in ('-w', '--width'): try: options.width = int(arg) except ValueError: usage(1, _('--width argument must be an integer: %s') % arg) elif opt in ('-x', '--exclude-file'): options.excludefilename = arg elif opt in ('-X', '--no-docstrings'): fp = open(arg) try: while 1: line = fp.readline() if not line: break options.nodocstrings[line[:-1]] = 1 finally: fp.close() # calculate escapes make_escapes(options.escape) # calculate all keywords options.keywords.extend(default_keywords) options.ngettext_keywords.extend(default_ngettext_keywords) options.keywords.extend(options.ngettext_keywords) # initialize list of strings to exclude if options.excludefilename: try: fp = open(options.excludefilename) options.toexclude = fp.readlines() fp.close() except IOError: print_(_("Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr) sys.exit(1) else: options.toexclude = [] # resolve args to module lists expanded = [] for arg in args: if arg == '-': expanded.append(arg) else: expanded.extend(getFilesForName(arg)) args = expanded # slurp through all the files eater = TokenEater(options) for filename in args: if filename == '-': if options.verbose: print(_('Reading standard input')) fp = sys.stdin closep = 0 else: if options.verbose: print(_('Working on %s') % filename) fp = open(filename, 'rb') closep = 1 try: eater.set_filename(filename) try: if PY2: for token_info in tokenize.generate_tokens(fp.readline): eater(*token_info) else: for token_info in tokenize.tokenize(fp.readline): eater(*token_info) except tokenize.TokenError as e: print_('%s: %s, line %d, column %d' % (e[0], filename, e[1][0], e[1][1]), file=sys.stderr) except tokenize.StopTokenizing: pass finally: if closep: fp.close() # write the output if options.outfile == '-': fp = sys.stdout closep = 0 else: if options.outpath: options.outfile = os.path.join(options.outpath, options.outfile) fp = open(options.outfile, 'w') closep = 1 try: eater.write(fp) finally: if closep: fp.close()
def process_file(self, file): """ Process a file object. """ for token in tokenize.generate_tokens(file.next): self.process_token(*token) self.make_index()
import tokenize f = open("test_token.txt") tk = tokenize.generate_tokens(f.readline) for toknum, tokvalue, _, _, _ in tk: print toknum, tokvalue
def splitGybLines(sourceLines): r"""Return a list of lines at which to split the incoming source These positions represent the beginnings of python line groups that will require a matching %end construct if they are to be closed. >>> src = splitLines('''\ ... if x: ... print x ... if y: # trailing comment ... print z ... if z: # another comment\ ... ''') >>> s = splitGybLines(src) >>> len(s) 2 >>> src[s[0]] ' print z\n' >>> s[1] - len(src) 0 >>> src = splitLines('''\ ... if x: ... if y: print 1 ... if z: ... print 2 ... pass\ ... ''') >>> s = splitGybLines(src) >>> len(s) 1 >>> src[s[0]] ' if y: print 1\n' >>> src = splitLines('''\ ... if x: ... if y: ... print 1 ... print 2 ... ''') >>> s = splitGybLines(src) >>> len(s) 2 >>> src[s[0]] ' if y:\n' >>> src[s[1]] ' print 1\n' """ lastTokenText, lastTokenKind = None, None unmatchedIndents = [] dedents = 0 try: for tokenKind, tokenText, tokenStart, (tokenEndLine, tokenEndCol), lineText \ in tokenize.generate_tokens(lambda i=iter(sourceLines): next(i)): if tokenKind in (tokenize.COMMENT, tokenize.ENDMARKER): continue if tokenText == '\n' and lastTokenText == ':': unmatchedIndents.append(tokenEndLine) # The tokenizer appends dedents at EOF; don't consider # those as matching indentations. Instead just save them # up... if lastTokenKind == tokenize.DEDENT: dedents += 1 # And count them later, when we see something real. if tokenKind != tokenize.DEDENT and dedents > 0: unmatchedIndents = unmatchedIndents[:-dedents] dedents = 0 lastTokenText, lastTokenKind = tokenText, tokenKind except tokenize.TokenError: # Let the later compile() call report the error return [] if lastTokenText == ':': unmatchedIndents.append(len(sourceLines)) return unmatchedIndents
def process(filename, list): print("-"*70) assert list # if this fails, readwarnings() is broken try: fp = open(filename) except IOError as msg: sys.stderr.write("can't open: %s\n" % msg) return 1 print("Index:", filename) f = FileContext(fp) list.sort() index = 0 # list[:index] has been processed, list[index:] is still to do g = tokenize.generate_tokens(f.readline) while 1: startlineno, endlineno, slashes = lineinfo = scanline(g) if startlineno is None: break assert startlineno <= endlineno is not None orphans = [] while index < len(list) and list[index][0] < startlineno: orphans.append(list[index]) index += 1 if orphans: reportphantomwarnings(orphans, f) warnings = [] while index < len(list) and list[index][0] <= endlineno: warnings.append(list[index]) index += 1 if not slashes and not warnings: pass elif slashes and not warnings: report(slashes, "No conclusive evidence") elif warnings and not slashes: reportphantomwarnings(warnings, f) else: if len(slashes) > 1: if not multi_ok: rows = [] lastrow = None for (row, col), line in slashes: if row == lastrow: continue rows.append(row) lastrow = row assert rows if len(rows) == 1: print("*** More than one / operator in line", rows[0]) else: print("*** More than one / operator per statement", end=' ') print("in lines %d-%d" % (rows[0], rows[-1])) intlong = [] floatcomplex = [] bad = [] for lineno, what in warnings: if what in ("int", "long"): intlong.append(what) elif what in ("float", "complex"): floatcomplex.append(what) else: bad.append(what) lastrow = None for (row, col), line in slashes: if row == lastrow: continue lastrow = row line = chop(line) if line[col:col+1] != "/": print("*** Can't find the / operator in line %d:" % row) print("*", line) continue if bad: print("*** Bad warning for line %d:" % row, bad) print("*", line) elif intlong and not floatcomplex: print("%dc%d" % (row, row)) print("<", line) print("---") print(">", line[:col] + "/" + line[col:]) elif floatcomplex and not intlong: print("True division / operator at line %d:" % row) print("=", line) elif intlong and floatcomplex: print("*** Ambiguous / operator (%s, %s) at line %d:" % ( "|".join(intlong), "|".join(floatcomplex), row)) print("?", line) fp.close()
def analyse(exctyp, value, tb): import tokenize import keyword import platform import application from gui.meta import get_libs_version_string app = application.get_app() trace = StringIO() nlines = 3 frecs = inspect.getinnerframes(tb, nlines) trace.write('Mypaint version: %s\n' % app.version) trace.write('System information: %s\n' % platform.platform()) trace.write('Using: %s\n' % (get_libs_version_string(), )) trace.write('Traceback (most recent call last):\n') for frame, fname, lineno, funcname, context, cindex in frecs: trace.write(' File "%s", line %d, ' % (fname, lineno)) args, varargs, varkw, lcls = inspect.getargvalues(frame) def readline(lno=[lineno], *args): if args: print args try: return linecache.getline(fname, lno[0]) finally: lno[0] += 1 all, prev, name, scope = {}, None, '', None for ttype, tstr, stup, etup, line in tokenize.generate_tokens( readline): if ttype == tokenize.NAME and tstr not in keyword.kwlist: if name: if name[-1] == '.': try: val = getattr(prev, tstr) except AttributeError: # XXX skip the rest of this identifier only break name += tstr else: assert not name and not scope scope, val = lookup(tstr, frame, lcls) name = tstr if val is not None: prev = val elif tstr == '.': if prev: name += '.' else: if name: all[name] = (scope, prev) prev, name, scope = None, '', None if ttype == tokenize.NEWLINE: break try: details = inspect.formatargvalues( args, varargs, varkw, lcls, formatvalue=lambda v: '=' + pydoc.text.repr(v)) except: # seen that one on Windows (actual exception was KeyError: self) details = '(no details)' trace.write(funcname + details + '\n') if context is None: context = ['<source context missing>\n'] trace.write(''.join([ ' ' + x.replace('\t', ' ') for x in filter(lambda a: a.strip(), context) ])) if len(all): trace.write(' variables: %s\n' % str(all)) trace.write('%s: %s' % (exctyp.__name__, value)) return trace
def extract_python(fileobj, keywords, comment_tags, options): """Extract messages from Python source code. It returns an iterator yielding tuples in the following form ``(lineno, funcname, message, comments)``. Adapted from the corresponding pybabel built-in function, so that it understands the syntax of our custom `trans`/`trans_lazy` function and correctly parses the default message and the context. :param fileobj: the seekable, file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :rtype: ``iterator`` """ funcname = lineno = message_lineno = None call_stack = -1 buf = [] messages = [] translator_comments = [] in_def = in_translator_comments = False comment_tag = None encoding = parse_encoding(fileobj) or options.get("encoding", "UTF-8") future_flags = parse_future_flags(fileobj, encoding) if PY2: next_line = fileobj.readline else: next_line = lambda: fileobj.readline().decode(encoding) tokens = generate_tokens(next_line) for tok, value, (lineno, _), _, _ in tokens: if call_stack == -1 and tok == NAME and value in ("def", "class"): in_def = True elif tok == OP and value == "(": if in_def: # Avoid false positives for declarations such as: # def gettext(arg='message'): in_def = False continue if funcname: message_lineno = lineno call_stack += 1 elif in_def and tok == OP and value == ":": # End of a class definition without parens in_def = False continue elif call_stack == -1 and tok == COMMENT: # Strip the comment token from the line if PY2: value = value.decode(encoding) value = value[1:].strip() if in_translator_comments and translator_comments[-1][0] == lineno - 1: # We're already inside a translator comment, continue appending translator_comments.append((lineno, value)) continue # If execution reaches this point, let's see if comment line # starts with one of the comment tags for comment_tag in comment_tags: if value.startswith(comment_tag): in_translator_comments = True translator_comments.append((lineno, value)) break elif funcname and call_stack == 0: nested = tok == NAME and value in keywords if (tok == OP and value == ")") or nested: if buf: messages.append("".join(buf)) del buf[:] else: messages.append(None) if len(messages) > 1: messages = tuple(messages) else: messages = messages[0] # Comments don't apply unless they immediately preceed the # message if ( translator_comments and translator_comments[-1][0] < message_lineno - 1 ): translator_comments = [] ### HERE start our modifications to pybabel's script if funcname in ["trans", "trans_lazy"]: # `messages` will have all the string parameters to our function # As we specify in the documentation of `trans`, # the first will be the message ID, the second will be the default message # and the (optional) third will be the message context if len(messages) > 1 and messages[1]: # If we have a default, add it as a special comment # that will be processed by our `merge_catalogs` script translator_comments.append( (message_lineno, "default-message: " + messages[1]) ) if len(messages) > 2 and isinstance(messages[2], str): context = messages[2] else: context = None if context: # if we have a context, trick pybabel to use `pgettext` # so that it adds the context to the translation file funcname = "pgettext" messages = [context, messages[0]] else: funcname = None ### HERE end our modifications to pybabel's script yield ( message_lineno, funcname, messages, [comment[1] for comment in translator_comments], ) funcname = lineno = message_lineno = None call_stack = -1 messages = [] translator_comments = [] in_translator_comments = False if nested: funcname = value elif tok == STRING: # Unwrap quotes in a safe manner, maintaining the string's # encoding # https://sourceforge.net/tracker/?func=detail&atid=355470& # aid=617979&group_id=5470 code = compile( "# coding=%s\n%s" % (str(encoding), value), "<string>", "eval", future_flags, ) value = eval(code, {"__builtins__": {}}, {}) if PY2 and not isinstance(value, text_type): value = value.decode(encoding) buf.append(value) elif tok == OP and value == ",": if buf: messages.append("".join(buf)) del buf[:] else: messages.append(None) if translator_comments: # We have translator comments, and since we're on a # comma(,) user is allowed to break into a new line # Let's increase the last comment's lineno in order # for the comment to still be a valid one old_lineno, old_comment = translator_comments.pop() translator_comments.append((old_lineno + 1, old_comment)) elif call_stack > 0 and tok == OP and value == ")": call_stack -= 1 elif funcname and call_stack == -1: funcname = None elif tok == NAME and value in keywords: funcname = value
def analyse(exctyp, value, tb): import tokenize, keyword trace = StringIO() nlines = 3 frecs = inspect.getinnerframes(tb, nlines) trace.write('Traceback (most recent call last):\n') for frame, fname, lineno, funcname, context, cindex in frecs: trace.write(' File "%s", line %d, ' % (fname, lineno)) args, varargs, varkw, lcls = inspect.getargvalues(frame) def readline(lno=[lineno], *args): if args: print args try: return linecache.getline(fname, lno[0]) finally: lno[0] += 1 all, prev, name, scope = {}, None, '', None for ttype, tstr, stup, etup, line in tokenize.generate_tokens( readline): if ttype == tokenize.NAME and tstr not in keyword.kwlist: if name: if name[-1] == '.': try: val = getattr(prev, tstr) except AttributeError: # XXX skip the rest of this identifier only break name += tstr else: assert not name and not scope scope, val = lookup(tstr, frame, lcls) name = tstr if val: prev = val #print ' found', scope, 'name', name, 'val', val, 'in', prev, 'for token', tstr elif tstr == '.': if prev: name += '.' else: if name: all[name] = (scope, prev) prev, name, scope = None, '', None if ttype == tokenize.NEWLINE: break trace.write(funcname + inspect.formatargvalues( args, varargs, varkw, lcls, formatvalue=lambda v: '=' + pydoc.text.repr(v)) + '\n') trace.write(''.join([ ' ' + x.replace('\t', ' ') for x in filter(lambda a: a.strip(), context) ])) if len(all): trace.write(' variables: %s\n' % str(all)) trace.write('%s: %s' % (exctyp.__name__, value)) return trace
def parse_assignments(source): tokens = tokenize.generate_tokens(StringIO(source).readline) assignments = [] state = 'need_variable' variable_name = None for token_type, token_string, start, end, line in tokens: if token_type == tokenize.NL: if state == 'need_variable': continue raise ParserSyntaxError("Newline not expected", start, end) if token_type == tokenize.COMMENT: continue if token_type == tokenize.ENDMARKER: break if (state == 'need_value' and token_type == tokenize.NAME and token_string in ('True', 'False', 'None')): token_type = 'SPECIAL_VALUE' if token_type == tokenize.NAME: if state != 'need_variable': raise ParserSyntaxError( "Variable not expected (got %s)" % token_string, start, end) variable_name = token_string state = 'need_assignment' if token_type == tokenize.OP: if token_string != '=': raise ParserSyntaxError( "Only assignments are allowed (got operator %s)" % token_string, start, end) if state != 'need_assignment': raise ParserSyntaxError("Assignment not expected", start, end) state = 'need_value' if token_type in (tokenize.STRING, tokenize.NUMBER, 'SPECIAL_VALUE'): if token_type == 'SPECIAL_VALUE': if token_string == 'True': value = True elif token_string == 'False': value = False elif token_string == 'None': value = None else: assert 0, 'Unknown value: %r' % token_string elif token_type == tokenize.STRING: value = parse_string(token_string, start, end) elif token_type == tokenize.NUMBER: if '.' in token_string or 'e' in token_string: value = float(token_string) elif token_string.startswith('0x'): value = int(token_string[2:], 16) elif token_string.startswith('0b'): value = int(token_string[2:], 2) elif token_string.startswith('0o'): value = int(token_string[2:], 8) elif len(token_string) > 1 and token_string.startswith('0'): value = int(token_string[1:], 8) else: value = int(token_string) else: raise ParserSyntaxError( "Unknown value type: %s" % token_string, start, end) if not state == 'need_value': raise ParserSyntaxError( "Value not expected (got value %s)" % token_string, start, end) assert variable_name assignments.append((variable_name, value)) variable_name = None state = 'need_variable' if state != 'need_variable': raise ParserSyntaxError( "Unfinished assignment (of variable %s)" % variable_name, start, end) return assignments
def partition_source(src: str) -> List[CodePartition]: """Partitions source into a list of `CodePartition`s for import refactoring. """ ast_obj = ast.parse(src.encode()) visitor = TopLevelImportVisitor() visitor.visit(ast_obj) line_offsets = get_line_offsets_by_line_no(src) chunks = [] startpos = 0 pending_chunk_type = None possible_ending_tokens = None seen_import = False for ( token_type, token_text, (srow, scol), (erow, ecol), _, ) in tokenize.generate_tokens(io.StringIO(src).readline): # Searching for a start of a chunk if pending_chunk_type is None: if not seen_import and token_type == tokenize.COMMENT: if 'noreorder' in token_text: chunks.append(CodePartition(CodeType.CODE, src[startpos:])) break else: pending_chunk_type = CodeType.PRE_IMPORT_CODE possible_ending_tokens = TERMINATES_COMMENT elif not seen_import and token_type == tokenize.STRING: pending_chunk_type = CodeType.PRE_IMPORT_CODE possible_ending_tokens = TERMINATES_DOCSTRING elif scol == 0 and srow in visitor.top_level_import_line_numbers: seen_import = True pending_chunk_type = CodeType.IMPORT possible_ending_tokens = TERMINATES_IMPORT elif token_type == tokenize.NL: # A NL token is a non-important newline, we'll immediately # append a NON_CODE partition endpos = line_offsets[erow] + ecol srctext = src[startpos:endpos] startpos = endpos chunks.append(CodePartition(CodeType.NON_CODE, srctext)) elif token_type == tokenize.COMMENT: if 'noreorder' in token_text: chunks.append(CodePartition(CodeType.CODE, src[startpos:])) break else: pending_chunk_type = CodeType.CODE possible_ending_tokens = TERMINATES_COMMENT elif token_type == tokenize.ENDMARKER: # Token ended right before end of file or file was empty pass else: # We've reached a `CODE` block, which spans the rest of the # file (intentionally timid). Let's append that block and be # done chunks.append(CodePartition(CodeType.CODE, src[startpos:])) break # Attempt to find ending of token elif token_type in possible_ending_tokens: endpos = line_offsets[erow] + ecol srctext = src[startpos:endpos] startpos = endpos chunks.append(CodePartition(pending_chunk_type, srctext)) pending_chunk_type = None possible_ending_tokens = None elif token_type == tokenize.COMMENT and 'noreorder' in token_text: chunks.append(CodePartition(CodeType.CODE, src[startpos:])) break chunks = [chunk for chunk in chunks if chunk.src] # Make sure we're not removing any code assert _partitions_to_src(chunks) == src return chunks
def remove_docstrings(source): """ Return 'source' minus docstrings. Parameters ---------- source : str Original source code. Returns ------- str Source with docstrings removed. """ io_obj = StringIO(source) out = "" prev_toktype = tokenize.INDENT last_lineno = -1 last_col = 0 for tok in tokenize.generate_tokens(io_obj.readline): token_type = tok[0] token_string = tok[1] start_line, start_col = tok[2] end_line, end_col = tok[3] # ltext = tok[4] # in original code but not used here # The following two conditionals preserve indentation. # This is necessary because we're not using tokenize.untokenize() # (because it spits out code with copious amounts of oddly-placed # whitespace). if start_line > last_lineno: last_col = 0 if start_col > last_col: out += (" " * (start_col - last_col)) # This series of conditionals removes docstrings: if token_type == tokenize.STRING: if prev_toktype != tokenize.INDENT: # This is likely a docstring; double-check we're not inside an operator: if prev_toktype != tokenize.NEWLINE: # Note regarding NEWLINE vs NL: The tokenize module # differentiates between newlines that start a new statement # and newlines inside of operators such as parens, brackes, # and curly braces. Newlines inside of operators are # NEWLINE and newlines that start new code are NL. # Catch whole-module docstrings: if start_col > 0: # Unlabelled indentation means we're inside an operator out += token_string # Note regarding the INDENT token: The tokenize module does # not label indentation inside of an operator (parens, # brackets, and curly braces) as actual indentation. # For example: # def foo(): # "The spaces before this docstring are tokenize.INDENT" # test = [ # "The spaces before this string do not get a token" # ] else: out += token_string prev_toktype = token_type last_col = end_col last_lineno = end_line return out
def generate_edges(formula): """Parses an edge specification from the head of the given formula part and yields the following: - startpoint(s) of the edge by vertex names - endpoint(s) of the edge by names or an empty list if the vertices are isolated - a pair of bools to denote whether we had arrowheads at the start and end vertices """ if formula == "": yield [], [""], [False, False] return name_tokens = set([token.NAME, token.NUMBER, token.STRING]) edge_chars = "<>-+" start_names, end_names, arrowheads = [], [], [False, False] parsing_vertices = True # Tokenize the formula token_gen = tokenize.generate_tokens(StringIO(formula).__next__) for token_info in token_gen: # Do the state transitions token_type, tok, _, _, _ = token_info if parsing_vertices: if all(ch in edge_chars for ch in tok) and token_type == token.OP: parsing_vertices = False # Check the edge we currently have if start_names and end_names: # We have a whole edge yield start_names, end_names, arrowheads start_names, end_names = end_names, [] arrowheads = [False, False] else: if any(ch not in edge_chars for ch in tok): parsing_vertices = True if parsing_vertices: # We are parsing vertex names at the moment if token_type in name_tokens: # We found a vertex name if token_type == token.STRING: end_names.append(eval(tok)) else: end_names.append(str(tok)) elif tok == ":" and token_type == token.OP: # Separating semicolon between vertex names, we just go on continue elif token_type == token.NEWLINE: # Newlines are fine pass elif token_type == token.ENDMARKER: # End markers are fine pass else: msg = "invalid token found in edge specification: %s; token_type=%r; tok=%r" % (formula, token_type, tok) raise SyntaxError(msg) else: # We are parsing an edge operator if "<" in tok: if ">" in tok: arrowheads = [True, True] else: arrowheads[0] = True elif ">" in tok: arrowheads[1] = True elif "+" in tok: if tok[0] == "+": arrowheads[0] = True if len(tok) > 1 and tok[-1] == "+": arrowheads[1] = True # The final edge yield start_names, end_names, arrowheads
def listified_tokenizer(source): """Tokenizes *source* and returns the tokens as a list of lists.""" io_obj = io.StringIO(source) return [list(a) for a in tokenize.generate_tokens(io_obj.readline)]
def initialize(self): self.algdict = {} filterNames = [] # go through filters, creating if necessary for filterTypeAndName in self.filters: l = filterTypeAndName.split('/') filterType = l[0] filterName = l[1] filterNames += [filterName] _alg = PyAthena.py_alg(filterName, 'IAlgorithm') if not _alg: #try to create algmgr = PyAthena.py_svc('ApplicationMgr', iface='IAlgManager') if not algmgr: error = 'could not retrieve IAlgManager/ApplicationMgr' self.msg.error(error) raise RuntimeError(error) import PyCintex _alg = PyCintex.libPyROOT.MakeNullPointer("IAlgorithm") if algmgr.createAlgorithm(filterType, filterName, _alg).isFailure() or not _alg: self.msg.error('could not create alg: ' + filterTypeAndName) raise RuntimeError('could not create alg: ' + filterTypeAndName) #we are responsible for initializing it too if _alg.sysInitialize().isFailure(): self.msg.error('Failed to initialize alg: ' + filterTypeAndName) raise RuntimeError('Failed not initialize alg: ' + filterTypeAndName) self.ownedAlgs += [_alg] self.algdict[filterName] = _alg if self.Expression == "": #do a simple and of all the filters given self.Expression = " and ".join(filterNames) self.msg.debug("Filter Expression = " + self.Expression) # look if parentheses are matched if self.Expression.count("(") != self.Expression.count(")"): self.msg.fatal("Mismatched parentheses in filter string: %s" % self.Expression) return StatusCode.Failure # these parentheses are not logically correct if self.Expression.count("{") != 0 or \ self.Expression.count("}") != 0 or \ self.Expression.count("[") != 0 or \ self.Expression.count("]") != 0: self.msg.fatal("Wrong type of parentheses in filter string: %s" % self.Expression) return StatusCode.Failure try: tokenobj = tokenize.generate_tokens( StringIO(self.Expression).readline) result = [] for toknum, tokval, _, _, _ in tokenobj: if toknum == tokenize.NAME and \ tokval != 'or' and \ tokval != 'not' and \ tokval != 'and' and \ tokval != 'True' and \ tokval != 'False': #check the token is a known alg if tokval not in self.algdict: self.msg.error("Unknown alg : " + tokval) return StatusCode.Failure result.extend([(tokenize.STRING, 'self'), (tokenize.OP, '.'), (tokenize.STRING, 'evalFilter'), (tokenize.OP, '('), (tokenize.OP, '"%s"' % tokval), (tokenize.OP, ')')]) else: result.append((toknum, tokval)) self.cmd = tokenize.untokenize(result) self.msg.debug("String changed internally to:\n%s", self.cmd) #execute command once to validate #response = bool(eval(self.cmd)) except Exception as e: self.msg.fatal( "%s is not a valid Python expression string. Exception: %s" % (self.Expression, e)) return StatusCode.Failure # If needed, set up a random number generator if self.Sampling >= 0: random.seed(1234) return StatusCode.Success
def __init__(self, text): self.text = text readline = iter([text]).next self.tokens = tokenize.generate_tokens(readline) self.index = 0
def python_tokens(self, text): readline = iter([text]).next tokens = tokenize.generate_tokens(readline) return [t[1] for t in tokens]
def python_tokens(self, text): i = iter([text]) readline = lambda: next(i) tokens = tokenize.generate_tokens(readline) return [t[1] for t in tokens]
def simple_parser_main(parser_class: Type[Parser]) -> None: argparser = argparse.ArgumentParser() argparser.add_argument( "-v", "--verbose", action="count", default=0, help="Print timing stats; repeat for more debug output", ) argparser.add_argument( "-q", "--quiet", action="store_true", help="Don't print the parsed program" ) argparser.add_argument("filename", help="Input file ('-' to use stdin)") args = argparser.parse_args() verbose = args.verbose verbose_tokenizer = verbose >= 3 verbose_parser = verbose == 2 or verbose >= 4 t0 = time.time() filename = args.filename if filename == "" or filename == "-": filename = "<stdin>" file = sys.stdin else: file = open(args.filename) try: tokengen = tokenize.generate_tokens(file.readline) tokenizer = Tokenizer(tokengen, verbose=verbose_tokenizer) parser = parser_class(tokenizer, verbose=verbose_parser) tree = parser.start() try: if file.isatty(): endpos = 0 else: endpos = file.tell() except IOError: endpos = 0 finally: if file is not sys.stdin: file.close() t1 = time.time() if not tree: err = parser.make_syntax_error(filename) traceback.print_exception(err.__class__, err, None) sys.exit(1) if not args.quiet: print(tree) if verbose: dt = t1 - t0 diag = tokenizer.diagnose() nlines = diag.end[0] if diag.type == token.ENDMARKER: nlines -= 1 print(f"Total time: {dt:.3f} sec; {nlines} lines", end="") if endpos: print(f" ({endpos} bytes)", end="") if dt: print(f"; {nlines / dt:.0f} lines/sec") else: print() print("Caches sizes:") print(f" token array : {len(tokenizer._tokens):10}") print(f" cache : {len(parser._cache):10}")