def _format_code(source, summary_wrap_length=79, description_wrap_length=72, pre_summary_newline=False, make_summary_multi_line=False, post_description_blank=False, force_wrap=False, line_range=None): """Return source code with docstrings formatted.""" if not source: return source if line_range is not None: assert line_range[0] > 0 and line_range[1] > 0 def in_range(start, end): """Return True if start/end is in line_range.""" if line_range is None: return True return any(line_range[0] <= line_no <= line_range[1] for line_no in range(start, end + 1)) modified_tokens = [] sio = io.StringIO(source) previous_token_string = '' previous_token_type = None only_comments_so_far = True for (token_type, token_string, start, end, line) in tokenize.generate_tokens(sio.readline): if (token_type == tokenize.STRING and token_string.startswith( ('"', "'")) and (previous_token_type == tokenize.INDENT or only_comments_so_far) and in_range(start[0], end[0])): if only_comments_so_far: indentation = '' else: indentation = previous_token_string token_string = format_docstring( indentation, token_string, summary_wrap_length=summary_wrap_length, description_wrap_length=description_wrap_length, pre_summary_newline=pre_summary_newline, make_summary_multi_line=make_summary_multi_line, post_description_blank=post_description_blank, force_wrap=force_wrap) if token_type not in [tokenize.COMMENT, tokenize.NEWLINE, tokenize.NL]: only_comments_so_far = False previous_token_string = token_string previous_token_type = token_type modified_tokens.append((token_type, token_string, start, end, line)) return untokenize.untokenize(modified_tokens)
def run(filename): """Check untokenize with file. Return True on success. """ with open_with_encoding(filename, encoding=detect_encoding(filename)) as input_file: source_code = input_file.read() # Handle files with trailing whitespace, but no final newline. # tokenize.generate_tokens() will not report the trailing whitespace in # such a case. if source_code.endswith((' ', '\t')): source_code = source_code.rstrip() string_io = io.StringIO(source_code) generated = untokenize.untokenize( tokenize.generate_tokens(string_io.readline)) if source_code == generated: return True else: print('untokenize failed on ' + filename, file=sys.stderr) print(diff(source_code, generated), file=sys.stderr)
def stripCommentsAndNewlines(s): readline = StringIO(s).readline tokens = [ token for token in tokenize.generate_tokens(readline) if token[0] not in [tokenize.NL, tokenize.COMMENT] ] stripped = untokenize.untokenize(tokens) return stripped
def _format_code(source, summary_wrap_length, description_wrap_length, pre_summary_newline, post_description_blank, force_wrap): """Return source code with docstrings formatted.""" if not source: return source modified_tokens = [] sio = io.StringIO(source) previous_token_string = '' previous_token_type = None only_comments_so_far = True for (token_type, token_string, start, end, line) in tokenize.generate_tokens(sio.readline): if ( token_type == tokenize.STRING and token_string.startswith(('"', "'")) and (previous_token_type == tokenize.INDENT or only_comments_so_far) ): if only_comments_so_far: indentation = '' else: indentation = previous_token_string token_string = format_docstring( indentation, token_string, summary_wrap_length=summary_wrap_length, description_wrap_length=description_wrap_length, pre_summary_newline=pre_summary_newline, post_description_blank=post_description_blank, force_wrap=force_wrap) if token_type not in [tokenize.COMMENT, tokenize.NEWLINE, tokenize.NL]: only_comments_so_far = False previous_token_string = token_string previous_token_type = token_type modified_tokens.append( (token_type, token_string, start, end, line)) return untokenize.untokenize(modified_tokens)
def _format_code(source, preferred_quote): """Return source code with quotes unified.""" if not source: return source modified_tokens = [] sio = io.StringIO(source) for (token_type, token_string, start, end, line) in tokenize.generate_tokens(sio.readline): if token_type == tokenize.STRING: token_string = unify_quotes(token_string, preferred_quote=preferred_quote) modified_tokens.append((token_type, token_string, start, end, line)) return untokenize.untokenize(modified_tokens)
def _format_code(source, rules): """Return source code with quotes unified.""" if not source: return source modified_tokens = [] sio = io.StringIO(source) for (token_type, token_string, start, end, line) in tokenize.generate_tokens(sio.readline): editable_string = get_editable_string(token_type, token_string) editable_string.reformat(rules) token_string = editable_string.token modified_tokens.append((token_type, token_string, start, end, line)) return untokenize.untokenize(modified_tokens)
def _format_code(source, preferred_quote): """Return source code with quotes unified.""" if not source: return source modified_tokens = [] sio = io.StringIO(source) for (token_type, token_string, start, end, line) in tokenize.generate_tokens(sio.readline): if token_type == tokenize.STRING: token_string = unify_quotes(token_string, preferred_quote=preferred_quote) modified_tokens.append( (token_type, token_string, start, end, line)) return untokenize.untokenize(modified_tokens)
def _format_code(source: str, args: SimpleNamespace, filename: str) -> Any: """Return source code with quotes unified.""" if not source: return source if args._dev_debug_level >= 20: print("_format_code: start", "format_file: ", args._read_filename) for ontoken_dict in args._plugin_order_ontoken_order: try: ontoken_plugin = args._plugins_manager.filter(name=ontoken_dict.name).index(0).plugin() if not ontoken_plugin.is_parse: if args._dev_debug_level >= 25: print("_format_code: not show_or_save plugin", ontoken_dict.name) continue if not ontoken_plugin.check_is_enabled(args): continue args._modified_tokens = [] for token, line_tokens in prepare_tokens(source): if args._dev_debug_level >= 25: print("_format_code: read token", token) if search_comment_code(line_tokens, search="noqa", filename=filename): pass # no check/reformat line else: if args._dev_debug_level >= 25: print("_format_code: apply plugin:", ontoken_dict.name) token_dict = get_token_dict(token.type, token.string, token.start, token.end, token.line, filename) token = ontoken_plugin.parse(token=token, line_tokens=line_tokens, args=args, token_dict=token_dict, _args=ontoken_dict.args, kwargs=ontoken_dict.kwargs) args._modified_tokens.append((token.type, token.string, token.start, token.end, token.line)) source = untokenize.untokenize(args._modified_tokens) del args._modified_tokens except BaseException as e: raise e return source
def _format_code( source, summary_wrap_length=79, description_wrap_length=72, pre_summary_newline=False, post_description_blank=True, force_wrap=False, line_range=None, ): """Return source code with docstrings formatted.""" if not source: return source if line_range is not None: assert line_range[0] > 0 and line_range[1] > 0 def in_range(start, end): """Return True if start/end is in line_range.""" if line_range is None: return True return any(line_range[0] <= line_no <= line_range[1] for line_no in range(start, end + 1)) modified_tokens = [] sio = io.StringIO(source) previous_token_string = "" previous_token_type = None only_comments_so_far = True for (token_type, token_string, start, end, line) in tokenize.generate_tokens(sio.readline): if ( token_type == tokenize.STRING and token_string.startswith(('"', "'")) and (previous_token_type == tokenize.INDENT or only_comments_so_far) and in_range(start[0], end[0]) ): if only_comments_so_far: indentation = "" else: indentation = previous_token_string token_string = format_docstring( indentation, token_string, summary_wrap_length=summary_wrap_length, description_wrap_length=description_wrap_length, pre_summary_newline=pre_summary_newline, post_description_blank=post_description_blank, force_wrap=force_wrap, ) if token_type not in [tokenize.COMMENT, tokenize.NEWLINE, tokenize.NL]: only_comments_so_far = False previous_token_string = token_string previous_token_type = token_type modified_tokens.append((token_type, token_string, start, end, line)) return untokenize.untokenize(modified_tokens)
def test_untokenize_with_encoding(self): source = '0' bytes_io = io.BytesIO(source.encode('us-ascii')) self.assertEqual( source, untokenize.untokenize(tokenize.tokenize(bytes_io.readline)))
def check(self, source_code): string_io = io.StringIO(source_code) self.assertEqual( source_code, untokenize.untokenize( tokenize.generate_tokens(string_io.readline)))
# find highest ranked sentences and return in the correct order final_list = dict([]) for i in range(0, 6): max1 = [0, 0] for j in range(len(tokens_sentences)): if tokens_sentences[j][0] > max1[1]: max1[1] = tokens_sentences[j][0] # store the index max1[0] = j # insert at index - we preserve the position by storing the index as the key final_list[max1[0]] = tokens_sentences[max1[0]] # remove at index tokens_sentences.pop(max1[0]) print(sorted(final_list)) # detokenize - and order for i in range(0, len(sorted(final_list))): print(sorted(final_list)[i]) statement = final_list[sorted(final_list)[i]] # remove the score statement.pop(0) condensed = untokenize(statement) print(condensed)