def format_lines(statements, lines, arcs=None): """Nicely format a list of line numbers. Format a list of line numbers for printing by coalescing groups of lines as long as the lines represent consecutive statements. This will coalesce even if there are gaps between statements. For example, if `statements` is [1,2,3,4,5,10,11,12,13,14] and `lines` is [1,2,5,10,11,13,14] then the result will be "1-2, 5-11, 13-14". Both `lines` and `statements` can be any iterable. All of the elements of `lines` must be in `statements`, and all of the values must be positive integers. If `arcs` is provided, they are (start,[end,end,end]) pairs that will be included in the output as long as start isn't in `lines`. """ line_items = [(pair[0], nice_pair(pair)) for pair in _line_ranges(statements, lines)] if arcs: line_exits = sorted(arcs) for line, exits in line_exits: for ex in sorted(exits): if line not in lines: dest = (ex if ex > 0 else "exit") line_items.append((line, "%d->%s" % (line, dest))) ret = ', '.join(t[-1] for t in sorted(line_items)) return ret
def _raw_parse(self): """Parse the source to find the interesting facts about its lines. A handful of member fields are updated. """ if self.exclude: self.excluded = self.lines_matching(self.exclude) indent = 0 exclude_indent = 0 excluding = False prev_toktype = token.INDENT first_line = None empty = True tokgen = tokenize.generate_tokens(StringIO(self.text).readline) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: print '%10s %5s %-20r %r' % (tokenize.tok_name.get( toktype, toktype), nice_pair( (slineno, elineno)), ttext, ltext) if toktype == token.INDENT: indent += 1 elif toktype == token.DEDENT: indent -= 1 elif toktype == token.NAME and ttext == 'class': self.classdefs.add(slineno) elif toktype == token.OP and ttext == ':': if not excluding and elineno in self.excluded: exclude_indent = indent excluding = True elif toktype == token.STRING and prev_toktype == token.INDENT: self.docstrings.update(range(slineno, elineno + 1)) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: rng = (first_line, elineno) for l in range(first_line, elineno + 1): self.multiline[l] = rng first_line = None if ttext.strip() and toktype != tokenize.COMMENT: empty = False if first_line is None: first_line = slineno if excluding and indent <= exclude_indent: excluding = False if excluding: self.excluded.add(elineno) prev_toktype = toktype if not empty: self.statement_starts.update(self.byte_parser._find_statements())
def _raw_parse(self): if self.exclude: self.excluded = self.lines_matching(self.exclude) indent = 0 exclude_indent = 0 excluding = False prev_toktype = token.INDENT first_line = None empty = True tokgen = tokenize.generate_tokens(StringIO(self.text).readline) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: print '%10s %5s %-20r %r' % (tokenize.tok_name.get(toktype, toktype), nice_pair((slineno, elineno)), ttext, ltext) if toktype == token.INDENT: indent += 1 elif toktype == token.DEDENT: indent -= 1 elif toktype == token.NAME and ttext == 'class': self.classdefs.add(slineno) elif toktype == token.OP and ttext == ':': if not excluding and elineno in self.excluded: exclude_indent = indent excluding = True elif toktype == token.STRING and prev_toktype == token.INDENT: self.docstrings.update(range(slineno, elineno + 1)) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: rng = (first_line, elineno) for l in range(first_line, elineno + 1): self.multiline[l] = rng first_line = None if ttext.strip() and toktype != tokenize.COMMENT: empty = False if first_line is None: first_line = slineno if excluding and indent <= exclude_indent: excluding = False if excluding: self.excluded.add(elineno) prev_toktype = toktype if not empty: self.statement_starts.update(self.byte_parser._find_statements())
def _raw_parse(self): """Parse the source to find the interesting facts about its lines. A handful of attributes are updated. """ # Find lines which match an exclusion pattern. if self.exclude: self.raw_excluded = self.lines_matching(self.exclude) # Tokenize, to find excluded suites, to find docstrings, and to find # multi-line statements. indent = 0 exclude_indent = 0 excluding = False excluding_decorators = False prev_toktype = token.INDENT first_line = None empty = True first_on_line = True tokgen = generate_tokens(self.text) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: # pragma: debugging print("%10s %5s %-20r %r" % (tokenize.tok_name.get( toktype, toktype), nice_pair( (slineno, elineno)), ttext, ltext)) if toktype == token.INDENT: indent += 1 elif toktype == token.DEDENT: indent -= 1 elif toktype == token.NAME: if ttext == 'class': # Class definitions look like branches in the bytecode, so # we need to exclude them. The simplest way is to note the # lines with the 'class' keyword. self.raw_classdefs.add(slineno) elif toktype == token.OP: if ttext == ':': should_exclude = ( elineno in self.raw_excluded) or excluding_decorators if not excluding and should_exclude: # Start excluding a suite. We trigger off of the colon # token so that the #pragma comment will be recognized on # the same line as the colon. self.raw_excluded.add(elineno) exclude_indent = indent excluding = True excluding_decorators = False elif ttext == '@' and first_on_line: # A decorator. if elineno in self.raw_excluded: excluding_decorators = True if excluding_decorators: self.raw_excluded.add(elineno) elif toktype == token.STRING and prev_toktype == token.INDENT: # Strings that are first on an indented line are docstrings. # (a trick from trace.py in the stdlib.) This works for # 99.9999% of cases. For the rest (!) see: # http://stackoverflow.com/questions/1769332/x/1769794#1769794 self.raw_docstrings.update(range(slineno, elineno + 1)) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: # We're at the end of a line, and we've ended on a # different line than the first line of the statement, # so record a multi-line range. for l in range(first_line, elineno + 1): self._multiline[l] = first_line first_line = None first_on_line = True if ttext.strip() and toktype != tokenize.COMMENT: # A non-whitespace token. empty = False if first_line is None: # The token is not whitespace, and is the first in a # statement. first_line = slineno # Check whether to end an excluded suite. if excluding and indent <= exclude_indent: excluding = False if excluding: self.raw_excluded.add(elineno) first_on_line = False prev_toktype = toktype # Find the starts of the executable statements. if not empty: self.raw_statements.update(self.byte_parser._find_statements()) # The first line of modules can lie and say 1 always, even if the first # line of code is later. If so, map 1 to the actual first line of the # module. if env.PYBEHAVIOR.module_firstline_1 and self._multiline: self._multiline[1] = min(self.raw_statements)
def _raw_parse(self): """Parse the source to find the interesting facts about its lines. A handful of member fields are updated. """ # Find lines which match an exclusion pattern. if self.exclude: self.excluded = self.lines_matching(self.exclude) # Tokenize, to find excluded suites, to find docstrings, and to find # multi-line statements. indent = 0 exclude_indent = 0 excluding = False prev_toktype = token.INDENT first_line = None empty = True tokgen = generate_tokens(self.text) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: # pragma: not covered print("%10s %5s %-20r %r" % ( tokenize.tok_name.get(toktype, toktype), nice_pair((slineno, elineno)), ttext, ltext )) if toktype == token.INDENT: indent += 1 elif toktype == token.DEDENT: indent -= 1 elif toktype == token.NAME and ttext == 'class': # Class definitions look like branches in the byte code, so # we need to exclude them. The simplest way is to note the # lines with the 'class' keyword. self.classdefs.add(slineno) elif toktype == token.OP and ttext == ':': if not excluding and elineno in self.excluded: # Start excluding a suite. We trigger off of the colon # token so that the #pragma comment will be recognized on # the same line as the colon. exclude_indent = indent excluding = True elif toktype == token.STRING and prev_toktype == token.INDENT: # Strings that are first on an indented line are docstrings. # (a trick from trace.py in the stdlib.) This works for # 99.9999% of cases. For the rest (!) see: # http://stackoverflow.com/questions/1769332/x/1769794#1769794 self.docstrings.update(range(slineno, elineno+1)) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: # We're at the end of a line, and we've ended on a # different line than the first line of the statement, # so record a multi-line range. for l in range(first_line, elineno+1): self.multiline[l] = first_line first_line = None if ttext.strip() and toktype != tokenize.COMMENT: # A non-whitespace token. empty = False if first_line is None: # The token is not whitespace, and is the first in a # statement. first_line = slineno # Check whether to end an excluded suite. if excluding and indent <= exclude_indent: excluding = False if excluding: self.excluded.add(elineno) prev_toktype = toktype # Find the starts of the executable statements. if not empty: self.statement_starts.update(self.byte_parser._find_statements())
def _raw_parse(self): """Parse the source to find the interesting facts about its lines. A handful of member fields are updated. """ # Find lines which match an exclusion pattern. if self.exclude: self.excluded = self.lines_matching(self.exclude) # Tokenize, to find excluded suites, to find docstrings, and to find # multi-line statements. indent = 0 exclude_indent = 0 excluding = False prev_toktype = token.INDENT first_line = None empty = True tokgen = tokenize.generate_tokens(StringIO(self.text).readline) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: # pragma: no cover print("%10s %5s %-20r %r" % ( tokenize.tok_name.get(toktype, toktype), nice_pair((slineno, elineno)), ttext, ltext )) if toktype == token.INDENT: indent += 1 elif toktype == token.DEDENT: indent -= 1 elif toktype == token.NAME and ttext == 'class': # Class definitions look like branches in the byte code, so # we need to exclude them. The simplest way is to note the # lines with the 'class' keyword. self.classdefs.add(slineno) elif toktype == token.OP and ttext == ':': if not excluding and elineno in self.excluded: # Start excluding a suite. We trigger off of the colon # token so that the #pragma comment will be recognized on # the same line as the colon. exclude_indent = indent excluding = True elif toktype == token.STRING and prev_toktype == token.INDENT: # Strings that are first on an indented line are docstrings. # (a trick from trace.py in the stdlib.) This works for # 99.9999% of cases. For the rest (!) see: # http://stackoverflow.com/questions/1769332/x/1769794#1769794 for i in range(slineno, elineno+1): self.docstrings.add(i) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: # We're at the end of a line, and we've ended on a # different line than the first line of the statement, # so record a multi-line range. rng = (first_line, elineno) for l in range(first_line, elineno+1): self.multiline[l] = rng first_line = None if ttext.strip() and toktype != tokenize.COMMENT: # A non-whitespace token. empty = False if first_line is None: # The token is not whitespace, and is the first in a # statement. first_line = slineno # Check whether to end an excluded suite. if excluding and indent <= exclude_indent: excluding = False if excluding: self.excluded.add(elineno) prev_toktype = toktype # Find the starts of the executable statements. if not empty: self.statement_starts.update(self.byte_parser._find_statements())
def _raw_parse(self): """Parse the source to find the interesting facts about its lines. A handful of attributes are updated. """ # Find lines which match an exclusion pattern. if self.exclude: self.raw_excluded = self.lines_matching(self.exclude) # Tokenize, to find excluded suites, to find docstrings, and to find # multi-line statements. indent = 0 exclude_indent = 0 excluding = False excluding_decorators = False prev_toktype = lexer.indent(1) first_line = None empty = True first_on_line = True operators = {op[1] for op in lexer.operators} def generate_tokens(text): """Use enaml lexer to generate the tokens. """ lexer.input(text) while True: tok = lexer.token() length = 0 if tok is None: break if tok.type == 'STRING': # HINT use a string when counting to avoid encoding issues. length = tok.value.count(str('\n')) yield (tok.type, tok.value, (tok.lineno, 0), (tok.lineno + length, 0), '') tokgen = generate_tokens(self.text) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: # pragma: not covered print("%10s %5s %-20r %r" % ( toktype, nice_pair((slineno, elineno)), ttext, ltext )) if toktype == 'INDENT': indent += 1 elif toktype == 'DEDENT': indent -= 1 elif toktype == 'NAME': if ttext == 'class' or ttext == 'enamldef': # Class definitions look like branches in the bytecode, so # we need to exclude them. The simplest way is to note the # lines with the 'class' keyword. self.raw_classdefs.add(slineno) elif toktype in operators: if ttext == ':': should_exclude = ((elineno in self.raw_excluded) or excluding_decorators) if not excluding and should_exclude: # Start excluding a suite. We trigger off of the colon # token so that the #pragma comment will be recognized # on the same line as the colon. self.raw_excluded.add(elineno) exclude_indent = indent excluding = True excluding_decorators = False elif ttext == '@' and first_on_line: # A decorator. if elineno in self.raw_excluded: excluding_decorators = True if excluding_decorators: self.raw_excluded.add(elineno) elif toktype == 'STRING 'and prev_toktype == 'INDENT': # Strings that are first on an indented line are docstrings. # (a trick from trace.py in the stdlib.) This works for # 99.9999% of cases. For the rest (!) see: # http://stackoverflow.com/questions/1769332/x/1769794#1769794 self.raw_docstrings.update(range(slineno, elineno+1)) elif toktype == 'NEWLINE': if first_line is not None and elineno != first_line: # We're at the end of a line, and we've ended on a # different line than the first line of the statement, # so record a multi-line range. for l in range(first_line, elineno+1): self._multiline[l] = first_line first_line = None first_on_line = True if ttext is not None: # A non-whitespace token. empty = False if first_line is None: # The token is not whitespace, and is the first in a # statement. first_line = slineno # Check whether to end an excluded suite. if excluding and indent <= exclude_indent: excluding = False if excluding: self.raw_excluded.add(elineno) first_on_line = False prev_toktype = toktype # Find the starts of the executable statements. if not empty: self.raw_statements.update(self.byte_parser._find_statements())