class CodeParser(object): def __init__(self, text=None, filename=None, exclude=None): self.filename = filename or '<code>' self.text = text if not self.text: try: sourcef = open_source(self.filename) try: self.text = sourcef.read() finally: sourcef.close() except IOError: _, err, _ = sys.exc_info() raise NoSource("No source for code: '%s': %s" % (self.filename, err)) if self.text and ord(self.text[0]) == 65279: self.text = self.text[1:] self.exclude = exclude self.show_tokens = False self.lines = self.text.split('\n') self.excluded = set() self.docstrings = set() self.classdefs = set() self.multiline = {} self.statement_starts = set() self._byte_parser = None def _get_byte_parser(self): if not self._byte_parser: self._byte_parser = ByteParser(text=self.text, filename=self.filename) return self._byte_parser byte_parser = property(_get_byte_parser) def lines_matching(self, *regexes): regex_c = re.compile(join_regex(regexes)) matches = set() for i, ltext in enumerate(self.lines): if regex_c.search(ltext): matches.add(i + 1) return matches def _raw_parse(self): if self.exclude: self.excluded = self.lines_matching(self.exclude) indent = 0 exclude_indent = 0 excluding = False prev_toktype = token.INDENT first_line = None empty = True tokgen = tokenize.generate_tokens(StringIO(self.text).readline) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: print '%10s %5s %-20r %r' % (tokenize.tok_name.get( toktype, toktype), nice_pair( (slineno, elineno)), ttext, ltext) if toktype == token.INDENT: indent += 1 elif toktype == token.DEDENT: indent -= 1 elif toktype == token.NAME and ttext == 'class': self.classdefs.add(slineno) elif toktype == token.OP and ttext == ':': if not excluding and elineno in self.excluded: exclude_indent = indent excluding = True elif toktype == token.STRING and prev_toktype == token.INDENT: self.docstrings.update(range(slineno, elineno + 1)) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: rng = (first_line, elineno) for l in range(first_line, elineno + 1): self.multiline[l] = rng first_line = None if ttext.strip() and toktype != tokenize.COMMENT: empty = False if first_line is None: first_line = slineno if excluding and indent <= exclude_indent: excluding = False if excluding: self.excluded.add(elineno) prev_toktype = toktype if not empty: self.statement_starts.update(self.byte_parser._find_statements()) def first_line(self, line): rng = self.multiline.get(line) if rng: first_line = rng[0] else: first_line = line return first_line def first_lines(self, lines, ignore=None): ignore = ignore or [] lset = set() for l in lines: if l in ignore: continue new_l = self.first_line(l) if new_l not in ignore: lset.add(new_l) return sorted(lset) def parse_source(self): try: self._raw_parse() except (tokenize.TokenError, IndentationError): _, tokerr, _ = sys.exc_info() msg, lineno = tokerr.args raise NotPython( "Couldn't parse '%s' as Python source: '%s' at %s" % (self.filename, msg, lineno)) excluded_lines = self.first_lines(self.excluded) ignore = excluded_lines + list(self.docstrings) lines = self.first_lines(self.statement_starts, ignore) return (lines, excluded_lines) def arcs(self): all_arcs = [] for l1, l2 in self.byte_parser._all_arcs(): fl1 = self.first_line(l1) fl2 = self.first_line(l2) if fl1 != fl2: all_arcs.append((fl1, fl2)) return sorted(all_arcs) arcs = expensive(arcs) def exit_counts(self): excluded_lines = self.first_lines(self.excluded) exit_counts = {} for l1, l2 in self.arcs(): if l1 < 0: continue if l1 in excluded_lines: continue if l2 in excluded_lines: continue if l1 not in exit_counts: exit_counts[l1] = 0 exit_counts[l1] += 1 for l in self.classdefs: if l in exit_counts: exit_counts[l] -= 1 return exit_counts exit_counts = expensive(exit_counts)
class CodeParser(object): """Parse code to find executable lines, excluded lines, etc.""" def __init__(self, text=None, filename=None, exclude=None): """ Source can be provided as `text`, the text itself, or `filename`, from which the text will be read. Excluded lines are those that match `exclude`, a regex. """ assert text or filename, "CodeParser needs either text or filename" self.filename = filename or "<code>" self.text = text if not self.text: try: sourcef = open_source(self.filename) try: self.text = sourcef.read() finally: sourcef.close() except IOError: _, err, _ = sys.exc_info() raise NoSource( "No source for code: '%s': %s" % (self.filename, err) ) # Scrap the BOM if it exists. if self.text and ord(self.text[0]) == 0xfeff: self.text = self.text[1:] self.exclude = exclude self.show_tokens = False # The text lines of the parsed code. self.lines = self.text.split('\n') # The line numbers of excluded lines of code. self.excluded = set() # The line numbers of docstring lines. self.docstrings = set() # The line numbers of class definitions. self.classdefs = set() # A dict mapping line numbers to (lo,hi) for multi-line statements. self.multiline = {} # The line numbers that start statements. self.statement_starts = set() # Lazily-created ByteParser self._byte_parser = None def _get_byte_parser(self): """Create a ByteParser on demand.""" if not self._byte_parser: self._byte_parser = \ ByteParser(text=self.text, filename=self.filename) return self._byte_parser byte_parser = property(_get_byte_parser) def lines_matching(self, *regexes): """Find the lines matching one of a list of regexes. Returns a set of line numbers, the lines that contain a match for one of the regexes in `regexes`. The entire line needn't match, just a part of it. """ regex_c = re.compile(join_regex(regexes)) matches = set() for i, ltext in enumerate(self.lines): if regex_c.search(ltext): matches.add(i+1) return matches def _raw_parse(self): """Parse the source to find the interesting facts about its lines. A handful of member fields are updated. """ # Find lines which match an exclusion pattern. if self.exclude: self.excluded = self.lines_matching(self.exclude) # Tokenize, to find excluded suites, to find docstrings, and to find # multi-line statements. indent = 0 exclude_indent = 0 excluding = False prev_toktype = token.INDENT first_line = None empty = True tokgen = tokenize.generate_tokens(StringIO(self.text).readline) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: # pragma: no cover print("%10s %5s %-20r %r" % ( tokenize.tok_name.get(toktype, toktype), nice_pair((slineno, elineno)), ttext, ltext )) if toktype == token.INDENT: indent += 1 elif toktype == token.DEDENT: indent -= 1 elif toktype == token.NAME and ttext == 'class': # Class definitions look like branches in the byte code, so # we need to exclude them. The simplest way is to note the # lines with the 'class' keyword. self.classdefs.add(slineno) elif toktype == token.OP and ttext == ':': if not excluding and elineno in self.excluded: # Start excluding a suite. We trigger off of the colon # token so that the #pragma comment will be recognized on # the same line as the colon. exclude_indent = indent excluding = True elif toktype == token.STRING and prev_toktype == token.INDENT: # Strings that are first on an indented line are docstrings. # (a trick from trace.py in the stdlib.) This works for # 99.9999% of cases. For the rest (!) see: # http://stackoverflow.com/questions/1769332/x/1769794#1769794 for i in range(slineno, elineno+1): self.docstrings.add(i) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: # We're at the end of a line, and we've ended on a # different line than the first line of the statement, # so record a multi-line range. rng = (first_line, elineno) for l in range(first_line, elineno+1): self.multiline[l] = rng first_line = None if ttext.strip() and toktype != tokenize.COMMENT: # A non-whitespace token. empty = False if first_line is None: # The token is not whitespace, and is the first in a # statement. first_line = slineno # Check whether to end an excluded suite. if excluding and indent <= exclude_indent: excluding = False if excluding: self.excluded.add(elineno) prev_toktype = toktype # Find the starts of the executable statements. if not empty: self.statement_starts.update(self.byte_parser._find_statements()) def first_line(self, line): """Return the first line number of the statement including `line`.""" rng = self.multiline.get(line) if rng: first_line = rng[0] else: first_line = line return first_line def first_lines(self, lines, ignore=None): """Map the line numbers in `lines` to the correct first line of the statement. Skip any line mentioned in `ignore`. Returns a sorted list of the first lines. """ ignore = ignore or [] lset = set() for l in lines: if l in ignore: continue new_l = self.first_line(l) if new_l not in ignore: lset.add(new_l) return sorted(lset) def parse_source(self): """Parse source text to find executable lines, excluded lines, etc. Return values are 1) a sorted list of executable line numbers, and 2) a sorted list of excluded line numbers. Reported line numbers are normalized to the first line of multi-line statements. """ try: self._raw_parse() except (tokenize.TokenError, IndentationError): _, tokerr, _ = sys.exc_info() msg, lineno = tokerr.args raise NotPython( "Couldn't parse '%s' as Python source: '%s' at %s" % (self.filename, msg, lineno) ) excluded_lines = self.first_lines(self.excluded) ignore = excluded_lines + list(self.docstrings) lines = self.first_lines(self.statement_starts, ignore) return lines, excluded_lines def arcs(self): """Get information about the arcs available in the code. Returns a sorted list of line number pairs. Line numbers have been normalized to the first line of multiline statements. """ all_arcs = [] for l1, l2 in self.byte_parser._all_arcs(): fl1 = self.first_line(l1) fl2 = self.first_line(l2) if fl1 != fl2: all_arcs.append((fl1, fl2)) return sorted(all_arcs) arcs = expensive(arcs) def exit_counts(self): """Get a mapping from line numbers to count of exits from that line. Excluded lines are excluded. """ excluded_lines = self.first_lines(self.excluded) exit_counts = {} for l1, l2 in self.arcs(): if l1 < 0: # Don't ever report -1 as a line number continue if l1 in excluded_lines: # Don't report excluded lines as line numbers. continue if l2 in excluded_lines: # Arcs to excluded lines shouldn't count. continue if l1 not in exit_counts: exit_counts[l1] = 0 exit_counts[l1] += 1 # Class definitions have one extra exit, so remove one for each: for l in self.classdefs: # Ensure key is there: classdefs can include excluded lines. if l in exit_counts: exit_counts[l] -= 1 return exit_counts exit_counts = expensive(exit_counts)
class CodeParser(object): """Parse code to find executable lines, excluded lines, etc.""" def __init__(self, text=None, filename=None, exclude=None): """ Source can be provided as `text`, the text itself, or `filename`, from which the text will be read. Excluded lines are those that match `exclude`, a regex. """ self.filename = filename or '<code>' self.text = text if not self.text: try: sourcef = open_source(self.filename) try: self.text = sourcef.read() finally: sourcef.close() except IOError: _, err, _ = sys.exc_info() raise NoSource("No source for code: '%s': %s" % (self.filename, err)) if self.text and ord(self.text[0]) == 65279: self.text = self.text[1:] self.exclude = exclude self.show_tokens = False self.lines = self.text.split('\n') self.excluded = set() self.docstrings = set() self.classdefs = set() self.multiline = {} self.statement_starts = set() self._byte_parser = None def _get_byte_parser(self): """Create a ByteParser on demand.""" if not self._byte_parser: self._byte_parser = ByteParser(text=self.text, filename=self.filename) return self._byte_parser byte_parser = property(_get_byte_parser) def lines_matching(self, *regexes): """Find the lines matching one of a list of regexes. Returns a set of line numbers, the lines that contain a match for one of the regexes in `regexes`. The entire line needn't match, just a part of it. """ regex_c = re.compile(join_regex(regexes)) matches = set() for i, ltext in enumerate(self.lines): if regex_c.search(ltext): matches.add(i + 1) return matches def _raw_parse(self): """Parse the source to find the interesting facts about its lines. A handful of member fields are updated. """ if self.exclude: self.excluded = self.lines_matching(self.exclude) indent = 0 exclude_indent = 0 excluding = False prev_toktype = token.INDENT first_line = None empty = True tokgen = tokenize.generate_tokens(StringIO(self.text).readline) for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen: if self.show_tokens: print '%10s %5s %-20r %r' % (tokenize.tok_name.get( toktype, toktype), nice_pair( (slineno, elineno)), ttext, ltext) if toktype == token.INDENT: indent += 1 elif toktype == token.DEDENT: indent -= 1 elif toktype == token.NAME and ttext == 'class': self.classdefs.add(slineno) elif toktype == token.OP and ttext == ':': if not excluding and elineno in self.excluded: exclude_indent = indent excluding = True elif toktype == token.STRING and prev_toktype == token.INDENT: self.docstrings.update(range(slineno, elineno + 1)) elif toktype == token.NEWLINE: if first_line is not None and elineno != first_line: rng = (first_line, elineno) for l in range(first_line, elineno + 1): self.multiline[l] = rng first_line = None if ttext.strip() and toktype != tokenize.COMMENT: empty = False if first_line is None: first_line = slineno if excluding and indent <= exclude_indent: excluding = False if excluding: self.excluded.add(elineno) prev_toktype = toktype if not empty: self.statement_starts.update(self.byte_parser._find_statements()) def first_line(self, line): """Return the first line number of the statement including `line`.""" rng = self.multiline.get(line) if rng: first_line = rng[0] else: first_line = line return first_line def first_lines(self, lines, ignore=None): """Map the line numbers in `lines` to the correct first line of the statement. Skip any line mentioned in `ignore`. Returns a sorted list of the first lines. """ ignore = ignore or [] lset = set() for l in lines: if l in ignore: continue new_l = self.first_line(l) if new_l not in ignore: lset.add(new_l) return sorted(lset) def parse_source(self): """Parse source text to find executable lines, excluded lines, etc. Return values are 1) a sorted list of executable line numbers, and 2) a sorted list of excluded line numbers. Reported line numbers are normalized to the first line of multi-line statements. """ try: self._raw_parse() except (tokenize.TokenError, IndentationError): _, tokerr, _ = sys.exc_info() msg, lineno = tokerr.args raise NotPython( "Couldn't parse '%s' as Python source: '%s' at %s" % (self.filename, msg, lineno)) excluded_lines = self.first_lines(self.excluded) ignore = excluded_lines + list(self.docstrings) lines = self.first_lines(self.statement_starts, ignore) return (lines, excluded_lines) def arcs(self): """Get information about the arcs available in the code. Returns a sorted list of line number pairs. Line numbers have been normalized to the first line of multiline statements. """ all_arcs = [] for l1, l2 in self.byte_parser._all_arcs(): fl1 = self.first_line(l1) fl2 = self.first_line(l2) if fl1 != fl2: all_arcs.append((fl1, fl2)) return sorted(all_arcs) arcs = expensive(arcs) def exit_counts(self): """Get a mapping from line numbers to count of exits from that line. Excluded lines are excluded. """ excluded_lines = self.first_lines(self.excluded) exit_counts = {} for l1, l2 in self.arcs(): if l1 < 0: continue if l1 in excluded_lines: continue if l2 in excluded_lines: continue if l1 not in exit_counts: exit_counts[l1] = 0 exit_counts[l1] += 1 for l in self.classdefs: if l in exit_counts: exit_counts[l] -= 1 return exit_counts exit_counts = expensive(exit_counts)