Beispiel #1
0
class CodeParser(object):
    def __init__(self, text=None, filename=None, exclude=None):
        self.filename = filename or '<code>'
        self.text = text
        if not self.text:
            try:
                sourcef = open_source(self.filename)
                try:
                    self.text = sourcef.read()
                finally:
                    sourcef.close()

            except IOError:
                _, err, _ = sys.exc_info()
                raise NoSource("No source for code: '%s': %s" %
                               (self.filename, err))

        if self.text and ord(self.text[0]) == 65279:
            self.text = self.text[1:]
        self.exclude = exclude
        self.show_tokens = False
        self.lines = self.text.split('\n')
        self.excluded = set()
        self.docstrings = set()
        self.classdefs = set()
        self.multiline = {}
        self.statement_starts = set()
        self._byte_parser = None

    def _get_byte_parser(self):
        if not self._byte_parser:
            self._byte_parser = ByteParser(text=self.text,
                                           filename=self.filename)
        return self._byte_parser

    byte_parser = property(_get_byte_parser)

    def lines_matching(self, *regexes):
        regex_c = re.compile(join_regex(regexes))
        matches = set()
        for i, ltext in enumerate(self.lines):
            if regex_c.search(ltext):
                matches.add(i + 1)

        return matches

    def _raw_parse(self):
        if self.exclude:
            self.excluded = self.lines_matching(self.exclude)
        indent = 0
        exclude_indent = 0
        excluding = False
        prev_toktype = token.INDENT
        first_line = None
        empty = True
        tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
        for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
            if self.show_tokens:
                print '%10s %5s %-20r %r' % (tokenize.tok_name.get(
                    toktype, toktype), nice_pair(
                        (slineno, elineno)), ttext, ltext)
            if toktype == token.INDENT:
                indent += 1
            elif toktype == token.DEDENT:
                indent -= 1
            elif toktype == token.NAME and ttext == 'class':
                self.classdefs.add(slineno)
            elif toktype == token.OP and ttext == ':':
                if not excluding and elineno in self.excluded:
                    exclude_indent = indent
                    excluding = True
            elif toktype == token.STRING and prev_toktype == token.INDENT:
                self.docstrings.update(range(slineno, elineno + 1))
            elif toktype == token.NEWLINE:
                if first_line is not None and elineno != first_line:
                    rng = (first_line, elineno)
                    for l in range(first_line, elineno + 1):
                        self.multiline[l] = rng

                first_line = None
            if ttext.strip() and toktype != tokenize.COMMENT:
                empty = False
                if first_line is None:
                    first_line = slineno
                    if excluding and indent <= exclude_indent:
                        excluding = False
                    if excluding:
                        self.excluded.add(elineno)
            prev_toktype = toktype

        if not empty:
            self.statement_starts.update(self.byte_parser._find_statements())

    def first_line(self, line):
        rng = self.multiline.get(line)
        if rng:
            first_line = rng[0]
        else:
            first_line = line
        return first_line

    def first_lines(self, lines, ignore=None):
        ignore = ignore or []
        lset = set()
        for l in lines:
            if l in ignore:
                continue
            new_l = self.first_line(l)
            if new_l not in ignore:
                lset.add(new_l)

        return sorted(lset)

    def parse_source(self):
        try:
            self._raw_parse()
        except (tokenize.TokenError, IndentationError):
            _, tokerr, _ = sys.exc_info()
            msg, lineno = tokerr.args
            raise NotPython(
                "Couldn't parse '%s' as Python source: '%s' at %s" %
                (self.filename, msg, lineno))

        excluded_lines = self.first_lines(self.excluded)
        ignore = excluded_lines + list(self.docstrings)
        lines = self.first_lines(self.statement_starts, ignore)
        return (lines, excluded_lines)

    def arcs(self):
        all_arcs = []
        for l1, l2 in self.byte_parser._all_arcs():
            fl1 = self.first_line(l1)
            fl2 = self.first_line(l2)
            if fl1 != fl2:
                all_arcs.append((fl1, fl2))

        return sorted(all_arcs)

    arcs = expensive(arcs)

    def exit_counts(self):
        excluded_lines = self.first_lines(self.excluded)
        exit_counts = {}
        for l1, l2 in self.arcs():
            if l1 < 0:
                continue
            if l1 in excluded_lines:
                continue
            if l2 in excluded_lines:
                continue
            if l1 not in exit_counts:
                exit_counts[l1] = 0
            exit_counts[l1] += 1

        for l in self.classdefs:
            if l in exit_counts:
                exit_counts[l] -= 1

        return exit_counts

    exit_counts = expensive(exit_counts)
Beispiel #2
0
class CodeParser(object):
    """Parse code to find executable lines, excluded lines, etc."""

    def __init__(self, text=None, filename=None, exclude=None):
        """
        Source can be provided as `text`, the text itself, or `filename`, from
        which the text will be read.  Excluded lines are those that match
        `exclude`, a regex.

        """
        assert text or filename, "CodeParser needs either text or filename"
        self.filename = filename or "<code>"
        self.text = text
        if not self.text:
            try:
                sourcef = open_source(self.filename)
                try:
                    self.text = sourcef.read()
                finally:
                    sourcef.close()
            except IOError:
                _, err, _ = sys.exc_info()
                raise NoSource(
                    "No source for code: '%s': %s" % (self.filename, err)
                    )

        # Scrap the BOM if it exists.
        if self.text and ord(self.text[0]) == 0xfeff:
            self.text = self.text[1:]

        self.exclude = exclude

        self.show_tokens = False

        # The text lines of the parsed code.
        self.lines = self.text.split('\n')

        # The line numbers of excluded lines of code.
        self.excluded = set()

        # The line numbers of docstring lines.
        self.docstrings = set()

        # The line numbers of class definitions.
        self.classdefs = set()

        # A dict mapping line numbers to (lo,hi) for multi-line statements.
        self.multiline = {}

        # The line numbers that start statements.
        self.statement_starts = set()

        # Lazily-created ByteParser
        self._byte_parser = None

    def _get_byte_parser(self):
        """Create a ByteParser on demand."""
        if not self._byte_parser:
            self._byte_parser = \
                            ByteParser(text=self.text, filename=self.filename)
        return self._byte_parser
    byte_parser = property(_get_byte_parser)

    def lines_matching(self, *regexes):
        """Find the lines matching one of a list of regexes.

        Returns a set of line numbers, the lines that contain a match for one
        of the regexes in `regexes`.  The entire line needn't match, just a
        part of it.

        """
        regex_c = re.compile(join_regex(regexes))
        matches = set()
        for i, ltext in enumerate(self.lines):
            if regex_c.search(ltext):
                matches.add(i+1)
        return matches

    def _raw_parse(self):
        """Parse the source to find the interesting facts about its lines.

        A handful of member fields are updated.

        """
        # Find lines which match an exclusion pattern.
        if self.exclude:
            self.excluded = self.lines_matching(self.exclude)

        # Tokenize, to find excluded suites, to find docstrings, and to find
        # multi-line statements.
        indent = 0
        exclude_indent = 0
        excluding = False
        prev_toktype = token.INDENT
        first_line = None
        empty = True

        tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
        for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
            if self.show_tokens:                # pragma: no cover
                print("%10s %5s %-20r %r" % (
                    tokenize.tok_name.get(toktype, toktype),
                    nice_pair((slineno, elineno)), ttext, ltext
                    ))
            if toktype == token.INDENT:
                indent += 1
            elif toktype == token.DEDENT:
                indent -= 1
            elif toktype == token.NAME and ttext == 'class':
                # Class definitions look like branches in the byte code, so
                # we need to exclude them.  The simplest way is to note the
                # lines with the 'class' keyword.
                self.classdefs.add(slineno)
            elif toktype == token.OP and ttext == ':':
                if not excluding and elineno in self.excluded:
                    # Start excluding a suite.  We trigger off of the colon
                    # token so that the #pragma comment will be recognized on
                    # the same line as the colon.
                    exclude_indent = indent
                    excluding = True
            elif toktype == token.STRING and prev_toktype == token.INDENT:
                # Strings that are first on an indented line are docstrings.
                # (a trick from trace.py in the stdlib.) This works for
                # 99.9999% of cases.  For the rest (!) see:
                # http://stackoverflow.com/questions/1769332/x/1769794#1769794
                for i in range(slineno, elineno+1):
                    self.docstrings.add(i)
            elif toktype == token.NEWLINE:
                if first_line is not None and elineno != first_line:
                    # We're at the end of a line, and we've ended on a
                    # different line than the first line of the statement,
                    # so record a multi-line range.
                    rng = (first_line, elineno)
                    for l in range(first_line, elineno+1):
                        self.multiline[l] = rng
                first_line = None

            if ttext.strip() and toktype != tokenize.COMMENT:
                # A non-whitespace token.
                empty = False
                if first_line is None:
                    # The token is not whitespace, and is the first in a
                    # statement.
                    first_line = slineno
                    # Check whether to end an excluded suite.
                    if excluding and indent <= exclude_indent:
                        excluding = False
                    if excluding:
                        self.excluded.add(elineno)

            prev_toktype = toktype

        # Find the starts of the executable statements.
        if not empty:
            self.statement_starts.update(self.byte_parser._find_statements())

    def first_line(self, line):
        """Return the first line number of the statement including `line`."""
        rng = self.multiline.get(line)
        if rng:
            first_line = rng[0]
        else:
            first_line = line
        return first_line

    def first_lines(self, lines, ignore=None):
        """Map the line numbers in `lines` to the correct first line of the
        statement.

        Skip any line mentioned in `ignore`.

        Returns a sorted list of the first lines.

        """
        ignore = ignore or []
        lset = set()
        for l in lines:
            if l in ignore:
                continue
            new_l = self.first_line(l)
            if new_l not in ignore:
                lset.add(new_l)
        return sorted(lset)

    def parse_source(self):
        """Parse source text to find executable lines, excluded lines, etc.

        Return values are 1) a sorted list of executable line numbers, and
        2) a sorted list of excluded line numbers.

        Reported line numbers are normalized to the first line of multi-line
        statements.

        """
        try:
            self._raw_parse()
        except (tokenize.TokenError, IndentationError):
            _, tokerr, _ = sys.exc_info()
            msg, lineno = tokerr.args
            raise NotPython(
                "Couldn't parse '%s' as Python source: '%s' at %s" %
                    (self.filename, msg, lineno)
                )

        excluded_lines = self.first_lines(self.excluded)
        ignore = excluded_lines + list(self.docstrings)
        lines = self.first_lines(self.statement_starts, ignore)

        return lines, excluded_lines

    def arcs(self):
        """Get information about the arcs available in the code.

        Returns a sorted list of line number pairs.  Line numbers have been
        normalized to the first line of multiline statements.

        """
        all_arcs = []
        for l1, l2 in self.byte_parser._all_arcs():
            fl1 = self.first_line(l1)
            fl2 = self.first_line(l2)
            if fl1 != fl2:
                all_arcs.append((fl1, fl2))
        return sorted(all_arcs)
    arcs = expensive(arcs)

    def exit_counts(self):
        """Get a mapping from line numbers to count of exits from that line.

        Excluded lines are excluded.

        """
        excluded_lines = self.first_lines(self.excluded)
        exit_counts = {}
        for l1, l2 in self.arcs():
            if l1 < 0:
                # Don't ever report -1 as a line number
                continue
            if l1 in excluded_lines:
                # Don't report excluded lines as line numbers.
                continue
            if l2 in excluded_lines:
                # Arcs to excluded lines shouldn't count.
                continue
            if l1 not in exit_counts:
                exit_counts[l1] = 0
            exit_counts[l1] += 1

        # Class definitions have one extra exit, so remove one for each:
        for l in self.classdefs:
            # Ensure key is there: classdefs can include excluded lines.
            if l in exit_counts:
                exit_counts[l] -= 1

        return exit_counts
    exit_counts = expensive(exit_counts)
Beispiel #3
0
class CodeParser(object):
    """Parse code to find executable lines, excluded lines, etc."""
    def __init__(self, text=None, filename=None, exclude=None):
        """
        Source can be provided as `text`, the text itself, or `filename`, from
        which the text will be read.  Excluded lines are those that match
        `exclude`, a regex.
        
        """
        self.filename = filename or '<code>'
        self.text = text
        if not self.text:
            try:
                sourcef = open_source(self.filename)
                try:
                    self.text = sourcef.read()
                finally:
                    sourcef.close()

            except IOError:
                _, err, _ = sys.exc_info()
                raise NoSource("No source for code: '%s': %s" %
                               (self.filename, err))

        if self.text and ord(self.text[0]) == 65279:
            self.text = self.text[1:]
        self.exclude = exclude
        self.show_tokens = False
        self.lines = self.text.split('\n')
        self.excluded = set()
        self.docstrings = set()
        self.classdefs = set()
        self.multiline = {}
        self.statement_starts = set()
        self._byte_parser = None

    def _get_byte_parser(self):
        """Create a ByteParser on demand."""
        if not self._byte_parser:
            self._byte_parser = ByteParser(text=self.text,
                                           filename=self.filename)
        return self._byte_parser

    byte_parser = property(_get_byte_parser)

    def lines_matching(self, *regexes):
        """Find the lines matching one of a list of regexes.
        
        Returns a set of line numbers, the lines that contain a match for one
        of the regexes in `regexes`.  The entire line needn't match, just a
        part of it.
        
        """
        regex_c = re.compile(join_regex(regexes))
        matches = set()
        for i, ltext in enumerate(self.lines):
            if regex_c.search(ltext):
                matches.add(i + 1)

        return matches

    def _raw_parse(self):
        """Parse the source to find the interesting facts about its lines.
        
        A handful of member fields are updated.
        
        """
        if self.exclude:
            self.excluded = self.lines_matching(self.exclude)
        indent = 0
        exclude_indent = 0
        excluding = False
        prev_toktype = token.INDENT
        first_line = None
        empty = True
        tokgen = tokenize.generate_tokens(StringIO(self.text).readline)
        for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
            if self.show_tokens:
                print '%10s %5s %-20r %r' % (tokenize.tok_name.get(
                    toktype, toktype), nice_pair(
                        (slineno, elineno)), ttext, ltext)
            if toktype == token.INDENT:
                indent += 1
            elif toktype == token.DEDENT:
                indent -= 1
            elif toktype == token.NAME and ttext == 'class':
                self.classdefs.add(slineno)
            elif toktype == token.OP and ttext == ':':
                if not excluding and elineno in self.excluded:
                    exclude_indent = indent
                    excluding = True
            elif toktype == token.STRING and prev_toktype == token.INDENT:
                self.docstrings.update(range(slineno, elineno + 1))
            elif toktype == token.NEWLINE:
                if first_line is not None and elineno != first_line:
                    rng = (first_line, elineno)
                    for l in range(first_line, elineno + 1):
                        self.multiline[l] = rng

                first_line = None
            if ttext.strip() and toktype != tokenize.COMMENT:
                empty = False
                if first_line is None:
                    first_line = slineno
                    if excluding and indent <= exclude_indent:
                        excluding = False
                    if excluding:
                        self.excluded.add(elineno)
            prev_toktype = toktype

        if not empty:
            self.statement_starts.update(self.byte_parser._find_statements())

    def first_line(self, line):
        """Return the first line number of the statement including `line`."""
        rng = self.multiline.get(line)
        if rng:
            first_line = rng[0]
        else:
            first_line = line
        return first_line

    def first_lines(self, lines, ignore=None):
        """Map the line numbers in `lines` to the correct first line of the
        statement.
        
        Skip any line mentioned in `ignore`.
        
        Returns a sorted list of the first lines.
        
        """
        ignore = ignore or []
        lset = set()
        for l in lines:
            if l in ignore:
                continue
            new_l = self.first_line(l)
            if new_l not in ignore:
                lset.add(new_l)

        return sorted(lset)

    def parse_source(self):
        """Parse source text to find executable lines, excluded lines, etc.
        
        Return values are 1) a sorted list of executable line numbers, and
        2) a sorted list of excluded line numbers.
        
        Reported line numbers are normalized to the first line of multi-line
        statements.
        
        """
        try:
            self._raw_parse()
        except (tokenize.TokenError, IndentationError):
            _, tokerr, _ = sys.exc_info()
            msg, lineno = tokerr.args
            raise NotPython(
                "Couldn't parse '%s' as Python source: '%s' at %s" %
                (self.filename, msg, lineno))

        excluded_lines = self.first_lines(self.excluded)
        ignore = excluded_lines + list(self.docstrings)
        lines = self.first_lines(self.statement_starts, ignore)
        return (lines, excluded_lines)

    def arcs(self):
        """Get information about the arcs available in the code.
        
        Returns a sorted list of line number pairs.  Line numbers have been
        normalized to the first line of multiline statements.
        
        """
        all_arcs = []
        for l1, l2 in self.byte_parser._all_arcs():
            fl1 = self.first_line(l1)
            fl2 = self.first_line(l2)
            if fl1 != fl2:
                all_arcs.append((fl1, fl2))

        return sorted(all_arcs)

    arcs = expensive(arcs)

    def exit_counts(self):
        """Get a mapping from line numbers to count of exits from that line.
        
        Excluded lines are excluded.
        
        """
        excluded_lines = self.first_lines(self.excluded)
        exit_counts = {}
        for l1, l2 in self.arcs():
            if l1 < 0:
                continue
            if l1 in excluded_lines:
                continue
            if l2 in excluded_lines:
                continue
            if l1 not in exit_counts:
                exit_counts[l1] = 0
            exit_counts[l1] += 1

        for l in self.classdefs:
            if l in exit_counts:
                exit_counts[l] -= 1

        return exit_counts

    exit_counts = expensive(exit_counts)