Esempio n. 1
0
class GitHyperBlame:
    def __init__(self, path: str):
        self.g = Git(path)
        self.diff_hunks_cache = {}

    def parse_blame(self, blameoutput):
        """Parses the output of git blame -p into a data structure."""
        lines = blameoutput.split('\n')
        i = 0
        commits = {}

        while i < len(lines):
            # Read a commit line and parse it.
            line = lines[i]
            i += 1
            if not line.strip():
                continue
            commitline = line.split()
            commithash = commitline[0]
            lineno_then = int(commitline[1])
            lineno_now = int(commitline[2])

            try:
                commit = commits[commithash]
            except KeyError:
                commit = HyperBlameCommit(commithash)
                commits[commithash] = commit

            # Read commit details until we find a context line.
            while i < len(lines):
                line = lines[i]
                i += 1
                if line.startswith('\t'):
                    break

                try:
                    key, value = line.split(' ', 1)
                except ValueError:
                    key = line
                    value = True
                setattr(commit, key.replace('-', '_'), value)

            context = line[1:]

            yield BlameLine(commit, context, lineno_then, lineno_now, False)

    def get_parsed_blame(self, filename, revision='HEAD'):
        blame = self.g.blame('-p', revision, '--', filename)
        return list(self.parse_blame(blame))

    def hyper_blame(self, ignored, filename, revision='HEAD'):
        # Map from commit to parsed blame from that commit.
        blame_from = {}

        def cache_blame_from(filename, commithash):
            try:
                return blame_from[commithash]
            except KeyError:
                parsed = self.get_parsed_blame(filename, commithash)
                blame_from[commithash] = parsed
                return parsed

        parsed = cache_blame_from(filename, revision)

        new_parsed = []
        for line in parsed:
            # If a line references an ignored commit, blame that commit's
            # parent repeatedly until we find a non-ignored commit.
            while line.commit.commithash in ignored:
                if line.commit.previous is None:
                    # You can't ignore the commit that added this file.
                    break

                previouscommit, previousfilename = line.commit.previous.split(
                    ' ', 1)
                parent_blame = cache_blame_from(previousfilename,
                                                previouscommit)

                if len(parent_blame) == 0:
                    # The previous version of this file was empty,
                    # therefore, you can't ignore this commit.
                    break

                # line.lineno_then is the line number in question at
                # line.commit. We need
                # to translate that line number so that it refers to the
                # position of the same line on previouscommit.
                lineno_previous = self.approx_lineno_across_revs(
                    line.commit.filename, previousfilename,
                    line.commit.commithash, previouscommit, line.lineno_then)
                logger.debug('ignore commit %s on line p%d/t%d/n%d',
                             line.commit.commithash, lineno_previous,
                             line.lineno_then, line.lineno_now)

                # Get the line at lineno_previous in the parent commit.
                assert 1 <= lineno_previous <= len(parent_blame)
                newline = parent_blame[lineno_previous - 1]

                # Replace the commit and lineno_then, but not the lineno_now
                # or context.
                line = BlameLine(newline.commit, line.context,
                                 newline.lineno_then, line.lineno_now, True)
                logger.debug('replacing with %r', line)

            new_parsed.append(line)

        return self.build_result(new_parsed)

    def approx_lineno_across_revs(self, filename, newfilename, revision,
                                  newrevision, lineno):
        """Computes the approximate movement of a line number between two
        revisions.

        Consider line |lineno| in |filename| at |revision|. This function
        computes the
        line number of that line in |newfilename| at |newrevision|. This is
        necessarily approximate.

        Args:
          filename: The file (within the repo) at |revision|.
          newfilename: The name of the same file at |newrevision|.
          revision: A git revision.
          newrevision: Another git revision. Note: Can be ahead or behind
          |revision|.
          lineno: Line number within |filename| at |revision|.

        Returns:
          Line number within |newfilename| at |newrevision|.
        """
        # This doesn't work that well if there are a lot of line changes
        # within the
        # hunk (demonstrated by
        # GitHyperBlameLineMotionTest.testIntraHunkLineMotion).
        # A fuzzy heuristic that takes the text of the new line and tries to
        # find a
        # deleted line within the hunk that mostly matches the new line
        # could help.

        # Use the <revision>:<filename> syntax to diff between two blobs.
        # This is the only way to diff a file that has been renamed.
        old = '%s:%s' % (revision, filename)
        new = '%s:%s' % (newrevision, newfilename)
        hunks = self.cache_diff_hunks(old, new)

        cumulative_offset = 0

        # Find the hunk containing lineno (if any).
        for (oldstart, oldlength), (newstart, newlength) in hunks:
            cumulative_offset += newlength - oldlength

            if lineno >= oldstart + oldlength:
                # Not there yet.
                continue

            if lineno < oldstart:
                # Gone too far.
                break

            # lineno is in [oldstart, oldlength] at revision; [newstart,
            # newlength] at
            # newrevision.

            # If newlength == 0, newstart will be the line before the
            # deleted hunk.
            # Since the line must have been deleted, just return that as the
            # nearest
            # line in the new file. Caution: newstart can be 0 in this case.
            if newlength == 0:
                return max(1, newstart)

            newend = newstart + newlength - 1

            # Move lineno based on the amount the entire hunk shifted.
            lineno = lineno + newstart - oldstart
            # Constrain the output within the range [newstart, newend].
            return min(newend, max(newstart, lineno))

        # Wasn't in a hunk. Figure out the line motion based on the
        # difference in
        # length between the hunks seen so far.
        return lineno + cumulative_offset

    def cache_diff_hunks(self, oldrev, newrev):
        def parse_start_length(s):
            # Chop the '-' or '+'.
            s = s[1:]
            # Length is optional (defaults to 1).
            try:
                start, length = s.split(',')
            except ValueError:
                start = s
                length = 1
            return int(start), int(length)

        try:
            return self.diff_hunks_cache[(oldrev, newrev)]
        except KeyError:
            pass

        # Use -U0 to get the smallest possible hunks.
        diff = self.g.diff(oldrev, newrev, '-U0')

        # Get all the hunks.
        hunks = []
        for line in diff.split('\n'):
            if not line.startswith('@@'):
                continue
            ranges = line.split(' ', 3)[1:3]
            ranges = tuple(parse_start_length(r) for r in ranges)
            hunks.append(ranges)

        self.diff_hunks_cache[(oldrev, newrev)] = hunks
        return hunks

    def build_result(self, parsedblame):
        table = []
        for line in parsedblame:
            offset = line.commit.author_tz
            hours = int(offset[:-2])
            minutes = int(offset[-2:])
            tz = timezone(timedelta(hours=hours, minutes=minutes))
            author_time = datetime.utcfromtimestamp(
                int(line.commit.author_time)) + timedelta(hours=hours,
                                                          minutes=minutes)
            author_time = author_time.replace(tzinfo=tz)
            row = ''
            row = [
                line.commit.commithash[:8], '(' + line.commit.author,
                author_time.strftime('%Y-%m-%d %H:%M:%S %z'),
                str(line.lineno_now) + ('*' if line.modified else '') + ')',
                line.context
            ]
            row.insert(1, line.commit.filename)
            row = ' '.join(row)
            table.append(row)
        return table