예제 #1
0
    def blame_incremental(self, rev, file, **kwargs):
        """Iterator for blame information for the given file at the given revision.

        Unlike .blame(), this does not return the actual file's contents, only
        a stream of BlameEntry tuples.

        :parm rev: revision specifier, see git-rev-parse for viable options.
        :return: lazy iterator of BlameEntry tuples, where the commit
                 indicates the commit to blame for the line, and range
                 indicates a span of line numbers in the resulting file.

        If you combine all line number ranges outputted by this command, you
        should get a continuous range spanning all line numbers in the file.
        """
        data = self.git.blame(rev,
                              '--',
                              file,
                              p=True,
                              incremental=True,
                              stdout_as_string=False,
                              **kwargs)
        commits = dict()

        stream = (line for line in data.split(b'\n') if line)
        while True:
            line = next(
                stream
            )  # when exhausted, causes a StopIteration, terminating this function
            hexsha, orig_lineno, lineno, num_lines = line.split()
            lineno = int(lineno)
            num_lines = int(num_lines)
            orig_lineno = int(orig_lineno)
            if hexsha not in commits:
                # Now read the next few lines and build up a dict of properties
                # for this commit
                props = dict()
                while True:
                    line = next(stream)
                    if line == b'boundary':
                        # "boundary" indicates a root commit and occurs
                        # instead of the "previous" tag
                        continue

                    tag, value = line.split(b' ', 1)
                    props[tag] = value
                    if tag == b'filename':
                        # "filename" formally terminates the entry for --incremental
                        orig_filename = value
                        break

                c = Commit(
                    self,
                    hex_to_bin(hexsha),
                    author=Actor(
                        safe_decode(props[b'author']),
                        safe_decode(
                            props[b'author-mail'].lstrip(b'<').rstrip(b'>'))),
                    authored_date=int(props[b'author-time']),
                    committer=Actor(
                        safe_decode(props[b'committer']),
                        safe_decode(props[b'committer-mail'].lstrip(
                            b'<').rstrip(b'>'))),
                    committed_date=int(props[b'committer-time']))
                commits[hexsha] = c
            else:
                # Discard all lines until we find "filename" which is
                # guaranteed to be the last line
                while True:
                    line = next(
                        stream)  # will fail if we reach the EOF unexpectedly
                    tag, value = line.split(b' ', 1)
                    if tag == b'filename':
                        orig_filename = value
                        break

            yield BlameEntry(commits[hexsha], range(lineno,
                                                    lineno + num_lines),
                             safe_decode(orig_filename),
                             range(orig_lineno, orig_lineno + num_lines))
예제 #2
0
파일: base.py 프로젝트: zofuthan/GitPython
    def blame(self, rev, file):
        """The blame information for the given file at the given revision.

        :parm rev: revision specifier, see git-rev-parse for viable options.
        :return:
            list: [git.Commit, list: [<line>]]
            A list of tuples associating a Commit object with a list of lines that
            changed within the given commit. The Commit objects will be given in order
            of appearance."""
        data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False)
        commits = dict()
        blames = list()
        info = None

        keepends = True
        for line in data.splitlines(keepends):
            try:
                line = line.rstrip().decode(defenc)
            except UnicodeDecodeError:
                firstpart = ''
                is_binary = True
            else:
                # As we don't have an idea when the binary data ends, as it could contain multiple newlines
                # in the process. So we rely on being able to decode to tell us what is is.
                # This can absolutely fail even on text files, but even if it does, we should be fine treating it
                # as binary instead
                parts = self.re_whitespace.split(line, 1)
                firstpart = parts[0]
                is_binary = False
            # end handle decode of line

            if self.re_hexsha_only.search(firstpart):
                # handles
                # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7        - indicates blame-data start
                # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2          - indicates
                # another line of blame with the same data
                digits = parts[-1].split(" ")
                if len(digits) == 3:
                    info = {'id': firstpart}
                    blames.append([None, []])
                elif info['id'] != firstpart:
                    info = {'id': firstpart}
                    blames.append([commits.get(firstpart), []])
                # END blame data initialization
            else:
                m = self.re_author_committer_start.search(firstpart)
                if m:
                    # handles:
                    # author Tom Preston-Werner
                    # author-mail <*****@*****.**>
                    # author-time 1192271832
                    # author-tz -0700
                    # committer Tom Preston-Werner
                    # committer-mail <*****@*****.**>
                    # committer-time 1192271832
                    # committer-tz -0700  - IGNORED BY US
                    role = m.group(0)
                    if firstpart.endswith('-mail'):
                        info["%s_email" % role] = parts[-1]
                    elif firstpart.endswith('-time'):
                        info["%s_date" % role] = int(parts[-1])
                    elif role == firstpart:
                        info[role] = parts[-1]
                    # END distinguish mail,time,name
                else:
                    # handle
                    # filename lib/grit.rb
                    # summary add Blob
                    # <and rest>
                    if firstpart.startswith('filename'):
                        info['filename'] = parts[-1]
                    elif firstpart.startswith('summary'):
                        info['summary'] = parts[-1]
                    elif firstpart == '':
                        if info:
                            sha = info['id']
                            c = commits.get(sha)
                            if c is None:
                                c = Commit(
                                    self,
                                    hex_to_bin(sha),
                                    author=Actor._from_string(
                                        info['author'] + ' ' +
                                        info['author_email']),
                                    authored_date=info['author_date'],
                                    committer=Actor._from_string(
                                        info['committer'] + ' ' +
                                        info['committer_email']),
                                    committed_date=info['committer_date'],
                                    message=info['summary'])
                                commits[sha] = c
                            # END if commit objects needs initial creation
                            if not is_binary:
                                if line and line[0] == '\t':
                                    line = line[1:]
                            else:
                                # NOTE: We are actually parsing lines out of binary data, which can lead to the
                                # binary being split up along the newline separator. We will append this to the blame
                                # we are currently looking at, even though it should be concatenated with the last line
                                # we have seen.
                                pass
                            # end handle line contents
                            blames[-1][0] = c
                            blames[-1][1].append(line)
                            info = {'id': sha}
                        # END if we collected commit info
                    # END distinguish filename,summary,rest
                # END distinguish author|committer vs filename,summary,rest
            # END distinguish hexsha vs other information
        return blames
예제 #3
0
    def blame_incremental(self, rev, file, **kwargs):
        """Iterator for blame information for the given file at the given revision.

        Unlike .blame(), this does not return the actual file's contents, only
        a stream of (commit, range) tuples.

        :parm rev: revision specifier, see git-rev-parse for viable options.
        :return: lazy iterator of (git.Commit, range) tuples, where the commit
                 indicates the commit to blame for the line, and range
                 indicates a span of line numbers in the resulting file.

        If you combine all line number ranges outputted by this command, you
        should get a continuous range spanning all line numbers in the file.
        """
        data = self.git.blame(rev,
                              '--',
                              file,
                              p=True,
                              incremental=True,
                              stdout_as_string=False,
                              **kwargs)
        commits = dict()

        stream = iter(data.splitlines())
        while True:
            line = next(
                stream
            )  # when exhausted, casues a StopIteration, terminating this function

            hexsha, _, lineno, num_lines = line.split()
            lineno = int(lineno)
            num_lines = int(num_lines)
            if hexsha not in commits:
                # Now read the next few lines and build up a dict of properties
                # for this commit
                props = dict()
                while True:
                    line = next(stream)
                    if line == b'boundary':
                        # "boundary" indicates a root commit and occurs
                        # instead of the "previous" tag
                        continue

                    tag, value = line.split(b' ', 1)
                    props[tag] = value
                    if tag == b'filename':
                        # "filename" formally terminates the entry for --incremental
                        break

                c = Commit(
                    self,
                    hex_to_bin(hexsha),
                    author=Actor(
                        safe_decode(props[b'author']),
                        safe_decode(
                            props[b'author-mail'].lstrip(b'<').rstrip(b'>'))),
                    authored_date=int(props[b'author-time']),
                    committer=Actor(
                        safe_decode(props[b'committer']),
                        safe_decode(props[b'committer-mail'].lstrip(
                            b'<').rstrip(b'>'))),
                    committed_date=int(props[b'committer-time']),
                    message=safe_decode(props[b'summary']))
                commits[hexsha] = c
            else:
                # Discard the next line (it's a filename end tag)
                line = next(stream)
                assert line.startswith(
                    b'filename'), 'Unexpected git blame output'

            yield commits[hexsha], range(lineno, lineno + num_lines)