def blame_incremental(self, rev, file, **kwargs): """Iterator for blame information for the given file at the given revision. Unlike .blame(), this does not return the actual file's contents, only a stream of BlameEntry tuples. :parm rev: revision specifier, see git-rev-parse for viable options. :return: lazy iterator of BlameEntry tuples, where the commit indicates the commit to blame for the line, and range indicates a span of line numbers in the resulting file. If you combine all line number ranges outputted by this command, you should get a continuous range spanning all line numbers in the file. """ data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs) commits = dict() stream = (line for line in data.split(b'\n') if line) while True: line = next( stream ) # when exhausted, causes a StopIteration, terminating this function hexsha, orig_lineno, lineno, num_lines = line.split() lineno = int(lineno) num_lines = int(num_lines) orig_lineno = int(orig_lineno) if hexsha not in commits: # Now read the next few lines and build up a dict of properties # for this commit props = dict() while True: line = next(stream) if line == b'boundary': # "boundary" indicates a root commit and occurs # instead of the "previous" tag continue tag, value = line.split(b' ', 1) props[tag] = value if tag == b'filename': # "filename" formally terminates the entry for --incremental orig_filename = value break c = Commit( self, hex_to_bin(hexsha), author=Actor( safe_decode(props[b'author']), safe_decode( props[b'author-mail'].lstrip(b'<').rstrip(b'>'))), authored_date=int(props[b'author-time']), committer=Actor( safe_decode(props[b'committer']), safe_decode(props[b'committer-mail'].lstrip( b'<').rstrip(b'>'))), committed_date=int(props[b'committer-time'])) commits[hexsha] = c else: # Discard all lines until we find "filename" which is # guaranteed to be the last line while True: line = next( stream) # will fail if we reach the EOF unexpectedly tag, value = line.split(b' ', 1) if tag == b'filename': orig_filename = value break yield BlameEntry(commits[hexsha], range(lineno, lineno + num_lines), safe_decode(orig_filename), range(orig_lineno, orig_lineno + num_lines))
def blame(self, rev, file): """The blame information for the given file at the given revision. :parm rev: revision specifier, see git-rev-parse for viable options. :return: list: [git.Commit, list: [<line>]] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False) commits = dict() blames = list() info = None keepends = True for line in data.splitlines(keepends): try: line = line.rstrip().decode(defenc) except UnicodeDecodeError: firstpart = '' is_binary = True else: # As we don't have an idea when the binary data ends, as it could contain multiple newlines # in the process. So we rely on being able to decode to tell us what is is. # This can absolutely fail even on text files, but even if it does, we should be fine treating it # as binary instead parts = self.re_whitespace.split(line, 1) firstpart = parts[0] is_binary = False # end handle decode of line if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - indicates # another line of blame with the same data digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) elif info['id'] != firstpart: info = {'id': firstpart} blames.append([commits.get(firstpart), []]) # END blame data initialization else: m = self.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail <*****@*****.**> # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail <*****@*****.**> # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # <and rest> if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: c = Commit( self, hex_to_bin(sha), author=Actor._from_string( info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string( info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c # END if commit objects needs initial creation if not is_binary: if line and line[0] == '\t': line = line[1:] else: # NOTE: We are actually parsing lines out of binary data, which can lead to the # binary being split up along the newline separator. We will append this to the blame # we are currently looking at, even though it should be concatenated with the last line # we have seen. pass # end handle line contents blames[-1][0] = c blames[-1][1].append(line) info = {'id': sha} # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information return blames
def blame_incremental(self, rev, file, **kwargs): """Iterator for blame information for the given file at the given revision. Unlike .blame(), this does not return the actual file's contents, only a stream of (commit, range) tuples. :parm rev: revision specifier, see git-rev-parse for viable options. :return: lazy iterator of (git.Commit, range) tuples, where the commit indicates the commit to blame for the line, and range indicates a span of line numbers in the resulting file. If you combine all line number ranges outputted by this command, you should get a continuous range spanning all line numbers in the file. """ data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs) commits = dict() stream = iter(data.splitlines()) while True: line = next( stream ) # when exhausted, casues a StopIteration, terminating this function hexsha, _, lineno, num_lines = line.split() lineno = int(lineno) num_lines = int(num_lines) if hexsha not in commits: # Now read the next few lines and build up a dict of properties # for this commit props = dict() while True: line = next(stream) if line == b'boundary': # "boundary" indicates a root commit and occurs # instead of the "previous" tag continue tag, value = line.split(b' ', 1) props[tag] = value if tag == b'filename': # "filename" formally terminates the entry for --incremental break c = Commit( self, hex_to_bin(hexsha), author=Actor( safe_decode(props[b'author']), safe_decode( props[b'author-mail'].lstrip(b'<').rstrip(b'>'))), authored_date=int(props[b'author-time']), committer=Actor( safe_decode(props[b'committer']), safe_decode(props[b'committer-mail'].lstrip( b'<').rstrip(b'>'))), committed_date=int(props[b'committer-time']), message=safe_decode(props[b'summary'])) commits[hexsha] = c else: # Discard the next line (it's a filename end tag) line = next(stream) assert line.startswith( b'filename'), 'Unexpected git blame output' yield commits[hexsha], range(lineno, lineno + num_lines)