def __call__(self, data_event): from mcdp_hdb.disk_map_disk_events_from_data_events import disk_events_from_data_event from mcdp_hdb.disk_events import apply_disk_event_to_filesystem s = yaml_dump(data_event) logger.debug('Event #%d:\n%s' % (len(self.data_events), indent(s, '> ')) ) self.data_events.append(data_event) disk_events = disk_events_from_data_event(disk_map=self.disk_map, schema=self.view._schema, data_rep=self.view._data, data_event=data_event) for disk_event in disk_events: logger.debug('Disk event:\n%s' % yaml_dump(disk_event)) wd = self.repo.working_dir apply_disk_event_to_filesystem(wd, disk_event, repo=self.repo) message = yaml_dump(data_event) who = data_event['who'] if who is not None: actor = who['actor'] host = who['host'] instance = who['instance'] else: actor = 'system' host = host_name() instance = 'unspecified' author = Actor(actor, '%s@%s' % (actor, instance)) committer = Actor(instance, '%s@%s' % (instance, host)) _commit = self.repo.index.commit(message, author=author, committer=committer)
def check_translation_diskrep_to_gitrep(disk_rep0, disk_events, disk_rep1, out): # @UnusedVariable if not disk_events: raise ValueError('no disk events') repo = gitrep_from_diskrep(disk_rep0) wd = repo.working_tree_dir readback = diskrep_from_gitrep(repo) assert_diskreps_same(disk_rep0, readback, 'original', 'written back') logger.debug(wd) logger.debug('\n'+indent(readback.tree(), 'read back |')) logger.debug('\n'+indent(yaml_dump(disk_events), 'disk_events|')) commits = [] for disk_event in disk_events: logger.debug(indent(yaml_dump(disk_event), 'disk_event | ')) apply_disk_event_to_filesystem(wd, disk_event, repo=repo) if repo.untracked_files: logger.debug('adding untracked file %r' % repo.untracked_files) repo.index.add(repo.untracked_files) message = yaml_dump(disk_event) who = disk_event['who'] logger.info('who: %s' % who) actor = who['actor'] instance = who.get('instance', None) host = who.get('host', None) author = Actor(actor, instance) committer = Actor(instance, host) commit = repo.index.commit(message, author=author, committer=committer) commits.append(commit) res = {} res['repo'] = repo return res
def make_commit(self, freezer=None): """ Makes a random commit in the current branch. """ freezer.start() if freezer else None from datetime import datetime fragment = uuid().hex[:8] filename = join(self.repodir, fragment) with open(filename, "w") as fh: fh.write(uuid().hex) self.repo.index.add([basename(filename)]) str_date = str(datetime.utcnow()) if "." in str_date: str_date, _ = str_date.split(".") str_date = str_date.replace(" ", "T") if not freezer: str_date, _ = str(datetime.utcnow().isoformat()).split(".") msg = "Adding {0}".format(basename(filename)) self.repo.index.commit( author=Actor("Peter", "*****@*****.**"), author_date=str_date, committer=Actor("Paul", "*****@*****.**"), commit_date=str_date, message=msg, ) freezer.stop() if freezer else None
def test_actor_get_uid_laziness_not_called(self, mock_get_uid): env = { "GIT_AUTHOR_NAME": "John Doe", "GIT_AUTHOR_EMAIL": "*****@*****.**", "GIT_COMMITTER_NAME": "Jane Doe", "GIT_COMMITTER_EMAIL": "*****@*****.**", } os.environ.update(env) for cr in (None, self.rorepo.config_reader()): Actor.committer(cr) Actor.author(cr) self.assertFalse(mock_get_uid.called)
def test_actor_get_uid_laziness_called(self, mock_get_uid): mock_get_uid.return_value = "user" for cr in (None, self.rorepo.config_reader()): committer = Actor.committer(cr) author = Actor.author(cr) if cr is None: # otherwise, use value from config_reader self.assertEqual(committer.name, 'user') self.assertTrue(committer.email.startswith('user@')) self.assertEqual(author.name, 'user') self.assertTrue(committer.email.startswith('user@')) self.assertTrue(mock_get_uid.called) self.assertEqual(mock_get_uid.call_count, 4)
def commit(self): # Calc tree hash treedata = b'' for entry in sorted(os.listdir(self.repo_path)): if entry == '.git': continue with open(os.path.join(self.repo_path, entry)) as f: data_hash = git_object_hash('blob', f.read().encode()).digest() treedata += b'100644 ' + entry.encode() + b'\x00' + data_hash tree_hash = git_object_hash('tree', treedata).hexdigest() # Serialize commit commit_data = b'tree ' + tree_hash.encode() + b'\n' commit_data += b'parent ' + self.repo.head.commit.hexsha.encode( ) + b'\n' # Random author author = Actor(get_full_name(), get_email_address()) # Calc timestamp unix_time = int(time()) is_dst = daylight and localtime().tm_isdst > 0 offset = altzone if is_dst else timezone datetime = str(unix_time) + ' ' + altz_to_utctz_str(offset) commit_data += b'author ' + author.name.encode( ) + b' <' + author.email.encode() + b'> ' + datetime.encode() + b'\n' commit_data += b'committer ' + author.name.encode( ) + b' <' + author.email.encode() + b'> ' + datetime.encode() + b'\n' commit_data += b'\n' # Randomize commit message to feature proof-of-work msg = calc_pow(commit_data) return self.repo.index.commit(msg, author=author, committer=author, author_date=datetime, commit_date=datetime)
def from_line(cls, line): """:return: New RefLogEntry instance from the given revlog line. :param line: line bytes without trailing newline :raise ValueError: If line could not be parsed""" line = line.decode(defenc) fields = line.split('\t', 1) if len(fields) == 1: info, msg = fields[0], None elif len(fields) == 2: info, msg = fields else: raise ValueError("Line must have up to two TAB-separated fields." " Got %s" % repr(line)) # END handle first split oldhexsha = info[:40] newhexsha = info[41:81] for hexsha in (oldhexsha, newhexsha): if not cls._re_hexsha_only.match(hexsha): raise ValueError("Invalid hexsha: %r" % (hexsha,)) # END if hexsha re doesn't match # END for each hexsha email_end = info.find('>', 82) if email_end == -1: raise ValueError("Missing token: >") # END handle missing end brace actor = Actor._from_string(info[82:email_end + 1]) time, tz_offset = parse_date(info[email_end + 2:]) return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), msg))
class GitStorage(WikiABC): AUTHOR = Actor('tw5_server', '') @cached_property def repo(self): return Repo(self.ROOT / self.name) @classmethod def _init_storage(cls, root): if not root.exists(): os.makedirs(root) try: repo = Repo.init(root) filepath = root / cls.FILENAME shutil.copy(CONFIG['TW5_TEMPLATE_FILE'], filepath) repo.index.add(cls.FILENAME) repo.index.commit('automatic create', author=cls.AUTHOR) except Exception as err: if root.exists(): shutil.rmtree(root) raise err def update(self, file_obj): with open(self.file, 'wb') as f: shutil.copyfileobj(file_obj, f) self.repo.index.add(self.FILENAME) self.repo.index.commit('automatic commit', author=self.AUTHOR)
def test_stats(self): commit = self.rorepo.commit('33ebe7acec14b25c5f84f35a664803fcab2f7781') stats = commit.stats def check_entries(d): assert isinstance(d, dict) for key in ("insertions", "deletions", "lines"): assert key in d # END assertion helper assert stats.files assert stats.total check_entries(stats.total) assert "files" in stats.total for filepath, d in stats.files.items(): check_entries(d) # END for each stated file # assure data is parsed properly michael = Actor._from_string("Michael Trier <*****@*****.**>") assert commit.author == michael assert commit.committer == michael assert commit.authored_date == 1210193388 assert commit.committed_date == 1210193388 assert commit.author_tz_offset == 14400, commit.author_tz_offset assert commit.committer_tz_offset == 14400, commit.committer_tz_offset assert commit.message == "initial project\n"
def test_unicode_actor(self): # assure we can parse unicode actors correctly name = "Üäöß ÄußÉ".decode("utf-8") assert len(name) == 9 special = Actor._from_string(u"%s <*****@*****.**>" % name) assert special.name == name assert isinstance(special.name, unicode)
def from_line(cls, line): """:return: New RefLogEntry instance from the given revlog line. :param line: line without trailing newline :raise ValueError: If line could not be parsed""" try: info, msg = line.split('\t', 2) except ValueError: raise ValueError("line is missing tab separator") # END handle first plit oldhexsha = info[:40] newhexsha = info[41:81] for hexsha in (oldhexsha, newhexsha): if not cls._re_hexsha_only.match(hexsha): raise ValueError("Invalid hexsha: %s" % hexsha) # END if hexsha re doesn't match # END for each hexsha email_end = info.find('>', 82) if email_end == -1: raise ValueError("Missing token: >") # END handle missing end brace actor = Actor._from_string(info[82:email_end + 1]) time, tz_offset = parse_date(info[email_end + 2:]) return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), msg))
def append_entry(cls, config_reader, filepath, oldbinsha, newbinsha, message): """Append a new log entry to the revlog at filepath. :param config_reader: configuration reader of the repository - used to obtain user information. May be None :param filepath: full path to the log file :param oldbinsha: binary sha of the previous commit :param newbinsha: binary sha of the current commit :param message: message describing the change to the reference :param write: If True, the changes will be written right away. Otherwise the change will not be written :return: RefLogEntry objects which was appended to the log :note: As we are append-only, concurrent access is not a problem as we do not interfere with readers.""" if len(oldbinsha) != 20 or len(newbinsha) != 20: raise ValueError("Shas need to be given in binary format") #END handle sha type assure_directory_exists(filepath, is_file=True) entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha), Actor.committer(config_reader), (int(time.time()), time.altzone), message)) lf = LockFile(filepath) lf._obtain_lock_or_raise() fd = open(filepath, 'a') try: fd.write(repr(entry)) finally: fd.close() lf._release_lock() #END handle write operation return entry
def append_entry(cls, config_reader, filepath, oldbinsha, newbinsha, message): """Append a new log entry to the revlog at filepath. :param config_reader: configuration reader of the repository - used to obtain user information. May be None :param filepath: full path to the log file :param oldbinsha: binary sha of the previous commit :param newbinsha: binary sha of the current commit :param message: message describing the change to the reference :param write: If True, the changes will be written right away. Otherwise the change will not be written :return: RefLogEntry objects which was appended to the log :note: As we are append-only, concurrent access is not a problem as we do not interfere with readers.""" if len(oldbinsha) != 20 or len(newbinsha) != 20: raise ValueError("Shas need to be given in binary format") # END handle sha type assure_directory_exists(filepath, is_file=True) entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha), Actor.committer( config_reader), (int(time.time()), time.altzone), message)) lf = LockFile(filepath) lf._obtain_lock_or_raise() fd = open(filepath, 'a') try: fd.write(repr(entry)) finally: fd.close() lf._release_lock() # END handle write operation return entry
def from_line(cls, line): """:return: New RefLogEntry instance from the given revlog line. :param line: line without trailing newline :raise ValueError: If line could not be parsed""" try: info, msg = line.split('\t', 2) except ValueError: raise ValueError("line is missing tab separator") #END handle first plit oldhexsha = info[:40] newhexsha = info[41:81] for hexsha in (oldhexsha, newhexsha): if not cls._re_hexsha_only.match(hexsha): raise ValueError("Invalid hexsha: %s" % hexsha) # END if hexsha re doesn't match #END for each hexsha email_end = info.find('>', 82) if email_end == -1: raise ValueError("Missing token: >") #END handle missing end brace actor = Actor._from_string(info[82:email_end + 1]) time, tz_offset = parse_date(info[email_end + 2:]) return RefLogEntry( (oldhexsha, newhexsha, actor, (time, tz_offset), msg))
def setUp(self): """ Sets up the Git repository for testing. The following will be available after :py:method`setUp()` runs. self.repodir The absolute filename of the Git repository self.repo A ``git.Repo`` object for self.repodir This will create the root commit in the test repository automaticall. """ freezer = freeze_time("2001-01-01T00:00:00") freezer.start() from datetime import datetime super(GitSweepTestCase, self).setUp() repodir = mkdtemp() self.repodir = repodir self.repo = Repo.init(repodir) rootcommit_filename = join(repodir, "rootcommit") with open(rootcommit_filename, "w") as fh: fh.write("") self.repo.index.add([basename(rootcommit_filename)]) str_date = str(datetime.utcnow()) self.repo.index.commit( author=Actor("Peter", "*****@*****.**"), author_date=str_date, committer=Actor("Paul", "*****@*****.**"), commit_date=str_date, message="Root commit", ) # Cache the remote per test self._remote = None # Keep track of cloned repositories that track self.repo self._clone_dirs = [] freezer.stop()
def parse_actor_and_date(line): """Parse out the actor (author or committer) info from a line like:: author Tom Preston-Werner <*****@*****.**> 1191999972 -0700 :return: [Actor, int_seconds_since_epoch, int_timezone_offset]""" m = _re_actor_epoch.search(line) actor, epoch, offset = m.groups() return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
def test_from_string_should_separate_name_and_email(self): a = Actor._from_string("Michael Trier <*****@*****.**>") assert "Michael Trier" == a.name assert "*****@*****.**" == a.email # base type capabilities assert a == a assert not (a != a) m = set() m.add(a) m.add(a) assert len(m) == 1
def test_actor_from_string(self): self.assertEqual(Actor._from_string("name"), Actor("name", None)) self.assertEqual(Actor._from_string("name <>"), Actor("name", "")) self.assertEqual( Actor._from_string( "name last another <*****@*****.**>"), Actor("name last another", "*****@*****.**"))
def repo_commit_all_changes(repo, message=None, author=None): if author is None: author = Actor("system", "system") if message is None: message = "" if repo.untracked_files: repo.index.add(repo.untracked_files) modified_files = repo.index.diff(None) for m in modified_files: repo.index.add([m.b_path]) commit = repo.index.commit(message, author=author) return commit
def parse_actor_and_date(line: str) -> Tuple[Actor, int, int]: """Parse out the actor (author or committer) info from a line like:: author Tom Preston-Werner <*****@*****.**> 1191999972 -0700 :return: [Actor, int_seconds_since_epoch, int_timezone_offset]""" actor, epoch, offset = '', '0', '0' m = _re_actor_epoch.search(line) if m: actor, epoch, offset = m.groups() else: m = _re_only_actor.search(line) actor = m.group(1) if m else line or '' return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
def append_entry(cls, config_reader: Union[Actor, 'GitConfigParser', 'SectionConstraint', None], filepath: PathLike, oldbinsha: bytes, newbinsha: bytes, message: str, write: bool = True) -> 'RefLogEntry': """Append a new log entry to the revlog at filepath. :param config_reader: configuration reader of the repository - used to obtain user information. May also be an Actor instance identifying the committer directly or None. :param filepath: full path to the log file :param oldbinsha: binary sha of the previous commit :param newbinsha: binary sha of the current commit :param message: message describing the change to the reference :param write: If True, the changes will be written right away. Otherwise the change will not be written :return: RefLogEntry objects which was appended to the log :note: As we are append-only, concurrent access is not a problem as we do not interfere with readers.""" if len(oldbinsha) != 20 or len(newbinsha) != 20: raise ValueError("Shas need to be given in binary format") # END handle sha type assure_directory_exists(filepath, is_file=True) first_line = message.split('\n')[0] if isinstance(config_reader, Actor): committer = config_reader # mypy thinks this is Actor | Gitconfigparser, but why? else: committer = Actor.committer(config_reader) entry = RefLogEntry((bin_to_hex(oldbinsha).decode('ascii'), bin_to_hex(newbinsha).decode('ascii'), committer, (int(_time.time()), _time.altzone), first_line)) if write: lf = LockFile(filepath) lf._obtain_lock_or_raise() fd = open(filepath, 'ab') try: fd.write(entry.format().encode(defenc)) finally: fd.close() lf._release_lock() # END handle write operation return entry
def test_reflogentry(self): nullhexsha = IndexObject.NULL_HEX_SHA hexsha = 'F' * 40 actor = Actor('name', 'email') msg = "message" self.assertRaises(ValueError, RefLogEntry.new, nullhexsha, hexsha, 'noactor', 0, 0, "") e = RefLogEntry.new(nullhexsha, hexsha, actor, 0, 1, msg) assert e.oldhexsha == nullhexsha assert e.newhexsha == hexsha assert e.actor == actor assert e.time[0] == 0 assert e.time[1] == 1 assert e.message == msg # check representation (roughly) assert repr(e).startswith(nullhexsha)
def test_str_should_alias_name(self): a = Actor._from_string("Michael Trier <*****@*****.**>") assert a.name == str(a)
def blame(repo, start_commit, end_commit, filename): data = repo.git.blame('%s^..%s' % (start_commit, end_commit), '--', filename, p=True) commits = dict() blames = list() info = None for line in data.splitlines(False): parts = repo.re_whitespace.split(line, 1) firstpart = parts[0] if repo.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) elif info['id'] != firstpart: info = {'id': firstpart} blames.append([commits.get(firstpart), []]) # END blame data initialization else: m = repo.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail <*****@*****.**> # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail <*****@*****.**> # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # <and rest> if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart.startswith('boundary'): info['boundary'] = True elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: if info.get('boundary'): commits[sha] = False else: c = repo.CommitCls( repo, hex_to_bin(sha), author=Actor._from_string(info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary'] ) commits[sha] = c if c is not False: # END if commit objects needs initial creation m = repo.re_tab_full_line.search(line) text, = m.groups() blames[-1][0] = c blames[-1][1].append(text) info = { 'id' : sha } # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information for commit, lines in blames: if commit is not None: yield commit, lines
def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False, author=None, committer=None): """Commit the given tree, creating a commit object. :param repo: Repo object the commit should be part of :param tree: Tree object or hex or bin sha the tree of the new commit :param message: Commit message. It may be an empty string if no message is provided. It will be converted to a string in any case. :param parent_commits: Optional Commit objects to use as parents for the new commit. If empty list, the commit will have no parents at all and become a root commit. If None , the current head commit will be the parent of the new commit object :param head: If True, the HEAD will be advanced to the new commit automatically. Else the HEAD will remain pointing on the previous commit. This could lead to undesired results when diffing files. :param author: The name of the author, optional. If unset, the repository configuration is used to obtain this value. :param committer: The name of the committer, optional. If unset, the repository configuration is used to obtain this value. :return: Commit object representing the new commit :note: Additional information about the committer and Author are taken from the environment or from the git configuration, see git-commit-tree for more information""" if parent_commits is None: try: parent_commits = [repo.head.commit] except ValueError: # empty repositories have no head commit parent_commits = list() # END handle parent commits else: for p in parent_commits: if not isinstance(p, cls): raise ValueError("Parent commit '%r' must be of type %s" % (p, cls)) # end check parent commit types # END if parent commits are unset # retrieve all additional information, create a commit object, and # serialize it # Generally: # * Environment variables override configuration values # * Sensible defaults are set according to the git documentation # COMMITER AND AUTHOR INFO cr = repo.config_reader() env = os.environ committer = committer or Actor.committer(cr) author = author or Actor.author(cr) # PARSE THE DATES unix_time = int(time()) offset = altzone author_date_str = env.get(cls.env_author_date, '') if author_date_str: author_time, author_offset = parse_date(author_date_str) else: author_time, author_offset = unix_time, offset # END set author time committer_date_str = env.get(cls.env_committer_date, '') if committer_date_str: committer_time, committer_offset = parse_date(committer_date_str) else: committer_time, committer_offset = unix_time, offset # END set committer time # assume utf8 encoding enc_section, enc_option = cls.conf_encoding.split('.') conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) # if the tree is no object, make sure we create one - otherwise # the created commit object is invalid if isinstance(tree, str): tree = repo.tree(tree) # END tree conversion # CREATE NEW COMMIT new_commit = cls(repo, cls.NULL_BIN_SHA, tree, author, author_time, author_offset, committer, committer_time, committer_offset, message, parent_commits, conf_encoding) stream = BytesIO() new_commit._serialize(stream) streamlen = stream.tell() stream.seek(0) istream = repo.odb.store(IStream(cls.type, streamlen, stream)) new_commit.binsha = istream.binsha if head: # need late import here, importing git at the very beginning throws # as well ... import git.refs try: repo.head.set_commit(new_commit, logmsg=message) except ValueError: # head is not yet set to the ref our HEAD points to # Happens on first commit master = git.refs.Head.create(repo, repo.head.ref, new_commit, logmsg="commit (initial): %s" % message) repo.head.set_reference(master, logmsg='commit: Switching to %s' % master) # END handle empty repositories # END advance head handling return new_commit
def test_actor(self): for cr in (None, self.rorepo.config_reader()): self.assertIsInstance(Actor.committer(cr), Actor) self.assertIsInstance(Actor.author(cr), Actor)
def test_index_mutation(self, rw_repo): index = rw_repo.index num_entries = len(index.entries) cur_head = rw_repo.head uname = u"Thomas Müller" umail = "*****@*****.**" with rw_repo.config_writer() as writer: writer.set_value("user", "name", uname) writer.set_value("user", "email", umail) self.assertEqual(writer.get_value("user", "name"), uname) # remove all of the files, provide a wild mix of paths, BaseIndexEntries, # IndexEntries def mixed_iterator(): count = 0 for entry in index.entries.values(): type_id = count % 4 if type_id == 0: # path yield entry.path elif type_id == 1: # blob yield Blob(rw_repo, entry.binsha, entry.mode, entry.path) elif type_id == 2: # BaseIndexEntry yield BaseIndexEntry(entry[:4]) elif type_id == 3: # IndexEntry yield entry else: raise AssertionError("Invalid Type") count += 1 # END for each entry # END mixed iterator deleted_files = index.remove(mixed_iterator(), working_tree=False) assert deleted_files self.assertEqual(self._count_existing(rw_repo, deleted_files), len(deleted_files)) self.assertEqual(len(index.entries), 0) # reset the index to undo our changes index.reset() self.assertEqual(len(index.entries), num_entries) # remove with working copy deleted_files = index.remove(mixed_iterator(), working_tree=True) assert deleted_files self.assertEqual(self._count_existing(rw_repo, deleted_files), 0) # reset everything index.reset(working_tree=True) self.assertEqual(self._count_existing(rw_repo, deleted_files), len(deleted_files)) # invalid type self.failUnlessRaises(TypeError, index.remove, [1]) # absolute path deleted_files = index.remove( [osp.join(rw_repo.working_tree_dir, "lib")], r=True) assert len(deleted_files) > 1 self.failUnlessRaises(ValueError, index.remove, ["/doesnt/exists"]) # TEST COMMITTING # commit changed index cur_commit = cur_head.commit commit_message = u"commit default head by Frèderic Çaufl€" new_commit = index.commit(commit_message, head=False) assert cur_commit != new_commit self.assertEqual(new_commit.author.name, uname) self.assertEqual(new_commit.author.email, umail) self.assertEqual(new_commit.committer.name, uname) self.assertEqual(new_commit.committer.email, umail) self.assertEqual(new_commit.message, commit_message) self.assertEqual(new_commit.parents[0], cur_commit) self.assertEqual(len(new_commit.parents), 1) self.assertEqual(cur_head.commit, cur_commit) # commit with other actor cur_commit = cur_head.commit my_author = Actor(u"Frèderic Çaufl€", "*****@*****.**") my_committer = Actor(u"Committing Frèderic Çaufl€", "*****@*****.**") commit_actor = index.commit(commit_message, author=my_author, committer=my_committer) assert cur_commit != commit_actor self.assertEqual(commit_actor.author.name, u"Frèderic Çaufl€") self.assertEqual(commit_actor.author.email, "*****@*****.**") self.assertEqual(commit_actor.committer.name, u"Committing Frèderic Çaufl€") self.assertEqual(commit_actor.committer.email, "*****@*****.**") self.assertEqual(commit_actor.message, commit_message) self.assertEqual(commit_actor.parents[0], cur_commit) self.assertEqual(len(new_commit.parents), 1) self.assertEqual(cur_head.commit, commit_actor) self.assertEqual(cur_head.log()[-1].actor, my_committer) # commit with author_date and commit_date cur_commit = cur_head.commit commit_message = u"commit with dates by Avinash Sajjanshetty" new_commit = index.commit(commit_message, author_date="2006-04-07T22:13:13", commit_date="2005-04-07T22:13:13") assert cur_commit != new_commit print(new_commit.authored_date, new_commit.committed_date) self.assertEqual(new_commit.message, commit_message) self.assertEqual(new_commit.authored_date, 1144447993) self.assertEqual(new_commit.committed_date, 1112911993) # same index, no parents commit_message = "index without parents" commit_no_parents = index.commit(commit_message, parent_commits=[], head=True) self.assertEqual(commit_no_parents.message, commit_message) self.assertEqual(len(commit_no_parents.parents), 0) self.assertEqual(cur_head.commit, commit_no_parents) # same index, multiple parents commit_message = "Index with multiple parents\n commit with another line" commit_multi_parent = index.commit(commit_message, parent_commits=(commit_no_parents, new_commit)) self.assertEqual(commit_multi_parent.message, commit_message) self.assertEqual(len(commit_multi_parent.parents), 2) self.assertEqual(commit_multi_parent.parents[0], commit_no_parents) self.assertEqual(commit_multi_parent.parents[1], new_commit) self.assertEqual(cur_head.commit, commit_multi_parent) # re-add all files in lib # get the lib folder back on disk, but get an index without it index.reset(new_commit.parents[0], working_tree=True).reset(new_commit, working_tree=False) lib_file_path = osp.join("lib", "git", "__init__.py") assert (lib_file_path, 0) not in index.entries assert osp.isfile(osp.join(rw_repo.working_tree_dir, lib_file_path)) # directory entries = index.add(['lib'], fprogress=self._fprogress_add) self._assert_entries(entries) self._assert_fprogress(entries) assert len(entries) > 1 # glob entries = index.reset(new_commit).add([osp.join('lib', 'git', '*.py')], fprogress=self._fprogress_add) self._assert_entries(entries) self._assert_fprogress(entries) self.assertEqual(len(entries), 14) # same file entries = index.reset(new_commit).add( [osp.join(rw_repo.working_tree_dir, 'lib', 'git', 'head.py')] * 2, fprogress=self._fprogress_add) self._assert_entries(entries) self.assertEqual(entries[0].mode & 0o644, 0o644) # would fail, test is too primitive to handle this case # self._assert_fprogress(entries) self._reset_progress() self.assertEqual(len(entries), 2) # missing path self.failUnlessRaises(OSError, index.reset(new_commit).add, ['doesnt/exist/must/raise']) # blob from older revision overrides current index revision old_blob = new_commit.parents[0].tree.blobs[0] entries = index.reset(new_commit).add([old_blob], fprogress=self._fprogress_add) self._assert_entries(entries) self._assert_fprogress(entries) self.assertEqual(index.entries[(old_blob.path, 0)].hexsha, old_blob.hexsha) self.assertEqual(len(entries), 1) # mode 0 not allowed null_hex_sha = Diff.NULL_HEX_SHA null_bin_sha = b"\0" * 20 self.failUnlessRaises( ValueError, index.reset(new_commit).add, [BaseIndexEntry((0, null_bin_sha, 0, "doesntmatter"))]) # add new file new_file_relapath = "my_new_file" self._make_file(new_file_relapath, "hello world", rw_repo) entries = index.reset(new_commit).add( [BaseIndexEntry((0o10644, null_bin_sha, 0, new_file_relapath))], fprogress=self._fprogress_add) self._assert_entries(entries) self._assert_fprogress(entries) self.assertEqual(len(entries), 1) self.assertNotEquals(entries[0].hexsha, null_hex_sha) # add symlink if not is_win: for target in ('/etc/nonexisting', '/etc/passwd', '/etc'): basename = "my_real_symlink" link_file = osp.join(rw_repo.working_tree_dir, basename) os.symlink(target, link_file) entries = index.reset(new_commit).add( [link_file], fprogress=self._fprogress_add) self._assert_entries(entries) self._assert_fprogress(entries) self.assertEqual(len(entries), 1) self.assertTrue(S_ISLNK(entries[0].mode)) self.assertTrue( S_ISLNK(index.entries[index.entry_key( "my_real_symlink", 0)].mode)) # we expect only the target to be written self.assertEqual( index.repo.odb.stream( entries[0].binsha).read().decode('ascii'), target) os.remove(link_file) # end for each target # END real symlink test # add fake symlink and assure it checks-our as symlink fake_symlink_relapath = "my_fake_symlink" link_target = "/etc/that" fake_symlink_path = self._make_file(fake_symlink_relapath, link_target, rw_repo) fake_entry = BaseIndexEntry( (0o120000, null_bin_sha, 0, fake_symlink_relapath)) entries = index.reset(new_commit).add([fake_entry], fprogress=self._fprogress_add) self._assert_entries(entries) self._assert_fprogress(entries) assert entries[0].hexsha != null_hex_sha self.assertEqual(len(entries), 1) self.assertTrue(S_ISLNK(entries[0].mode)) # assure this also works with an alternate method full_index_entry = IndexEntry.from_base( BaseIndexEntry((0o120000, entries[0].binsha, 0, entries[0].path))) entry_key = index.entry_key(full_index_entry) index.reset(new_commit) assert entry_key not in index.entries index.entries[entry_key] = full_index_entry index.write() index.update() # force reread of entries new_entry = index.entries[entry_key] assert S_ISLNK(new_entry.mode) # a tree created from this should contain the symlink tree = index.write_tree() assert fake_symlink_relapath in tree index.write() # flush our changes for the checkout # checkout the fakelink, should be a link then assert not S_ISLNK(os.stat(fake_symlink_path)[ST_MODE]) os.remove(fake_symlink_path) index.checkout(fake_symlink_path) # on windows we will never get symlinks if is_win: # simlinks should contain the link as text ( which is what a # symlink actually is ) with open(fake_symlink_path, 'rt') as fd: self.assertEqual(fd.read(), link_target) else: self.assertTrue(S_ISLNK(os.lstat(fake_symlink_path)[ST_MODE])) # TEST RENAMING def assert_mv_rval(rval): for source, dest in rval: assert not osp.exists(source) and osp.exists(dest) # END for each renamed item # END move assertion utility self.failUnlessRaises(ValueError, index.move, ['just_one_path']) # file onto existing file files = ['AUTHORS', 'LICENSE'] self.failUnlessRaises(GitCommandError, index.move, files) # again, with force assert_mv_rval(index.move(files, f=True)) # files into directory - dry run paths = ['LICENSE', 'VERSION', 'doc'] rval = index.move(paths, dry_run=True) self.assertEqual(len(rval), 2) assert osp.exists(paths[0]) # again, no dry run rval = index.move(paths) assert_mv_rval(rval) # dir into dir rval = index.move(['doc', 'test']) assert_mv_rval(rval) # TEST PATH REWRITING ###################### count = [0] def rewriter(entry): rval = str(count[0]) count[0] += 1 return rval # END rewriter def make_paths(): # two existing ones, one new one yield 'CHANGES' yield 'ez_setup.py' yield index.entries[index.entry_key('README', 0)] yield index.entries[index.entry_key('.gitignore', 0)] for fid in range(3): fname = 'newfile%i' % fid with open(fname, 'wb') as fd: fd.write(b"abcd") yield Blob(rw_repo, Blob.NULL_BIN_SHA, 0o100644, fname) # END for each new file # END path producer paths = list(make_paths()) self._assert_entries(index.add(paths, path_rewriter=rewriter)) for filenum in range(len(paths)): assert index.entry_key(str(filenum), 0) in index.entries # TEST RESET ON PATHS ###################### arela = "aa" brela = "bb" afile = self._make_file(arela, "adata", rw_repo) bfile = self._make_file(brela, "bdata", rw_repo) akey = index.entry_key(arela, 0) bkey = index.entry_key(brela, 0) keys = (akey, bkey) absfiles = (afile, bfile) files = (arela, brela) for fkey in keys: assert fkey not in index.entries index.add(files, write=True) nc = index.commit("2 files committed", head=False) for fkey in keys: assert fkey in index.entries # just the index index.reset(paths=(arela, afile)) assert akey not in index.entries assert bkey in index.entries # now with working tree - files on disk as well as entries must be recreated rw_repo.head.commit = nc for absfile in absfiles: os.remove(absfile) index.reset(working_tree=True, paths=files) for fkey in keys: assert fkey in index.entries for absfile in absfiles: assert osp.isfile(absfile)
def test_from_string_should_handle_just_name(self): a = Actor._from_string("Michael Trier") assert "Michael Trier" == a.name assert None == a.email
def test_actor(self): for cr in (None, self.rorepo.config_reader()): assert isinstance(Actor.committer(cr), Actor) assert isinstance(Actor.author(cr), Actor)
def blame(self, rev, file): """The blame information for the given file at the given revision. :parm rev: revision specifier, see git-rev-parse for viable options. :return: list: [git.Commit, list: [<line>]] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" data = self.git.blame(rev, '--', file, p=True) commits = dict() blames = list() info = None for line in data.splitlines(False): parts = self.re_whitespace.split(line, 1) firstpart = parts[0] if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) # END blame data initialization else: m = self.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail <*****@*****.**> # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail <*****@*****.**> # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # <and rest> if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: c = Commit( self, hex_to_bin(sha), author=Actor._from_string(info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c # END if commit objects needs initial creation m = self.re_tab_full_line.search(line) text, = m.groups() blames[-1][0] = c blames[-1][1].append( text ) info = None # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information return blames
def test_should_display_representation(self): a = Actor._from_string("Michael Trier <*****@*****.**>") assert '<git.Actor "Michael Trier <*****@*****.**>">' == repr(a)
def blame(self, rev, file): """The blame information for the given file at the given revision. :parm rev: revision specifier, see git-rev-parse for viable options. :return: list: [git.Commit, list: [<line>]] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False) commits = dict() blames = list() info = None keepends = True for line in data.splitlines(keepends): try: line = line.rstrip().decode(defenc) except UnicodeDecodeError: firstpart = '' is_binary = True else: # As we don't have an idea when the binary data ends, as it could contain multiple newlines # in the process. So we rely on being able to decode to tell us what is is. # This can absolutely fail even on text files, but even if it does, we should be fine treating it # as binary instead parts = self.re_whitespace.split(line, 1) firstpart = parts[0] is_binary = False # end handle decode of line if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - indicates # another line of blame with the same data digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) elif info['id'] != firstpart: info = {'id': firstpart} blames.append([commits.get(firstpart), []]) # END blame data initialization else: m = self.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail <*****@*****.**> # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail <*****@*****.**> # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # <and rest> if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: c = Commit( self, hex_to_bin(sha), author=Actor._from_string( info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string( info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c # END if commit objects needs initial creation if not is_binary: if line and line[0] == '\t': line = line[1:] else: # NOTE: We are actually parsing lines out of binary data, which can lead to the # binary being split up along the newline separator. We will append this to the blame # we are currently looking at, even though it should be concatenated with the last line # we have seen. pass # end handle line contents blames[-1][0] = c blames[-1][1].append(line) info = {'id': sha} # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information return blames
def blame_incremental(self, rev, file, **kwargs): """Iterator for blame information for the given file at the given revision. Unlike .blame(), this does not return the actual file's contents, only a stream of BlameEntry tuples. :parm rev: revision specifier, see git-rev-parse for viable options. :return: lazy iterator of BlameEntry tuples, where the commit indicates the commit to blame for the line, and range indicates a span of line numbers in the resulting file. If you combine all line number ranges outputted by this command, you should get a continuous range spanning all line numbers in the file. """ data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs) commits = dict() stream = (line for line in data.split(b'\n') if line) while True: line = next( stream ) # when exhausted, causes a StopIteration, terminating this function hexsha, orig_lineno, lineno, num_lines = line.split() lineno = int(lineno) num_lines = int(num_lines) orig_lineno = int(orig_lineno) if hexsha not in commits: # Now read the next few lines and build up a dict of properties # for this commit props = dict() while True: line = next(stream) if line == b'boundary': # "boundary" indicates a root commit and occurs # instead of the "previous" tag continue tag, value = line.split(b' ', 1) props[tag] = value if tag == b'filename': # "filename" formally terminates the entry for --incremental orig_filename = value break c = Commit( self, hex_to_bin(hexsha), author=Actor( safe_decode(props[b'author']), safe_decode( props[b'author-mail'].lstrip(b'<').rstrip(b'>'))), authored_date=int(props[b'author-time']), committer=Actor( safe_decode(props[b'committer']), safe_decode(props[b'committer-mail'].lstrip( b'<').rstrip(b'>'))), committed_date=int(props[b'committer-time'])) commits[hexsha] = c else: # Discard all lines until we find "filename" which is # guaranteed to be the last line while True: line = next( stream) # will fail if we reach the EOF unexpectedly tag, value = line.split(b' ', 1) if tag == b'filename': orig_filename = value break yield BlameEntry(commits[hexsha], range(lineno, lineno + num_lines), safe_decode(orig_filename), range(orig_lineno, orig_lineno + num_lines))
def create_from_tree(cls, repo, tree, message, parent_commits=None, head=False, author=None, committer=None, author_date=None, commit_date=None, isolated=False): """Commit the given tree, creating a commit object. :param repo: Repo object the commit should be part of :param tree: Tree object or hex or bin sha the tree of the new commit :param message: Commit message. It may be an empty string if no message is provided. It will be converted to a string in any case. :param parent_commits: Optional Commit objects to use as parents for the new commit. If empty list, the commit will have no parents at all and become a root commit. If None , the current head commit will be the parent of the new commit object :param head: If True, the HEAD will be advanced to the new commit automatically. Else the HEAD will remain pointing on the previous commit. This could lead to undesired results when diffing files. :param author: The name of the author, optional. If unset, the repository configuration is used to obtain this value. :param committer: The name of the committer, optional. If unset, the repository configuration is used to obtain this value. :param author_date: The timestamp for the author field :param commit_date: The timestamp for the committer field :param isolated: if true, the parent environment is not passed to the git command. :return: Commit object representing the new commit :note: Additional information about the committer and Author are taken from the environment or from the git configuration, see git-commit-tree for more information""" if parent_commits is None: try: parent_commits = [repo.head.commit] except ValueError: # empty repositories have no head commit parent_commits = [] # END handle parent commits else: for p in parent_commits: if not isinstance(p, cls): raise ValueError("Parent commit '%r' must be of type %s" % (p, cls)) # end check parent commit types # END if parent commits are unset # retrieve all additional information, create a commit object, and # serialize it # Generally: # * Environment variables override configuration values # * Sensible defaults are set according to the git documentation # COMMITER AND AUTHOR INFO cr = repo.config_reader() env = {} if isolated else os.environ committer = committer or Actor.committer(cr, isolated=isolated) author = author or Actor.author(cr, isolated=isolated) # PARSE THE DATES unix_time = int(time()) is_dst = daylight and localtime().tm_isdst > 0 offset = altzone if is_dst else timezone author_date_str = env.get(cls.env_author_date, '') if author_date: author_time, author_offset = parse_date(author_date) elif author_date_str: author_time, author_offset = parse_date(author_date_str) else: author_time, author_offset = unix_time, offset # END set author time committer_date_str = env.get(cls.env_committer_date, '') if commit_date: committer_time, committer_offset = parse_date(commit_date) elif committer_date_str: committer_time, committer_offset = parse_date(committer_date_str) else: committer_time, committer_offset = unix_time, offset # END set committer time # assume utf8 encoding enc_section, enc_option = cls.conf_encoding.split('.') conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) # if the tree is no object, make sure we create one - otherwise # the created commit object is invalid if isinstance(tree, str): tree = repo.tree(tree) # END tree conversion # CREATE NEW COMMIT new_commit = cls(repo, cls.NULL_BIN_SHA, tree, author, author_time, author_offset, committer, committer_time, committer_offset, message, parent_commits, conf_encoding) stream = BytesIO() new_commit._serialize(stream) streamlen = stream.tell() stream.seek(0) istream = repo.odb.store(IStream(cls.type, streamlen, stream)) new_commit.binsha = istream.binsha if head: # need late import here, importing git at the very beginning throws # as well ... import git.refs try: repo.head.set_commit(new_commit, logmsg=message) except ValueError: # head is not yet set to the ref our HEAD points to # Happens on first commit master = git.refs.Head.create(repo, repo.head.ref, new_commit, logmsg="commit (initial): %s" % message) repo.head.set_reference(master, logmsg='commit: Switching to %s' % master) # END handle empty repositories # END advance head handling return new_commit
def blame(self, rev, file): """The blame information for the given file at the given revision. :parm rev: revision specifier, see git-rev-parse for viable options. :return: list: [git.Commit, list: [<line>]] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False) commits = dict() blames = list() info = None keepends = True for line in data.splitlines(keepends): try: line = line.rstrip().decode(defenc) except UnicodeDecodeError: firstpart = '' is_binary = True else: # As we don't have an idea when the binary data ends, as it could contain multiple newlines # in the process. So we rely on being able to decode to tell us what is is. # This can absolutely fail even on text files, but even if it does, we should be fine treating it # as binary instead parts = self.re_whitespace.split(line, 1) firstpart = parts[0] is_binary = False # end handle decode of line if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - indicates # another line of blame with the same data digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) elif info['id'] != firstpart: info = {'id': firstpart} blames.append([commits.get(firstpart), []]) # END blame data initialization else: m = self.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail <*****@*****.**> # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail <*****@*****.**> # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # <and rest> if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: c = Commit(self, hex_to_bin(sha), author=Actor._from_string(info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string( info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c # END if commit objects needs initial creation if not is_binary: if line and line[0] == '\t': line = line[1:] else: # NOTE: We are actually parsing lines out of binary data, which can lead to the # binary being split up along the newline separator. We will append this to the blame # we are currently looking at, even though it should be concatenated with the last line # we have seen. pass # end handle line contents blames[-1][0] = c blames[-1][1].append(line) info = {'id': sha} # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information return blames
def as_git_actor(self): hostname = 'hostname' # XXX email = '%s@%s' % (self.username, hostname) author = Actor(self.username, email) return author
def blame_incremental(self, rev, file, **kwargs): """Iterator for blame information for the given file at the given revision. Unlike .blame(), this does not return the actual file's contents, only a stream of (commit, range) tuples. :parm rev: revision specifier, see git-rev-parse for viable options. :return: lazy iterator of (git.Commit, range) tuples, where the commit indicates the commit to blame for the line, and range indicates a span of line numbers in the resulting file. If you combine all line number ranges outputted by this command, you should get a continuous range spanning all line numbers in the file. """ data = self.git.blame(rev, '--', file, p=True, incremental=True, stdout_as_string=False, **kwargs) commits = dict() stream = iter(data.splitlines()) while True: line = next( stream ) # when exhausted, casues a StopIteration, terminating this function hexsha, _, lineno, num_lines = line.split() lineno = int(lineno) num_lines = int(num_lines) if hexsha not in commits: # Now read the next few lines and build up a dict of properties # for this commit props = dict() while True: line = next(stream) if line == b'boundary': # "boundary" indicates a root commit and occurs # instead of the "previous" tag continue tag, value = line.split(b' ', 1) props[tag] = value if tag == b'filename': # "filename" formally terminates the entry for --incremental break c = Commit( self, hex_to_bin(hexsha), author=Actor( safe_decode(props[b'author']), safe_decode( props[b'author-mail'].lstrip(b'<').rstrip(b'>'))), authored_date=int(props[b'author-time']), committer=Actor( safe_decode(props[b'committer']), safe_decode(props[b'committer-mail'].lstrip( b'<').rstrip(b'>'))), committed_date=int(props[b'committer-time']), message=safe_decode(props[b'summary'])) commits[hexsha] = c else: # Discard the next line (it's a filename end tag) line = next(stream) assert line.startswith( b'filename'), 'Unexpected git blame output' yield commits[hexsha], range(lineno, lineno + num_lines)