def test_actor_from_string(self): self.assertEqual(Actor._from_string("name"), Actor("name", None)) self.assertEqual(Actor._from_string("name <>"), Actor("name", "")) self.assertEqual( Actor._from_string( "name last another <*****@*****.**>"), Actor("name last another", "*****@*****.**"))
def test_stats(self): commit = self.rorepo.commit('33ebe7acec14b25c5f84f35a664803fcab2f7781') stats = commit.stats def check_entries(d): assert isinstance(d, dict) for key in ("insertions", "deletions", "lines"): assert key in d # END assertion helper assert stats.files assert stats.total check_entries(stats.total) assert "files" in stats.total for filepath, d in stats.files.items(): check_entries(d) # END for each stated file # assure data is parsed properly michael = Actor._from_string("Michael Trier <*****@*****.**>") assert commit.author == michael assert commit.committer == michael assert commit.authored_date == 1210193388 assert commit.committed_date == 1210193388 assert commit.author_tz_offset == 14400, commit.author_tz_offset assert commit.committer_tz_offset == 14400, commit.committer_tz_offset assert commit.message == "initial project\n"
def from_line(cls, line): """:return: New RefLogEntry instance from the given revlog line. :param line: line bytes without trailing newline :raise ValueError: If line could not be parsed""" line = line.decode(defenc) fields = line.split('\t', 1) if len(fields) == 1: info, msg = fields[0], None elif len(fields) == 2: info, msg = fields else: raise ValueError("Line must have up to two TAB-separated fields." " Got %s" % repr(line)) # END handle first split oldhexsha = info[:40] newhexsha = info[41:81] for hexsha in (oldhexsha, newhexsha): if not cls._re_hexsha_only.match(hexsha): raise ValueError("Invalid hexsha: %r" % (hexsha,)) # END if hexsha re doesn't match # END for each hexsha email_end = info.find('>', 82) if email_end == -1: raise ValueError("Missing token: >") # END handle missing end brace actor = Actor._from_string(info[82:email_end + 1]) time, tz_offset = parse_date(info[email_end + 2:]) return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), msg))
def test_unicode_actor(self): # assure we can parse unicode actors correctly name = "Üäöß ÄußÉ".decode("utf-8") assert len(name) == 9 special = Actor._from_string(u"%s <*****@*****.**>" % name) assert special.name == name assert isinstance(special.name, unicode)
def from_line(cls, line): """:return: New RefLogEntry instance from the given revlog line. :param line: line without trailing newline :raise ValueError: If line could not be parsed""" try: info, msg = line.split('\t', 2) except ValueError: raise ValueError("line is missing tab separator") #END handle first plit oldhexsha = info[:40] newhexsha = info[41:81] for hexsha in (oldhexsha, newhexsha): if not cls._re_hexsha_only.match(hexsha): raise ValueError("Invalid hexsha: %s" % hexsha) # END if hexsha re doesn't match #END for each hexsha email_end = info.find('>', 82) if email_end == -1: raise ValueError("Missing token: >") #END handle missing end brace actor = Actor._from_string(info[82:email_end + 1]) time, tz_offset = parse_date(info[email_end + 2:]) return RefLogEntry( (oldhexsha, newhexsha, actor, (time, tz_offset), msg))
def from_line(cls, line): """:return: New RefLogEntry instance from the given revlog line. :param line: line without trailing newline :raise ValueError: If line could not be parsed""" try: info, msg = line.split('\t', 2) except ValueError: raise ValueError("line is missing tab separator") # END handle first plit oldhexsha = info[:40] newhexsha = info[41:81] for hexsha in (oldhexsha, newhexsha): if not cls._re_hexsha_only.match(hexsha): raise ValueError("Invalid hexsha: %s" % hexsha) # END if hexsha re doesn't match # END for each hexsha email_end = info.find('>', 82) if email_end == -1: raise ValueError("Missing token: >") # END handle missing end brace actor = Actor._from_string(info[82:email_end + 1]) time, tz_offset = parse_date(info[email_end + 2:]) return RefLogEntry((oldhexsha, newhexsha, actor, (time, tz_offset), msg))
def parse_actor_and_date(line): """Parse out the actor (author or committer) info from a line like:: author Tom Preston-Werner <*****@*****.**> 1191999972 -0700 :return: [Actor, int_seconds_since_epoch, int_timezone_offset]""" m = _re_actor_epoch.search(line) actor, epoch, offset = m.groups() return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
def test_from_string_should_separate_name_and_email(self): a = Actor._from_string("Michael Trier <*****@*****.**>") assert "Michael Trier" == a.name assert "*****@*****.**" == a.email # base type capabilities assert a == a assert not (a != a) m = set() m.add(a) m.add(a) assert len(m) == 1
def parse_actor_and_date(line: str) -> Tuple[Actor, int, int]: """Parse out the actor (author or committer) info from a line like:: author Tom Preston-Werner <*****@*****.**> 1191999972 -0700 :return: [Actor, int_seconds_since_epoch, int_timezone_offset]""" actor, epoch, offset = '', '0', '0' m = _re_actor_epoch.search(line) if m: actor, epoch, offset = m.groups() else: m = _re_only_actor.search(line) actor = m.group(1) if m else line or '' return (Actor._from_string(actor), int(epoch), utctz_to_altz(offset))
def blame(self, rev, file): """The blame information for the given file at the given revision. :parm rev: revision specifier, see git-rev-parse for viable options. :return: list: [git.Commit, list: [<line>]] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False) commits = dict() blames = list() info = None keepends = True for line in data.splitlines(keepends): try: line = line.rstrip().decode(defenc) except UnicodeDecodeError: firstpart = '' is_binary = True else: # As we don't have an idea when the binary data ends, as it could contain multiple newlines # in the process. So we rely on being able to decode to tell us what is is. # This can absolutely fail even on text files, but even if it does, we should be fine treating it # as binary instead parts = self.re_whitespace.split(line, 1) firstpart = parts[0] is_binary = False # end handle decode of line if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - indicates # another line of blame with the same data digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) elif info['id'] != firstpart: info = {'id': firstpart} blames.append([commits.get(firstpart), []]) # END blame data initialization else: m = self.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail <*****@*****.**> # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail <*****@*****.**> # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # <and rest> if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: c = Commit(self, hex_to_bin(sha), author=Actor._from_string(info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string( info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c # END if commit objects needs initial creation if not is_binary: if line and line[0] == '\t': line = line[1:] else: # NOTE: We are actually parsing lines out of binary data, which can lead to the # binary being split up along the newline separator. We will append this to the blame # we are currently looking at, even though it should be concatenated with the last line # we have seen. pass # end handle line contents blames[-1][0] = c blames[-1][1].append(line) info = {'id': sha} # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information return blames
def blame(self, rev, file): """The blame information for the given file at the given revision. :parm rev: revision specifier, see git-rev-parse for viable options. :return: list: [git.Commit, list: [<line>]] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" data = self.git.blame(rev, '--', file, p=True) commits = dict() blames = list() info = None for line in data.splitlines(False): parts = self.re_whitespace.split(line, 1) firstpart = parts[0] if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) # END blame data initialization else: m = self.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail <*****@*****.**> # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail <*****@*****.**> # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # <and rest> if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: c = Commit( self, hex_to_bin(sha), author=Actor._from_string(info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c # END if commit objects needs initial creation m = self.re_tab_full_line.search(line) text, = m.groups() blames[-1][0] = c blames[-1][1].append( text ) info = None # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information return blames
def blame(repo, start_commit, end_commit, filename): data = repo.git.blame('%s^..%s' % (start_commit, end_commit), '--', filename, p=True) commits = dict() blames = list() info = None for line in data.splitlines(False): parts = repo.re_whitespace.split(line, 1) firstpart = parts[0] if repo.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) elif info['id'] != firstpart: info = {'id': firstpart} blames.append([commits.get(firstpart), []]) # END blame data initialization else: m = repo.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail <*****@*****.**> # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail <*****@*****.**> # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # <and rest> if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart.startswith('boundary'): info['boundary'] = True elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: if info.get('boundary'): commits[sha] = False else: c = repo.CommitCls( repo, hex_to_bin(sha), author=Actor._from_string(info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string(info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary'] ) commits[sha] = c if c is not False: # END if commit objects needs initial creation m = repo.re_tab_full_line.search(line) text, = m.groups() blames[-1][0] = c blames[-1][1].append(text) info = { 'id' : sha } # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information for commit, lines in blames: if commit is not None: yield commit, lines
def test_str_should_alias_name(self): a = Actor._from_string("Michael Trier <*****@*****.**>") assert a.name == str(a)
def test_should_display_representation(self): a = Actor._from_string("Michael Trier <*****@*****.**>") assert '<git.Actor "Michael Trier <*****@*****.**>">' == repr(a)
def test_from_string_should_handle_just_name(self): a = Actor._from_string("Michael Trier") assert "Michael Trier" == a.name assert None == a.email
def blame(self, rev, file): """The blame information for the given file at the given revision. :parm rev: revision specifier, see git-rev-parse for viable options. :return: list: [git.Commit, list: [<line>]] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" data = self.git.blame(rev, '--', file, p=True, stdout_as_string=False) commits = dict() blames = list() info = None keepends = True for line in data.splitlines(keepends): try: line = line.rstrip().decode(defenc) except UnicodeDecodeError: firstpart = '' is_binary = True else: # As we don't have an idea when the binary data ends, as it could contain multiple newlines # in the process. So we rely on being able to decode to tell us what is is. # This can absolutely fail even on text files, but even if it does, we should be fine treating it # as binary instead parts = self.re_whitespace.split(line, 1) firstpart = parts[0] is_binary = False # end handle decode of line if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 - indicates # another line of blame with the same data digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) elif info['id'] != firstpart: info = {'id': firstpart} blames.append([commits.get(firstpart), []]) # END blame data initialization else: m = self.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail <*****@*****.**> # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail <*****@*****.**> # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # <and rest> if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: c = Commit( self, hex_to_bin(sha), author=Actor._from_string( info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string( info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c # END if commit objects needs initial creation if not is_binary: if line and line[0] == '\t': line = line[1:] else: # NOTE: We are actually parsing lines out of binary data, which can lead to the # binary being split up along the newline separator. We will append this to the blame # we are currently looking at, even though it should be concatenated with the last line # we have seen. pass # end handle line contents blames[-1][0] = c blames[-1][1].append(line) info = {'id': sha} # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information return blames
def blame(self, rev, file): """The blame information for the given file at the given revision. :parm rev: revision specifier, see git-rev-parse for viable options. :return: list: [git.Commit, list: [<line>]] A list of tuples associating a Commit object with a list of lines that changed within the given commit. The Commit objects will be given in order of appearance.""" data = self.git.blame(rev, '--', file, p=True) commits = dict() blames = list() info = None for line in data.splitlines(False): parts = self.re_whitespace.split(line, 1) firstpart = parts[0] if self.re_hexsha_only.search(firstpart): # handles # 634396b2f541a9f2d58b00be1a07f0c358b999b3 1 1 7 - indicates blame-data start # 634396b2f541a9f2d58b00be1a07f0c358b999b3 2 2 digits = parts[-1].split(" ") if len(digits) == 3: info = {'id': firstpart} blames.append([None, []]) # END blame data initialization else: m = self.re_author_committer_start.search(firstpart) if m: # handles: # author Tom Preston-Werner # author-mail <*****@*****.**> # author-time 1192271832 # author-tz -0700 # committer Tom Preston-Werner # committer-mail <*****@*****.**> # committer-time 1192271832 # committer-tz -0700 - IGNORED BY US role = m.group(0) if firstpart.endswith('-mail'): info["%s_email" % role] = parts[-1] elif firstpart.endswith('-time'): info["%s_date" % role] = int(parts[-1]) elif role == firstpart: info[role] = parts[-1] # END distinguish mail,time,name else: # handle # filename lib/grit.rb # summary add Blob # <and rest> if firstpart.startswith('filename'): info['filename'] = parts[-1] elif firstpart.startswith('summary'): info['summary'] = parts[-1] elif firstpart == '': if info: sha = info['id'] c = commits.get(sha) if c is None: c = Commit( self, hex_to_bin(sha), author=Actor._from_string( info['author'] + ' ' + info['author_email']), authored_date=info['author_date'], committer=Actor._from_string( info['committer'] + ' ' + info['committer_email']), committed_date=info['committer_date'], message=info['summary']) commits[sha] = c # END if commit objects needs initial creation m = self.re_tab_full_line.search(line) text, = m.groups() blames[-1][0] = c blames[-1][1].append(text) info = None # END if we collected commit info # END distinguish filename,summary,rest # END distinguish author|committer vs filename,summary,rest # END distinguish hexsha vs other information return blames