def get_winners_info(): aom = {} aoy = {} lines = get_pipe_output( ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split( '\n') for line in lines: parts = line.split(' ', 4) try: stamp = int(parts[0]) except ValueError: stamp = 0 author, mail = parts[4].split('<', 1) author = author.rstrip() date = datetime.datetime.fromtimestamp(float(stamp)) # author of the month/year yymm = date.strftime('%Y-%m') if yymm in aom: aom[yymm][author] = aom[yymm].get(author, 0) + 1 else: aom[yymm] = {} aom[yymm][author] = 1 yy = date.year if yy in aoy: aoy[yy][author] = aoy[yy].get(author, 0) + 1 else: aoy[yy] = {} aoy[yy][author] = 1 return aom, aoy
def get_weekly_hourly_activity(): activity_by_hour_of_week = {} activity_by_hour_of_week_busiest = 0 lines = get_pipe_output( ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split( '\n') for line in lines: parts = line.split(' ', 4) try: stamp = int(parts[0]) except ValueError: stamp = 0 date = datetime.datetime.fromtimestamp(float(stamp)) # activity # hour hour = date.hour # day of week day = date.weekday() # hour of week if day not in activity_by_hour_of_week: activity_by_hour_of_week[day] = {} activity_by_hour_of_week[day][hour] = activity_by_hour_of_week[day].get(hour, 0) + 1 # most active hour? if activity_by_hour_of_week[day][hour] > activity_by_hour_of_week_busiest: activity_by_hour_of_week_busiest = activity_by_hour_of_week[day][hour] return activity_by_hour_of_week, activity_by_hour_of_week_busiest
def get_timezone_info(): timezones = {} lines = get_pipe_output( ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split( '\n') for line in lines: parts = line.split(' ', 4) timezone = parts[3] timezones[timezone] = timezones.get(timezone, 0) + 1 return timezones
def get_tags_info(): tags = {} lines = get_pipe_output(['git show-ref --tags']).split('\n') for line in lines: if len(line) == 0: continue (hash, tag) = line.split(' ') tag = tag.replace('refs/tags/', '') output = get_pipe_output(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash]) if len(output) > 0: parts = output.split(' ') try: stamp = int(parts[0]) except ValueError: stamp = 0 tags[tag] = {'stamp': long(stamp), 'hash': hash, 'date': datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0, 'authors': {}} # collect info on tags, starting from latest tags_sorted_by_date_desc = map(lambda el: el[1], reversed(sorted(map(lambda el: (el[1]['date'], el[0]), tags.items())))) prev = None for tag in reversed(tags_sorted_by_date_desc): cmd = 'git shortlog -s "%s"' % tag if prev is not None: cmd += ' "^%s"' % prev output = get_pipe_output([cmd]) if len(output) == 0: continue prev = tag for line in output.split('\n'): parts = re.split('\s+', line, 2) commits = int(parts[1]) author = parts[2] tags[tag]['commits'] += commits tags[tag]['authors'][author] = commits return tags
def test_rev_list(self): revlines = get_pipe_output([ 'git rev-list --pretty=format:"%%at %%T" %s' % getlogrange('HEAD'), 'grep -v ^commit']).strip().split('\n') expected_data = [] for line in revlines: ts, tree_id = line.split(' ') expected_data.append((long(ts), tree_id)) actual_data = [] for t, r in self.gs.get_revisions(): actual_data.append((t, r)) self.assertListEqual(expected_data, actual_data)
def get_authors_history(): lines = get_pipe_output( ['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange('HEAD'))]).split('\n') lines.reverse() inserted = 0 stamp = 0 tmp_authors = {} changes_by_date_by_author = {} for line in lines: if len(line) == 0: continue # <stamp> <author> if re.search('files? changed', line) is None: pos = line.find(' ') if pos != -1: try: oldstamp = stamp (stamp, author) = (int(line[:pos]), line[pos + 1:]) if oldstamp > stamp: # clock skew, keep old timestamp to avoid having ugly graph # FIXME: in the original version the clock skew was used to avoid splashes on the graphs # due to cherry-picking, rebases and amendments. So far the skew is switched of and supposed to # be fixed using commiter time in future # stamp = oldstamp pass if author not in tmp_authors: tmp_authors[author] = {'lines_added': 0, 'commits': 0} tmp_authors[author]['commits'] = tmp_authors[author].get('commits', 0) + 1 tmp_authors[author]['lines_added'] = tmp_authors[author].get('lines_added', 0) + inserted if stamp not in changes_by_date_by_author: changes_by_date_by_author[stamp] = {} if author not in changes_by_date_by_author[stamp]: changes_by_date_by_author[stamp][author] = {} changes_by_date_by_author[stamp][author]['lines_added'] = tmp_authors[author]['lines_added'] changes_by_date_by_author[stamp][author]['commits'] = tmp_authors[author]['commits'] files, inserted, deleted = 0, 0, 0 except ValueError: print 'Warning: unexpected line "%s"' % line else: print 'Warning: unexpected line "%s"' % line else: numbers = get_stat_summary_counts(line) if len(numbers) == 3: (files, inserted, deleted) = map(lambda el: int(el), numbers) else: print 'Warning: failed to handle line "%s"' % line (files, inserted, deleted) = (0, 0, 0) return changes_by_date_by_author
def get_active_days_info(): active_days = set() lines = get_pipe_output( ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split( '\n') for line in lines: parts = line.split(' ', 4) try: stamp = int(parts[0]) except ValueError: stamp = 0 date = datetime.datetime.fromtimestamp(float(stamp)) yymmdd = date.strftime('%Y-%m-%d') # project: active days active_days.add(yymmdd) return active_days
def get_monthly_activity_info(): activity_by_month_of_year = {} lines = get_pipe_output( ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split( '\n') for line in lines: parts = line.split(' ', 4) try: stamp = int(parts[0]) except ValueError: stamp = 0 date = datetime.datetime.fromtimestamp(float(stamp)) # activity # month of year month = date.month activity_by_month_of_year[month] = activity_by_month_of_year.get(month, 0) + 1 return activity_by_month_of_year
def get_activity_by_year_week(): activity_by_year_week = {} activity_by_year_week_peak = 0 lines = get_pipe_output( ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split('\n') for line in lines: parts = line.split(' ', 4) try: stamp = int(parts[0]) except ValueError: stamp = 0 date = datetime.datetime.fromtimestamp(float(stamp)) # activity # yearly/weekly activity yyw = date.strftime('%Y-%W') activity_by_year_week[yyw] = activity_by_year_week.get(yyw, 0) + 1 if activity_by_year_week_peak < activity_by_year_week[yyw]: activity_by_year_week_peak = activity_by_year_week[yyw] return activity_by_year_week, activity_by_year_week_peak
def get_commits_count_change_timeline(): commits_by_month = {} commits_by_year = {} lines = get_pipe_output( ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split( '\n') for line in lines: parts = line.split(' ', 4) try: stamp = int(parts[0]) except ValueError: stamp = 0 date = datetime.datetime.fromtimestamp(float(stamp)) yymm = date.strftime('%Y-%m') commits_by_month[yymm] = commits_by_month.get(yymm, 0) + 1 yy = date.year commits_by_year[yy] = commits_by_year.get(yy, 0) + 1 return commits_by_month, commits_by_year
def get_domain_info(): domains = {} lines = get_pipe_output( ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split( '\n') for line in lines: parts = line.split(' ', 4) author, mail = parts[4].split('<', 1) mail = mail.rstrip('>') domain = '?' if mail.find('@') != -1: domain = mail.rsplit('@', 1)[1] domain = domain.decode('utf-8') # domain stats if domain not in domains: domains[domain] = {} # commits domains[domain]['commits'] = domains[domain].get('commits', 0) + 1 return domains
def get_total_changes_timeline(): # line statistics # outputs: # N files changed, N insertions (+), N deletions(-) # <stamp> <author> changes_by_date = {} # stamp -> { files, ins, del } lines_added_by_month = {} lines_removed_by_month = {} lines_added_by_year = {} lines_removed_by_year = {} total_lines_added = 0 total_lines_removed = 0 # computation of lines of code by date is better done # on a linear history. extra = '' if conf['linear_linestats']: extra = '--first-parent -m' lines = get_pipe_output( ['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange('HEAD'))]).split('\n') lines.reverse() files = 0 inserted = 0 deleted = 0 total_lines = 0 for line in lines: if len(line) == 0: continue # <stamp> <author> if re.search('files? changed', line) is None: pos = line.find(' ') if pos != -1: try: (stamp, author) = (long(line[:pos]), line[pos + 1:]) changes_by_date[stamp] = {u'files': files, u'ins': inserted, u'del': deleted, u'lines': total_lines} date = datetime.datetime.fromtimestamp(stamp) yymm = date.strftime('%Y-%m') lines_added_by_month[yymm] = lines_added_by_month.get(yymm, 0) + inserted lines_removed_by_month[yymm] = lines_removed_by_month.get(yymm, 0) + deleted yy = date.year lines_added_by_year[yy] = lines_added_by_year.get(yy, 0) + inserted lines_removed_by_year[yy] = lines_removed_by_year.get(yy, 0) + deleted files, inserted, deleted = 0, 0, 0 except ValueError: print 'Warning: unexpected line "%s"' % line else: print 'Warning: unexpected line "%s"' % line else: numbers = get_stat_summary_counts(line) if len(numbers) == 3: (files, inserted, deleted) = map(lambda el: int(el), numbers) total_lines += inserted total_lines -= deleted total_lines_added += inserted total_lines_removed += deleted else: print 'Warning: failed to handle line "%s"' % line (files, inserted, deleted) = (0, 0, 0) return changes_by_date, total_lines_added, total_lines_removed
def get_num_files_in_revision(time_rev): """ Get number of files at a given revision """ time, rev = time_rev return int(time), rev, int(get_pipe_output(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
def get_authors_info(): authors = {} # Collect revision statistics # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'" lines = get_pipe_output( ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split( '\n') for line in lines: parts = line.split(' ', 4) try: stamp = int(parts[0]) except ValueError: stamp = 0 author, mail = parts[4].split('<', 1) author = author.rstrip() date = datetime.datetime.fromtimestamp(float(stamp)) # author stats if author not in authors: authors[author] = {} # commits, note again that commits may be in any date order because of cherry-picking and patches if 'last_commit_stamp' not in authors[author]: authors[author]['last_commit_stamp'] = stamp if stamp > authors[author]['last_commit_stamp']: authors[author]['last_commit_stamp'] = stamp if 'first_commit_stamp' not in authors[author]: authors[author]['first_commit_stamp'] = stamp if stamp < authors[author]['first_commit_stamp']: authors[author]['first_commit_stamp'] = stamp # authors: active days yymmdd = date.strftime('%Y-%m-%d') if 'last_active_day' not in authors[author]: authors[author]['last_active_day'] = yymmdd authors[author]['active_days'] = set([yymmdd]) elif yymmdd != authors[author]['last_active_day']: authors[author]['last_active_day'] = yymmdd authors[author]['active_days'].add(yymmdd) lines = get_pipe_output( ['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange('HEAD'))]).split('\n') lines.reverse() inserted = 0 deleted = 0 stamp = 0 for line in lines: if len(line) == 0: continue # <stamp> <author> if re.search('files? changed', line) is None: pos = line.find(' ') if pos != -1: try: oldstamp = stamp (stamp, author) = (int(line[:pos]), line[pos + 1:]) if oldstamp > stamp: # clock skew, keep old timestamp to avoid having ugly graph stamp = oldstamp if author not in authors: authors[author] = {'lines_added': 0, 'lines_removed': 0, 'commits': 0} authors[author]['commits'] = authors[author].get('commits', 0) + 1 authors[author]['lines_added'] = authors[author].get('lines_added', 0) + inserted authors[author]['lines_removed'] = authors[author].get('lines_removed', 0) + deleted files, inserted, deleted = 0, 0, 0 except ValueError: print 'Warning: unexpected line "%s"' % line else: print 'Warning: unexpected line "%s"' % line else: numbers = get_stat_summary_counts(line) if len(numbers) == 3: (files, inserted, deleted) = map(lambda el: int(el), numbers) else: print 'Warning: failed to handle line "%s"' % line (files, inserted, deleted) = (0, 0, 0) return authors