Exemple #1
0
def get_winners_info():
    aom = {}
    aoy = {}
    lines = get_pipe_output(
        ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split(
        '\n')
    for line in lines:
        parts = line.split(' ', 4)
        try:
            stamp = int(parts[0])
        except ValueError:
            stamp = 0
        author, mail = parts[4].split('<', 1)
        author = author.rstrip()
        date = datetime.datetime.fromtimestamp(float(stamp))

        # author of the month/year
        yymm = date.strftime('%Y-%m')
        if yymm in aom:
            aom[yymm][author] = aom[yymm].get(author, 0) + 1
        else:
            aom[yymm] = {}
            aom[yymm][author] = 1

        yy = date.year
        if yy in aoy:
            aoy[yy][author] = aoy[yy].get(author, 0) + 1
        else:
            aoy[yy] = {}
            aoy[yy][author] = 1
    return aom, aoy
Exemple #2
0
def get_weekly_hourly_activity():
    activity_by_hour_of_week = {}
    activity_by_hour_of_week_busiest = 0
    lines = get_pipe_output(
        ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split(
        '\n')
    for line in lines:
        parts = line.split(' ', 4)
        try:
            stamp = int(parts[0])
        except ValueError:
            stamp = 0
        date = datetime.datetime.fromtimestamp(float(stamp))

        # activity
        # hour
        hour = date.hour
        # day of week
        day = date.weekday()

        # hour of week
        if day not in activity_by_hour_of_week:
            activity_by_hour_of_week[day] = {}
        activity_by_hour_of_week[day][hour] = activity_by_hour_of_week[day].get(hour, 0) + 1
        # most active hour?
        if activity_by_hour_of_week[day][hour] > activity_by_hour_of_week_busiest:
            activity_by_hour_of_week_busiest = activity_by_hour_of_week[day][hour]

    return activity_by_hour_of_week, activity_by_hour_of_week_busiest
Exemple #3
0
def get_timezone_info():
    timezones = {}
    lines = get_pipe_output(
        ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split(
        '\n')
    for line in lines:
        parts = line.split(' ', 4)
        timezone = parts[3]
        timezones[timezone] = timezones.get(timezone, 0) + 1

    return timezones
Exemple #4
0
def get_tags_info():
    tags = {}
    lines = get_pipe_output(['git show-ref --tags']).split('\n')
    for line in lines:
        if len(line) == 0:
            continue
        (hash, tag) = line.split(' ')

        tag = tag.replace('refs/tags/', '')
        output = get_pipe_output(['git log "%s" --pretty=format:"%%at %%aN" -n 1' % hash])
        if len(output) > 0:
            parts = output.split(' ')
            try:
                stamp = int(parts[0])
            except ValueError:
                stamp = 0
            tags[tag] = {'stamp': long(stamp), 'hash': hash,
                         'date': datetime.datetime.fromtimestamp(stamp).strftime('%Y-%m-%d'), 'commits': 0,
                         'authors': {}}

    # collect info on tags, starting from latest
    tags_sorted_by_date_desc = map(lambda el: el[1],
                                   reversed(sorted(map(lambda el: (el[1]['date'], el[0]), tags.items()))))
    prev = None
    for tag in reversed(tags_sorted_by_date_desc):
        cmd = 'git shortlog -s "%s"' % tag
        if prev is not None:
            cmd += ' "^%s"' % prev
        output = get_pipe_output([cmd])
        if len(output) == 0:
            continue
        prev = tag
        for line in output.split('\n'):
            parts = re.split('\s+', line, 2)
            commits = int(parts[1])
            author = parts[2]
            tags[tag]['commits'] += commits
            tags[tag]['authors'][author] = commits
    return tags
Exemple #5
0
    def test_rev_list(self):
        revlines = get_pipe_output([
            'git rev-list --pretty=format:"%%at %%T" %s' % getlogrange('HEAD'),
            'grep -v ^commit']).strip().split('\n')
        expected_data = []
        for line in revlines:
            ts, tree_id = line.split(' ')
            expected_data.append((long(ts), tree_id))

        actual_data = []
        for t, r in self.gs.get_revisions():
            actual_data.append((t, r))

        self.assertListEqual(expected_data, actual_data)
Exemple #6
0
def get_authors_history():
    lines = get_pipe_output(
        ['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange('HEAD'))]).split('\n')
    lines.reverse()
    inserted = 0
    stamp = 0
    tmp_authors = {}
    changes_by_date_by_author = {}
    for line in lines:
        if len(line) == 0:
            continue
        # <stamp> <author>
        if re.search('files? changed', line) is None:
            pos = line.find(' ')
            if pos != -1:
                try:
                    oldstamp = stamp
                    (stamp, author) = (int(line[:pos]), line[pos + 1:])
                    if oldstamp > stamp:
                        # clock skew, keep old timestamp to avoid having ugly graph
                        # FIXME: in the original version the clock skew was used to avoid splashes on the graphs
                        # due to cherry-picking, rebases and amendments. So far the skew is switched of and supposed to
                        # be fixed using commiter time in future
                        # stamp = oldstamp
                        pass
                    if author not in tmp_authors:
                        tmp_authors[author] = {'lines_added': 0, 'commits': 0}
                    tmp_authors[author]['commits'] = tmp_authors[author].get('commits', 0) + 1
                    tmp_authors[author]['lines_added'] = tmp_authors[author].get('lines_added', 0) + inserted
                    if stamp not in changes_by_date_by_author:
                        changes_by_date_by_author[stamp] = {}
                    if author not in changes_by_date_by_author[stamp]:
                        changes_by_date_by_author[stamp][author] = {}
                    changes_by_date_by_author[stamp][author]['lines_added'] = tmp_authors[author]['lines_added']
                    changes_by_date_by_author[stamp][author]['commits'] = tmp_authors[author]['commits']
                    files, inserted, deleted = 0, 0, 0
                except ValueError:
                    print 'Warning: unexpected line "%s"' % line
            else:
                print 'Warning: unexpected line "%s"' % line
        else:
            numbers = get_stat_summary_counts(line)
            if len(numbers) == 3:
                (files, inserted, deleted) = map(lambda el: int(el), numbers)
            else:
                print 'Warning: failed to handle line "%s"' % line
                (files, inserted, deleted) = (0, 0, 0)

    return changes_by_date_by_author
Exemple #7
0
def get_active_days_info():
    active_days = set()
    lines = get_pipe_output(
        ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split(
        '\n')
    for line in lines:
        parts = line.split(' ', 4)
        try:
            stamp = int(parts[0])
        except ValueError:
            stamp = 0
        date = datetime.datetime.fromtimestamp(float(stamp))
        yymmdd = date.strftime('%Y-%m-%d')
        # project: active days
        active_days.add(yymmdd)

    return active_days
Exemple #8
0
def get_monthly_activity_info():
    activity_by_month_of_year = {}
    lines = get_pipe_output(
        ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split(
        '\n')
    for line in lines:
        parts = line.split(' ', 4)
        try:
            stamp = int(parts[0])
        except ValueError:
            stamp = 0
        date = datetime.datetime.fromtimestamp(float(stamp))

        # activity
        # month of year
        month = date.month
        activity_by_month_of_year[month] = activity_by_month_of_year.get(month, 0) + 1

    return activity_by_month_of_year
Exemple #9
0
def get_activity_by_year_week():
    activity_by_year_week = {}
    activity_by_year_week_peak = 0
    lines = get_pipe_output(
        ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split('\n')
    for line in lines:
        parts = line.split(' ', 4)
        try:
            stamp = int(parts[0])
        except ValueError:
            stamp = 0
        date = datetime.datetime.fromtimestamp(float(stamp))

        # activity
        # yearly/weekly activity
        yyw = date.strftime('%Y-%W')
        activity_by_year_week[yyw] = activity_by_year_week.get(yyw, 0) + 1
        if activity_by_year_week_peak < activity_by_year_week[yyw]:
            activity_by_year_week_peak = activity_by_year_week[yyw]
    return activity_by_year_week, activity_by_year_week_peak
Exemple #10
0
def get_commits_count_change_timeline():
    commits_by_month = {}
    commits_by_year = {}
    lines = get_pipe_output(
        ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split(
        '\n')
    for line in lines:
        parts = line.split(' ', 4)
        try:
            stamp = int(parts[0])
        except ValueError:
            stamp = 0
        date = datetime.datetime.fromtimestamp(float(stamp))
        yymm = date.strftime('%Y-%m')
        commits_by_month[yymm] = commits_by_month.get(yymm, 0) + 1

        yy = date.year
        commits_by_year[yy] = commits_by_year.get(yy, 0) + 1

    return commits_by_month, commits_by_year
Exemple #11
0
def get_domain_info():
    domains = {}
    lines = get_pipe_output(
        ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split(
        '\n')
    for line in lines:
        parts = line.split(' ', 4)
        author, mail = parts[4].split('<', 1)
        mail = mail.rstrip('>')
        domain = '?'
        if mail.find('@') != -1:
            domain = mail.rsplit('@', 1)[1]

        domain = domain.decode('utf-8')
        # domain stats
        if domain not in domains:
            domains[domain] = {}
        # commits
        domains[domain]['commits'] = domains[domain].get('commits', 0) + 1

    return domains
Exemple #12
0
def get_total_changes_timeline():
    # line statistics
    # outputs:
    #  N files changed, N insertions (+), N deletions(-)
    # <stamp> <author>
    changes_by_date = {}  # stamp -> { files, ins, del }
    lines_added_by_month = {}
    lines_removed_by_month = {}
    lines_added_by_year = {}
    lines_removed_by_year = {}
    total_lines_added = 0
    total_lines_removed = 0
    # computation of lines of code by date is better done
    # on a linear history.
    extra = ''
    if conf['linear_linestats']:
        extra = '--first-parent -m'
    lines = get_pipe_output(
        ['git log --shortstat %s --pretty=format:"%%at %%aN" %s' % (extra, getlogrange('HEAD'))]).split('\n')
    lines.reverse()
    files = 0
    inserted = 0
    deleted = 0
    total_lines = 0
    for line in lines:
        if len(line) == 0:
            continue
        # <stamp> <author>
        if re.search('files? changed', line) is None:
            pos = line.find(' ')
            if pos != -1:
                try:
                    (stamp, author) = (long(line[:pos]), line[pos + 1:])
                    changes_by_date[stamp] = {u'files': files, u'ins': inserted, u'del': deleted, u'lines': total_lines}

                    date = datetime.datetime.fromtimestamp(stamp)
                    yymm = date.strftime('%Y-%m')
                    lines_added_by_month[yymm] = lines_added_by_month.get(yymm, 0) + inserted
                    lines_removed_by_month[yymm] = lines_removed_by_month.get(yymm, 0) + deleted

                    yy = date.year
                    lines_added_by_year[yy] = lines_added_by_year.get(yy, 0) + inserted
                    lines_removed_by_year[yy] = lines_removed_by_year.get(yy, 0) + deleted

                    files, inserted, deleted = 0, 0, 0
                except ValueError:
                    print 'Warning: unexpected line "%s"' % line
            else:
                print 'Warning: unexpected line "%s"' % line
        else:
            numbers = get_stat_summary_counts(line)
            if len(numbers) == 3:
                (files, inserted, deleted) = map(lambda el: int(el), numbers)
                total_lines += inserted
                total_lines -= deleted
                total_lines_added += inserted
                total_lines_removed += deleted
            else:
                print 'Warning: failed to handle line "%s"' % line
                (files, inserted, deleted) = (0, 0, 0)
    return changes_by_date, total_lines_added, total_lines_removed
Exemple #13
0
def get_num_files_in_revision(time_rev):
    """
    Get number of files at a given revision
    """
    time, rev = time_rev
    return int(time), rev, int(get_pipe_output(['git ls-tree -r --name-only "%s"' % rev, 'wc -l']).split('\n')[0])
Exemple #14
0
def get_authors_info():
    authors = {}

    # Collect revision statistics
    # Outputs "<stamp> <date> <time> <timezone> <author> '<' <mail> '>'"
    lines = get_pipe_output(
        ['git rev-list --pretty=format:"%%at %%ai %%aN <%%aE>" %s' % getlogrange('HEAD'), 'grep -v ^commit']).split(
        '\n')
    for line in lines:
        parts = line.split(' ', 4)
        try:
            stamp = int(parts[0])
        except ValueError:
            stamp = 0
        author, mail = parts[4].split('<', 1)
        author = author.rstrip()
        date = datetime.datetime.fromtimestamp(float(stamp))
        # author stats
        if author not in authors:
            authors[author] = {}
        # commits, note again that commits may be in any date order because of cherry-picking and patches
        if 'last_commit_stamp' not in authors[author]:
            authors[author]['last_commit_stamp'] = stamp
        if stamp > authors[author]['last_commit_stamp']:
            authors[author]['last_commit_stamp'] = stamp
        if 'first_commit_stamp' not in authors[author]:
            authors[author]['first_commit_stamp'] = stamp
        if stamp < authors[author]['first_commit_stamp']:
            authors[author]['first_commit_stamp'] = stamp

        # authors: active days
        yymmdd = date.strftime('%Y-%m-%d')
        if 'last_active_day' not in authors[author]:
            authors[author]['last_active_day'] = yymmdd
            authors[author]['active_days'] = set([yymmdd])
        elif yymmdd != authors[author]['last_active_day']:
            authors[author]['last_active_day'] = yymmdd
            authors[author]['active_days'].add(yymmdd)

    lines = get_pipe_output(
        ['git log --shortstat --date-order --pretty=format:"%%at %%aN" %s' % (getlogrange('HEAD'))]).split('\n')
    lines.reverse()

    inserted = 0
    deleted = 0
    stamp = 0
    for line in lines:
        if len(line) == 0:
            continue
        # <stamp> <author>
        if re.search('files? changed', line) is None:
            pos = line.find(' ')
            if pos != -1:
                try:
                    oldstamp = stamp
                    (stamp, author) = (int(line[:pos]), line[pos + 1:])
                    if oldstamp > stamp:
                        # clock skew, keep old timestamp to avoid having ugly graph
                        stamp = oldstamp
                    if author not in authors:
                        authors[author] = {'lines_added': 0, 'lines_removed': 0, 'commits': 0}
                    authors[author]['commits'] = authors[author].get('commits', 0) + 1
                    authors[author]['lines_added'] = authors[author].get('lines_added', 0) + inserted
                    authors[author]['lines_removed'] = authors[author].get('lines_removed', 0) + deleted
                    files, inserted, deleted = 0, 0, 0
                except ValueError:
                    print 'Warning: unexpected line "%s"' % line
            else:
                print 'Warning: unexpected line "%s"' % line
        else:
            numbers = get_stat_summary_counts(line)
            if len(numbers) == 3:
                (files, inserted, deleted) = map(lambda el: int(el), numbers)
            else:
                print 'Warning: failed to handle line "%s"' % line
                (files, inserted, deleted) = (0, 0, 0)
    return authors