Esempio n. 1
0
def save_dist(data, headers, filename):
    dist = data_utils.dict_to_dist(data)
    file_utils.dump_dict_csv(dist, headers, filename)
Esempio n. 2
0
        user_active_repos[key] = len(user_active_repos[key])
        user_adopt_repos[key] = len(user_adopt_repos[key])
        user_adopt_commits[key] = user_adopt_commits[
            key]  #touch in case key not hit

    for key in repo_commit_count.keys():
        repo_adopt_libs[key] = len(repo_adopt_libs[key])
        repo_active_users[key] = len(repo_active_users[key])
        repo_adopting_users[key] = len(repo_adopting_users[key])
        repo_adopt_commits[key] = repo_adopt_commits[key]

    #save raw data to csv
    file_utils.dump_dict_csv([
        user_commit_count, user_adopt_commits, user_adopt_libs,
        user_active_repos, user_adopt_repos
    ], [
        "user", "number of commits", "number of adoption commits",
        "unique libraries adopted", "unique committing repos",
        "unique adoption repos"
    ], "results/user_dist/raw_user_data.csv")
    file_utils.dump_dict_csv([
        repo_commit_count, repo_adopt_commits, repo_adopt_libs,
        repo_active_users, repo_adopting_users
    ], [
        "repo", "number of commits", "number of adoption commits",
        "unique libraries adopted", "unique committing users",
        "unique adopting users"
    ], "results/repo_dist/raw_repo_data.csv")

    #convert user/repo-specific counts to distributions and save as csv
    save_dist(user_commit_count, ["number of commits", "number of users"],
              "results/user_dist/commits.csv")
Esempio n. 3
0
#commit times
avg_adopt_commit_time = {}
for length in session_freq.keys():	#make sure all lengths represented
	if len(adopt_commit_times[length]) != 0:
		avg_adopt_commit_time[length] = sum(adopt_commit_times[length]) / len(adopt_commit_times[length])
	else:
		avg_adopt_commit_time[length] = None
#commit positions
avg_adopt_commit_position = {}
for commit_count in adopt_commit_positions.keys():
	avg_adopt_commit_position[commit_count] = sum(adopt_commit_positions[commit_count]) / len(adopt_commit_positions[commit_count])

#DUMP DATA

#dump all session length data (counts and averages) to single csv
file_utils.dump_dict_csv([session_freq, session_adopt_freq, session_avg_commits, session_avg_adopt_commits, session_commits_when_adopt, session_avg_commits_no_adopt, avg_adopt_commit_time], ["session length (hours)", "number of sessions", "number of sessions containing adoption", "average commits per session", "average adoption commits per session", "average commits for sessions containing adoption", "average commits for sessions without adoption", "average adoption time (seconds from session start)"], "results/session_analysis/session_length_data_%s.csv" % BIN_SIZE)

#average commit positions in separate file
file_utils.dump_dict_csv(avg_adopt_commit_position, ["number of commits in session", "average adoption commit position (first commit = 1)"], "results/session_analysis/session_avg_adopt_commit_pos.csv")

#dump dictionaries of key-> list each to separate csv file
file_utils.dump_dict_of_lists(session_commit_counts, ["session length (hours)", "commit counts ->"], "results/session_analysis/commit_counts_by_session_length_%s.csv" % BIN_SIZE)
file_utils.dump_dict_of_lists(session_adopt_commit_counts, ["session length (hours)", "adopt commit counts ->"], "results/session_analysis/adopt_commit_counts_by_session_length_%s.csv" % BIN_SIZE)
file_utils.dump_dict_of_lists(session_commits_when_adopt, ["session length (hours)", "commit counts (only sessions containing adoption) ->"], "results/session_analysis/commit_counts_when_adopt_by_session_length_%s.csv" % BIN_SIZE)
file_utils.dump_dict_of_lists(session_commits_no_adopt, ["session length (hours)", "commit counts (only sessions without adoption) ->"], "results/session_analysis/commit_counts_when_no_adopt_by_session_length_%s.csv" % BIN_SIZE)
#adoption position lists by session length
file_utils.dump_dict_of_lists(adopt_commit_positions, ["session length (number of commits)", "adoption position (commits numbered from 1) ->"], "results/session_analysis/adopt_commit_pos_by_session_length_%s.csv" % BIN_SIZE)
file_utils.dump_dict_of_lists(adopt_commit_times, ["session length (hours)", "adoption time (seconds from session start) ->"], "results/session_analysis/adopt_time_by_session_length_%s.csv" % BIN_SIZE)

#save adoption time lists to same file
file_utils.dump_lists([adopt_times_list, adopt_times_list_non_zero], ["all adoption times (percentage of session)", "adoption times excluding single-commit sessions (percentage of session)"], "results/session_analysis/adopt_times_lists.csv")
Esempio n. 4
0
        user_promote_commit_repos[user] = len(user_promote_commit_repos[user])
    for user in user_promote_dest_repos.keys():
        user_promote_dest_repos[user] = len(user_promote_dest_repos[user])
    #and flip to distribution
    user_adopt_commit_repos_dist = data_utils.dict_to_dist(
        user_adopt_commit_repos)
    user_adopt_source_repos_dist = data_utils.dict_to_dist(
        user_adopt_source_repos)
    user_promote_commit_repos_dist = data_utils.dict_to_dist(
        user_promote_commit_repos)
    user_promote_dest_repos_dist = data_utils.dict_to_dist(
        user_promote_dest_repos)

    #dump data to files
    file_utils.dump_dict_csv(
        adopt_delay_dist, ["adoption delay (hours)", "frequency"],
        "results/adopt_graph_analysis/adopt_delay_dist_%shr.csv" % BIN_SIZE)
    file_utils.dump_dict_csv(
        adopt_from_dist, [
            "number of times user adopted from (outgoing adoption edges)",
            "number of users"
        ], "results/adopt_graph_analysis/user_adopted_from_dist.csv")
    file_utils.dump_dict_csv(
        adopter_dist, [
            "number of incoming adoption edges (lib-source adoption pairs)",
            "number of users"
        ], "results/adopt_graph_analysis/user_adopting_edge_dist.csv")

    file_utils.dump_dict_csv(
        user_adopt_commit_repos_dist, [
            "number of unique repos user made adoption commit in (adopter)",
Esempio n. 5
0
    month_deletions_count['total'] = deletions_count
    month_additions['total'] = len(additions)
    month_deletions['total'] = len(deletions)
    month_adoption_commit_count['total'] = adoption_commit_count
    month_adoption_libs_count['total'] = adoption_libs_count
    month_adoptions['total'] = len(adoptions)
    month_users['total'] = len(all_users)
    month_repos['total'] = len(all_repos)
    month_user_adopt['total'] = len(user_adopt)
    month_repo_adopt['total'] = len(repo_adopt)
    month_libs['total'] = len(all_libs)

    #save data to csv
    file_utils.dump_dict_csv([
        month_count, month_import_commit_count, month_libs,
        month_addition_commit_count, month_additions_count, month_additions,
        month_deletion_commit_count, month_deletions_count, month_deletions,
        month_adoption_commit_count, month_adoption_libs_count,
        month_adoptions, month_users, month_repos, month_user_adopt,
        month_repo_adopt
    ], [
        "year-month", "number of commits", "number of import commits",
        "unique libraries committed (add-del-or-adopt)",
        "number of addition commits", "number of libraries added",
        "unique libraries added", "number of deletion commits",
        "number of libraries deleted", "unique libraries deleted",
        "number of adoption commits", "number of libraries adopted",
        "unique libraries adopted", "unique active users",
        "unique active repos", "unique adopting users",
        "unique repos with adoption"
    ], "results/commit_analysis_by_month.csv")
Esempio n. 6
0
    list(adopt_activity_counts.keys()) +
    list(set(reg_activity_counts.keys()) - set(adopt_activity_counts.keys())))
times = sorted(
    list(times) + list(set(import_activity_counts.keys()) - set(times)))

#divide commits activity totals by total number of commits (compute average)
for key in times:
    adopt_activity_counts[key] /= total_adopt_commits
    reg_activity_counts[key] /= total_reg_commits
    import_activity_counts[key] /= total_import_commits

#save all data to csv
file_utils.dump_dict_csv([
    adopt_activity_counts, reg_activity_counts, import_activity_counts
], [
    "time from commit (minutes)", "adoption commits",
    "non-adopt commits (all)", "non-adopt import commits (added lib)"
], "results/activity_analysis/commit_activity_data_%smin_%sK_max_commits.csv" %
                         (BIN_WIDTH, int(MAX_USER_COMMITS / 1000)))

#plot all three lines on the same plot (since this is just for verification)
plt.clf()
fig, ax = plt.subplots()
#adoption
x, y = zip(*sorted(adopt_activity_counts.items()))
ax.plot(x, y, 'r', label='adoption commits')
#all non-adopt
x, y = zip(*sorted(reg_activity_counts.items()))
ax.plot(x, y, 'b', label='all non-adopt commits')
#import non-adopt
x, y = zip(*sorted(import_activity_counts.items()))