def issues_network(out, repo, github): """Builds issues netowrk""" interactions = datautil.get_issues_interaction(repo, github) graph = networkutil.create_interaction_network(interactions, repo_name=repo) nx.write_adjlist(graph, out)
def commit_networks(in_pattern, out_dir, languages): """ Builds commit network. It retrieves commits from the top most-watched repos of the given language. Then, for each repo, it builds interaction network from the commit history of each blob. param: in_pattern: The path pattern of file containing top most watched repos. out_dir: The pattern of the output dir for both raw data and graph data. lang: List of language. Here is the example of each param: in_pattern = "../data/most_watched/{0}.txt" out_dir = "../data/network/commit/{0}" lang = ['python', 'java'] It should be noted that in out_dir, it is expected that there will be sub-dir graph/ and raw/. """ graph_out = out_dir + "/graph/{1}.txt" raw_out = out_dir + "/raw/{1}.pickle" github = Github(requests_per_second=1) for lang in languages: with open(in_pattern.format(lang), "r") as f: for line in f: repo = line.strip() f_name = repo.replace('/', '_') print "Starting {0} at {1}".format(repo, raw_out.format( lang, f_name)) commits = get_commits_from_repo(repo, github) with open(raw_out.format(lang, f_name), "w") as pickle_f: pickle.dump(commits, pickle_f) print "Starting {0} at {1}".format(repo, graph_out.format( lang, f_name)) commit_interactions = commit_interactions_from_repo(commits) g = networkutil.create_interaction_network( commit_interactions, repo_name=repo) nx.write_adjlist(g, graph_out.format(lang, f_name))