def extract_link_type(response_p, response_i, renew, filepath=None): if renew == 1: nodes = response_p['data']['repository']['pullRequests'][ 'nodes'] + response_i['data']['repository']['issues']['nodes'] if os.path.isfile(filepath + "links_type.json"): # 如果已有link_type.json,查找后断点重启 links = file_opt.read_json_from_file(filepath + "links_type.json") else: # 从0开始提取Link links = [] continue_nodes = [] for node in nodes: # 用来找到新的起点 if links == []: continue_nodes = nodes break else: if str(node['number']) == str(links[-1]['source']['number']): continue_nodes = nodes[nodes.index(node) + 1:] break else: continue if continue_nodes != []: for node in tqdm(continue_nodes): # 开始提取link links = extract_link_in_title(nodes, node, links) # links = extract_link_in_body(nodes, node, links) # links = extract_link_in_comment(nodes, node, links) # links = extract_link_in_crossReference(nodes, node, links) links = extract_link_in_referencedEvent(nodes, node, links) if len(links) % 100 == 0: file_opt.save_json_to_file(filepath + "links_type_sl.json", links) file_opt.save_json_to_file(filepath + "links_type_sl.json", links) elif renew == 0: links = file_opt.read_json_from_file(filepath + "links_type_sl.json") return
def extract_link_mode(linkset, renew, save_file_path): if renew == 1: link_1_1, link_1_N = parse_1_and_N(linkset) link_cluster = parse_link_cluster(link_1_1, link_1_N) # link_list = parse_link_list(linkset) # link_cluster = parse_list_2_cluster(link_list) link_self_bilateral, link_bilateral = parse_bilateral(linkset) file_opt.save_json_to_file(save_file_path + "link_1_1.json", link_1_1) file_opt.save_json_to_file(save_file_path + "link_1_N.json", link_1_N) file_opt.save_json_to_file(save_file_path + "link_bi.json", link_bilateral) file_opt.save_json_to_file(save_file_path + "link_self_bi.json", link_self_bilateral) file_opt.save_json_to_file(save_file_path + "link_cluster.json", link_cluster) elif renew == 0: link_1_1 = file_opt.read_json_from_file(save_file_path + "link_1_1.json") link_1_N = file_opt.read_json_from_file(save_file_path + "link_1_N.json") link_self_bilateral = file_opt.read_json_from_file(save_file_path + "link_self_bi.json") link_bilateral = file_opt.read_json_from_file(save_file_path + "link_bi.json") link_cluster = file_opt.read_json_from_file(save_file_path + "link_cluster.json") return link_1_1, link_1_N, link_self_bilateral, link_bilateral, link_cluster
def work_on_repos(fullname_repo): owner, repo = fullname_repo[0], fullname_repo[1] print("--------------------handle " + owner + "/" + repo + "---------------------------") response_pr = file_opt.read_json_from_file(init.local_data_filepath + owner + "/" + repo + "/response_pullRequests.json") response_iss = file_opt.read_json_from_file(init.local_data_filepath + owner + "/" + repo + "/response_issues.json") create_noe4j(response_pr, response_iss, renew, owner, repo, init.local_data_filepath + owner + "/" + repo + "/") # 主程序 print("--------------------finish " + owner + "/" + repo + "---------------------------")
def visulize_link_self_bila(): link_list = [] for o_r in init.repos_to_get_info: owner, name = o_r[0], o_r[1] link_self = file_opt.read_json_from_file(init.local_data_filepath + owner + "/" + name + "/link_self_bi.json") link_bila = file_opt.read_json_from_file(init.local_data_filepath + owner + "/" + name + "/link_bi.json") link_list.append({ 'repo': owner + "/" + name, 'link_self': link_self, 'link_bilateral': link_bila })
def create_initial_info(): repos_info_file = init.local_data_filepath + "/candidate_repos_info.json" repo_info = file_opt.read_json_from_file(repos_info_file) repo_info_dict = [] for item in repo_info['data']['search']['nodes']: languageKind = [] for lang in item['languages']['nodes']: languageKind.append(lang['name']) repo_info_dict.append({ "owner": item['owner']['login'], "name": item['name'], "description": item['description'], "forks": item['forkCount'], "stars": item['stargazerCount'], "languagesCount": item['languages']['totalCount'], "languageKind": languageKind, "issues": item['issues']['totalCount'], "pullRequests": item['pullRequests']['totalCount'] }) return repo_info_dict
def read_repos_data(file_name): repo_list = init.repo_list RQ_list = [] for repo in repo_list: data = file_opt.read_json_from_file(init.local_data_filepath + repo.strip() + "/" + file_name) RQ_list += data return RQ_list
def work_on_repos(fullname_repo): owner, repo = fullname_repo[0], fullname_repo[1] print("--------------------handle " + owner + "/" + repo + "---------------------------") response_pr = file_opt.read_json_from_file(init.local_data_filepath + owner + "/" + repo + "/response_pullRequests.json") response_iss = file_opt.read_json_from_file(init.local_data_filepath + owner + "/" + repo + "/response_issues.json") print(repo, len(response_iss['data']['repository']['issues']['nodes']), len(response_pr['data']['repository']['pullRequests']['nodes'])) # calculate_data_number(response_pr, response_iss) extract_link_type(response_pr, response_iss, renew, init.local_data_filepath + owner + "/" + repo + "/") # 主程序 print("--------------------finish " + owner + "/" + repo + "---------------------------")
def main(): for o_r in init.repos_to_get_info: owner, name = o_r[0], o_r[1] print("--------------------handle " + owner + "/" + name + "---------------------------") clusters = file_opt.read_json_from_file(init.local_data_filepath + owner + "/" + name + "/" + "link_cluster.json") clusters_files = create_file_list(clusters) divide_module(clusters_files)
def work(fullrepo): owner, name = fullrepo[0], fullrepo[1] print("-------------------start " + owner + "/" + name + "---------------------------") link_type = file_opt.read_json_from_file(init.local_data_filepath + owner + "/" + name + "/links_type.json") link_1_1, link_1_N, link_self_bilateral, link_bilateral, link_cluster = \ extract_link_mode(link_type,renew,init.local_data_filepath+owner+"/"+name+"/") print("-------------------finish " + owner + "/" + name + "---------------------------")
def work(fullname_repo): owner, repo = fullname_repo[0], fullname_repo[1] print("--------------------handle " + owner + "/" + repo + "---------------------------") type_list_sl = file_opt.read_json_from_file(init.local_data_filepath + owner + "/" + repo + "/links_type_sl.json") delete_self_loop(type_list_sl, init.local_data_filepath + owner + "/" + repo + "/") # 主程序 print("--------------------finish " + owner + "/" + repo + "---------------------------") return
def visualization_multi_repos(): # 多个repo可视化 repolist = init.repos_to_get_info link_list = [] for r_o in repolist: owner = r_o[0] name = r_o[1] links = file_opt.read_json_from_file(init.local_data_filepath + owner + "/" + name + "/links_type.json") link_list.append({"repo": owner + "/" + name, "links": links}) vis.visualization_multi_type(link_list) vis.visualization_multi_where(link_list) vis.visualization_multi_when(link_list) return None
def request_graphQL(fullname_repo): """ 通过graphQL获取owner/repo仓库的pr和issue数据 """ owner = fullname_repo[0] repo = fullname_repo[1] types = ["pullRequests","issues"] # types = ["issues","pullRequests"] for type in types: count = 0 output_response_file = init.local_data_filepath+owner+"/"+repo+"/response_"+type+".json" if os.path.isfile(output_response_file): r = file_opt.read_json_from_file(output_response_file) else: r = query_request(queries.search_100_nodes, owner, repo, type) if not r['data']['repository'][type]['pageInfo']['hasNextPage']: continue print("-----------------start fetch " + fullname_repo[0] + "/" + fullname_repo[1] + "---------------") while True: count += 1 print(owner+"/"+repo,count,datetime.now(),r['data']['repository'][type]['totalCount'],len(r['data']['repository'][type]['nodes'])) if count % 1 == 0: file_opt.save_json_to_file(output_response_file, r) else: pass earliest_pr_cursor = r['data']['repository'][type]['edges'][-1]['cursor'] # earliest_pr_cursor = "Y3Vyc29yOnYyOpHOHaMMaA==" # 用来处理无法通过graphQL获取的pr或者issue,需要填入当前pr的cursor,可能是timelineItem的原因 r2 = query_request(queries.search_100_nodes, owner, repo, type, last_typenode=earliest_pr_cursor) r2 = request_morethan_100_nodes(r2, owner, repo, type) r['data']['repository'][type]['pageInfo'] = r2['data']['repository'][type]['pageInfo'] r['data']['repository'][type]['totalCount'] = r2['data']['repository'][type]['totalCount'] r['data']['repository'][type]['edges']+= r2['data']['repository'][type]['edges'] r['data']['repository'][type]['nodes'] += r2['data']['repository'][type]['nodes'] if not r['data']['repository'][type]['pageInfo']['hasNextPage']: file_opt.save_json_to_file(output_response_file, r) print("-----------------finish fetch " + fullname_repo[0]+"/"+ fullname_repo[1] + "---------------") break file_opt.save_line_to_file(init.repos_list_finish_graphQL, fullname_repo[0] + "/" + fullname_repo[1])