Exemple #1
0
def extract_link_type(response_p, response_i, renew, filepath=None):
    if renew == 1:
        nodes = response_p['data']['repository']['pullRequests'][
            'nodes'] + response_i['data']['repository']['issues']['nodes']
        if os.path.isfile(filepath +
                          "links_type.json"):  # 如果已有link_type.json,查找后断点重启
            links = file_opt.read_json_from_file(filepath + "links_type.json")
        else:  # 从0开始提取Link
            links = []
        continue_nodes = []
        for node in nodes:  # 用来找到新的起点
            if links == []:
                continue_nodes = nodes
                break
            else:
                if str(node['number']) == str(links[-1]['source']['number']):
                    continue_nodes = nodes[nodes.index(node) + 1:]
                    break
                else:
                    continue
        if continue_nodes != []:
            for node in tqdm(continue_nodes):  # 开始提取link
                links = extract_link_in_title(nodes, node, links)
                # links = extract_link_in_body(nodes, node, links)
                # links = extract_link_in_comment(nodes, node, links)
                # links = extract_link_in_crossReference(nodes, node, links)
                links = extract_link_in_referencedEvent(nodes, node, links)
                if len(links) % 100 == 0:
                    file_opt.save_json_to_file(filepath + "links_type_sl.json",
                                               links)
            file_opt.save_json_to_file(filepath + "links_type_sl.json", links)
    elif renew == 0:
        links = file_opt.read_json_from_file(filepath + "links_type_sl.json")
    return
Exemple #2
0
def extract_link_mode(linkset, renew, save_file_path):
    if renew == 1:
        link_1_1, link_1_N = parse_1_and_N(linkset)
        link_cluster = parse_link_cluster(link_1_1, link_1_N)
        # link_list = parse_link_list(linkset)
        # link_cluster = parse_list_2_cluster(link_list)
        link_self_bilateral, link_bilateral = parse_bilateral(linkset)

        file_opt.save_json_to_file(save_file_path + "link_1_1.json", link_1_1)
        file_opt.save_json_to_file(save_file_path + "link_1_N.json", link_1_N)
        file_opt.save_json_to_file(save_file_path + "link_bi.json",
                                   link_bilateral)
        file_opt.save_json_to_file(save_file_path + "link_self_bi.json",
                                   link_self_bilateral)
        file_opt.save_json_to_file(save_file_path + "link_cluster.json",
                                   link_cluster)
    elif renew == 0:
        link_1_1 = file_opt.read_json_from_file(save_file_path +
                                                "link_1_1.json")
        link_1_N = file_opt.read_json_from_file(save_file_path +
                                                "link_1_N.json")
        link_self_bilateral = file_opt.read_json_from_file(save_file_path +
                                                           "link_self_bi.json")
        link_bilateral = file_opt.read_json_from_file(save_file_path +
                                                      "link_bi.json")
        link_cluster = file_opt.read_json_from_file(save_file_path +
                                                    "link_cluster.json")
    return link_1_1, link_1_N, link_self_bilateral, link_bilateral, link_cluster
Exemple #3
0
def work_on_repos(fullname_repo):
    owner, repo = fullname_repo[0], fullname_repo[1]
    print("--------------------handle " + owner + "/" + repo +
          "---------------------------")
    response_pr = file_opt.read_json_from_file(init.local_data_filepath +
                                               owner + "/" + repo +
                                               "/response_pullRequests.json")
    response_iss = file_opt.read_json_from_file(init.local_data_filepath +
                                                owner + "/" + repo +
                                                "/response_issues.json")
    create_noe4j(response_pr, response_iss, renew, owner, repo,
                 init.local_data_filepath + owner + "/" + repo + "/")  # 主程序
    print("--------------------finish " + owner + "/" + repo +
          "---------------------------")
Exemple #4
0
def visulize_link_self_bila():
    link_list = []
    for o_r in init.repos_to_get_info:
        owner, name = o_r[0], o_r[1]
        link_self = file_opt.read_json_from_file(init.local_data_filepath +
                                                 owner + "/" + name +
                                                 "/link_self_bi.json")
        link_bila = file_opt.read_json_from_file(init.local_data_filepath +
                                                 owner + "/" + name +
                                                 "/link_bi.json")
        link_list.append({
            'repo': owner + "/" + name,
            'link_self': link_self,
            'link_bilateral': link_bila
        })
def create_initial_info():
    repos_info_file = init.local_data_filepath + "/candidate_repos_info.json"
    repo_info = file_opt.read_json_from_file(repos_info_file)
    repo_info_dict = []
    for item in repo_info['data']['search']['nodes']:
        languageKind = []
        for lang in item['languages']['nodes']:
            languageKind.append(lang['name'])
        repo_info_dict.append({
            "owner":
            item['owner']['login'],
            "name":
            item['name'],
            "description":
            item['description'],
            "forks":
            item['forkCount'],
            "stars":
            item['stargazerCount'],
            "languagesCount":
            item['languages']['totalCount'],
            "languageKind":
            languageKind,
            "issues":
            item['issues']['totalCount'],
            "pullRequests":
            item['pullRequests']['totalCount']
        })
    return repo_info_dict
Exemple #6
0
def read_repos_data(file_name):
    repo_list = init.repo_list
    RQ_list = []
    for repo in repo_list:
        data = file_opt.read_json_from_file(init.local_data_filepath +
                                            repo.strip() + "/" + file_name)
        RQ_list += data
    return RQ_list
Exemple #7
0
def work_on_repos(fullname_repo):
    owner, repo = fullname_repo[0], fullname_repo[1]
    print("--------------------handle " + owner + "/" + repo +
          "---------------------------")
    response_pr = file_opt.read_json_from_file(init.local_data_filepath +
                                               owner + "/" + repo +
                                               "/response_pullRequests.json")
    response_iss = file_opt.read_json_from_file(init.local_data_filepath +
                                                owner + "/" + repo +
                                                "/response_issues.json")
    print(repo, len(response_iss['data']['repository']['issues']['nodes']),
          len(response_pr['data']['repository']['pullRequests']['nodes']))
    # calculate_data_number(response_pr, response_iss)
    extract_link_type(response_pr, response_iss, renew,
                      init.local_data_filepath + owner + "/" + repo +
                      "/")  # 主程序
    print("--------------------finish " + owner + "/" + repo +
          "---------------------------")
Exemple #8
0
def main():
    for o_r in init.repos_to_get_info:
        owner, name = o_r[0], o_r[1]
        print("--------------------handle " + owner + "/" + name +
              "---------------------------")
        clusters = file_opt.read_json_from_file(init.local_data_filepath +
                                                owner + "/" + name + "/" +
                                                "link_cluster.json")
        clusters_files = create_file_list(clusters)
        divide_module(clusters_files)
Exemple #9
0
def work(fullrepo):

    owner, name = fullrepo[0], fullrepo[1]
    print("-------------------start " + owner + "/" + name +
          "---------------------------")
    link_type = file_opt.read_json_from_file(init.local_data_filepath + owner +
                                             "/" + name + "/links_type.json")
    link_1_1, link_1_N, link_self_bilateral, link_bilateral, link_cluster = \
        extract_link_mode(link_type,renew,init.local_data_filepath+owner+"/"+name+"/")
    print("-------------------finish " + owner + "/" + name +
          "---------------------------")
Exemple #10
0
def work(fullname_repo):
    owner, repo = fullname_repo[0], fullname_repo[1]
    print("--------------------handle " + owner + "/" + repo +
          "---------------------------")
    type_list_sl = file_opt.read_json_from_file(init.local_data_filepath +
                                                owner + "/" + repo +
                                                "/links_type_sl.json")
    delete_self_loop(type_list_sl, init.local_data_filepath + owner + "/" +
                     repo + "/")  # 主程序
    print("--------------------finish " + owner + "/" + repo +
          "---------------------------")
    return
Exemple #11
0
def visualization_multi_repos():
    # 多个repo可视化
    repolist = init.repos_to_get_info
    link_list = []
    for r_o in repolist:
        owner = r_o[0]
        name = r_o[1]
        links = file_opt.read_json_from_file(init.local_data_filepath + owner +
                                             "/" + name + "/links_type.json")
        link_list.append({"repo": owner + "/" + name, "links": links})
    vis.visualization_multi_type(link_list)
    vis.visualization_multi_where(link_list)
    vis.visualization_multi_when(link_list)
    return None
Exemple #12
0
def request_graphQL(fullname_repo):
    """
    通过graphQL获取owner/repo仓库的pr和issue数据
    """
    owner = fullname_repo[0]
    repo = fullname_repo[1]
    types = ["pullRequests","issues"]
    # types = ["issues","pullRequests"]
    for type in types:
        count = 0
        output_response_file = init.local_data_filepath+owner+"/"+repo+"/response_"+type+".json"
        if os.path.isfile(output_response_file):
            r = file_opt.read_json_from_file(output_response_file)
        else:
            r = query_request(queries.search_100_nodes, owner, repo, type)
        if not r['data']['repository'][type]['pageInfo']['hasNextPage']:
            continue
        print("-----------------start fetch " + fullname_repo[0] + "/" + fullname_repo[1] + "---------------")
        while True:
            count += 1
            print(owner+"/"+repo,count,datetime.now(),r['data']['repository'][type]['totalCount'],len(r['data']['repository'][type]['nodes']))
            if count % 1 == 0:
                file_opt.save_json_to_file(output_response_file, r)
            else:
                pass
            earliest_pr_cursor = r['data']['repository'][type]['edges'][-1]['cursor']
            # earliest_pr_cursor = "Y3Vyc29yOnYyOpHOHaMMaA=="           # 用来处理无法通过graphQL获取的pr或者issue,需要填入当前pr的cursor,可能是timelineItem的原因
            r2 = query_request(queries.search_100_nodes, owner, repo, type, last_typenode=earliest_pr_cursor)
            r2 = request_morethan_100_nodes(r2, owner, repo, type)
            r['data']['repository'][type]['pageInfo'] = r2['data']['repository'][type]['pageInfo']
            r['data']['repository'][type]['totalCount'] = r2['data']['repository'][type]['totalCount']
            r['data']['repository'][type]['edges']+= r2['data']['repository'][type]['edges']
            r['data']['repository'][type]['nodes'] += r2['data']['repository'][type]['nodes']
            if not r['data']['repository'][type]['pageInfo']['hasNextPage']:
                file_opt.save_json_to_file(output_response_file, r)
                print("-----------------finish fetch " + fullname_repo[0]+"/"+ fullname_repo[1] + "---------------")
                break
    file_opt.save_line_to_file(init.repos_list_finish_graphQL, fullname_repo[0] + "/" + fullname_repo[1])