Esempio n. 1
0
def delete_self_loop(type_list_sl, filepath=None):
    type_list = []
    for item in type_list_sl:
        if not int(item['source']['number']) == int(item['target']['number']):
            type_list.append(item)
    file_opt.save_json_to_file(filepath + "links_type.json", type_list)
    return
Esempio n. 2
0
def extract_link_type(response_p, response_i, renew, filepath=None):
    if renew == 1:
        nodes = response_p['data']['repository']['pullRequests'][
            'nodes'] + response_i['data']['repository']['issues']['nodes']
        if os.path.isfile(filepath +
                          "links_type.json"):  # 如果已有link_type.json,查找后断点重启
            links = file_opt.read_json_from_file(filepath + "links_type.json")
        else:  # 从0开始提取Link
            links = []
        continue_nodes = []
        for node in nodes:  # 用来找到新的起点
            if links == []:
                continue_nodes = nodes
                break
            else:
                if str(node['number']) == str(links[-1]['source']['number']):
                    continue_nodes = nodes[nodes.index(node) + 1:]
                    break
                else:
                    continue
        if continue_nodes != []:
            for node in tqdm(continue_nodes):  # 开始提取link
                links = extract_link_in_title(nodes, node, links)
                # links = extract_link_in_body(nodes, node, links)
                # links = extract_link_in_comment(nodes, node, links)
                # links = extract_link_in_crossReference(nodes, node, links)
                links = extract_link_in_referencedEvent(nodes, node, links)
                if len(links) % 100 == 0:
                    file_opt.save_json_to_file(filepath + "links_type_sl.json",
                                               links)
            file_opt.save_json_to_file(filepath + "links_type_sl.json", links)
    elif renew == 0:
        links = file_opt.read_json_from_file(filepath + "links_type_sl.json")
    return
Esempio n. 3
0
def extract_link_mode(linkset, renew, save_file_path):
    if renew == 1:
        link_1_1, link_1_N = parse_1_and_N(linkset)
        link_cluster = parse_link_cluster(link_1_1, link_1_N)
        # link_list = parse_link_list(linkset)
        # link_cluster = parse_list_2_cluster(link_list)
        link_self_bilateral, link_bilateral = parse_bilateral(linkset)

        file_opt.save_json_to_file(save_file_path + "link_1_1.json", link_1_1)
        file_opt.save_json_to_file(save_file_path + "link_1_N.json", link_1_N)
        file_opt.save_json_to_file(save_file_path + "link_bi.json",
                                   link_bilateral)
        file_opt.save_json_to_file(save_file_path + "link_self_bi.json",
                                   link_self_bilateral)
        file_opt.save_json_to_file(save_file_path + "link_cluster.json",
                                   link_cluster)
    elif renew == 0:
        link_1_1 = file_opt.read_json_from_file(save_file_path +
                                                "link_1_1.json")
        link_1_N = file_opt.read_json_from_file(save_file_path +
                                                "link_1_N.json")
        link_self_bilateral = file_opt.read_json_from_file(save_file_path +
                                                           "link_self_bi.json")
        link_bilateral = file_opt.read_json_from_file(save_file_path +
                                                      "link_bi.json")
        link_cluster = file_opt.read_json_from_file(save_file_path +
                                                    "link_cluster.json")
    return link_1_1, link_1_N, link_self_bilateral, link_bilateral, link_cluster
Esempio n. 4
0
def select_repos():
    initial_info_list = create_initial_info()
    clear_language_list = remove_no_language(initial_info_list)
    common_language_list = involve_common_language(clear_language_list)
    iss_pr_number_list = select_iss_pr_number(common_language_list)
    # 保存文件
    file_opt.save_json_to_file(
        init.local_data_filepath + "/after_select_respos.json",
        iss_pr_number_list)
    # 创建仓库列表
    create_repo_list(iss_pr_number_list)
Esempio n. 5
0
def search_repos():
    """
    获取满足search条件的repos list
    """
    output_response_file = init.local_data_filepath + "/candidate_repos_info.json"
    r = query_request(queries.search_candidate_repos)
    while r['data']['search']['nodes'][-1]['stargazerCount'] > 10000:
        last_star = r['data']['search']['nodes'][-1]['stargazerCount']
        r2 = query_request(queries.search_candidate_repos, last_star=last_star)
        r['data']['search']['nodes'] += r2['data']['search']['nodes'][1:]
        print("has finished ", len(r['data']['search']['nodes']))
    file_opt.save_json_to_file(output_response_file, r)
Esempio n. 6
0
def request_graphQL(fullname_repo):
    """
    通过graphQL获取owner/repo仓库的pr和issue数据
    """
    owner = fullname_repo[0]
    repo = fullname_repo[1]
    types = ["pullRequests","issues"]
    # types = ["issues","pullRequests"]
    for type in types:
        count = 0
        output_response_file = init.local_data_filepath+owner+"/"+repo+"/response_"+type+".json"
        if os.path.isfile(output_response_file):
            r = file_opt.read_json_from_file(output_response_file)
        else:
            r = query_request(queries.search_100_nodes, owner, repo, type)
        if not r['data']['repository'][type]['pageInfo']['hasNextPage']:
            continue
        print("-----------------start fetch " + fullname_repo[0] + "/" + fullname_repo[1] + "---------------")
        while True:
            count += 1
            print(owner+"/"+repo,count,datetime.now(),r['data']['repository'][type]['totalCount'],len(r['data']['repository'][type]['nodes']))
            if count % 1 == 0:
                file_opt.save_json_to_file(output_response_file, r)
            else:
                pass
            earliest_pr_cursor = r['data']['repository'][type]['edges'][-1]['cursor']
            # earliest_pr_cursor = "Y3Vyc29yOnYyOpHOHaMMaA=="           # 用来处理无法通过graphQL获取的pr或者issue,需要填入当前pr的cursor,可能是timelineItem的原因
            r2 = query_request(queries.search_100_nodes, owner, repo, type, last_typenode=earliest_pr_cursor)
            r2 = request_morethan_100_nodes(r2, owner, repo, type)
            r['data']['repository'][type]['pageInfo'] = r2['data']['repository'][type]['pageInfo']
            r['data']['repository'][type]['totalCount'] = r2['data']['repository'][type]['totalCount']
            r['data']['repository'][type]['edges']+= r2['data']['repository'][type]['edges']
            r['data']['repository'][type]['nodes'] += r2['data']['repository'][type]['nodes']
            if not r['data']['repository'][type]['pageInfo']['hasNextPage']:
                file_opt.save_json_to_file(output_response_file, r)
                print("-----------------finish fetch " + fullname_repo[0]+"/"+ fullname_repo[1] + "---------------")
                break
    file_opt.save_line_to_file(init.repos_list_finish_graphQL, fullname_repo[0] + "/" + fullname_repo[1])
Esempio n. 7
0
def random_sample(all_link):
    sample = random.sample(all_link, sample_size)
    file_opt.save_json_to_file(
        "../card_sorting/sample_" + str(sample_size) + "_supply2.json", sample)
    return