Ejemplo n.º 1
0
def get_repo_PRlist(repo, type, renew):
    api = GitHubAPI()
    save_path = LOCAL_DATA_PATH + '/pr_data/' + repo + '/%s_list.json' % type

    # todo: could be extended to analyze forks in the future
    if type == 'fork':
        save_path = LOCAL_DATA_PATH + '/result/' + repo + '/forks_list.json'

    if (os.path.exists(save_path)) and (not renew):
        print("read from local files and return")
        try:
            return localfile.get_file(save_path)
        except:
            pass

    print('files does not exist in local disk, start to fetch new list for ',
          repo, type)
    if (type == 'pull') or (type == 'issue'):
        ret = api.request('repos/%s/%ss' % (repo, type),
                          state='all',
                          paginate=True)
    else:
        if type == 'branch':
            type = 'branche'
        ret = api.request('repos/%s/%ss' % (repo, type), True)

    localfile.write_to_file(save_path, ret)
    return ret
Ejemplo n.º 2
0
def fetch_file_list(pull, renew=False):
    repo, num = pull["base"]["repo"]["full_name"], str(pull["number"])
    save_path = LOCAL_DATA_PATH + '/pr_data/' + repo + '/' + num + '/raw_diff.json'

    if os.path.exists(save_path) and (not renew):
        try:
            return localfile.get_file(save_path)
        except:
            pass

    # t = api.get('repos/%s/pulls/%s/files?page=3' % (repo, num))
    t = api.request('repos/%s/pulls/%s/files?page=3' % (repo, num))
    file_list = []
    if len(t) > 0:
        raise Exception('too big', pull['html_url'])
    else:
        li = api.request('repos/%s/pulls/%s/files' % (repo, num),
                         paginate=True)
        # li = api.request( 'repos/%s/pulls/%s/files' % (repo, num), True)
        time.sleep(0.8)
        for f in li:
            if f.get('changes', 0) <= 5000 and ('filename' in f) and ('patch'
                                                                      in f):
                file_list.append(parse_diff(f['filename'], f['patch']))

    localfile.write_to_file(save_path, file_list)
    return file_list
Ejemplo n.º 3
0
def filterNonCodeFiles(file_list, outfile_prefix):
    newFileList = []
    count = 0
    for f in file_list:
        if count > 500:
            localfile.write_to_file(outfile_prefix + "/toobig.txt", '500file')
            return []
        if not language_tool.is_text(f['name']):
            newFileList.append(f)
            count += 1
    return newFileList
Ejemplo n.º 4
0
def get_PR(repo, num, renew=False):
    save_path = LOCAL_DATA_PATH + '/pr_data/%s/%s/api.json' % (repo, num)
    if os.path.exists(save_path) and (not renew):
        try:
            return localfile.get_file(save_path)
        except:
            pass

    r = api.request('repos/%s/pulls/%s' % (repo, num))
    time.sleep(3.0)
    localfile.write_to_file(save_path, r)
    return r
Ejemplo n.º 5
0
def get_pull_commit(pull, renew=False):
    save_path = LOCAL_DATA_PATH + '/pr_data/%s/%s/commits.json' % (
        pull["base"]["repo"]["full_name"], pull["number"])
    if os.path.exists(save_path) and (not renew):
        try:
            return localfile.get_file(save_path)
        except:
            pass

    commits = api.request(pull['commits_url'].replace(
        'https://api.github.com/', ''),
                          paginate=True,
                          state='all')
    time.sleep(0.7)
    localfile.write_to_file(save_path, commits)
    return commits
Ejemplo n.º 6
0
def get_pr_commit(repo, pr_id, renew=False):
    save_path = LOCAL_DATA_PATH + '/pr_data/%s/%s/commits.json' % (repo, pr_id)
    commit_url = 'repos/%s/pulls/%s/commits' % (repo, pr_id)
    if os.path.exists(save_path) and (not renew) and (
            os.stat(save_path).st_size > 2):
        try:
            return localfile.get_file(save_path)
        except:

            pass
    #     commits = api.request(pull['commits_url'].replace('https://api.github.com/', ''), True)
    api = GitHubAPI()
    commits = api.request(commit_url.replace('https://api.github.com/', ''),
                          paginate=True,
                          state='all')
    time.sleep(0.7)
    localfile.write_to_file(save_path, commits)
    return commits
Ejemplo n.º 7
0
def fetch_commit(url, renew=False):
    api = GitHubAPI()
    save_path = LOCAL_DATA_PATH + '/pr_data/%s.json' % url.replace(
        'https://api.github.com/repos/', '')
    if os.path.exists(save_path) and (not renew):
        try:
            return localfile.get_file(save_path)
        except:
            pass

    c = api.request(url)
    time.sleep(0.7)
    file_list = []
    for f in c['files']:
        if 'patch' in f:
            file_list.append(
                fetch_raw_diff.parse_diff(f['filename'], f['patch']))
    localfile.write_to_file(save_path, file_list)
    return file_list
Ejemplo n.º 8
0
def fetch_file_list(repo, num, renew=False):
    api = GitHubAPI()
    # repo, num = pull["base"]["repo"]["full_name"], str(pull["number"])
    outfile_prefix = init.local_pr_data_dir + repo + "/" + str(num)
    save_path = outfile_prefix + '/raw_diff.json'
    if os.path.exists(save_path) and (not renew):
        try:
            return localfile.get_file(save_path)
        except:
            pass
    file_list = []

    li = api.request('repos/%s/pulls/%s/files' % (repo, num), paginate=True)
    time.sleep(0.8)
    for f in li:
        if f.get('changes', 0) <= 5000 and ('filename' in f) and ('patch'
                                                                  in f):
            file_list.append(
                fetch_raw_diff.parse_diff(f['filename'], f['patch']))

    localfile.write_to_file(save_path, file_list)
    return file_list
Ejemplo n.º 9
0
def get_another_pull(pull, renew=False):
    api = GitHubAPI()
    save_path = LOCAL_DATA_PATH + '/pr_data/%s/%s/another_pull.json' % (
        pull["base"]["repo"]["full_name"], pull["number"])
    if os.path.exists(save_path) and (not renew):
        try:
            return localfile.get_file(save_path)
        except:
            pass

    comments_href = pull["_links"]["comments"][
        "href"]  # found cites in comments, but checking events is easier.
    comments = api.request(comments_href, True)
    time.sleep(0.7)
    candidates = []
    for comment in comments:
        candidates.extend(get_pr_and_issue_numbers(comment["body"]))
    candidates.extend(get_pr_and_issue_numbers(pull["body"]))

    result = list(set(candidates))

    localfile.write_to_file(save_path, result)
    return result