Python read_csv_ignore_headersの例、utils.read_csv_ignore_headers Pythonの例

コード例 #1

0

ファイルを表示

ファイル: repo_import_diffs_totals.py プロジェクト: brombaut/CISC873_TermProject

def main():
    releases_lines = utils.read_csv_ignore_headers(RELEASES_FILE, RELEASE_FIELD_NAMES)
    releases = build_releases(releases_lines)
    repo_import_diffs = utils.read_csv_ignore_headers(REPO_IMPORT_DIFFS_FILE, REPO_IMPORT_DIFFS_FIELD_NAMES)
    add_release_dates(repo_import_diffs, releases)
    repo_import_diffs.sort(key=get_release_date)
    timeline = build_timeline(repo_import_diffs)
    flattened_timeline = flatten(timeline)
    utils.create_csv_file(REPO_IMPORT_DIFFS_TIMELINE_FILE, REPO_IMPORT_DIFFS_TIMELINE_FIELD_NAMES)
    utils.write_lines_to_existing_csv(REPO_IMPORT_DIFFS_TIMELINE_FILE, REPO_IMPORT_DIFFS_TIMELINE_FIELD_NAMES, flattened_timeline)

コード例 #2

0

ファイルを表示

ファイル: get_all_project_issue_comment_jsons.py プロジェクト: brombaut/LOG6307_Project

def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    issues = utils.read_csv_ignore_headers(
        ISSUES_TO_COLLECT_COMMENTS_FOR_CSV_FILE_PATH,
        ISSUES_CSV_FILE_FIELD_NAMES)
    print("Getting all comments for issues...")
    total = len(issues)
    count = 0
    print('Skipping thru read ones...')
    for issue in issues:
        try:
            count += 1
            issue_id = issue['id']
            repo_name = issue['repo_name']
            comments_url = issue['comments_url']
            output_file_name = "{}/comments@issue@{}@{}.json".format(
                OUTPUT_FOLDER_PATH, issue_id, repo_name.replace('/', '@'))
            if utils.file_or_read_file_already_exists(output_file_name):
                continue
            print("\t{}/{} repo={}".format(count, total, repo_name))
            issue_comments = get_comments(comments_url)
            utils.write_to_json_file(output_file_name, issue_comments)
        except Exception as e:
            print("ERROR: Failed getting comments for issue={}".format(
                issue['id']))
    print("Done")

コード例 #3

0

ファイルを表示

ファイル: get_commit_jsons_from_commit_events.py プロジェクト: brombaut/LOG6307_Project

def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    commits_to_collect = utils.read_csv_ignore_headers(
        INPUT_CSV_FILE_PATH, INPUT_CSV_FILE_FIELD_NAMES)
    print("Getting all comments for issues...")
    total = len(commits_to_collect)
    count = 0
    for c in commits_to_collect:
        try:
            issue_id = c['issue_id']
            repo_name = c['repo_name']
            sha = c['commit_id']
            count += 1
            print("\t{}/{} sha={}".format(count, total, sha))
            output_file_name = "{}/commit@{}@{}.json".format(
                OUTPUT_FOLDER_PATH, sha, repo_name.replace('/', '@'))
            if utils.file_or_read_file_already_exists(output_file_name):
                print("File Exists - Continuing")
                continue
            commit_url = f'https://api.github.com/repos/{repo_name}/commits/{sha}'
            commit_json = get_commit_json(commit_url)
            utils.write_to_json_file(output_file_name, commit_json)
        except Exception as e:
            print("ERROR: Failed getting comments for issue={}".format(
                c['issue_id']))
    print("Done")

コード例 #4

0

ファイルを表示

def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    if LIBRARIES_IO_ACCESS_TOKEN is None:
        raise Exception("No LIBRARIES_IO_ACCESS_TOKEN")
    entity_to_collect_for = utils.read_csv_ignore_headers(
        INPUT_CSV_FILE_PATH, INPUT_CSV_FILE_FIELD_NAMES)
    total = len(entity_to_collect_for)
    count = 0
    for x in entity_to_collect_for:
        try:
            repo_half_name = x['repo_name']
            count += 1
            print("\t{}/{} repo={}".format(count, total, repo_half_name))
            repo_full_name = get_repo_full_name_from_libraries_io(
                repo_half_name)

            output_file_name = "{}/{}.json".format(
                OUTPUT_FOLDER_PATH, repo_full_name.replace('/', '@'))
            if utils.file_or_read_file_already_exists(output_file_name):
                print("File Exists - Continuing")
                continue
            dependents = get_dependents(repo_full_name)
            utils.write_to_json_file(output_file_name, dependents)
        except Exception as e:
            print("ERROR: Failed for repo={}".format(x['repo_name']))
    print("Done")

コード例 #5

0

ファイルを表示

def main():
    print("Reading releases...")
    releases_lines = utils.read_csv_ignore_headers(RELEASES_FILE,
                                                   RELEASE_FIELD_NAMES)
    print("Building releases...")
    releases = build_releases(releases_lines)
    print("Reading repo cass diffs...")
    repo_call_diffs = utils.read_csv_ignore_headers(
        REPO_CALL_DIFFS_FILE, REPO_CALL_DIFFS_FIELD_NAMES)
    print("Adding release dates...")
    add_release_dates(repo_call_diffs, releases)
    print("Sorting repo call diffs")
    repo_call_diffs.sort(key=get_release_date)
    print("Building timeline...")
    timeline = build_timeline(repo_call_diffs)
    print("Flattening timeline...")
    flattened_timeline = flatten(timeline)
    print("Creating output file...")
    utils.create_csv_file(REPO_CALL_DIFFS_TIMELINE_FILE,
                          REPO_CALL_DIFFS_TIMELINE_FIELD_NAMES)
    print("Writing timeline to output file...")
    utils.write_lines_to_existing_csv(REPO_CALL_DIFFS_TIMELINE_FILE,
                                      REPO_CALL_DIFFS_TIMELINE_FIELD_NAMES,
                                      flattened_timeline)

コード例 #6

0

ファイルを表示

ファイル: extract_package_name.py プロジェクト: brombaut/LOG6307_Project

def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    repos = utils.read_csv_ignore_headers(INPUT_CSV_FILE_PATH,
                                          INPUT_CSV_FILE_FIELD_NAMES)
    total = len(repos)
    count = 0
    list_to_write = list()
    for repo in repos:
        repo_name = repo['repo_name']
        try:
            count += 1
            print("\t{}/{} repo={}".format(count, total, repo_name))
            project_git_folder = f"{REPOS_PATH}/repos/{repo_name.replace('/', '#')}"
            if os.path.isdir(project_git_folder):
                package_json_path = f"{project_git_folder}/package.json"
                if utils.file_or_read_file_already_exists(package_json_path):
                    package_json_contents = utils.load_json_file(
                        package_json_path)
                    # Will throw if 'name' is not there
                    list_to_write.append({
                        'repo_name':
                        repo_name,
                        'package_name':
                        package_json_contents['name']
                    })
                else:
                    # package.json doesn't exist
                    list_to_write.append({
                        'repo_name': repo_name,
                        'package_name': None
                    })
            else:
                # Could not clone project
                list_to_write.append({
                    'repo_name': repo_name,
                    'package_name': None
                })
        except Exception as e:
            list_to_write.append({
                'repo_name': repo_name,
                'package_name': None
            })
    utils.write_lines_to_new_csv(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES,
                                 list_to_write)
    print("Done")

コード例 #7

0

ファイルを表示

ファイル: get_project_info.py プロジェクト: brombaut/LOG6307_Project

def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    repos = utils.read_csv_ignore_headers(INPUT_CSV_FILE_PATH,
                                          INPUT_CSV_FILE_FIELD_NAMES)
    print("Starting...")
    total = len(repos)
    count = 0
    lines_to_write = list()
    for repo in repos:
        try:
            count += 1
            repo_name = repo['repo_name']
            print("\t{}/{} repo={}".format(count, total, repo_name))
            repos_info = get_repo_info(repo_name)
            lines_to_write.append({
                'repo_name':
                repo_name,
                'is_fork':
                repos_info['fork'],
                'size':
                repos_info['size'],
                'stargazers_count':
                repos_info['stargazers_count'],
                'watchers_count':
                repos_info['watchers_count'],
                'language':
                repos_info['language'],
            })
        except Exception as e:
            print("ERROR: Failed getting comments for issue={}".format(
                repo['repo_name']))
    utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH,
                                       OUTPUT_FILE_FIELD_NAMES)
    utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH,
                                      OUTPUT_FILE_FIELD_NAMES, lines_to_write)

    print("Done")

コード例 #8

0

ファイルを表示

def main():
    gk_issues = utils.read_csv_ignore_headers(GREENKEEPER_ISSUES_FILE_PATH, GREENKEEPER_ISSUES_FIELD_NAMES)
    library_names = list()
    for issue in gk_issues:
        if issue['issue_dependency_name'] not in library_names:
            library_names.append(issue['issue_dependency_name'])
    total = len(library_names)
    count = 0
    for lib_name in library_names:
        count += 1
        try:
            print("\t{}/{} pr_url={}".format(count, total, lib_name))
            if not lib_name:
                continue
            lib_name_for_url = lib_name.replace('/', '%2F')
            url = 'https://libraries.io/api/NPM/{}?api_key={}'.format(lib_name_for_url, LIBRARIES_IO_ACCESS_TOKEN)
            lib_info = utils.send_request(url, None, ignore_token=True, sleep_time=1.2)
            lib_name_for_file = lib_name.replace('/', '%2F')
            file_name = "{}/{}.json".format(OUTPUT_FOLDER_PATH, lib_name_for_file)
            utils.write_to_json_file(file_name, lib_info)
        except Exception as e:
            print("Error on {}".format(lib_name))

コード例 #9

0

ファイルを表示

ファイル: clone_repos.py プロジェクト: brombaut/LOG6307_Project

def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    repos = utils.read_csv_ignore_headers(INPUT_CSV_FILE_PATH, INPUT_CSV_FILE_FIELD_NAMES)
    total = len(repos)
    count = 0
    for repo in repos:
        try:
            repo_name = repo['repo_name']
            count += 1
            print("\t{}/{} repo={}".format(count, total, repo_name))

            output_folder = f"{REPOS_PATH}/repos/{repo_name.replace('/', '#')}"
            if os.path.isdir(output_folder):
                continue
            os.mkdir(output_folder)
            subprocess.call(['git', 'clone', f'[email protected]:{repo_name}.git', output_folder])
            # git clone [email protected]:${repo}.git ${REPOS_DATA_DIR}${repo_dir}
        except Exception as e:
            print("ERROR: Failed getting issues for repo={}".format(repo['repo']))
    print("Done")

コード例 #10

0

ファイルを表示

ファイル: get_all_project_issue_jsons.py プロジェクト: brombaut/LOG6307_Project

def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    repos = utils.read_csv_ignore_headers(REPOS_FILE_PATH, REPOs_FILED_NAMES)
    print("Getting all Issues for repos...")
    total = len(repos)
    count = 0
    for repo in repos:
        try:
            repo_name = repo['dependent']
            count += 1
            print("\t{}/{} repo={}".format(count, total, repo_name))
            output_file_name = "{}/issues@{}.json".format(OUTPUT_FOLDER_PATH, repo_name.replace('/', '@'))
            if utils.file_or_read_file_already_exists(output_file_name):
                print("File Exists - Continuing -- repo={}".format(repo_name))
                continue
            repo_issues = get_issues(repo_name)
            utils.write_to_json_file(output_file_name, repo_issues)
        except Exception as e:
            print("ERROR: Failed getting issues for repo={}".format(repo['repo']))
    print("Done")

コード例 #11

0

ファイルを表示

ファイル: is_npm_package.py プロジェクト: brombaut/LOG6307_Project

def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    if LIBRARIES_IO_ACCESS_TOKEN is None:
        raise Exception("No LIBRARIES_IO_ACCESS_TOKEN")
    utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES)
    entries = utils.read_csv_ignore_headers(INPUT_CSV_FILE_PATH,
                                            INPUT_CSV_FILE_FIELD_NAMES)
    total = len(entries)
    count = 0
    for x in entries:
        lines_to_write = list()
        repo_name = x['repo_name']
        package_name = x['package_name']
        use_repo_name = False
        try:
            count += 1
            print("\t{}/{} repo={}".format(count, total, package_name))
            if not package_name:
                use_repo_name = True
                package_name = repo_name.split('/')[1]
            libraries_io_response = get_libraries_io_response(package_name)
            if 'error' in libraries_io_response:
                lines_to_write.append({
                    'repo_name': repo_name,
                    'package_name': package_name,
                    'use_repo_name': use_repo_name,
                    'on_libraries_io_npm': False,
                    'npm_dependent_repos_count': None,
                    'npm_dependents_count': None,
                    'npm_forks': None,
                    'npm_language': None,
                    'npm_rank': None,
                    'npm_stars': None,
                })
                utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH,
                                                  OUTPUT_FIELD_NAMES,
                                                  lines_to_write)
            else:
                lines_to_write.append({
                    'repo_name':
                    repo_name,
                    'package_name':
                    package_name,
                    'use_repo_name':
                    use_repo_name,
                    'on_libraries_io_npm':
                    True,
                    'npm_dependent_repos_count':
                    libraries_io_response['dependent_repos_count'],
                    'npm_dependents_count':
                    libraries_io_response['dependents_count'],
                    'npm_forks':
                    libraries_io_response['forks'],
                    'npm_language':
                    libraries_io_response['language'],
                    'npm_rank':
                    libraries_io_response['rank'],
                    'npm_stars':
                    libraries_io_response['stars'],
                })
                utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH,
                                                  OUTPUT_FIELD_NAMES,
                                                  lines_to_write)
        except Exception as e:
            lines_to_write.append({
                'repo_name': repo_name,
                'package_name': package_name,
                'use_repo_name': use_repo_name,
                'on_libraries_io_npm': False,
                'npm_dependent_repos_count': None,
                'npm_dependents_count': None,
                'npm_forks': None,
                'npm_language': None,
                'npm_rank': None,
                'npm_stars': None,
            })
            utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH,
                                              OUTPUT_FIELD_NAMES,
                                              lines_to_write)
    print("Done")