def main(): releases_lines = utils.read_csv_ignore_headers(RELEASES_FILE, RELEASE_FIELD_NAMES) releases = build_releases(releases_lines) repo_import_diffs = utils.read_csv_ignore_headers(REPO_IMPORT_DIFFS_FILE, REPO_IMPORT_DIFFS_FIELD_NAMES) add_release_dates(repo_import_diffs, releases) repo_import_diffs.sort(key=get_release_date) timeline = build_timeline(repo_import_diffs) flattened_timeline = flatten(timeline) utils.create_csv_file(REPO_IMPORT_DIFFS_TIMELINE_FILE, REPO_IMPORT_DIFFS_TIMELINE_FIELD_NAMES) utils.write_lines_to_existing_csv(REPO_IMPORT_DIFFS_TIMELINE_FILE, REPO_IMPORT_DIFFS_TIMELINE_FIELD_NAMES, flattened_timeline)
def main(): if PROJECT_PATH is None: raise Exception("No PROJECT_ROOT_PATH") if GITHUB_ACCESS_TOKEN is None: raise Exception("No GITHUB_ACCESS_TOKEN") issues = utils.read_csv_ignore_headers( ISSUES_TO_COLLECT_COMMENTS_FOR_CSV_FILE_PATH, ISSUES_CSV_FILE_FIELD_NAMES) print("Getting all comments for issues...") total = len(issues) count = 0 print('Skipping thru read ones...') for issue in issues: try: count += 1 issue_id = issue['id'] repo_name = issue['repo_name'] comments_url = issue['comments_url'] output_file_name = "{}/comments@issue@{}@{}.json".format( OUTPUT_FOLDER_PATH, issue_id, repo_name.replace('/', '@')) if utils.file_or_read_file_already_exists(output_file_name): continue print("\t{}/{} repo={}".format(count, total, repo_name)) issue_comments = get_comments(comments_url) utils.write_to_json_file(output_file_name, issue_comments) except Exception as e: print("ERROR: Failed getting comments for issue={}".format( issue['id'])) print("Done")
def main(): if PROJECT_PATH is None: raise Exception("No PROJECT_ROOT_PATH") if GITHUB_ACCESS_TOKEN is None: raise Exception("No GITHUB_ACCESS_TOKEN") commits_to_collect = utils.read_csv_ignore_headers( INPUT_CSV_FILE_PATH, INPUT_CSV_FILE_FIELD_NAMES) print("Getting all comments for issues...") total = len(commits_to_collect) count = 0 for c in commits_to_collect: try: issue_id = c['issue_id'] repo_name = c['repo_name'] sha = c['commit_id'] count += 1 print("\t{}/{} sha={}".format(count, total, sha)) output_file_name = "{}/commit@{}@{}.json".format( OUTPUT_FOLDER_PATH, sha, repo_name.replace('/', '@')) if utils.file_or_read_file_already_exists(output_file_name): print("File Exists - Continuing") continue commit_url = f'https://api.github.com/repos/{repo_name}/commits/{sha}' commit_json = get_commit_json(commit_url) utils.write_to_json_file(output_file_name, commit_json) except Exception as e: print("ERROR: Failed getting comments for issue={}".format( c['issue_id'])) print("Done")
def main(): if PROJECT_PATH is None: raise Exception("No PROJECT_ROOT_PATH") if GITHUB_ACCESS_TOKEN is None: raise Exception("No GITHUB_ACCESS_TOKEN") if LIBRARIES_IO_ACCESS_TOKEN is None: raise Exception("No LIBRARIES_IO_ACCESS_TOKEN") entity_to_collect_for = utils.read_csv_ignore_headers( INPUT_CSV_FILE_PATH, INPUT_CSV_FILE_FIELD_NAMES) total = len(entity_to_collect_for) count = 0 for x in entity_to_collect_for: try: repo_half_name = x['repo_name'] count += 1 print("\t{}/{} repo={}".format(count, total, repo_half_name)) repo_full_name = get_repo_full_name_from_libraries_io( repo_half_name) output_file_name = "{}/{}.json".format( OUTPUT_FOLDER_PATH, repo_full_name.replace('/', '@')) if utils.file_or_read_file_already_exists(output_file_name): print("File Exists - Continuing") continue dependents = get_dependents(repo_full_name) utils.write_to_json_file(output_file_name, dependents) except Exception as e: print("ERROR: Failed for repo={}".format(x['repo_name'])) print("Done")
def main(): print("Reading releases...") releases_lines = utils.read_csv_ignore_headers(RELEASES_FILE, RELEASE_FIELD_NAMES) print("Building releases...") releases = build_releases(releases_lines) print("Reading repo cass diffs...") repo_call_diffs = utils.read_csv_ignore_headers( REPO_CALL_DIFFS_FILE, REPO_CALL_DIFFS_FIELD_NAMES) print("Adding release dates...") add_release_dates(repo_call_diffs, releases) print("Sorting repo call diffs") repo_call_diffs.sort(key=get_release_date) print("Building timeline...") timeline = build_timeline(repo_call_diffs) print("Flattening timeline...") flattened_timeline = flatten(timeline) print("Creating output file...") utils.create_csv_file(REPO_CALL_DIFFS_TIMELINE_FILE, REPO_CALL_DIFFS_TIMELINE_FIELD_NAMES) print("Writing timeline to output file...") utils.write_lines_to_existing_csv(REPO_CALL_DIFFS_TIMELINE_FILE, REPO_CALL_DIFFS_TIMELINE_FIELD_NAMES, flattened_timeline)
def main(): if PROJECT_PATH is None: raise Exception("No PROJECT_ROOT_PATH") if GITHUB_ACCESS_TOKEN is None: raise Exception("No GITHUB_ACCESS_TOKEN") repos = utils.read_csv_ignore_headers(INPUT_CSV_FILE_PATH, INPUT_CSV_FILE_FIELD_NAMES) total = len(repos) count = 0 list_to_write = list() for repo in repos: repo_name = repo['repo_name'] try: count += 1 print("\t{}/{} repo={}".format(count, total, repo_name)) project_git_folder = f"{REPOS_PATH}/repos/{repo_name.replace('/', '#')}" if os.path.isdir(project_git_folder): package_json_path = f"{project_git_folder}/package.json" if utils.file_or_read_file_already_exists(package_json_path): package_json_contents = utils.load_json_file( package_json_path) # Will throw if 'name' is not there list_to_write.append({ 'repo_name': repo_name, 'package_name': package_json_contents['name'] }) else: # package.json doesn't exist list_to_write.append({ 'repo_name': repo_name, 'package_name': None }) else: # Could not clone project list_to_write.append({ 'repo_name': repo_name, 'package_name': None }) except Exception as e: list_to_write.append({ 'repo_name': repo_name, 'package_name': None }) utils.write_lines_to_new_csv(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES, list_to_write) print("Done")
def main(): if PROJECT_PATH is None: raise Exception("No PROJECT_ROOT_PATH") if GITHUB_ACCESS_TOKEN is None: raise Exception("No GITHUB_ACCESS_TOKEN") repos = utils.read_csv_ignore_headers(INPUT_CSV_FILE_PATH, INPUT_CSV_FILE_FIELD_NAMES) print("Starting...") total = len(repos) count = 0 lines_to_write = list() for repo in repos: try: count += 1 repo_name = repo['repo_name'] print("\t{}/{} repo={}".format(count, total, repo_name)) repos_info = get_repo_info(repo_name) lines_to_write.append({ 'repo_name': repo_name, 'is_fork': repos_info['fork'], 'size': repos_info['size'], 'stargazers_count': repos_info['stargazers_count'], 'watchers_count': repos_info['watchers_count'], 'language': repos_info['language'], }) except Exception as e: print("ERROR: Failed getting comments for issue={}".format( repo['repo_name'])) utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, OUTPUT_FILE_FIELD_NAMES) utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, OUTPUT_FILE_FIELD_NAMES, lines_to_write) print("Done")
def main(): gk_issues = utils.read_csv_ignore_headers(GREENKEEPER_ISSUES_FILE_PATH, GREENKEEPER_ISSUES_FIELD_NAMES) library_names = list() for issue in gk_issues: if issue['issue_dependency_name'] not in library_names: library_names.append(issue['issue_dependency_name']) total = len(library_names) count = 0 for lib_name in library_names: count += 1 try: print("\t{}/{} pr_url={}".format(count, total, lib_name)) if not lib_name: continue lib_name_for_url = lib_name.replace('/', '%2F') url = 'https://libraries.io/api/NPM/{}?api_key={}'.format(lib_name_for_url, LIBRARIES_IO_ACCESS_TOKEN) lib_info = utils.send_request(url, None, ignore_token=True, sleep_time=1.2) lib_name_for_file = lib_name.replace('/', '%2F') file_name = "{}/{}.json".format(OUTPUT_FOLDER_PATH, lib_name_for_file) utils.write_to_json_file(file_name, lib_info) except Exception as e: print("Error on {}".format(lib_name))
def main(): if PROJECT_PATH is None: raise Exception("No PROJECT_ROOT_PATH") if GITHUB_ACCESS_TOKEN is None: raise Exception("No GITHUB_ACCESS_TOKEN") repos = utils.read_csv_ignore_headers(INPUT_CSV_FILE_PATH, INPUT_CSV_FILE_FIELD_NAMES) total = len(repos) count = 0 for repo in repos: try: repo_name = repo['repo_name'] count += 1 print("\t{}/{} repo={}".format(count, total, repo_name)) output_folder = f"{REPOS_PATH}/repos/{repo_name.replace('/', '#')}" if os.path.isdir(output_folder): continue os.mkdir(output_folder) subprocess.call(['git', 'clone', f'[email protected]:{repo_name}.git', output_folder]) # git clone [email protected]:${repo}.git ${REPOS_DATA_DIR}${repo_dir} except Exception as e: print("ERROR: Failed getting issues for repo={}".format(repo['repo'])) print("Done")
def main(): if PROJECT_PATH is None: raise Exception("No PROJECT_ROOT_PATH") if GITHUB_ACCESS_TOKEN is None: raise Exception("No GITHUB_ACCESS_TOKEN") repos = utils.read_csv_ignore_headers(REPOS_FILE_PATH, REPOs_FILED_NAMES) print("Getting all Issues for repos...") total = len(repos) count = 0 for repo in repos: try: repo_name = repo['dependent'] count += 1 print("\t{}/{} repo={}".format(count, total, repo_name)) output_file_name = "{}/issues@{}.json".format(OUTPUT_FOLDER_PATH, repo_name.replace('/', '@')) if utils.file_or_read_file_already_exists(output_file_name): print("File Exists - Continuing -- repo={}".format(repo_name)) continue repo_issues = get_issues(repo_name) utils.write_to_json_file(output_file_name, repo_issues) except Exception as e: print("ERROR: Failed getting issues for repo={}".format(repo['repo'])) print("Done")
def main(): if PROJECT_PATH is None: raise Exception("No PROJECT_ROOT_PATH") if GITHUB_ACCESS_TOKEN is None: raise Exception("No GITHUB_ACCESS_TOKEN") if LIBRARIES_IO_ACCESS_TOKEN is None: raise Exception("No LIBRARIES_IO_ACCESS_TOKEN") utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES) entries = utils.read_csv_ignore_headers(INPUT_CSV_FILE_PATH, INPUT_CSV_FILE_FIELD_NAMES) total = len(entries) count = 0 for x in entries: lines_to_write = list() repo_name = x['repo_name'] package_name = x['package_name'] use_repo_name = False try: count += 1 print("\t{}/{} repo={}".format(count, total, package_name)) if not package_name: use_repo_name = True package_name = repo_name.split('/')[1] libraries_io_response = get_libraries_io_response(package_name) if 'error' in libraries_io_response: lines_to_write.append({ 'repo_name': repo_name, 'package_name': package_name, 'use_repo_name': use_repo_name, 'on_libraries_io_npm': False, 'npm_dependent_repos_count': None, 'npm_dependents_count': None, 'npm_forks': None, 'npm_language': None, 'npm_rank': None, 'npm_stars': None, }) utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES, lines_to_write) else: lines_to_write.append({ 'repo_name': repo_name, 'package_name': package_name, 'use_repo_name': use_repo_name, 'on_libraries_io_npm': True, 'npm_dependent_repos_count': libraries_io_response['dependent_repos_count'], 'npm_dependents_count': libraries_io_response['dependents_count'], 'npm_forks': libraries_io_response['forks'], 'npm_language': libraries_io_response['language'], 'npm_rank': libraries_io_response['rank'], 'npm_stars': libraries_io_response['stars'], }) utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES, lines_to_write) except Exception as e: lines_to_write.append({ 'repo_name': repo_name, 'package_name': package_name, 'use_repo_name': use_repo_name, 'on_libraries_io_npm': False, 'npm_dependent_repos_count': None, 'npm_dependents_count': None, 'npm_forks': None, 'npm_language': None, 'npm_rank': None, 'npm_stars': None, }) utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES, lines_to_write) print("Done")