def main(): utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, BREAKING_LIBRARY_VERSIONS_FIELD_NAMES) print("Finding files to parse that match {} in {}".format( FILE_NAME_SEARCH_STRING, SEARCH_DIR_PATH)) files_to_parse = utils.get_list_of_unread_files(SEARCH_DIR_PATH, FILE_NAME_SEARCH_STRING) print("Found {} files".format(len(files_to_parse))) count = 0 for ftp in files_to_parse: try: count += 1 print("{}: Parsing + writing {}".format(count, ftp)) file_contents = utils.load_json_file(ftp) lines_to_write = list() package_name = file_contents['name'] for v in file_contents['versions']: lines_to_write.append({ 'package_name': package_name, "version": v['number'], "version_published_at": v['published_at'] }) utils.write_lines_to_existing_csv( OUTPUT_FILE_PATH, BREAKING_LIBRARY_VERSIONS_FIELD_NAMES, lines_to_write) utils.mark_file_as_read(ftp) except Exception as e: print("[ERROR] on file {}. Continuing from next file.".format(ftp)) print("DONE")
def main(): utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, COMMENTS_FIELD_NAMES) print("Finding files to parse that match {} in {}".format(FILE_NAME_SEARCH_STRING, SEARCH_DIR_PATH)) files_to_parse = utils.get_list_of_unread_files(SEARCH_DIR_PATH, FILE_NAME_SEARCH_STRING) total = len(files_to_parse) print("Found {} files".format(total)) count = 0 for ftp in files_to_parse: try: count += 1 print("{}/{}: Parsing + writing {}".format(count, total, ftp)) comments = utils.load_json_file(ftp) issue_id, repo_name = parse_issue_id_and_repo_name_from_file_name(ftp) lines_to_write = list() for c in comments: lines_to_write.append({ 'id': c['id'], 'issue_id': issue_id, 'repo_name': repo_name, 'url': c['url'], 'issue_url': c['issue_url'], 'user_id': c['user']['id'], 'user_login': c['user']['login'], 'user_type': c['user']['type'], 'created_at': c['created_at'], 'updated_at': c['updated_at'], 'body': c['body'], }) utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, COMMENTS_FIELD_NAMES, lines_to_write) utils.mark_file_as_read(ftp) except Exception as e: print("[ERROR] on file {}. Continuing from next file.".format(ftp)) print("DONE")
def main(): utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES) print("Finding files to parse that match {} in {}".format(FILE_NAME_SEARCH_STRING, SEARCH_DIR_PATH)) files_to_parse = utils.get_list_of_unread_files(SEARCH_DIR_PATH, FILE_NAME_SEARCH_STRING) total = len(files_to_parse) print("Found {} files".format(total)) count = 0 for ftp in files_to_parse: try: count += 1 print("{}/{}: Parsing + writing {}".format(count, total, ftp)) commit = utils.load_json_file(ftp) if 'message' in commit and 'No commit found for SHA' in commit['message']: print('No Commit found...continuing') continue commit_sha, issue_id, repo_name = parse_artifacts_from_file_name(ftp) message = commit['commit']['message'] url = commit['url'] html_url = commit['html_url'] author_login = commit['author']['login'] if commit['author'] is not None else '' author_type = commit['author']['type'] if commit['author'] is not None else '' committer_login = commit['committer']['login'] if commit['committer'] is not None else '' committer_type = commit['committer']['type'] if commit['committer'] is not None else '' stats_total = commit['stats']['total'] stats_additions = commit['stats']['additions'] stats_deletions = commit['stats']['deletions'] def make_new_commit_line(f): return { 'commit_sha': commit_sha, 'issue_id': issue_id, 'repo_name': repo_name, 'url': url, 'html_url': html_url, 'message': message, 'author_login': author_login, 'author_type': author_type, 'committer_login': committer_login, 'committer_type': committer_type, 'stats_total': stats_total, 'stats_additions': stats_additions, 'stats_deletions': stats_deletions, 'file_name': f['filename'], 'file_status': f['status'], 'file_additions': f['additions'], 'file_deletions': f['deletions'], 'file_changes': f['changes'], 'file_patch': f['patch'] if 'patch' in f else None, } lines_to_write = list() for file in commit['files']: new_line = make_new_commit_line(file) lines_to_write.append(new_line) utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES, lines_to_write) utils.mark_file_as_read(ftp) except Exception as e: print("[ERROR] on file {}. Continuing from next file.".format(ftp)) print("DONE")
def main(): utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, ISSUE_FIELD_NAMES) print("Finding files to parse that match {} in {}".format( FILE_NAME_SEARCH_STRING, SEARCH_DIR_PATH)) files_to_parse = utils.get_list_of_unread_files(SEARCH_DIR_PATH, FILE_NAME_SEARCH_STRING) print("Found {} files".format(len(files_to_parse))) count = 0 for ftp in files_to_parse: try: count += 1 print("{}: Parsing + writing {}".format(count, ftp)) issues = utils.load_json_file(ftp) repo_name = parse_repo_name_form_file_name(ftp) lines_to_write = list() for i in issues: lines_to_write.append({ 'id': i['id'], 'repo_name': repo_name, 'url': i['url'], 'repository_url': i['repository_url'], 'comments_url': i['comments_url'], 'events_url': i['events_url'], 'html_url': i['html_url'], 'number': i['number'], 'title': i['title'], 'user_id': i['user']['id'], 'user_login': i['user']['login'], 'user_type': i['user']['type'], 'state': i['state'], 'locked': i['locked'], 'comments': i['comments'], 'created_at': i['created_at'], 'updated_at': i['updated_at'], 'closed_at': i['closed_at'], 'body': i['body'], 'is_pull_request': 'pull_request' in i }) utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, ISSUE_FIELD_NAMES, lines_to_write) utils.mark_file_as_read(ftp) except Exception as e: print("[ERROR] on file {}. Continuing from next file.".format(ftp)) print("DONE")
def main(): if PROJECT_PATH is None: raise Exception("No PROJECT_ROOT_PATH") if GITHUB_ACCESS_TOKEN is None: raise Exception("No GITHUB_ACCESS_TOKEN") repos = utils.read_csv_ignore_headers(INPUT_CSV_FILE_PATH, INPUT_CSV_FILE_FIELD_NAMES) print("Starting...") total = len(repos) count = 0 lines_to_write = list() for repo in repos: try: count += 1 repo_name = repo['repo_name'] print("\t{}/{} repo={}".format(count, total, repo_name)) repos_info = get_repo_info(repo_name) lines_to_write.append({ 'repo_name': repo_name, 'is_fork': repos_info['fork'], 'size': repos_info['size'], 'stargazers_count': repos_info['stargazers_count'], 'watchers_count': repos_info['watchers_count'], 'language': repos_info['language'], }) except Exception as e: print("ERROR: Failed getting comments for issue={}".format( repo['repo_name'])) utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, OUTPUT_FILE_FIELD_NAMES) utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, OUTPUT_FILE_FIELD_NAMES, lines_to_write) print("Done")
def main(): utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES) print("Finding files to parse that match {} in {}".format( FILE_NAME_SEARCH_STRING, SEARCH_DIR_PATH)) files_to_parse = utils.get_list_of_unread_files(SEARCH_DIR_PATH, FILE_NAME_SEARCH_STRING) print("Found {} files".format(len(files_to_parse))) count = 0 for ftp in files_to_parse: try: count += 1 print("{}: Parsing + writing {}".format(count, ftp)) dependents = utils.load_json_file(ftp) repo_name = parse_repo_name_form_file_name(ftp) lines_to_write = list() for d in dependents: lines_to_write.append({'repo_name': repo_name, 'dependent': d}) utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES, lines_to_write) utils.mark_file_as_read(ftp) except Exception as e: print("[ERROR] on file {}. Continuing from next file.".format(ftp)) print("DONE")
def main(): if PROJECT_PATH is None: raise Exception("No PROJECT_ROOT_PATH") if GITHUB_ACCESS_TOKEN is None: raise Exception("No GITHUB_ACCESS_TOKEN") if LIBRARIES_IO_ACCESS_TOKEN is None: raise Exception("No LIBRARIES_IO_ACCESS_TOKEN") utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES) entries = utils.read_csv_ignore_headers(INPUT_CSV_FILE_PATH, INPUT_CSV_FILE_FIELD_NAMES) total = len(entries) count = 0 for x in entries: lines_to_write = list() repo_name = x['repo_name'] package_name = x['package_name'] use_repo_name = False try: count += 1 print("\t{}/{} repo={}".format(count, total, package_name)) if not package_name: use_repo_name = True package_name = repo_name.split('/')[1] libraries_io_response = get_libraries_io_response(package_name) if 'error' in libraries_io_response: lines_to_write.append({ 'repo_name': repo_name, 'package_name': package_name, 'use_repo_name': use_repo_name, 'on_libraries_io_npm': False, 'npm_dependent_repos_count': None, 'npm_dependents_count': None, 'npm_forks': None, 'npm_language': None, 'npm_rank': None, 'npm_stars': None, }) utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES, lines_to_write) else: lines_to_write.append({ 'repo_name': repo_name, 'package_name': package_name, 'use_repo_name': use_repo_name, 'on_libraries_io_npm': True, 'npm_dependent_repos_count': libraries_io_response['dependent_repos_count'], 'npm_dependents_count': libraries_io_response['dependents_count'], 'npm_forks': libraries_io_response['forks'], 'npm_language': libraries_io_response['language'], 'npm_rank': libraries_io_response['rank'], 'npm_stars': libraries_io_response['stars'], }) utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES, lines_to_write) except Exception as e: lines_to_write.append({ 'repo_name': repo_name, 'package_name': package_name, 'use_repo_name': use_repo_name, 'on_libraries_io_npm': False, 'npm_dependent_repos_count': None, 'npm_dependents_count': None, 'npm_forks': None, 'npm_language': None, 'npm_rank': None, 'npm_stars': None, }) utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES, lines_to_write) print("Done")