def main():
    utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, COMMENTS_FIELD_NAMES)
    print("Finding files to parse that match {} in {}".format(FILE_NAME_SEARCH_STRING, SEARCH_DIR_PATH))
    files_to_parse = utils.get_list_of_unread_files(SEARCH_DIR_PATH, FILE_NAME_SEARCH_STRING)
    total = len(files_to_parse)
    print("Found {} files".format(total))
    count = 0
    for ftp in files_to_parse:
        try:
            count += 1
            print("{}/{}: Parsing + writing {}".format(count, total, ftp))
            comments = utils.load_json_file(ftp)
            issue_id, repo_name = parse_issue_id_and_repo_name_from_file_name(ftp)
            lines_to_write = list()
            for c in comments:
                lines_to_write.append({
                    'id': c['id'],
                    'issue_id': issue_id,
                    'repo_name': repo_name,
                    'url': c['url'],
                    'issue_url': c['issue_url'],
                    'user_id': c['user']['id'],
                    'user_login': c['user']['login'],
                    'user_type': c['user']['type'],
                    'created_at': c['created_at'],
                    'updated_at': c['updated_at'],
                    'body': c['body'],
                })
            utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, COMMENTS_FIELD_NAMES, lines_to_write)
            utils.mark_file_as_read(ftp)
        except Exception as e:
            print("[ERROR] on file {}. Continuing from next file.".format(ftp))
    print("DONE")
Ejemplo n.º 2
0
def main():
    utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH,
                                       BREAKING_LIBRARY_VERSIONS_FIELD_NAMES)
    print("Finding files to parse that match {} in {}".format(
        FILE_NAME_SEARCH_STRING, SEARCH_DIR_PATH))
    files_to_parse = utils.get_list_of_unread_files(SEARCH_DIR_PATH,
                                                    FILE_NAME_SEARCH_STRING)
    print("Found {} files".format(len(files_to_parse)))
    count = 0
    for ftp in files_to_parse:
        try:
            count += 1
            print("{}: Parsing + writing {}".format(count, ftp))
            file_contents = utils.load_json_file(ftp)
            lines_to_write = list()
            package_name = file_contents['name']
            for v in file_contents['versions']:
                lines_to_write.append({
                    'package_name': package_name,
                    "version": v['number'],
                    "version_published_at": v['published_at']
                })
            utils.write_lines_to_existing_csv(
                OUTPUT_FILE_PATH, BREAKING_LIBRARY_VERSIONS_FIELD_NAMES,
                lines_to_write)
            utils.mark_file_as_read(ftp)
        except Exception as e:
            print("[ERROR] on file {}. Continuing from next file.".format(ftp))
    print("DONE")
def main():
    utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES)
    print("Finding files to parse that match {} in {}".format(FILE_NAME_SEARCH_STRING, SEARCH_DIR_PATH))
    files_to_parse = utils.get_list_of_unread_files(SEARCH_DIR_PATH, FILE_NAME_SEARCH_STRING)
    total = len(files_to_parse)
    print("Found {} files".format(total))
    count = 0
    for ftp in files_to_parse:
        try:
            count += 1
            print("{}/{}: Parsing + writing {}".format(count, total, ftp))
            commit = utils.load_json_file(ftp)
            if 'message' in commit and 'No commit found for SHA' in commit['message']:
                print('No Commit found...continuing')
                continue
            commit_sha, issue_id, repo_name = parse_artifacts_from_file_name(ftp)
            message = commit['commit']['message']
            url = commit['url']
            html_url = commit['html_url']
            author_login = commit['author']['login'] if commit['author'] is not None else ''
            author_type = commit['author']['type'] if commit['author'] is not None else ''
            committer_login = commit['committer']['login'] if commit['committer'] is not None else ''
            committer_type = commit['committer']['type'] if commit['committer'] is not None else ''
            stats_total = commit['stats']['total']
            stats_additions = commit['stats']['additions']
            stats_deletions = commit['stats']['deletions']

            def make_new_commit_line(f):
                return {
                    'commit_sha': commit_sha,
                    'issue_id': issue_id,
                    'repo_name': repo_name,
                    'url': url,
                    'html_url': html_url,
                    'message': message,
                    'author_login': author_login,
                    'author_type': author_type,
                    'committer_login': committer_login,
                    'committer_type': committer_type,
                    'stats_total': stats_total,
                    'stats_additions': stats_additions,
                    'stats_deletions': stats_deletions,
                    'file_name': f['filename'],
                    'file_status': f['status'],
                    'file_additions': f['additions'],
                    'file_deletions': f['deletions'],
                    'file_changes': f['changes'],
                    'file_patch': f['patch'] if 'patch' in f else None,
                }

            lines_to_write = list()
            for file in commit['files']:
                new_line = make_new_commit_line(file)
                lines_to_write.append(new_line)
            utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES, lines_to_write)
            utils.mark_file_as_read(ftp)
        except Exception as e:
            print("[ERROR] on file {}. Continuing from next file.".format(ftp))
    print("DONE")
def main():
    releases_lines = utils.read_csv_ignore_headers(RELEASES_FILE, RELEASE_FIELD_NAMES)
    releases = build_releases(releases_lines)
    repo_import_diffs = utils.read_csv_ignore_headers(REPO_IMPORT_DIFFS_FILE, REPO_IMPORT_DIFFS_FIELD_NAMES)
    add_release_dates(repo_import_diffs, releases)
    repo_import_diffs.sort(key=get_release_date)
    timeline = build_timeline(repo_import_diffs)
    flattened_timeline = flatten(timeline)
    utils.create_csv_file(REPO_IMPORT_DIFFS_TIMELINE_FILE, REPO_IMPORT_DIFFS_TIMELINE_FIELD_NAMES)
    utils.write_lines_to_existing_csv(REPO_IMPORT_DIFFS_TIMELINE_FILE, REPO_IMPORT_DIFFS_TIMELINE_FIELD_NAMES, flattened_timeline)
Ejemplo n.º 5
0
def main():
    utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, ISSUE_FIELD_NAMES)
    print("Finding files to parse that match {} in {}".format(
        FILE_NAME_SEARCH_STRING, SEARCH_DIR_PATH))
    files_to_parse = utils.get_list_of_unread_files(SEARCH_DIR_PATH,
                                                    FILE_NAME_SEARCH_STRING)
    print("Found {} files".format(len(files_to_parse)))
    count = 0
    for ftp in files_to_parse:
        try:
            count += 1
            print("{}: Parsing + writing {}".format(count, ftp))
            issues = utils.load_json_file(ftp)
            repo_name = parse_repo_name_form_file_name(ftp)
            lines_to_write = list()
            for i in issues:
                lines_to_write.append({
                    'id': i['id'],
                    'repo_name': repo_name,
                    'url': i['url'],
                    'repository_url': i['repository_url'],
                    'comments_url': i['comments_url'],
                    'events_url': i['events_url'],
                    'html_url': i['html_url'],
                    'number': i['number'],
                    'title': i['title'],
                    'user_id': i['user']['id'],
                    'user_login': i['user']['login'],
                    'user_type': i['user']['type'],
                    'state': i['state'],
                    'locked': i['locked'],
                    'comments': i['comments'],
                    'created_at': i['created_at'],
                    'updated_at': i['updated_at'],
                    'closed_at': i['closed_at'],
                    'body': i['body'],
                    'is_pull_request': 'pull_request' in i
                })
            utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH,
                                              ISSUE_FIELD_NAMES,
                                              lines_to_write)
            utils.mark_file_as_read(ftp)
        except Exception as e:
            print("[ERROR] on file {}. Continuing from next file.".format(ftp))
    print("DONE")
def main():
    releases_list = read_csv(RELEASES_FILE, RELEASE_FIELD_NAMES)
    repos = build_repos_from_releases(releases_list)
    imports_list = read_csv(IMPORTS_FILE, IMPORT_FIELD_NAMES)
    add_imports_data_to_repos(imports_list, repos)
    for repo in repos.values():
        repo.parse_file_import_diffs()
        repo.parse_repo_imports()
        repo.parse_repo_import_diffs()
    diffs = list()
    repo_imports = list()
    repo_import_diffs = list()
    for repo in repos.values():
        diffs.extend(repo.diffs)
        repo_imports.extend(repo.repo_imports)
        repo_import_diffs.extend(repo.repo_import_diffs)
    # Import Diffs at file level
    diffs_for_csv = transform_diffs_to_csv_writable_objects(diffs)
    utils.create_csv_file(IMPORT_DIFFS_FILE, IMPORT_DIFFS_FIELD_NAMES)
    utils.write_lines_to_existing_csv(IMPORT_DIFFS_FILE,
                                      IMPORT_DIFFS_FIELD_NAMES, diffs_for_csv)
    # Imports at repo level
    utils.create_csv_file(REPO_IMPORTS_FILE, REPO_IMPORTS_FIELD_NAMES)
    utils.write_lines_to_existing_csv(REPO_IMPORTS_FILE,
                                      REPO_IMPORTS_FIELD_NAMES, repo_imports)
    # Import Diffs at repo level
    repo_import_diffs_for_csv = transform_repo_import_diffs_to_csv_writable_objects(
        repo_import_diffs)
    utils.create_csv_file(REPO_IMPORT_DIFFS_FILE,
                          REPO_IMPORT_DIFFS_FIELD_NAMES)
    utils.write_lines_to_existing_csv(REPO_IMPORT_DIFFS_FILE,
                                      REPO_IMPORT_DIFFS_FIELD_NAMES,
                                      repo_import_diffs_for_csv)
Ejemplo n.º 7
0
def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    repos = utils.read_csv_ignore_headers(INPUT_CSV_FILE_PATH,
                                          INPUT_CSV_FILE_FIELD_NAMES)
    print("Starting...")
    total = len(repos)
    count = 0
    lines_to_write = list()
    for repo in repos:
        try:
            count += 1
            repo_name = repo['repo_name']
            print("\t{}/{} repo={}".format(count, total, repo_name))
            repos_info = get_repo_info(repo_name)
            lines_to_write.append({
                'repo_name':
                repo_name,
                'is_fork':
                repos_info['fork'],
                'size':
                repos_info['size'],
                'stargazers_count':
                repos_info['stargazers_count'],
                'watchers_count':
                repos_info['watchers_count'],
                'language':
                repos_info['language'],
            })
        except Exception as e:
            print("ERROR: Failed getting comments for issue={}".format(
                repo['repo_name']))
    utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH,
                                       OUTPUT_FILE_FIELD_NAMES)
    utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH,
                                      OUTPUT_FILE_FIELD_NAMES, lines_to_write)

    print("Done")
Ejemplo n.º 8
0
def main():
    utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES)
    print("Finding files to parse that match {} in {}".format(
        FILE_NAME_SEARCH_STRING, SEARCH_DIR_PATH))
    files_to_parse = utils.get_list_of_unread_files(SEARCH_DIR_PATH,
                                                    FILE_NAME_SEARCH_STRING)
    print("Found {} files".format(len(files_to_parse)))
    count = 0
    for ftp in files_to_parse:
        try:
            count += 1
            print("{}: Parsing + writing {}".format(count, ftp))
            dependents = utils.load_json_file(ftp)
            repo_name = parse_repo_name_form_file_name(ftp)
            lines_to_write = list()
            for d in dependents:
                lines_to_write.append({'repo_name': repo_name, 'dependent': d})
            utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH,
                                              OUTPUT_FIELD_NAMES,
                                              lines_to_write)
            utils.mark_file_as_read(ftp)
        except Exception as e:
            print("[ERROR] on file {}. Continuing from next file.".format(ftp))
    print("DONE")
Ejemplo n.º 9
0
def main():
    print("Reading releases...")
    releases_lines = utils.read_csv_ignore_headers(RELEASES_FILE,
                                                   RELEASE_FIELD_NAMES)
    print("Building releases...")
    releases = build_releases(releases_lines)
    print("Reading repo cass diffs...")
    repo_call_diffs = utils.read_csv_ignore_headers(
        REPO_CALL_DIFFS_FILE, REPO_CALL_DIFFS_FIELD_NAMES)
    print("Adding release dates...")
    add_release_dates(repo_call_diffs, releases)
    print("Sorting repo call diffs")
    repo_call_diffs.sort(key=get_release_date)
    print("Building timeline...")
    timeline = build_timeline(repo_call_diffs)
    print("Flattening timeline...")
    flattened_timeline = flatten(timeline)
    print("Creating output file...")
    utils.create_csv_file(REPO_CALL_DIFFS_TIMELINE_FILE,
                          REPO_CALL_DIFFS_TIMELINE_FIELD_NAMES)
    print("Writing timeline to output file...")
    utils.write_lines_to_existing_csv(REPO_CALL_DIFFS_TIMELINE_FILE,
                                      REPO_CALL_DIFFS_TIMELINE_FIELD_NAMES,
                                      flattened_timeline)
Ejemplo n.º 10
0
def main():
    if PROJECT_PATH is None:
        raise Exception("No PROJECT_ROOT_PATH")
    if GITHUB_ACCESS_TOKEN is None:
        raise Exception("No GITHUB_ACCESS_TOKEN")
    if LIBRARIES_IO_ACCESS_TOKEN is None:
        raise Exception("No LIBRARIES_IO_ACCESS_TOKEN")
    utils.create_csv_file_if_necessary(OUTPUT_FILE_PATH, OUTPUT_FIELD_NAMES)
    entries = utils.read_csv_ignore_headers(INPUT_CSV_FILE_PATH,
                                            INPUT_CSV_FILE_FIELD_NAMES)
    total = len(entries)
    count = 0
    for x in entries:
        lines_to_write = list()
        repo_name = x['repo_name']
        package_name = x['package_name']
        use_repo_name = False
        try:
            count += 1
            print("\t{}/{} repo={}".format(count, total, package_name))
            if not package_name:
                use_repo_name = True
                package_name = repo_name.split('/')[1]
            libraries_io_response = get_libraries_io_response(package_name)
            if 'error' in libraries_io_response:
                lines_to_write.append({
                    'repo_name': repo_name,
                    'package_name': package_name,
                    'use_repo_name': use_repo_name,
                    'on_libraries_io_npm': False,
                    'npm_dependent_repos_count': None,
                    'npm_dependents_count': None,
                    'npm_forks': None,
                    'npm_language': None,
                    'npm_rank': None,
                    'npm_stars': None,
                })
                utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH,
                                                  OUTPUT_FIELD_NAMES,
                                                  lines_to_write)
            else:
                lines_to_write.append({
                    'repo_name':
                    repo_name,
                    'package_name':
                    package_name,
                    'use_repo_name':
                    use_repo_name,
                    'on_libraries_io_npm':
                    True,
                    'npm_dependent_repos_count':
                    libraries_io_response['dependent_repos_count'],
                    'npm_dependents_count':
                    libraries_io_response['dependents_count'],
                    'npm_forks':
                    libraries_io_response['forks'],
                    'npm_language':
                    libraries_io_response['language'],
                    'npm_rank':
                    libraries_io_response['rank'],
                    'npm_stars':
                    libraries_io_response['stars'],
                })
                utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH,
                                                  OUTPUT_FIELD_NAMES,
                                                  lines_to_write)
        except Exception as e:
            lines_to_write.append({
                'repo_name': repo_name,
                'package_name': package_name,
                'use_repo_name': use_repo_name,
                'on_libraries_io_npm': False,
                'npm_dependent_repos_count': None,
                'npm_dependents_count': None,
                'npm_forks': None,
                'npm_language': None,
                'npm_rank': None,
                'npm_stars': None,
            })
            utils.write_lines_to_existing_csv(OUTPUT_FILE_PATH,
                                              OUTPUT_FIELD_NAMES,
                                              lines_to_write)
    print("Done")