Exemple #1
0
def test_print_by_contributor_package(capsys):
    """Unit test for print_by_contributor() for networml python package."""
    pkg = "networkml"
    pypi_data = get_pypi_data(pkg)
    contributors = get_contributors(pypi_data["github_owner_and_repo"])
    print_by_contributor(pkg, contributors, pypi_data=pypi_data)
    captured = capsys.readouterr()  # capture output
    # dedent removes spacing, using the spacing width from the first line
    output_text = textwrap.dedent(
        """        CONTRIBUTOR, LOCATION
        * indicates PyPI maintainer
        ---------------------
        cglewis * | USA | United States
        anarkiwi | Wellington, New Zealand | New Zealand
        CStephenson970 | None | None
        renovate-bot | None | None
        lilchurro | None | None
        rashley-iqt | None | None
        jspeed-meyers * | None | None
        pyup-bot | None | None
        alshaboti | Wellington, New Zealand | New Zealand
        jseparovic | Mountain View, CA | United States
        squeeve | None | None
        gregs5 | Washington DC | United States
        krb1997 | None | None
        toddstavish | None | None
        sneakyoctopus12 | None | None
        Hax7 | Palestine | Palestine
        paulgowdy | Menlo Park CA | United States\n"""
    )
    assert captured.out == output_text
Exemple #2
0
def scan_single_repo(repo, summary, output_csv, num=100):
    """Print location results for single GitHub repository.

    Printing can either be by contributor or by country.
    Output can optionally be stored as a csv.

    Args:
        repo - URL of repo
        summary - whether to print results by country, i.e. summary.
        output_csv - whether to store output in csv (default: false)
        num - max number of contributors to analyze

    Returns:
        null
    """
    repo_ending_string = extract_github_owner_and_repo(repo)
    contributors = get_contributors(repo_ending_string, num)
    print("-----------------")
    print("GITHUB REPO: {}".format(repo_ending_string))
    print("-----------------")

    if summary:
        print_by_country(contributors)
    else:
        print_by_contributor(repo_ending_string, contributors, output_csv)
Exemple #3
0
def get_dataframe_from_repo(repo, num=100):
    """Create pandas dataframe of contributors by country.

    Args:
        repo - a full GitHub repo URL
        num - number of contributors to analyze per repo

    Returns:
        df - a pandas dataframe of contributors by country
        num_contributors - total number of contributors
    """
    # get contributors
    repo_ending_string = extract_github_owner_and_repo(repo)
    contributors = get_contributors(repo_ending_string, num)
    num_contributors = len(contributors)

    # get count of countries
    country_list = []
    for contributor in contributors:
        location = get_contributor_location(contributor)
        country = get_country_from_location(location)
        country_list.append(country)
    country_counter = Counter(country_list)

    # convert counter to pandas dataframe
    df = pd.DataFrame.from_records(country_counter.most_common(),
                                   columns=["country", "contributor_count"])

    return df, num_contributors
Exemple #4
0
def test_print_by_contributor_repo(capsys):
    """Unit test for print by contributors for GitHub repo."""
    repo = "jspeed-meyers/pcap2map"
    contributors = get_contributors(repo)
    print_by_contributor(repo, contributors)
    captured = capsys.readouterr()  # capture output printed
    # dedent removes spacing, using the spacing width from the first line
    output_text = textwrap.dedent(
        """        CONTRIBUTOR, LOCATION
        ---------------------
        jspeed-meyers | None | None\n"""
    )
    assert captured.out == output_text
Exemple #5
0
def test_print_by_country(capsys):
    """Unit test for print_by_country() for networml python package."""
    repo = "https://www.github.com/iqtlabs/networkml"
    repo_ending_string = extract_github_owner_and_repo(repo)
    contributors = get_contributors(repo_ending_string)
    print_by_country(contributors)
    captured = capsys.readouterr()  # capture output printed to date
    # dedent removes spacing, using the spacing width from the first line
    output_text = textwrap.dedent(
        """        COUNTRY | # OF CONTRIBUTORS
        ---------------------------
        None 10
        United States 4
        New Zealand 2
        Palestine 1\n"""
    )
    assert captured.out == output_text
Exemple #6
0
def scan_multiple_repos(input_file="repos.txt", num=100):
    """Create csv of data for multiple repos.

    Scan through repos provided in repos.txt and create a single csv that
    stores all contributor-related data for each contributor in each repo.

    Args:
        input_file - file containing repo list
        num - max number of contributors to analyze per repo

    Returns:
        None
    """
    # create csv to store multi-repo scan results
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    create_csv("multirepo", timestamp)

    # open file that contains repos to scan and append contributors for each
    # repo to csv. Also, repos.txt must contain repo names, one repo per line.
    with open(input_file, "r") as input_repos:
        for repo in input_repos:
            # Skip blank lines
            if repo == "":
                continue
            # strip blank space before extracting owner and repo name
            repo_ending_string = extract_github_owner_and_repo(repo.strip())
            contributors = get_contributors(repo_ending_string, num)
            for contributor in contributors:
                location = get_contributor_location(contributor)
                country = get_country_from_location(location)
                add_committer_to_csv(
                    "multirepo",
                    repo_ending_string,
                    timestamp,
                    contributor,
                    location,
                    country,
                )
Exemple #7
0
def scan_single_package(pkg, summary, num=100):
    """Print location results for single package.

    Printing can either be by contributor or by country.

    Args:
        pkg - name of python package on PyPI
        summary - whether to summarize answers by country or not
        num - max number of contributors to analyze

    Returns:
        null
    """
    pypi_data = get_pypi_data(pkg)
    contributors = get_contributors(pypi_data["github_owner_and_repo"], num)
    print("-----------------")
    print("PACKAGE: {}".format(pkg))
    print("GITHUB REPO: {}".format(pypi_data["github_owner_and_repo"]))
    print("-----------------")

    if summary:
        print_by_country(contributors)
    else:
        print_by_contributor(pkg, contributors, pypi_data=pypi_data)
Exemple #8
0
 def test_get_contributors(self):
     """Unit test for get_contributors()."""
     assert get_contributors("jspeed-meyers/pcap2map") == ["jspeed-meyers"]