Example #1
0
def scan_single_repo(repo, summary, output_csv, num=100):
    """Print location results for single GitHub repository.

    Printing can either be by contributor or by country.
    Output can optionally be stored as a csv.

    Args:
        repo - URL of repo
        summary - whether to print results by country, i.e. summary.
        output_csv - whether to store output in csv (default: false)
        num - max number of contributors to analyze

    Returns:
        null
    """
    repo_ending_string = extract_github_owner_and_repo(repo)
    contributors = get_contributors(repo_ending_string, num)
    print("-----------------")
    print("GITHUB REPO: {}".format(repo_ending_string))
    print("-----------------")

    if summary:
        print_by_country(contributors)
    else:
        print_by_contributor(repo_ending_string, contributors, output_csv)
Example #2
0
def get_dataframe_from_repo(repo, num=100):
    """Create pandas dataframe of contributors by country.

    Args:
        repo - a full GitHub repo URL
        num - number of contributors to analyze per repo

    Returns:
        df - a pandas dataframe of contributors by country
        num_contributors - total number of contributors
    """
    # get contributors
    repo_ending_string = extract_github_owner_and_repo(repo)
    contributors = get_contributors(repo_ending_string, num)
    num_contributors = len(contributors)

    # get count of countries
    country_list = []
    for contributor in contributors:
        location = get_contributor_location(contributor)
        country = get_country_from_location(location)
        country_list.append(country)
    country_counter = Counter(country_list)

    # convert counter to pandas dataframe
    df = pd.DataFrame.from_records(country_counter.most_common(),
                                   columns=["country", "contributor_count"])

    return df, num_contributors
Example #3
0
def test_print_by_country(capsys):
    """Unit test for print_by_country() for networml python package."""
    repo = "https://www.github.com/iqtlabs/networkml"
    repo_ending_string = extract_github_owner_and_repo(repo)
    contributors = get_contributors(repo_ending_string)
    print_by_country(contributors)
    captured = capsys.readouterr()  # capture output printed to date
    # dedent removes spacing, using the spacing width from the first line
    output_text = textwrap.dedent(
        """        COUNTRY | # OF CONTRIBUTORS
        ---------------------------
        None 10
        United States 4
        New Zealand 2
        Palestine 1\n"""
    )
    assert captured.out == output_text
Example #4
0
def scan_multiple_repos(input_file="repos.txt", num=100):
    """Create csv of data for multiple repos.

    Scan through repos provided in repos.txt and create a single csv that
    stores all contributor-related data for each contributor in each repo.

    Args:
        input_file - file containing repo list
        num - max number of contributors to analyze per repo

    Returns:
        None
    """
    # create csv to store multi-repo scan results
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    create_csv("multirepo", timestamp)

    # open file that contains repos to scan and append contributors for each
    # repo to csv. Also, repos.txt must contain repo names, one repo per line.
    with open(input_file, "r") as input_repos:
        for repo in input_repos:
            # Skip blank lines
            if repo == "":
                continue
            # strip blank space before extracting owner and repo name
            repo_ending_string = extract_github_owner_and_repo(repo.strip())
            contributors = get_contributors(repo_ending_string, num)
            for contributor in contributors:
                location = get_contributor_location(contributor)
                country = get_country_from_location(location)
                add_committer_to_csv(
                    "multirepo",
                    repo_ending_string,
                    timestamp,
                    contributor,
                    location,
                    country,
                )
Example #5
0
 def test_extract_github_owner_and_repo(self):
     """Unit test for extract_github_owner_and_repo()."""
     owner_and_repo = extract_github_owner_and_repo("www.github.com/psf/requests")
     assert owner_and_repo == "psf/requests"