def test_print_by_contributor_package(capsys): """Unit test for print_by_contributor() for networml python package.""" pkg = "networkml" pypi_data = get_pypi_data(pkg) contributors = get_contributors(pypi_data["github_owner_and_repo"]) print_by_contributor(pkg, contributors, pypi_data=pypi_data) captured = capsys.readouterr() # capture output # dedent removes spacing, using the spacing width from the first line output_text = textwrap.dedent( """ CONTRIBUTOR, LOCATION * indicates PyPI maintainer --------------------- cglewis * | USA | United States anarkiwi | Wellington, New Zealand | New Zealand CStephenson970 | None | None renovate-bot | None | None lilchurro | None | None rashley-iqt | None | None jspeed-meyers * | None | None pyup-bot | None | None alshaboti | Wellington, New Zealand | New Zealand jseparovic | Mountain View, CA | United States squeeve | None | None gregs5 | Washington DC | United States krb1997 | None | None toddstavish | None | None sneakyoctopus12 | None | None Hax7 | Palestine | Palestine paulgowdy | Menlo Park CA | United States\n""" ) assert captured.out == output_text
def scan_single_repo(repo, summary, output_csv, num=100): """Print location results for single GitHub repository. Printing can either be by contributor or by country. Output can optionally be stored as a csv. Args: repo - URL of repo summary - whether to print results by country, i.e. summary. output_csv - whether to store output in csv (default: false) num - max number of contributors to analyze Returns: null """ repo_ending_string = extract_github_owner_and_repo(repo) contributors = get_contributors(repo_ending_string, num) print("-----------------") print("GITHUB REPO: {}".format(repo_ending_string)) print("-----------------") if summary: print_by_country(contributors) else: print_by_contributor(repo_ending_string, contributors, output_csv)
def get_dataframe_from_repo(repo, num=100): """Create pandas dataframe of contributors by country. Args: repo - a full GitHub repo URL num - number of contributors to analyze per repo Returns: df - a pandas dataframe of contributors by country num_contributors - total number of contributors """ # get contributors repo_ending_string = extract_github_owner_and_repo(repo) contributors = get_contributors(repo_ending_string, num) num_contributors = len(contributors) # get count of countries country_list = [] for contributor in contributors: location = get_contributor_location(contributor) country = get_country_from_location(location) country_list.append(country) country_counter = Counter(country_list) # convert counter to pandas dataframe df = pd.DataFrame.from_records(country_counter.most_common(), columns=["country", "contributor_count"]) return df, num_contributors
def test_print_by_contributor_repo(capsys): """Unit test for print by contributors for GitHub repo.""" repo = "jspeed-meyers/pcap2map" contributors = get_contributors(repo) print_by_contributor(repo, contributors) captured = capsys.readouterr() # capture output printed # dedent removes spacing, using the spacing width from the first line output_text = textwrap.dedent( """ CONTRIBUTOR, LOCATION --------------------- jspeed-meyers | None | None\n""" ) assert captured.out == output_text
def test_print_by_country(capsys): """Unit test for print_by_country() for networml python package.""" repo = "https://www.github.com/iqtlabs/networkml" repo_ending_string = extract_github_owner_and_repo(repo) contributors = get_contributors(repo_ending_string) print_by_country(contributors) captured = capsys.readouterr() # capture output printed to date # dedent removes spacing, using the spacing width from the first line output_text = textwrap.dedent( """ COUNTRY | # OF CONTRIBUTORS --------------------------- None 10 United States 4 New Zealand 2 Palestine 1\n""" ) assert captured.out == output_text
def scan_multiple_repos(input_file="repos.txt", num=100): """Create csv of data for multiple repos. Scan through repos provided in repos.txt and create a single csv that stores all contributor-related data for each contributor in each repo. Args: input_file - file containing repo list num - max number of contributors to analyze per repo Returns: None """ # create csv to store multi-repo scan results timestamp = time.strftime("%Y%m%d-%H%M%S") create_csv("multirepo", timestamp) # open file that contains repos to scan and append contributors for each # repo to csv. Also, repos.txt must contain repo names, one repo per line. with open(input_file, "r") as input_repos: for repo in input_repos: # Skip blank lines if repo == "": continue # strip blank space before extracting owner and repo name repo_ending_string = extract_github_owner_and_repo(repo.strip()) contributors = get_contributors(repo_ending_string, num) for contributor in contributors: location = get_contributor_location(contributor) country = get_country_from_location(location) add_committer_to_csv( "multirepo", repo_ending_string, timestamp, contributor, location, country, )
def scan_single_package(pkg, summary, num=100): """Print location results for single package. Printing can either be by contributor or by country. Args: pkg - name of python package on PyPI summary - whether to summarize answers by country or not num - max number of contributors to analyze Returns: null """ pypi_data = get_pypi_data(pkg) contributors = get_contributors(pypi_data["github_owner_and_repo"], num) print("-----------------") print("PACKAGE: {}".format(pkg)) print("GITHUB REPO: {}".format(pypi_data["github_owner_and_repo"])) print("-----------------") if summary: print_by_country(contributors) else: print_by_contributor(pkg, contributors, pypi_data=pypi_data)
def test_get_contributors(self): """Unit test for get_contributors().""" assert get_contributors("jspeed-meyers/pcap2map") == ["jspeed-meyers"]