class TestDBConnector(unittest.TestCase): def setUp(self): if os.environ.get('TRAVIS') == None: self.db = DBConnector() def test_add_and_delete_data(self): if os.environ.get('TRAVIS') == None: github_data_import = GitHubData( date_updated=datetime.datetime.now(), language='repo_name', pull_requests=0, open_issues=0, number_of_commits=0, number_of_branches=0, number_of_releases=0, number_of_contributors=0, number_of_watchers=0, number_of_stargazers=0, number_of_forks=0 ) res = self.db.add_data(github_data_import) self.assertTrue(isinstance(res, GitHubData)) res = self.db.delete_data(res.id, 'github_data') self.assertTrue(res) packagedata = PackageManagerData( date_updated=datetime.datetime.now(), csharp_downloads=0, nodejs_downloads=0, php_downloads=0, python_downloads=0, ruby_downloads=0 ) res = self.db.add_data(packagedata) self.assertTrue(isinstance(res, PackageManagerData)) res = self.db.delete_data(res.id, 'package_manager_data') self.assertTrue(res) def test_get_data(self): if os.environ.get('TRAVIS') == None: github_data = self.db.get_data(GitHubData) self.assertTrue(isinstance(github_data, list)) self.assertTrue(isinstance(github_data[0], GitHubData))
class GitHub(object): """Collect time stamped repository data from GitHub and store in a DB""" def __init__(self): if (os.environ.get('ENV') != 'prod'): # We are not in Heroku github_token = os.environ.get('GITHUB_TOKEN') else: github_token = os.environ['GITHUB_TOKEN'] self.github = github3.login(token=github_token) self.db = DBConnector() def update_library_data(self, repo_user, repo_name): """Gets data from a given GitHub repo and adds it to the DB :param repo_user: the username of the repo's owner :param repo_name: the name of the GitHub repo :type repo_user: string :type repo_name: string :returns: Returns the data object that was added to the DB :rtype: Data object """ github_data = self.github.repository(repo_user, repo_name) lib_data = {} lib_data['num_pull_requests'] \ = sum(1 for i in github_data.iter_pulls()) lib_data['num_issues'] \ = sum(1 for i in github_data.iter_issues()) lib_data['num_commits'] \ = sum(1 for i in github_data.iter_commits()) lib_data['num_branches'] \ = sum(1 for i in github_data.iter_branches()) lib_data['num_releases'] \ = sum(1 for i in github_data.iter_releases()) lib_data['num_contributors'] \ = sum(1 for i in github_data.iter_contributors()) lib_data['num_watchers'] \ = sum(1 for i in github_data.iter_subscribers()) lib_data['num_stargazers'] \ = sum(1 for i in github_data.iter_stargazers()) lib_data['num_forks'] = github_data.forks_count github_data_import = GitHubData( date_updated=datetime.datetime.now(), language=repo_name, pull_requests=lib_data['num_pull_requests'], open_issues=lib_data['num_issues'], number_of_commits=lib_data['num_commits'], number_of_branches=lib_data['num_branches'], number_of_releases=lib_data['num_releases'], number_of_contributors=lib_data['num_contributors'], number_of_watchers=lib_data['num_watchers'], number_of_stargazers=lib_data['num_stargazers'], number_of_forks=lib_data['num_forks'] ) return self.db.add_data(github_data_import)
class TestDBConnector(unittest.TestCase): def setUp(self): if os.environ.get('TRAVIS') == None: self.db = DBConnector() def test_add_and_delete_data(self): if os.environ.get('TRAVIS') == None: github_data_import = GitHubData( date_updated=datetime.datetime.now(), language='repo_name', pull_requests=0, open_issues=0, number_of_commits=0, number_of_branches=0, number_of_releases=0, number_of_contributors=0, number_of_watchers=0, number_of_stargazers=0, number_of_forks=0) res = self.db.add_data(github_data_import) self.assertTrue(isinstance(res, GitHubData)) res = self.db.delete_data(res.id, 'github_data') self.assertTrue(res) packagedata = PackageManagerData( date_updated=datetime.datetime.now(), csharp_downloads=0, nodejs_downloads=0, php_downloads=0, python_downloads=0, ruby_downloads=0) res = self.db.add_data(packagedata) self.assertTrue(isinstance(res, PackageManagerData)) res = self.db.delete_data(res.id, 'package_manager_data') self.assertTrue(res) def test_get_data(self): if os.environ.get('TRAVIS') == None: github_data = self.db.get_data(GitHubData) self.assertTrue(isinstance(github_data, list)) self.assertTrue(isinstance(github_data[0], GitHubData))
class PackageManagers(object): """Collect time stamped package manager data from various package managers and store in a DB""" def __init__(self): self.db = DBConnector() def update_package_manager_data(self, package_manager_urls): """Gets data given package manager urls and adds it to the DB :param package_manager_urls: URL(s) to the package you want to obtain download data from :type package_manager_urls: Array of strings :returns: Returns the data object that was added to the DB :rtype: Data object """ num_total_csharp_downloads = None num_nodejs_monthly_downloads = None num_php_downloads = None num_python_downloads = None num_ruby_downloads = None num_python_http_client_downloads = None num_python_open_source_library_data_collector_downloads = None num_ruby_http_client_downloads = None num_csharp_http_client_downloads = None num_php_http_client_downloads = None num_node_http_client_downloads = None for url in package_manager_urls: if 'https://www.nuget.org/packages/SendGrid' == url: num_total_csharp_downloads = self.csharp_downloads(url) if 'https://www.nuget.org/packages/SendGrid.CSharp.HTTP.Client' == url: num_csharp_http_client_downloads = self.csharp_downloads(url) if 'https://www.npmjs.com/package/sendgrid' in url: if 'https://www.npmjs.com/package/sendgrid-rest' != url: num_nodejs_monthly_downloads = self.nodejs_downloads(url) if 'https://www.npmjs.com/package/sendgrid-rest' in url: num_node_http_client_downloads = self.nodejs_downloads(url) if 'https://packagist.org/packages/sendgrid/sendgrid' == url: num_php_downloads = self.php_downloads(url) if 'https://packagist.org/packages/sendgrid/php-http-client' == url: num_php_http_client_downloads = self.php_downloads(url) if 'pypi' in url and 'sendgrid' in url: num_python_downloads = self.python_downloads(url) if 'pypi' in url and 'python_http_client' in url: num_python_http_client_downloads = self.python_downloads(url) if 'pypi' in url and 'open_source_library_data_collector' in url: num_python_open_source_library_data_collector_downloads = self.python_downloads(url) if 'rubygems' in url and 'sendgrid' in url: num_ruby_downloads = self.ruby_downloads(url) if 'rubygems' in url and 'http' in url: num_ruby_http_client_downloads = self.ruby_downloads(url) return self.update_db(num_total_csharp_downloads, num_nodejs_monthly_downloads, num_php_downloads, num_python_downloads, num_ruby_downloads, num_python_http_client_downloads, num_python_open_source_library_data_collector_downloads, num_ruby_http_client_downloads, num_csharp_http_client_downloads, num_php_http_client_downloads, num_node_http_client_downloads) def csharp_downloads(self, url): """Gets library download data from nuget.org :param url: the URL of the package :type url: string :returns: The number of total library downloads :rtype: Integer """ r = requests.get(url) soup = BeautifulSoup(r.text, "html.parser") mydivs = soup.findAll("p", {"class": "stat-number"}) nodes = [] for node in mydivs: nodes.append(''.join(node.findAll(text=True))) num_total_csharp_downloads = nodes[0].replace(',', '') return num_total_csharp_downloads def nodejs_downloads(self, url): """Gets library download data from npmjs.org :param url: the URL of the package :type url: string :returns: The number of library downloads in the last month :rtype: Integer """ r = requests.get(url) soup = BeautifulSoup(r.text, "html.parser") mydivs = soup.findAll("strong", {"class": "pretty-number monthly-downloads"}) nodes = [] for node in mydivs: nodes.append(''.join(node.findAll(text=True))) num_nodejs_monthly_downloads = nodes[0].replace(',', '') return num_nodejs_monthly_downloads def php_downloads(self, url): """Gets library download data from packagist.org :param url: the URL of the package :type url: string :returns: The number of total library downloads :rtype: Integer """ r = requests.get(url) soup = BeautifulSoup(r.text, "html.parser") mydivs = soup.findAll("div", {"class": "facts col-xs-12 col-sm-6 col-md-12"}) nodes = [] for node in mydivs: nodes.append(''.join(node.findAll(text=True))) num_php_downloads = nodes[0][11:].replace(u('\u2009'), '').split('\n') num_php_downloads = str(num_php_downloads[0]) return num_php_downloads def python_downloads(self, url): """Gets library download data from pypi.python.org :param url: the URL of the package :type url: string :returns: The number of library downloads in the last month :rtype: Integer """ r = requests.get(url) soup = BeautifulSoup(r.text, "html.parser") mydivs = soup.findAll("ul", {"class": "nodot"}) nodes = [] for node in mydivs: nodes.append(''.join(node.findAll(text=True))) num_python_downloads = \ nodes[0].replace(u('\n'), '') \ .rpartition('week')[-1] \ .rpartition('downloads')[0][2:] \ .replace(u('\u2009'), '') return num_python_downloads def ruby_downloads(self, url): """Gets library download data from rubygems.org :param url: the URL of the package :type url: string :returns: The number of total library downloads :rtype: Integer """ r = requests.get(url) soup = BeautifulSoup(r.text, "html.parser") mydivs = soup.findAll("span", {"class": "gem__downloads"}) nodes = [] for node in mydivs: nodes.append(''.join(node.findAll(text=True))) num_ruby_downloads = nodes[0].replace(',', '') return num_ruby_downloads def update_db( self, num_total_csharp_downloads, num_nodejs_monthly_downloads, num_php_downloads, num_python_downloads, num_ruby_downloads, num_python_http_client_downloads, num_python_open_source_library_data_collector_downloads, num_ruby_http_client_downloads, num_csharp_http_client_downloads, num_php_http_client_downloads, num_node_http_client_downloads ): """Update the DB with the package manager data :param num_total_csharp_downloads: # of total downloads :param num_nodejs_monthly_downloads: # of downloads in the last month :param num_php_downloads: # of total downloads :param num_python_downloads: # of downloads in the last month :param num_ruby_downloads: # of total downloads :type num_total_csharp_downloads: Integer :type num_nodejs_monthly_downloads: Integer :type num_php_downloads: Integer :type num_python_downloads: Integer :type num_ruby_downloads: Integer :returns: Returns the data object that was added to the DB :rtype: Data object """ packagedata = PackageManagerData( date_updated=datetime.datetime.now(), csharp_downloads=num_total_csharp_downloads, nodejs_downloads=num_nodejs_monthly_downloads, php_downloads=num_php_downloads, python_downloads=num_python_downloads, ruby_downloads=num_ruby_downloads, python_http_client_downloads=num_python_http_client_downloads, csharp_http_client_downloads=num_csharp_http_client_downloads, ruby_http_client_downloads=num_ruby_http_client_downloads, php_http_client_downloads=num_php_http_client_downloads, open_source_library_data_collector_downloads=num_python_open_source_library_data_collector_downloads, node_http_client_downloads=num_node_http_client_downloads ) return self.db.add_data(packagedata)