class TestDBConnector(unittest.TestCase):
    def setUp(self):
        if os.environ.get('TRAVIS') == None:
            self.db = DBConnector()

    def test_add_and_delete_data(self):
        if os.environ.get('TRAVIS') == None:
            github_data_import = GitHubData(
                                    date_updated=datetime.datetime.now(),
                                    language='repo_name',
                                    pull_requests=0,
                                    open_issues=0,
                                    number_of_commits=0,
                                    number_of_branches=0,
                                    number_of_releases=0,
                                    number_of_contributors=0,
                                    number_of_watchers=0,
                                    number_of_stargazers=0,
                                    number_of_forks=0
                                    )
            res = self.db.add_data(github_data_import)
            self.assertTrue(isinstance(res, GitHubData))
            res = self.db.delete_data(res.id, 'github_data')
            self.assertTrue(res)

            packagedata = PackageManagerData(
                                    date_updated=datetime.datetime.now(),
                                    csharp_downloads=0,
                                    nodejs_downloads=0,
                                    php_downloads=0,
                                    python_downloads=0,
                                    ruby_downloads=0
                                    )
            res = self.db.add_data(packagedata)
            self.assertTrue(isinstance(res, PackageManagerData))
            res = self.db.delete_data(res.id, 'package_manager_data')
            self.assertTrue(res)

    def test_get_data(self):
        if os.environ.get('TRAVIS') == None:
            github_data = self.db.get_data(GitHubData)
            self.assertTrue(isinstance(github_data, list))
            self.assertTrue(isinstance(github_data[0], GitHubData))
Beispiel #2
0
class GitHub(object):
    """Collect time stamped repository data from GitHub and store in a DB"""
    def __init__(self):
        if (os.environ.get('ENV') != 'prod'):  # We are not in Heroku
            github_token = os.environ.get('GITHUB_TOKEN')
        else:
            github_token = os.environ['GITHUB_TOKEN']
        self.github = github3.login(token=github_token)
        self.db = DBConnector()

    def update_library_data(self, repo_user, repo_name):
        """Gets data from a given GitHub repo and adds it to the DB

        :param repo_user: the username of the repo's owner
        :param repo_name: the name of the GitHub repo
        :type repo_user:  string
        :type repo_name:  string

        :returns: Returns the data object that was added to the DB
        :rtype:   Data object
        """
        github_data = self.github.repository(repo_user, repo_name)
        lib_data = {}
        lib_data['num_pull_requests'] \
            = sum(1 for i in github_data.iter_pulls())
        lib_data['num_issues'] \
            = sum(1 for i in github_data.iter_issues())
        lib_data['num_commits'] \
            = sum(1 for i in github_data.iter_commits())
        lib_data['num_branches'] \
            = sum(1 for i in github_data.iter_branches())
        lib_data['num_releases'] \
            = sum(1 for i in github_data.iter_releases())
        lib_data['num_contributors'] \
            = sum(1 for i in github_data.iter_contributors())
        lib_data['num_watchers'] \
            = sum(1 for i in github_data.iter_subscribers())
        lib_data['num_stargazers'] \
            = sum(1 for i in github_data.iter_stargazers())
        lib_data['num_forks'] = github_data.forks_count
        github_data_import = GitHubData(
                        date_updated=datetime.datetime.now(),
                        language=repo_name,
                        pull_requests=lib_data['num_pull_requests'],
                        open_issues=lib_data['num_issues'],
                        number_of_commits=lib_data['num_commits'],
                        number_of_branches=lib_data['num_branches'],
                        number_of_releases=lib_data['num_releases'],
                        number_of_contributors=lib_data['num_contributors'],
                        number_of_watchers=lib_data['num_watchers'],
                        number_of_stargazers=lib_data['num_stargazers'],
                        number_of_forks=lib_data['num_forks']
                        )
        return self.db.add_data(github_data_import)
class TestDBConnector(unittest.TestCase):
    def setUp(self):
        if os.environ.get('TRAVIS') == None:
            self.db = DBConnector()

    def test_add_and_delete_data(self):
        if os.environ.get('TRAVIS') == None:
            github_data_import = GitHubData(
                date_updated=datetime.datetime.now(),
                language='repo_name',
                pull_requests=0,
                open_issues=0,
                number_of_commits=0,
                number_of_branches=0,
                number_of_releases=0,
                number_of_contributors=0,
                number_of_watchers=0,
                number_of_stargazers=0,
                number_of_forks=0)
            res = self.db.add_data(github_data_import)
            self.assertTrue(isinstance(res, GitHubData))
            res = self.db.delete_data(res.id, 'github_data')
            self.assertTrue(res)

            packagedata = PackageManagerData(
                date_updated=datetime.datetime.now(),
                csharp_downloads=0,
                nodejs_downloads=0,
                php_downloads=0,
                python_downloads=0,
                ruby_downloads=0)
            res = self.db.add_data(packagedata)
            self.assertTrue(isinstance(res, PackageManagerData))
            res = self.db.delete_data(res.id, 'package_manager_data')
            self.assertTrue(res)

    def test_get_data(self):
        if os.environ.get('TRAVIS') == None:
            github_data = self.db.get_data(GitHubData)
            self.assertTrue(isinstance(github_data, list))
            self.assertTrue(isinstance(github_data[0], GitHubData))
class PackageManagers(object):
    """Collect time stamped package manager data from various package managers
       and store in a DB"""
    def __init__(self):
        self.db = DBConnector()

    def update_package_manager_data(self, package_manager_urls):
        """Gets data given package manager urls and adds it to the DB

        :param package_manager_urls: URL(s) to the package you want to obtain
                                     download data from
        :type package_manager_urls:  Array of strings

        :returns: Returns the data object that was added to the DB
        :rtype:   Data object
        """
        num_total_csharp_downloads = None
        num_nodejs_monthly_downloads = None
        num_php_downloads = None
        num_python_downloads = None
        num_ruby_downloads = None
        num_python_http_client_downloads = None
        num_python_open_source_library_data_collector_downloads = None
        num_ruby_http_client_downloads = None
        num_csharp_http_client_downloads = None
        num_php_http_client_downloads = None
        num_node_http_client_downloads = None
        for url in package_manager_urls:
            if 'https://www.nuget.org/packages/SendGrid' == url:
                num_total_csharp_downloads = self.csharp_downloads(url)
            if 'https://www.nuget.org/packages/SendGrid.CSharp.HTTP.Client' == url:
                num_csharp_http_client_downloads = self.csharp_downloads(url)
            if 'https://www.npmjs.com/package/sendgrid' in url:
                if 'https://www.npmjs.com/package/sendgrid-rest' != url:
                    num_nodejs_monthly_downloads = self.nodejs_downloads(url)
            if 'https://www.npmjs.com/package/sendgrid-rest' in url:
                num_node_http_client_downloads = self.nodejs_downloads(url)
            if 'https://packagist.org/packages/sendgrid/sendgrid' == url:
                num_php_downloads = self.php_downloads(url)
            if 'https://packagist.org/packages/sendgrid/php-http-client' == url:
                num_php_http_client_downloads = self.php_downloads(url)
            if 'pypi' in url and 'sendgrid' in url:
                num_python_downloads = self.python_downloads(url)
            if 'pypi' in url and 'python_http_client' in url:
                num_python_http_client_downloads = self.python_downloads(url)
            if 'pypi' in url and 'open_source_library_data_collector' in url:
                num_python_open_source_library_data_collector_downloads = self.python_downloads(url)
            if 'rubygems' in url and 'sendgrid' in url:
                num_ruby_downloads = self.ruby_downloads(url)
            if 'rubygems' in url and 'http' in url:
                num_ruby_http_client_downloads = self.ruby_downloads(url)

        return self.update_db(num_total_csharp_downloads,
                              num_nodejs_monthly_downloads,
                              num_php_downloads,
                              num_python_downloads,
                              num_ruby_downloads,
                              num_python_http_client_downloads,
                              num_python_open_source_library_data_collector_downloads,
                              num_ruby_http_client_downloads,
                              num_csharp_http_client_downloads,
                              num_php_http_client_downloads,
                              num_node_http_client_downloads)

    def csharp_downloads(self, url):
        """Gets library download data from nuget.org

        :param url: the URL of the package
        :type url:  string

        :returns: The number of total library downloads
        :rtype:   Integer
        """
        r = requests.get(url)
        soup = BeautifulSoup(r.text, "html.parser")
        mydivs = soup.findAll("p", {"class": "stat-number"})
        nodes = []
        for node in mydivs:
            nodes.append(''.join(node.findAll(text=True)))
        num_total_csharp_downloads = nodes[0].replace(',', '')
        return num_total_csharp_downloads

    def nodejs_downloads(self, url):
        """Gets library download data from npmjs.org

        :param url: the URL of the package
        :type url:  string

        :returns: The number of library downloads in the last month
        :rtype:   Integer
        """
        r = requests.get(url)
        soup = BeautifulSoup(r.text, "html.parser")
        mydivs = soup.findAll("strong",
                              {"class": "pretty-number monthly-downloads"})
        nodes = []
        for node in mydivs:
            nodes.append(''.join(node.findAll(text=True)))
        num_nodejs_monthly_downloads = nodes[0].replace(',', '')
        return num_nodejs_monthly_downloads

    def php_downloads(self, url):
        """Gets library download data from packagist.org

        :param url: the URL of the package
        :type url:  string

        :returns: The number of total library downloads
        :rtype:   Integer
        """
        r = requests.get(url)
        soup = BeautifulSoup(r.text, "html.parser")
        mydivs = soup.findAll("div",
                              {"class": "facts col-xs-12 col-sm-6 col-md-12"})
        nodes = []
        for node in mydivs:
            nodes.append(''.join(node.findAll(text=True)))
        num_php_downloads = nodes[0][11:].replace(u('\u2009'), '').split('\n')
        num_php_downloads = str(num_php_downloads[0])
        return num_php_downloads

    def python_downloads(self, url):
        """Gets library download data from pypi.python.org

        :param url: the URL of the package
        :type url:  string

        :returns: The number of library downloads in the last month
        :rtype:   Integer
        """
        r = requests.get(url)
        soup = BeautifulSoup(r.text, "html.parser")
        mydivs = soup.findAll("ul", {"class": "nodot"})
        nodes = []
        for node in mydivs:
            nodes.append(''.join(node.findAll(text=True)))
        num_python_downloads = \
            nodes[0].replace(u('\n'), '') \
            .rpartition('week')[-1] \
            .rpartition('downloads')[0][2:] \
            .replace(u('\u2009'), '')
        return num_python_downloads

    def ruby_downloads(self, url):
        """Gets library download data from rubygems.org

        :param url: the URL of the package
        :type url:  string

        :returns: The number of total library downloads
        :rtype:   Integer
        """
        r = requests.get(url)
        soup = BeautifulSoup(r.text, "html.parser")
        mydivs = soup.findAll("span", {"class": "gem__downloads"})
        nodes = []
        for node in mydivs:
            nodes.append(''.join(node.findAll(text=True)))
        num_ruby_downloads = nodes[0].replace(',', '')
        return num_ruby_downloads

    def update_db(
            self,
            num_total_csharp_downloads,
            num_nodejs_monthly_downloads,
            num_php_downloads,
            num_python_downloads,
            num_ruby_downloads,
            num_python_http_client_downloads,
            num_python_open_source_library_data_collector_downloads,
            num_ruby_http_client_downloads,
            num_csharp_http_client_downloads,
            num_php_http_client_downloads,
            num_node_http_client_downloads
            ):
        """Update the DB with the package manager data

        :param num_total_csharp_downloads:   # of total downloads
        :param num_nodejs_monthly_downloads: # of downloads in the last month
        :param num_php_downloads:            # of total downloads
        :param num_python_downloads:         # of downloads in the last month
        :param num_ruby_downloads:           # of total downloads
        :type  num_total_csharp_downloads:   Integer
        :type  num_nodejs_monthly_downloads: Integer
        :type  num_php_downloads:            Integer
        :type  num_python_downloads:         Integer
        :type  num_ruby_downloads:           Integer

        :returns: Returns the data object that was added to the DB
        :rtype:   Data object
        """
        packagedata = PackageManagerData(
            date_updated=datetime.datetime.now(),
            csharp_downloads=num_total_csharp_downloads,
            nodejs_downloads=num_nodejs_monthly_downloads,
            php_downloads=num_php_downloads,
            python_downloads=num_python_downloads,
            ruby_downloads=num_ruby_downloads,
            python_http_client_downloads=num_python_http_client_downloads,
            csharp_http_client_downloads=num_csharp_http_client_downloads,
            ruby_http_client_downloads=num_ruby_http_client_downloads,
            php_http_client_downloads=num_php_http_client_downloads,
            open_source_library_data_collector_downloads=num_python_open_source_library_data_collector_downloads,
            node_http_client_downloads=num_node_http_client_downloads
            )
        return self.db.add_data(packagedata)