Esempio n. 1
0
    def setUpClass(cls) -> None:
        #Test database setup
        cls.dbConnector = DatabaseConnector(databaseFileName="testDB.db")
        cls.dbConnector.createDatabase()
        cls.dbConnector.openDatabaseConnection()
        cls.dbConnector.executeSQL(
            sql=
            'CREATE TABLE Issues (ID INTEGER, Count INTEGER, Title TEXT, Author TEXT, Assignees TEXT, Labels TEXT, Created_At TEXT, Updated_At TEXT, Closed_At TEXT, PRIMARY KEY(ID));'
        )

        #Test data
        cls.testData = {
            0: {
                'id': 0,
                'number': 7357,
                'title': 'testTitle',
                'user': {
                    'login': '******'
                },
                'assignees': {
                    'login': '******',
                    'login': '******'
                },
                'labels': {
                    'name': 'testLabel1',
                    'name': 'testLabel2'
                },
                'state': 'testState',
                'created_at': 'testCreate',
                'updated_at': 'testUpdate',
                'pushed_at': 'testPush'
            }
        }

        cls.dataComparison = [
            (0, 'testName', 'testLogin', 'testPrivate', 'testFork',
             'testCreate', 'testUpdate', 'testPush', 7357, 7357, 7357)
        ]

        #Commit class setup
        cls.issue = Issues(dbConnection=cls.dbConnector,
                           oauthToken="token",
                           repository="repo",
                           username="******",
                           url="url")
Esempio n. 2
0
def transform_issues():
    data = pd.DataFrame()
    repos = Repos('k8-proxy').list_repos()
    for i in range(0, len(repos)):
        data = pd.concat(
            [data, Issues('k8-proxy', repos[i]).get_issues()],
            ignore_index=True)
    data = data.drop([
        'labels_url', 'repository_url', 'locked', 'comments_url',
        'performed_via_github_app', 'events_url', 'html_url', 'labels',
        'assignee', 'milestone'
    ], 1)
    data.user = pd.json_normalize(data.user).login
    data['repo'] = data.url.str.split('/').str[-3]
    data['assignee'] = None
    for i in range(0, len(data)):
        if len(data.assignees[i]) == 0:
            data['assignee'][i] = None
        elif len(data.assignees[i]) == 1:
            data['assignee'][i] = data.assignees[i][0]['login']
        else:
            data['assignee'][i] = list(
                pd.DataFrame(data.assignees[i]).login.values)
    data['pr'] = None
    for i in range(0, len(data)):
        if pd.isna(data.pull_request[i]):
            data['pr'][i] = None
        else:
            data['pr'][i] = data.pull_request[i]['url']
    data = data.drop(['assignees', 'pull_request'], 1)
    data = data.rename(columns={"user": "******", "pr": "pull_request"})
    data = data[[
        'title', 'state', 'url', 'repo', 'user_', 'author_association',
        'assignee', 'created_at', 'updated_at', 'closed_at', 'number',
        'comments', 'node_id', 'id', 'active_lock_reason', 'pull_request'
    ]]
    issues_df = data.copy()
    return issues_df
Esempio n. 3
0
 def get_session_issues(self):
     response = requests.get(self.issues)
     data = xmltodict.parse(response.text)
     return Issues(data)
Esempio n. 4
0
    def startDataCollection(self) -> None:
        def _collectData(collector) -> int or bool:
            data = collector.getData()
            collector.insertData(dataset=data[0])
            return collector.iterateNext(data[1])

        def _scrapeData(collector) -> int or bool:
            collector.insertData()
            return 0

        def _showProgression(collector, maxIterations: int) -> None:
            for iteration in tqdm(range(0, abs(maxIterations) - 1), ):
                _collectData(collector)

        databaseConnection = self.checkForFile()
        self.createFileTablesColumns(dbConnection=databaseConnection)

        branchCollector = Branches(
            dbConnection=self.dbConnector,
            oauthToken=self.token,
            repository=self.repository,
            username=self.username,
            url=
            "https://api.github.com/repos/{}/{}/branches?per_page=100&page={}",
        )

        forksCollector = Forks(
            dbConnection=self.dbConnector,
            oauthToken=self.token,
            repository=self.repository,
            username=self.username,
            url="https://api.github.com/repos/{}/{}/forks?per_page=100&page={}",
        )

        issuesCollector = Issues(
            dbConnection=self.dbConnector,
            oauthToken=self.token,
            repository=self.repository,
            username=self.username,
            url=
            "https://api.github.com/repos/{}/{}/issues?state=all&per_page=100&page={}",
        )

        languageCollector = Languages(
            dbConnection=self.dbConnector,
            oauthToken=self.token,
            repository=self.repository,
            username=self.username,
            url=
            "https://api.github.com/repos/{}/{}/languages?per_page=100&page={}",
        )

        repositoryCollector = Repository(
            dbConnection=self.dbConnector,
            oauthToken=self.token,
            repository=self.repository,
            username=self.username,
            url="https://api.github.com/repos/{}/{}?per_page=100&page={}",
        )

        print("\nRepository Languages")
        languagePages = _collectData(languageCollector)  # One request only
        _showProgression(languageCollector, languagePages)

        print("\nRepository Information")
        repositoryPages = _collectData(repositoryCollector)  # One request only
        _showProgression(repositoryCollector, repositoryPages)

        print("\nRepository Branches")
        branchPages = _collectData(branchCollector)  # Estimated < 10 requests
        _showProgression(branchCollector, branchPages)

        print("\nRepository Forks")
        forkPages = _collectData(forksCollector)  # Estimated < 10 requests
        _showProgression(forksCollector, forkPages)

        print("\nRepository Issues")
        issuePages = _collectData(issuesCollector)  # Estimated < 20 requests
        _showProgression(issuesCollector, issuePages)

        commitsID = 0
        branchList = self.dbConnector.selectColumn(table="Branches",
                                                   column="Name")
        for branch in branchList:
            print("\nRepository Commits from Branch {}".format(branch[0]))
            commitsCollector = Commits(
                dbConnection=self.dbConnector,
                id=commitsID,
                oauthToken=self.token,
                repository=self.repository,
                sha=branch[0],
                username=self.username,
                url=
                "https://api.github.com/repos/{}/{}/commits?per_page=100&page={}&sha={}",
            )
            commitPages = _collectData(
                commitsCollector)  # Estimated to have the most requests
            _showProgression(commitsCollector, commitPages)
            commitsID = commitsCollector.exportID()

        # TODO: Implement a loading bar for the Files module
        # TODO: Reduce complexity where possible in the Files module

        # Creates a combined list of every commit paired with its corresponding     branch
        branchList = self.dbConnector.selectColumn(table="Commits",
                                                   column="Branch")
        commitSHAList = self.dbConnector.selectColumn(table="Commits",
                                                      column="Commit_SHA")
        # https://www.geeksforgeeks.org/python-merge-two-lists-into-list-of-tuples/
        mergedList = tuple(zip(branchList, commitSHAList))

        filesID = 0
        for pair in mergedList:
            branch = pair[0][0]
            commit = pair[1][0]
            print("\nRepository Files from Branch {} from Commit {}".format(
                branch, commit))
            filesCollector = Files(
                commitSHA=commit,
                branch=branch,
                dbConnection=self.dbConnector,
                id=filesID,
                repository=self.repository,
                username=self.username,
                url="https://github.com/{}/{}/commit/{}",
            )
            _scrapeData(filesCollector)
            filesID = filesCollector.exportID()
Esempio n. 5
0
 def test_cache(self):
     for (owner, project) in self.iter_repos():
         i = Issues(owner=owner, project=project)
         i.fetch_issues(check_cache=True, cache_dir='./')
         self.assertTrue(i.cached)  # Check the internal variable
Esempio n. 6
0
 def test_initialization(self):
     for (owner, project) in self.iter_repos():
         i = Issues(owner=owner, project=project)
         i.fetch_issues(check_cache=True, cache_dir='./')
         self._test_base_(i, owner, project)
Esempio n. 7
0
 def test_add_project(self):
     for (owner, project) in self.iter_repos():
         i = Issues()
         i.add_project(owner, project)
         i.fetch_issues(check_cache=True, cache_dir='./')
         self._test_base_(i, owner, project)