def setUpClass(cls) -> None: #Test database setup cls.dbConnector = DatabaseConnector(databaseFileName="testDB.db") cls.dbConnector.createDatabase() cls.dbConnector.openDatabaseConnection() cls.dbConnector.executeSQL( sql= 'CREATE TABLE Issues (ID INTEGER, Count INTEGER, Title TEXT, Author TEXT, Assignees TEXT, Labels TEXT, Created_At TEXT, Updated_At TEXT, Closed_At TEXT, PRIMARY KEY(ID));' ) #Test data cls.testData = { 0: { 'id': 0, 'number': 7357, 'title': 'testTitle', 'user': { 'login': '******' }, 'assignees': { 'login': '******', 'login': '******' }, 'labels': { 'name': 'testLabel1', 'name': 'testLabel2' }, 'state': 'testState', 'created_at': 'testCreate', 'updated_at': 'testUpdate', 'pushed_at': 'testPush' } } cls.dataComparison = [ (0, 'testName', 'testLogin', 'testPrivate', 'testFork', 'testCreate', 'testUpdate', 'testPush', 7357, 7357, 7357) ] #Commit class setup cls.issue = Issues(dbConnection=cls.dbConnector, oauthToken="token", repository="repo", username="******", url="url")
def transform_issues(): data = pd.DataFrame() repos = Repos('k8-proxy').list_repos() for i in range(0, len(repos)): data = pd.concat( [data, Issues('k8-proxy', repos[i]).get_issues()], ignore_index=True) data = data.drop([ 'labels_url', 'repository_url', 'locked', 'comments_url', 'performed_via_github_app', 'events_url', 'html_url', 'labels', 'assignee', 'milestone' ], 1) data.user = pd.json_normalize(data.user).login data['repo'] = data.url.str.split('/').str[-3] data['assignee'] = None for i in range(0, len(data)): if len(data.assignees[i]) == 0: data['assignee'][i] = None elif len(data.assignees[i]) == 1: data['assignee'][i] = data.assignees[i][0]['login'] else: data['assignee'][i] = list( pd.DataFrame(data.assignees[i]).login.values) data['pr'] = None for i in range(0, len(data)): if pd.isna(data.pull_request[i]): data['pr'][i] = None else: data['pr'][i] = data.pull_request[i]['url'] data = data.drop(['assignees', 'pull_request'], 1) data = data.rename(columns={"user": "******", "pr": "pull_request"}) data = data[[ 'title', 'state', 'url', 'repo', 'user_', 'author_association', 'assignee', 'created_at', 'updated_at', 'closed_at', 'number', 'comments', 'node_id', 'id', 'active_lock_reason', 'pull_request' ]] issues_df = data.copy() return issues_df
def get_session_issues(self): response = requests.get(self.issues) data = xmltodict.parse(response.text) return Issues(data)
def startDataCollection(self) -> None: def _collectData(collector) -> int or bool: data = collector.getData() collector.insertData(dataset=data[0]) return collector.iterateNext(data[1]) def _scrapeData(collector) -> int or bool: collector.insertData() return 0 def _showProgression(collector, maxIterations: int) -> None: for iteration in tqdm(range(0, abs(maxIterations) - 1), ): _collectData(collector) databaseConnection = self.checkForFile() self.createFileTablesColumns(dbConnection=databaseConnection) branchCollector = Branches( dbConnection=self.dbConnector, oauthToken=self.token, repository=self.repository, username=self.username, url= "https://api.github.com/repos/{}/{}/branches?per_page=100&page={}", ) forksCollector = Forks( dbConnection=self.dbConnector, oauthToken=self.token, repository=self.repository, username=self.username, url="https://api.github.com/repos/{}/{}/forks?per_page=100&page={}", ) issuesCollector = Issues( dbConnection=self.dbConnector, oauthToken=self.token, repository=self.repository, username=self.username, url= "https://api.github.com/repos/{}/{}/issues?state=all&per_page=100&page={}", ) languageCollector = Languages( dbConnection=self.dbConnector, oauthToken=self.token, repository=self.repository, username=self.username, url= "https://api.github.com/repos/{}/{}/languages?per_page=100&page={}", ) repositoryCollector = Repository( dbConnection=self.dbConnector, oauthToken=self.token, repository=self.repository, username=self.username, url="https://api.github.com/repos/{}/{}?per_page=100&page={}", ) print("\nRepository Languages") languagePages = _collectData(languageCollector) # One request only _showProgression(languageCollector, languagePages) print("\nRepository Information") repositoryPages = _collectData(repositoryCollector) # One request only _showProgression(repositoryCollector, repositoryPages) print("\nRepository Branches") branchPages = _collectData(branchCollector) # Estimated < 10 requests _showProgression(branchCollector, branchPages) print("\nRepository Forks") forkPages = _collectData(forksCollector) # Estimated < 10 requests _showProgression(forksCollector, forkPages) print("\nRepository Issues") issuePages = _collectData(issuesCollector) # Estimated < 20 requests _showProgression(issuesCollector, issuePages) commitsID = 0 branchList = self.dbConnector.selectColumn(table="Branches", column="Name") for branch in branchList: print("\nRepository Commits from Branch {}".format(branch[0])) commitsCollector = Commits( dbConnection=self.dbConnector, id=commitsID, oauthToken=self.token, repository=self.repository, sha=branch[0], username=self.username, url= "https://api.github.com/repos/{}/{}/commits?per_page=100&page={}&sha={}", ) commitPages = _collectData( commitsCollector) # Estimated to have the most requests _showProgression(commitsCollector, commitPages) commitsID = commitsCollector.exportID() # TODO: Implement a loading bar for the Files module # TODO: Reduce complexity where possible in the Files module # Creates a combined list of every commit paired with its corresponding branch branchList = self.dbConnector.selectColumn(table="Commits", column="Branch") commitSHAList = self.dbConnector.selectColumn(table="Commits", column="Commit_SHA") # https://www.geeksforgeeks.org/python-merge-two-lists-into-list-of-tuples/ mergedList = tuple(zip(branchList, commitSHAList)) filesID = 0 for pair in mergedList: branch = pair[0][0] commit = pair[1][0] print("\nRepository Files from Branch {} from Commit {}".format( branch, commit)) filesCollector = Files( commitSHA=commit, branch=branch, dbConnection=self.dbConnector, id=filesID, repository=self.repository, username=self.username, url="https://github.com/{}/{}/commit/{}", ) _scrapeData(filesCollector) filesID = filesCollector.exportID()
def test_cache(self): for (owner, project) in self.iter_repos(): i = Issues(owner=owner, project=project) i.fetch_issues(check_cache=True, cache_dir='./') self.assertTrue(i.cached) # Check the internal variable
def test_initialization(self): for (owner, project) in self.iter_repos(): i = Issues(owner=owner, project=project) i.fetch_issues(check_cache=True, cache_dir='./') self._test_base_(i, owner, project)
def test_add_project(self): for (owner, project) in self.iter_repos(): i = Issues() i.add_project(owner, project) i.fetch_issues(check_cache=True, cache_dir='./') self._test_base_(i, owner, project)