def setUpClass(cls) -> None: #Test database setup cls.dbConnector = DatabaseConnector( databaseFileName="testLanguageDB.db") cls.dbConnector.createDatabase() cls.dbConnector.openDatabaseConnection() cls.dbConnector.executeSQL( sql= "CREATE TABLE Languages (ID INTEGER, Language TEXT, Bytes_of_Code INTEGER, PRIMARY KEY(ID))" ) #Test data cls.testData = {'testLang1': 7357, 'testLang2': 5} cls.dataComparison = [(0, 'testLang1', 7357), (1, 'testLang2', 5)] #Commit class setup cls.lang = Languages(dbConnection=cls.dbConnector, oauthToken="token", repository="repo", username="******", url="url")
def core_before_request(): """ To run before each request""" my = None g.db = db languages_object = Languages() g.languages = languages_object.get_languages(0) get_headers() # Check that user has login if 'user_id' in session: # get user_id from session user_id = session['user_id'] # get the user's personal data. my = g.db.users.find_one({ '_id' : ObjectId(user_id) }) # If user_id not exist in the user list g.my is None if my: g.my = my # get user language g.lan = g.my['lan'] g.language = g.languages[g.lan] get_hash_admin()
import re from flask import Blueprint, request, session, g, render_template, url_for, redirect from pymongo import ASCENDING, DESCENDING from pymongo.objectid import ObjectId from pymongo.errors import InvalidId, PyMongoError # Imports inside bombolone from decorators import check_authentication, check_admin, get_hash_pages from languages import Languages from validators import CheckValue MODULE_DIR = 'modules/pages' pages = Blueprint('pages', __name__) check = CheckValue() # Check Value class languages_object = Languages() class Pages(object): """ This class allows to : - get_page - reset - new - remove """ page = {} type_label = {} len_of_label = 0 message = None # Error or succcess message
def startDataCollection(self) -> None: def _collectData(collector) -> int or bool: data = collector.getData() collector.insertData(dataset=data[0]) return collector.iterateNext(data[1]) def _scrapeData(collector) -> int or bool: collector.insertData() return 0 def _showProgression(collector, maxIterations: int) -> None: for iteration in tqdm(range(0, abs(maxIterations) - 1), ): _collectData(collector) databaseConnection = self.checkForFile() self.createFileTablesColumns(dbConnection=databaseConnection) branchCollector = Branches( dbConnection=self.dbConnector, oauthToken=self.token, repository=self.repository, username=self.username, url= "https://api.github.com/repos/{}/{}/branches?per_page=100&page={}", ) forksCollector = Forks( dbConnection=self.dbConnector, oauthToken=self.token, repository=self.repository, username=self.username, url="https://api.github.com/repos/{}/{}/forks?per_page=100&page={}", ) issuesCollector = Issues( dbConnection=self.dbConnector, oauthToken=self.token, repository=self.repository, username=self.username, url= "https://api.github.com/repos/{}/{}/issues?state=all&per_page=100&page={}", ) languageCollector = Languages( dbConnection=self.dbConnector, oauthToken=self.token, repository=self.repository, username=self.username, url= "https://api.github.com/repos/{}/{}/languages?per_page=100&page={}", ) repositoryCollector = Repository( dbConnection=self.dbConnector, oauthToken=self.token, repository=self.repository, username=self.username, url="https://api.github.com/repos/{}/{}?per_page=100&page={}", ) print("\nRepository Languages") languagePages = _collectData(languageCollector) # One request only _showProgression(languageCollector, languagePages) print("\nRepository Information") repositoryPages = _collectData(repositoryCollector) # One request only _showProgression(repositoryCollector, repositoryPages) print("\nRepository Branches") branchPages = _collectData(branchCollector) # Estimated < 10 requests _showProgression(branchCollector, branchPages) print("\nRepository Forks") forkPages = _collectData(forksCollector) # Estimated < 10 requests _showProgression(forksCollector, forkPages) print("\nRepository Issues") issuePages = _collectData(issuesCollector) # Estimated < 20 requests _showProgression(issuesCollector, issuePages) commitsID = 0 branchList = self.dbConnector.selectColumn(table="Branches", column="Name") for branch in branchList: print("\nRepository Commits from Branch {}".format(branch[0])) commitsCollector = Commits( dbConnection=self.dbConnector, id=commitsID, oauthToken=self.token, repository=self.repository, sha=branch[0], username=self.username, url= "https://api.github.com/repos/{}/{}/commits?per_page=100&page={}&sha={}", ) commitPages = _collectData( commitsCollector) # Estimated to have the most requests _showProgression(commitsCollector, commitPages) commitsID = commitsCollector.exportID() # TODO: Implement a loading bar for the Files module # TODO: Reduce complexity where possible in the Files module # Creates a combined list of every commit paired with its corresponding branch branchList = self.dbConnector.selectColumn(table="Commits", column="Branch") commitSHAList = self.dbConnector.selectColumn(table="Commits", column="Commit_SHA") # https://www.geeksforgeeks.org/python-merge-two-lists-into-list-of-tuples/ mergedList = tuple(zip(branchList, commitSHAList)) filesID = 0 for pair in mergedList: branch = pair[0][0] commit = pair[1][0] print("\nRepository Files from Branch {} from Commit {}".format( branch, commit)) filesCollector = Files( commitSHA=commit, branch=branch, dbConnection=self.dbConnector, id=filesID, repository=self.repository, username=self.username, url="https://github.com/{}/{}/commit/{}", ) _scrapeData(filesCollector) filesID = filesCollector.exportID()