def authenticate(self): """ authenticate() Authenticates this application to github using the cas-user git user credentials. This is hopefully temporary! """ s = requests.Session() username = config["github"]["user"] password = config["github"]["pass"] s.auth = (username, password) payload = {"scopes": ["repo"]} r = s.get(self.request_auth, params=payload) if r.headers.get('x-ratelimit-remaining') == '0': logging.info("Github quota limit hit -- waiting") # Wait up to a hour until we can continue.. while r.headers.get('x-ratelimit-remaining') == '0': time.sleep(600) # Wait 10 minutes and try again r = s.get(self.request_auth, params=payload) data = r.json() data = r.json()[0] if r.status_code >= 400: msg = data.get('message') logging.error("Failed to authenticate issue tracker: \n" +msg) return # Exit else: self.auth_token = data.get("token") requests_left = r.headers.get('x-ratelimit-remaining') logging.info("Analyzer has " + requests_left + " issue tracker calls left this hour")
def findIssueOpened(self, correctiveCommit): """ findIssueIds() If the corrective change/commit links to a issue in the issue tracker, returns the date of oldest open issue found otherwise returns none """ issue_opened = None if(self.issueTracker is None or hasattr(self.issueTracker, "getDateOpened") == False): return None idMatch = re.compile('#[\d]+') issue_ids = idMatch.findall(correctiveCommit.commit_message) issue_ids = [issue_id.strip('#') for issue_id in issue_ids] # Remove the '#' from ids if len(issue_ids) > 0: issue_opened = self.issueTracker.getDateOpened(issue_ids[0]) # Use the oldest open bug for issue_id in issue_ids: logging.info('Searching for issue id: ' + issue_id) curr_issue_opened = self.issueTracker.getDateOpened(issue_id) # Verify that an issue was found. if curr_issue_opened is not None: if int(curr_issue_opened) < int(issue_opened): issue_opened = curr_issue_opened return issue_opened
def checkIngestion(self): """Check if any repo needs to be ingested""" #print("checkIngestion") session = Session() repo_update_freq = int(config['repoUpdates']['freqInDays']) refresh_date_dateTime = (datetime.utcnow() - timedelta(days=repo_update_freq)) refresh_date = str(refresh_date_dateTime) refresh_date_dateTime = datetime.strptime(refresh_date, "%Y-%m-%d %H:%M:%S.%f") refresh_date = refresh_date_dateTime.strftime("%Y-%m-%d %H:%M:%S.%f") repos_to_get = (session.query(Repository) .filter( (Repository.status == "Waiting to be Ingested") | (Repository.ingestion_date < refresh_date_dateTime) & (Repository.status != "Error") & (Repository.status != "Analyzing")) .all()) for repo in repos_to_get: logging.info("Adding repo " + repo.id + " to work queue for ingesting") repo.status = "In Queue to be Ingested" session.commit() # update the status of repo self.workQueue.add_task(ingest,repo.id) session.close()
def getMedian(self, metric): """ Helper function for the method calculateMedians. Takes in a metric and returns a string property of the results @private """ median_props = "" try: # R functions to be used medianFn = robjects.r["median"] wilcoxFn = robjects.r["wilcox.test"] metric_buggy = getattr(self.metrics, metric + "_buggy") metric_nonbuggy = getattr(self.metrics, metric + "_nonbuggy") # First check p-values, if signficant then calculate median pvalue = self.wilcoxFn(robjects.FloatVector(metric_buggy), robjects.FloatVector(metric_nonbuggy))[2][0] buggy_median = self.medianFn(robjects.FloatVector(metric_buggy)) nonbuggy_median = self.medianFn(robjects.FloatVector(metric_nonbuggy)) median_props += '"' + metric + 'buggy":"' + str(buggy_median[0]) + '", ' median_props += '"' + metric + 'nonbuggy":"' + str(nonbuggy_median[0]) + '", ' if pvalue <= self.psig: median_props += '"' + metric + '_sig":"1", ' else: median_props += '"' + metric + '_sig":"0", ' except: # catch the case where we haven't made any observations to do this metric logging.info("Metric " + metric + " could not be used in the median model for repo " + self.repo_id) return median_props
def notify(self): """ Notify all subscribers that repo has been analyzed and is ready to be viewed """ FROM = "*****@*****.**" TO = self.subscribers SUBJECT = "Your repository has been analyzed" TEXT = "Your analyzed repository is now ready to be viewed at http://kiwi.se.rit.edu/repo/" + self.repo # prepare actual message message = """\From: %s\nTo: %s\nSubject: %s\n\n%s""" % ( FROM, ", ".join(TO), SUBJECT, TEXT) try: server = smtplib.SMTP("smtp.gmail.com", 587) server.ehlo() server.starttls() server.login(self.gmail_user, self.gmail_pwd) server.sendmail(FROM, TO, message) server.quit() logging.info("Notification sent successfully") except: logging.error("Failed to send notification")
def findIssueOpened(self, correctiveCommit): """ findIssueIds() If the corrective change/commit links to a issue in the issue tracker, returns the date of oldest open issue found otherwise returns none """ issue_opened = None if (self.issueTracker is None or hasattr(self.issueTracker, "getDateOpened") == False): return None idMatch = re.compile('#[\d]+') issue_ids = idMatch.findall(correctiveCommit.commit_message) issue_ids = [issue_id.strip('#') for issue_id in issue_ids] # Remove the '#' from ids if len(issue_ids) > 0: issue_opened = self.issueTracker.getDateOpened(issue_ids[0]) # Use the oldest open bug for issue_id in issue_ids: logging.info('Searching for issue id: ' + issue_id) curr_issue_opened = self.issueTracker.getDateOpened(issue_id) # Verify that an issue was found. if curr_issue_opened is not None: if int(curr_issue_opened) < int(issue_opened): issue_opened = curr_issue_opened return issue_opened
def notify(self): """ Notify all subscribers that repo has been analyzed and is ready to be viewed """ FROM = "*****@*****.**" TO = self.subscribers SUBJECT = "Your repository has been analyzed" TEXT = "Your analyzed repository is now ready to be viewed at http://kiwi.se.rit.edu/repo/" + self.repo # prepare actual message message = """\From: %s\nTo: %s\nSubject: %s\n\n%s""" % (FROM, ", ".join(TO), SUBJECT, TEXT) try: server = smtplib.SMTP("smtp.gmail.com", 587) server.ehlo() server.starttls() server.login(self.gmail_user, self.gmail_pwd) server.sendmail(FROM, TO, message) server.quit() logging.info("Notification sent successfully") except: logging.error("Failed to send notification")
def analyzeRepo(repository_to_analyze, session): """ Analyzes the given repository @param repository_to_analyze The repository to analyze. @param session SQLAlchemy session @private """ repo_name = repository_to_analyze.name repo_id = repository_to_analyze.id last_analysis_date = repository_to_analyze.analysis_date # Update status of repo to show it is analyzing repository_to_analyze.status = "Analyzing" session.commit() logging.info('Worker analyzing repository id ' + repo_id) # all commits in descending order all_commits = (session.query(Commit) .filter(Commit.repository_id == repo_id) .order_by(Commit.author_date_unix_timestamp.desc()) .all() ) # corrective commits in ascending order # if updating, only get the corrective commits that have not been linked yet. # No need to re-link corrective commits that have already been linked with the bug-inducing commit. corrective_commits = (session.query(Commit) .filter( (Commit.fix == "True") & (Commit.repository_id == repo_id) & (Commit.linked == False) ) .order_by(Commit.author_date_unix_timestamp.asc()) .all() ) logging.info("Linking " + str(len(corrective_commits)) + " new corrective commits for repo " + repo_id) try: git_commit_linker = GitCommitLinker(repo_id) git_commit_linker.linkCorrectiveCommits(corrective_commits, all_commits) except Exception as e: logging.exception("Got an exception linking bug fixing changes to bug inducing changes for repo " + repo_id) repository_to_analyze.status = "Error" session.commit() # update repo status raise # Signify to CAS Manager that this repo is ready to have it's model built if repository_to_analyze.status != "Error": repository_to_analyze.status = "In Queue to Build Model" session.commit() # update repo status
def analyzeRepo(repository_to_analyze, session): """ Analyzes the given repository @param repository_to_analyze The repository to analyze. @param session SQLAlchemy session @private """ repo_name = repository_to_analyze.name repo_id = repository_to_analyze.id last_analysis_date = repository_to_analyze.analysis_date # Update status of repo to show it is analyzing repository_to_analyze.status = "Analyzing" session.commit() logging.info('Worker analyzing repository id ' + repo_id) # all commits in descending order all_commits = (session.query(Commit).filter( Commit.repository_id == repo_id).order_by( Commit.author_date_unix_timestamp.desc()).all()) # corrective commits in ascending order # if updating, only get the corrective commits that have not been linked yet. # No need to re-link corrective commits that have already been linked with the bug-inducing commit. corrective_commits = (session.query(Commit).filter( (Commit.fix == "True") & (Commit.repository_id == repo_id) & (Commit.linked == False)).order_by( Commit.author_date_unix_timestamp.asc()).all()) logging.info("Linking " + str(len(corrective_commits)) + " new corrective commits for repo " + repo_id) try: git_commit_linker = GitCommitLinker(repo_id) git_commit_linker.linkCorrectiveCommits(corrective_commits, all_commits) except Exception as e: logging.exception( "Got an exception linking bug fixing changes to bug inducing changes for repo " + repo_id) repository_to_analyze.status = "Error" session.commit() # update repo status raise # Signify to CAS Manager that this repo is ready to have it's model built if repository_to_analyze.status != "Error": repository_to_analyze.status = "In Queue to Build Model" session.commit() # update repo status # after update commit.contains_bug & commit.fix label, parsing diff information git = Git() git.diff(repo_id)
def checkModel(self): """Check if any repo needs metrics to be generated""" session = Session() repos_to_get = (session.query(Repository).filter( (Repository.status == "In Queue to Build Model")).all()) for repo in repos_to_get: logging.info("Adding repo " + repo.id + " to model queue to finish analyzing") repo.status = "Building Model" session.commit() # update status of repo self.modelQueue.put(repo.id) session.close()
def checkModel(self): """Check if any repo needs metrics to be generated""" session = Session() repos_to_get = (session.query(Repository) .filter( (Repository.status == "In Queue to Build Model") ) .all()) for repo in repos_to_get: logging.info("Adding repo " + repo.id + " to model queue to finish analyzing") repo.status = "Building Model" session.commit() # update status of repo self.modelQueue.put(repo.id) session.close()
def ingest(repo_id): """ Ingest a repository with the given id. Gets the repository information from the repository table and starts ingesting using ingestRepo method @param repo_id The repository id to ingest. """ session = Session() repo_to_analyze = (session.query(Repository).filter( Repository.id == repo_id).all()) # Verify that repo exists if len(repo_to_analyze) == 1: ingestRepo(repo_to_analyze[0], session) else: logging.info('Repo with id ' + repo_id_to_analyze + ' not found!') session.close()
def analyze(repo_id): """ Analyze the repository with the given id. Gets the repository from the repository table and starts ingesting using the analyzeRepo method. @param repo_id The repository id to analyze """ session = Session() repo_to_analyze = (session.query(Repository).filter( Repository.id == repo_id).all()) # Verify that repo exists if len(repo_to_analyze) > 0: analyzeRepo(repo_to_analyze[0], session) else: logging.info('Repo with id ' + repo_id_to_analyze + ' not found!') session.close()
def getDateOpened(self, issueNumber): """ getDateOpened() Gets the date the issue number was opened in unix time If issue cannot be found for whichever reason, returns null. """ header = {'Authorization': 'token ' + self.auth_token} r = requests.get(self.request_repos + "/" + self.owner + "/" + self.repo + "/issues/" + issueNumber, headers=header) data = r.json() # If forbidden if r.status_code == 403: # Check the api quota if r.headers.get('x-ratelimit-remaining') == '0': logging.info("Github quota limit hit -- waiting") # Wait up to a hour until we can continue.. while r.headers.get('x-ratelimit-remaining') == '0': time.sleep(600) # Wait 10 minutes and try again r = requests.get(self.request_repos + "/" + self.owner + "/" + self.repo + "/issues/" + issueNumber, headers=header) data = r.json() # Check for other error codes elif r.status_code >= 400: msg = data.get('message') logging.error("ISSUE TRACKER FAILURE: \n" + msg) return None else: try: date = (dateutil.parser.parse( data.get('created_at'))).timestamp() return date except: logging.error( "ISSUE TRACKER FAILURE: Could not get created_at from github issues API" ) return None
def ingest(repo_id): """ Ingest a repository with the given id. Gets the repository information from the repository table and starts ingesting using ingestRepo method @param repo_id The repository id to ingest. """ session = Session() repo_to_analyze = (session.query(Repository) .filter (Repository.id == repo_id) .all() ) # Verify that repo exists if len(repo_to_analyze) == 1: ingestRepo(repo_to_analyze[0], session) else: logging.info('Repo with id ' + repo_id_to_analyze + ' not found!') session.close()
def checkAnalyzation(self): """Checks if any repo needs to be analyzed""" session = Session() repo_update_freq = int(config['repoUpdates']['freqInDays']) refresh_date = str(datetime.utcnow() - timedelta(days=repo_update_freq)) repos_to_get = (session.query(Repository) .filter( (Repository.status == "Waiting to be Analyzed") ) .all() ) for repo in repos_to_get: logging.info("Adding repo " + repo.id + " to work queue for analyzing.") repo.status = "In Queue to be Analyzed" session.commit() # update the status of repo self.workQueue.add_task(analyze, repo.id) session.close()
def checkAnalyzation(self): """Checks if any repo needs to be analyzed""" session = Session() repo_update_freq = int(config['repoUpdates']['freqInDays']) refresh_date = str(datetime.utcnow() - timedelta(days=repo_update_freq)) repos_to_get = (session.query(Repository).filter( (Repository.status == "Waiting to be Analyzed")).all()) for repo in repos_to_get: logging.info("Adding repo " + repo.id + " to work queue for analyzing.") repo.status = "In Queue to be Analyzed" session.commit() # update the status of repo #self.workQueue.add_task(analyze, repo.id) # Replace the line below by this line if you wish to allow multiple threads. For SQLite, this wont work so well. analyze(repo.id) session.close()
def notify(self, repo): """ Send e-mail notifications if applicable to a repo used by checkBuildModel """ notify = False notifier = None logging.info("Notifying subscribed users for repository " + repo.id) # Create the Notifier gmail_user = config['gmail']['user'] gmail_pass = config['gmail']['pass'] notifier = Notifier(gmail_user, gmail_pass, repo.name) # Add subscribers if applicable if repo.email is not None: notifier.addSubscribers([repo.email, gmail_user]) else: notifier.addSubscribers([gmail_user]) notifier.notify()
def analyze(repo_id): """ Analyze the repository with the given id. Gets the repository from the repository table and starts ingesting using the analyzeRepo method. @param repo_id The repository id to analyze """ session = Session() repo_to_analyze = (session.query(Repository) .filter(Repository.id == repo_id) .all() ) # Verify that repo exists if len(repo_to_analyze) > 0: analyzeRepo(repo_to_analyze[0], session) else: logging.info('Repo with id ' + repo_id_to_analyze + ' not found!') session.close()
def getMedian(self, metric): """ Helper function for the method calculateMedians. Takes in a metric and returns a string property of the results @private """ median_props = "" try: # R functions to be used medianFn = robjects.r['median'] wilcoxFn = robjects.r['wilcox.test'] metric_buggy = getattr(self.metrics, metric + "_buggy") metric_nonbuggy = getattr(self.metrics, metric + "_nonbuggy") # First check p-values, if signficant then calculate median pvalue = self.wilcoxFn(robjects.FloatVector(metric_buggy), robjects.FloatVector(metric_nonbuggy))[2][0] buggy_median = self.medianFn(robjects.FloatVector(metric_buggy)) nonbuggy_median = self.medianFn( robjects.FloatVector(metric_nonbuggy)) median_props += '"' + metric + 'buggy":"' + str( buggy_median[0]) + '", ' median_props += '"' + metric + 'nonbuggy":"' + str( nonbuggy_median[0]) + '", ' if pvalue <= self.psig: median_props += '"' + metric + '_sig":"1", ' else: median_props += '"' + metric + '_sig":"0", ' except: # catch the case where we haven't made any observations to do this metric logging.info("Metric " + metric + " could not be used in the median model for repo " + self.repo_id) return median_props
def getDateOpened(self, issueNumber): """ getDateOpened() Gets the date the issue number was opened in unix time If issue cannot be found for whichever reason, returns null. """ header = {'Authorization': 'token ' + self.auth_token} r = requests.get(self.request_repos + "/" + self.owner + "/" + self.repo + "/issues/" + issueNumber, headers=header) data = r.json() # If forbidden if r.status_code == 403: # Check the api quota if r.headers.get('x-ratelimit-remaining') == '0': logging.info("Github quota limit hit -- waiting") # Wait up to a hour until we can continue.. while r.headers.get('x-ratelimit-remaining') == '0': time.sleep(600) # Wait 10 minutes and try again r = requests.get(self.request_repos + "/" + self.owner + "/" + self.repo + "/issues/" + issueNumber, headers=header) data = r.json() # Check for other error codes elif r.status_code >= 400: msg = data.get('message') logging.error("ISSUE TRACKER FAILURE: \n" + msg) return None else: try: date = (dateutil.parser.parse(data.get('created_at'))).timestamp() return date except: logging.error("ISSUE TRACKER FAILURE: Could not get created_at from github issues API") return None
def ingestRepo(repository_to_ingest, session): """ Ingests a given repository @param repository_to_ingest The repository to inspect @param session The SQLAlchemy session @private """ logging.info('A worker is starting scan repository: ' + repository_to_ingest.id) # Update status of repo to show it is ingesting repository_to_ingest.status = "Ingesting" session.commit() local_repo = LocalRepository(repository_to_ingest) local_repo.sync() session.merge(repository_to_ingest) repository_to_ingest.status = "Waiting to be Analyzed" # update status session.commit() logging.info('A worker finished ingesting repo ' + repository_to_ingest.id) session.close()
def ingestRepo(repository_to_ingest, session): """ Ingests a given repository @param repository_to_ingest The repository to inspect @param session The SQLAlchemy session @private """ logging.info( 'A worker is starting scan repository: ' + repository_to_ingest.id ) # Update status of repo to show it is ingesting repository_to_ingest.status = "Ingesting" session.commit() local_repo = LocalRepository(repository_to_ingest) local_repo.sync() session.merge(repository_to_ingest) repository_to_ingest.status = "Waiting to be Analyzed" # update status session.commit() logging.info( 'A worker finished ingesting repo ' + repository_to_ingest.id ) session.close()
def _linkCorrectiveCommit(self, commit): """ links the corrective change/commit to the change/commit which was the cause. this is the purpose of this object @commit - the corrective change to link w/ the changes that introduces the problems/issues it fixes. """ region_chunks = self.getModifiedRegions(commit) logging.info("Linkage for commit " + commit.commit_hash) for k, v in region_chunks.items(): logging.info("-- file: " + k) logging.info("---- loc modified: " + str(v)) bug_introducing_changes = self.gitAnnotate(region_chunks, commit) return bug_introducing_changes
from classifier.classifier import * from caslogging import logging logging.info('Test categorization... ') classifier = Classifier() # Test classification of corrective commits # fix,bug,wrong,fail,problem corrective_msg_1 = "fixed something" corrective_msg_2 = "bam, there goes a bug!" corrective_msg_3 = "x was wrong, but no more!" corrective_msg_4 = "Houston, we *had* a problem" corrective_msg_5 = "My watch is fun" corrective_msg_6 = "This is definitively NOT a you-know what!" assert(classifier.categorize(corrective_msg_1) == "Corrective") assert(classifier.categorize(corrective_msg_2) == "Corrective") assert(classifier.categorize(corrective_msg_3) == "Corrective") assert(classifier.categorize(corrective_msg_4) == "Corrective") assert(classifier.categorize(corrective_msg_5) != "Corrective") assert(classifier.categorize(corrective_msg_6) != "Corrective") # Test classification of feature additions # new,add,requirement,initial,create feature_msg_1 = "new awesome thing added to that brillinat code" feature_msg_2 = "adding some color to this mundane gui!" feature_msg_3 = "Adding requirement.." feature_msg_4 = "This is an initial commit" feature_msg_5 = "Creating a new class for x,y, AND z!"
""" file: readRepo.py author: Ben Grawi <*****@*****.**> date: October 2013 description: The base script to call """ from caslogging import logging import sys from datetime import datetime, timedelta from commit import Commit from repository import * from metrics import * from localrepository import * logging.info('Starting CASReader') # Read the first argument and pass it in as a string if len(sys.argv) > 1: arg = sys.argv[1] else: arg = '' if arg == "initDb": # Init the database logging.info('Initializing the Database...') Base.metadata.create_all(engine) logging.info('Done') elif arg == "testRepos": logging.info('Making Test Repos')
""" file: readRepo.py author: Ben Grawi <*****@*****.**> date: October 2013 description: The base script to call """ from caslogging import logging import sys from datetime import datetime, timedelta from commit import Commit from repository import * from metrics import * from localrepository import * logging.info("Starting CASReader") # Read the first argument and pass it in as a string if len(sys.argv) > 1: arg = sys.argv[1] else: arg = "" if arg == "initDb": # Init the database logging.info("Initializing the Database...") Base.metadata.create_all(engine) logging.info("Done") elif arg == "testRepos": logging.info("Making Test Repos")
def checkBuildModel(self): """ Checks if any repo is awaiting to build model. We are using a queue because we can't concurrently access R """ session = Session() if self.modelQueue.empty() != True: repo_id = self.modelQueue.get() repo = (session.query(Repository).filter(Repository.id == repo_id).first()) # use data only up to X months prior we won't have sufficent data to build models # as there may be bugs introduced in those months that haven't been fixed, skewing # our model. glm_model_time = int(config['glm_modeling']['months']) data_months_datetime = datetime.utcnow() - monthdelta(glm_model_time) data_months_unixtime = calendar.timegm(data_months_datetime.utctimetuple()) # all commits for repo prior to current time - glm model time training_commits = (session.query(Commit) .filter( ( Commit.repository_id == repo_id ) & ( Commit.author_date_unix_timestamp < str(data_months_unixtime)) ) .order_by( Commit.author_date_unix_timestamp.desc() ) .all()) # all commits for repo after or on current time - glm model time testing_commits = (session.query(Commit) .filter( ( Commit.repository_id == repo_id ) & ( Commit.author_date_unix_timestamp >= str(data_months_unixtime))) .all()) try: metrics_generator = MetricsGenerator(repo_id, training_commits, testing_commits) metrics_generator.buildAllModels() # montly data dump - or rather, every 30 days. dump_refresh_date = str(datetime.utcnow() - timedelta(days=30)) if repo.last_data_dump == None or repo.last_data_dump < dump_refresh_date: logging.info("Generating a monthly data dump for repository: " + repo_id) # Get all commits for the repository all_commits = (session.query(Commit) .filter( ( Commit.repository_id == repo_id ) ) .order_by( Commit.author_date_unix_timestamp.desc() ) .all()) metrics_generator.dumpData(all_commits) repo.last_data_dump = str(datetime.now().replace(microsecond=0)) # Notify user if repo has never been analyzed previously if repo.analysis_date is None: self.notify(repo) logging.info("Repo " + repo_id + " finished analyzing.") repo.analysis_date = str(datetime.now().replace(microsecond=0)) repo.status = "Analyzed" session.commit() # update status of repo session.close() # uh-oh except Exception as e: logging.exception("Got an exception building model for repository " + repo_id) repo.status = "Error" session.commit() # update repo status session.close()
repositories that need to be analyzed, analyzes it, and places results in the metrics table. """ import sys from datetime import datetime, timedelta from repository import * from commit import * from bugfinder import * from metricsgenerator import * from githubissuetracker import * from caslogging import logging from notifier import * from config import config logging.info('Starting CASAnalyzer') # Latest time to analyze repo (1 Day) refresh_date = str(datetime.utcnow() - timedelta(days=1)) session = Session() reposToAnalyze = (session.query( Repository).filter((Repository.analysis_date == None) | (Repository.analysis_date < refresh_date)).all()) # Create the Notifier gmail_user = config['gmail']['user'] gmail_pass = config['gmail']['pass'] notifier = Notifier(gmail_user, gmail_pass) if len(reposToAnalyze) > 0:
repositories that need to be analyzed, analyzes it, and places results in the metrics table. """ import sys from datetime import datetime, timedelta from repository import * from commit import * from bugfinder import * from metricsgenerator import * from githubissuetracker import * from caslogging import logging from notifier import * from config import config logging.info('Starting CASAnalyzer') # Latest time to analyze repo (1 Day) refresh_date = str(datetime.utcnow() - timedelta(days=1)) session = Session() reposToAnalyze = (session.query(Repository) .filter( (Repository.analysis_date==None) | (Repository.analysis_date < refresh_date) ) .all() ) # Create the Notifier gmail_user = config['gmail']['user'] gmail_pass = config['gmail']['pass']
def checkBuildModel(self): """ Checks if any repo is awaiting to build model. We are using a queue because we can't concurrently access R """ session = Session() if self.modelQueue.empty() != True: repo_id = self.modelQueue.get() repo = (session.query(Repository).filter(Repository.id == repo_id).first()) # use data only up to X months prior we won't have sufficent data to build models # as there may be bugs introduced in those months that haven't been fixed, skewing # our model. glm_model_time = int(config['glm_modeling']['months']) data_months_datetime = datetime.utcnow() - MonthDelta(glm_model_time) data_months_unixtime = calendar.timegm(data_months_datetime.utctimetuple()) # all commits for repo prior to current time - glm model time training_commits = (session.query(Commit) .filter( ( Commit.repository_id == repo_id ) & ( Commit.author_date_unix_timestamp < int(data_months_unixtime)) ) .order_by( Commit.author_date_unix_timestamp.desc() ) .all()) # all commits for repo after or on current time - glm model time testing_commits = (session.query(Commit) .filter( ( Commit.repository_id == repo_id ) & ( Commit.author_date_unix_timestamp >= int(data_months_unixtime))) .all()) try: metrics_generator = MetricsGenerator(repo_id, training_commits, testing_commits) metrics_generator.buildAllModels() # montly data dump - or rather, every 30 days. dump_refresh_date = str(datetime.utcnow() - timedelta(days=30)) if repo.last_data_dump == None or repo.last_data_dump < dump_refresh_date: logging.info("Generating a monthly data dump for repository: " + repo_id) # Get all commits for the repository all_commits = (session.query(Commit) .filter( ( Commit.repository_id == repo_id ) ) .order_by( Commit.author_date_unix_timestamp.desc() ) .all()) metrics_generator.dumpData(all_commits) repo.last_data_dump = str(datetime.now().replace(microsecond=0)) # Notify user if repo has never been analyzed previously if repo.analysis_date is None: self.notify(repo) logging.info("Repo " + repo_id + " finished analyzing.") repo.analysis_date = str(datetime.now().replace(microsecond=0)) repo.status = "Analyzed" session.commit() # update status of repo session.close() # uh-oh except Exception as e: logging.exception("Got an exception building model for repository " + repo_id) repo.status = "Error" session.commit() # update repo status session.close()