def authenticate(self): """ authenticate() Authenticates this application to github using the cas-user git user credentials. This is hopefully temporary! """ s = requests.Session() username = config["github"]["user"] password = config["github"]["pass"] s.auth = (username, password) payload = {"scopes": ["repo"]} r = s.get(self.request_auth, params=payload) if r.headers.get('x-ratelimit-remaining') == '0': logging.info("Github quota limit hit -- waiting") # Wait up to a hour until we can continue.. while r.headers.get('x-ratelimit-remaining') == '0': time.sleep(600) # Wait 10 minutes and try again r = s.get(self.request_auth, params=payload) data = r.json() data = r.json()[0] if r.status_code >= 400: msg = data.get('message') logging.error("Failed to authenticate issue tracker: \n" +msg) return # Exit else: self.auth_token = data.get("token") requests_left = r.headers.get('x-ratelimit-remaining') logging.info("Analyzer has " + requests_left + " issue tracker calls left this hour")
def notify(self): """ Notify all subscribers that repo has been analyzed and is ready to be viewed """ FROM = "*****@*****.**" TO = self.subscribers SUBJECT = "Your repository has been analyzed" TEXT = "Your analyzed repository is now ready to be viewed at http://kiwi.se.rit.edu/repo/" + self.repo # prepare actual message message = """\From: %s\nTo: %s\nSubject: %s\n\n%s""" % ( FROM, ", ".join(TO), SUBJECT, TEXT) try: server = smtplib.SMTP("smtp.gmail.com", 587) server.ehlo() server.starttls() server.login(self.gmail_user, self.gmail_pwd) server.sendmail(FROM, TO, message) server.quit() logging.info("Notification sent successfully") except: logging.error("Failed to send notification")
def notify(self): """ Notify all subscribers that repo has been analyzed and is ready to be viewed """ FROM = "*****@*****.**" TO = self.subscribers SUBJECT = "Your repository has been analyzed" TEXT = "Your analyzed repository is now ready to be viewed at http://kiwi.se.rit.edu/repo/" + self.repo # prepare actual message message = """\From: %s\nTo: %s\nSubject: %s\n\n%s""" % (FROM, ", ".join(TO), SUBJECT, TEXT) try: server = smtplib.SMTP("smtp.gmail.com", 587) server.ehlo() server.starttls() server.login(self.gmail_user, self.gmail_pwd) server.sendmail(FROM, TO, message) server.quit() logging.info("Notification sent successfully") except: logging.error("Failed to send notification")
def getDateOpened(self, issueNumber): """ getDateOpened() Gets the date the issue number was opened in unix time If issue cannot be found for whichever reason, returns null. """ logging.info("searching for issue: " + str(issueNumber)) logging.info(self.request_repos + "/" + self.owner + "/" + self.repo + "/issues/" + issueNumber) header = {'Authorization': 'token ' + self.auth_token} r = requests.get(self.request_repos + "/" + self.owner + "/" + self.repo + "/issues/" + issueNumber, headers=header) data = r.json() # If forbidden if r.status_code == 403: # Check the api quota if r.headers.get('x-ratelimit-remaining') == '0': logging.info("Github quota limit hit -- waiting") # Wait up to a hour until we can continue.. while r.headers.get('x-ratelimit-remaining') == '0': time.sleep(600) # Wait 10 minutes and try again r = requests.get(self.request_repos + "/" + self.owner + "/" + self.repo + "/issues/" + issueNumber, headers=header) data = r.json() # Check for other error codes elif r.status_code >= 400: msg = data.get('message') logging.info("issue not found") return None else: try: date = (dateutil.parser.parse( data.get('created_at'))).timestamp() return date except: logging.error( "ISSUE TRACKER FAILURE: Could not get created_at from github issues API" ) return None
def getDateOpened(self, issueNumber): """ getDateOpened() Gets the date the issue number was opened in unix time If issue cannot be found for whichever reason, returns null. """ logging.info("searching for issue: " + str(issueNumber)) logging.info(self.request_repos + "/" + self.owner + "/" + self.repo + "/issues/" + issueNumber) header = {'Authorization': 'token ' + self.auth_token} r = requests.get(self.request_repos + "/" + self.owner + "/" + self.repo + "/issues/" + issueNumber, headers=header) data = r.json() # If forbidden if r.status_code == 403: # Check the api quota if r.headers.get('x-ratelimit-remaining') == '0': logging.info("Github quota limit hit -- waiting") # Wait up to a hour until we can continue.. while r.headers.get('x-ratelimit-remaining') == '0': time.sleep(600) # Wait 10 minutes and try again r = requests.get(self.request_repos + "/" + self.owner + "/" + self.repo + "/issues/" + issueNumber, headers=header) data = r.json() # Check for other error codes elif r.status_code >= 400: msg = data.get('message') logging.info("issue not found") return None else: try: date = (dateutil.parser.parse(data.get('created_at'))).timestamp() return date except: logging.error("ISSUE TRACKER FAILURE: Could not get created_at from github issues API") return None
def calculateCommitRiskyness(self, commits, coefficient_names): """ calcualte the probability of commits to be buggy or not using the linear regression model estimated probability = 1/[1 + exp(-a - BX)] """ # 2 cases: model cannot possibly be build if no signficant coefficients available # in this case, we just insert -1 for the probability to indicate no glm prediction possible if len(coefficient_names) == 0: coefficient_dict = {} model_available = False else: coefficient_dict = self._getCoefficients(coefficient_names) model_available = True intercept_value, intercept_pvalue = self._getInterceptValue( coefficient_names) for commit in commits: if model_available == False: commit.glm_probability = -1 else: coefs_sum = 0 for coef_name, coef_value in coefficient_dict.items(): coefs_sum += (coef_value * getattr(commit, coef_name)) try: riskyness = 1 / (1 + math.exp(-intercept_value - coefs_sum)) except OverflowError: logging.error("Overflow error for repo " + self.repo_id) logging.error("Calculating riskyness for " + commit.commit_hash) logging.error("Sum of coefficients: " + str(coefs_sum)) logging.error("Coeffiecents: " + str(coefficient_dict)) riskyness = 0.01 commit.glm_probability = riskyness
def calculateCommitRiskyness(self, commits, coefficient_names): """ calcualte the probability of commits to be buggy or not using the linear regression model estimated probability = 1/[1 + exp(-a - BX)] """ # 2 cases: model cannot possibly be build if no signficant coefficients available # in this case, we just insert -1 for the probability to indicate no glm prediction possible if len(coefficient_names) == 0: coefficient_dict = {} model_available = False else: coefficient_dict = self._getCoefficients(coefficient_names) model_available = True intercept_value, intercept_pvalue = self._getInterceptValue(coefficient_names) for commit in commits: if model_available == False: commit.glm_probability = -1 else: coefs_sum = 0 for coef_name, coef_value in coefficient_dict.items(): coefs_sum += (coef_value * getattr(commit, coef_name)) try: riskyness = 1/(1+ math.exp(-intercept_value-coefs_sum)) except OverflowError: logging.error("Overflow error for repo " + self.repo_id) logging.error("Calculating riskyness for " + commit.commit_hash) logging.error("Sum of coefficients: " + str(coefs_sum)) logging.error("Coeffiecents: " + str(coefficient_dict)) riskyness = 0.01 commit.glm_probability = riskyness
for repo in testRepos: session.merge(Repository(repo)) session.commit() logging.info("Done.") elif arg == "": # No args, just do scan logging.info("Starting Scan...") repoSession = Session() # Latest time to get new repo data (1 day ago) refresh_date = str(datetime.utcnow() - timedelta(days=1)) # Get un-injested repos or repos not been updated since the refresh_date reposToGet = ( repoSession.query(Repository) .filter((Repository.ingestion_date == None) | (Repository.ingestion_date < refresh_date)) .all() ) # TODO: This: (downloading and parsing commit logs if len(reposToGet) > 0: for repo in reposToGet: localRepo = LocalRepository(repo) localRepo.sync() repoSession.merge(repo) repoSession.commit() logging.info("Done, finished everything.") else: logging.info("Nothing to do. Done.") else: logging.error("Invalid Command")
] session = Session() for repo in testRepos: session.merge(Repository(repo)) session.commit() logging.info('Done.') elif arg == '': # No args, just do scan logging.info('Starting Scan...') repoSession = Session() # Latest time to get new repo data (1 day ago) refresh_date = str(datetime.utcnow() - timedelta(days=1)) # Get un-injested repos or repos not been updated since the refresh_date reposToGet = (repoSession.query( Repository).filter((Repository.ingestion_date == None) | (Repository.ingestion_date < refresh_date)).all()) #TODO: This: (downloading and parsing commit logs if len(reposToGet) > 0: for repo in reposToGet: localRepo = LocalRepository(repo) localRepo.sync() repoSession.merge(repo) repoSession.commit() logging.info('Done, finished everything.') else: logging.info('Nothing to do. Done.') else: logging.error('Invalid Command')