def authenticate(self): """ authenticate() Authenticates this application to github using the cas-user git user credentials. This is hopefully temporary! """ s = requests.Session() username = config["github"]["user"] password = config["github"]["pass"] s.auth = (username, password) payload = {"scopes": ["repo"]} r = s.get(self.request_auth, params=payload) if r.headers.get('x-ratelimit-remaining') == '0': logging.info("Github quota limit hit -- waiting") # Wait up to a hour until we can continue.. while r.headers.get('x-ratelimit-remaining') == '0': time.sleep(600) # Wait 10 minutes and try again r = s.get(self.request_auth, params=payload) data = r.json() data = r.json()[0] if r.status_code >= 400: msg = data.get('message') logging.error("Failed to authenticate issue tracker: \n" + msg) return # Exit else: self.auth_token = data.get("token") requests_left = r.headers.get('x-ratelimit-remaining') logging.info("Analyzer has " + requests_left + " issue tracker calls left this hour")
def notify(self): """ Notify all subscribers that repo has been analyzed and is ready to be viewed """ FROM = "*****@*****.**" TO = self.subscribers SUBJECT = "Your repository has been analyzed" TEXT = "Your analyzed repository is now ready to be viewed at http://kiwi.se.rit.edu/repo/" + self.repo # prepare actual message message = """\From: %s\nTo: %s\nSubject: %s\n\n%s""" % ( FROM, ", ".join(TO), SUBJECT, TEXT) try: server = smtplib.SMTP("smtp.gmail.com", 587) server.ehlo() server.starttls() server.login(self.gmail_user, self.gmail_pwd) server.sendmail(FROM, TO, message) server.quit() logging.info("Notification sent successfully") except: logging.error("Failed to send notification")
def checkIngestion(self): """Check if any repo needs to be ingested""" # repos_to_get = self.repos for i in range(self.repos.shape[0]): repo = self.repos.iloc[i] logging.info("Adding repo " + repo['name'] + " to work queue for ingesting") self.workQueue.add_task(ingest,repo)
def ingest(repo_id): """ Ingest a repository with the given id. Gets the repository information from the repository table and starts ingesting using ingestRepo method @param repo_id The repository id to ingest. """ repo_to_analyze = [repo_id['name']] # Verify that repo exists if len(repo_to_analyze) == 1: ingestRepo(repo_id) else: logging.info('Repo with id ' + repo_to_analyze + ' not found!')
def checkAnalyzation(self): """Checks if any repo needs to be analyzed""" # repos_to_get = self.repos for i in range(self.repos.shape[0]): repo = self.repos.iloc[i] refresh_date = datetime.datetime.now() + datetime.timedelta(-30) print("========================",datetime.datetime.now(),refresh_date) if repo['last_analyzed'] is not None and repo['last_analyzed'] < refresh_date: continue logging.info("Adding repo " + repo['name']+ " to work queue for analyzing.") self.workQueue.add_task(analyze, repo) self.repos.loc[i,'last_analyzed'] = datetime.datetime.now()
def analyze(repo_id): """ Analyze the repository with the given id. Gets the repository from the repository table and starts ingesting using the analyzeRepo method. @param repo_id The repository id to analyze """ repo_to_analyze = [repo_id['name']] # Verify that repo exists if len(repo_to_analyze) > 0: analyzeRepo(repo_id) else: logging.info('Repo with id ' + repo_id['name'] + ' not found!')
def ingestRepo(repository_to_ingest): """ Ingests a given repository @param repository_to_ingest The repository to inspect @param session The SQLAlchemy session @private """ logging.info('A worker is starting scan repository: ' + repository_to_ingest['name']) local_repo = LocalRepository(repository_to_ingest) local_repo.sync() logging.info('A worker finished ingesting repo ' + repository_to_ingest['name'])
def _linkCorrectiveCommit(self, commit): """ links the corrective change/commit to the change/commit which was the cause. this is the purpose of this object @commit - the corrective change to link w/ the changes that introduces the problems/issues it fixes. """ region_chunks = self.getModifiedRegions(commit) logging.info("Linkage for commit " + commit['commit_hash']) # for k,v in region_chunks.items(): # logging.info("-- file: " + k) # logging.info("---- loc modified: " + str(v)) bug_introducing_changes = self.gitAnnotate(region_chunks, commit) return bug_introducing_changes
def getMedian(self, metric): """ Helper function for the method calculateMedians. Takes in a metric and returns a string property of the results @private """ median_props = "" try: # R functions to be used medianFn = robjects.r['median'] wilcoxFn = robjects.r['wilcox.test'] metric_buggy = getattr(self.metrics, metric + "_buggy") metric_nonbuggy = getattr(self.metrics, metric + "_nonbuggy") # First check p-values, if signficant then calculate median pvalue = self.wilcoxFn(robjects.FloatVector(metric_buggy), robjects.FloatVector(metric_nonbuggy))[2][0] buggy_median = self.medianFn(robjects.FloatVector(metric_buggy)) nonbuggy_median = self.medianFn( robjects.FloatVector(metric_nonbuggy)) median_props += '"' + metric + 'buggy":"' + str( buggy_median[0]) + '", ' median_props += '"' + metric + 'nonbuggy":"' + str( nonbuggy_median[0]) + '", ' if pvalue <= self.psig: median_props += '"' + metric + '_sig":"1", ' else: median_props += '"' + metric + '_sig":"0", ' except: # catch the case where we haven't made any observations to do this metric logging.info("Metric " + metric + " could not be used in the median model for repo " + self.repo_id) return median_props
def getDateOpened(self, issueNumber): """ getDateOpened() Gets the date the issue number was opened in unix time If issue cannot be found for whichever reason, returns null. """ logging.info("searching for issue: " + str(issueNumber)) logging.info(self.request_repos + "/" + self.owner + "/" + self.repo + "/issues/" + issueNumber) header = {'Authorization': 'token ' + self.auth_token} r = requests.get(self.request_repos + "/" + self.owner + "/" + self.repo + "/issues/" + issueNumber, headers=header) data = r.json() # If forbidden if r.status_code == 403: # Check the api quota if r.headers.get('x-ratelimit-remaining') == '0': logging.info("Github quota limit hit -- waiting") # Wait up to a hour until we can continue.. while r.headers.get('x-ratelimit-remaining') == '0': time.sleep(600) # Wait 10 minutes and try again r = requests.get(self.request_repos + "/" + self.owner + "/" + self.repo + "/issues/" + issueNumber, headers=header) data = r.json() # Check for other error codes elif r.status_code >= 400: msg = data.get('message') logging.info("issue not found") return None else: try: date = (dateutil.parser.parse( data.get('created_at'))).timestamp() return date except: logging.error( "ISSUE TRACKER FAILURE: Could not get created_at from github issues API" ) return None
def analyzeRepo(repository_to_analyze): """ Analyzes the given repository @param repository_to_analyze The repository to analyze. @param session SQLAlchemy session @private """ repo_name = repository_to_analyze['name'] logging.info('Worker analyzing repository ' + repo_name) all_commits = pd.read_csv( up(os.path.dirname(__file__)) + '/Data/Commit/' + repo_name + '.csv') corrective_commits = all_commits[all_commits['fix'] == True] logging.info("Linking " + str(len(corrective_commits)) + " new corrective commits for repo " + repo_name) try: git_commit_linker = GitCommitLinker(repository_to_analyze) final_commits = git_commit_linker.linkCorrectiveCommits( corrective_commits, all_commits) #final_commits.to_csv(up(os.path.dirname(__file__)) + '/Data/commit_data/' + repo_name + '.csv',index=False) #print(os.getcwd()) final_commits.to_csv( '/home/smajumd3/Data_Miner/github/data/commit_guru/' + repo_name + '.csv', index=False) # final_commits.to_csv('/Users/suvodeepmajumder/Documents/AI4SE/Data_Miner/github/data/commit_guru_exp/' + repo_name + '.csv',index=False) #final_commits.to_csv(up(up(up(up(up(up(os.getcwd())))))) + '/data/commit_guru/' + repo_name + '.csv',index=False) except Exception as e: logging.exception( "Got an exception linking bug fixing changes to bug inducing changes for repo " + repo_name) # repository_to_analyze.status = "Error" # session.commit() # update repo status raise logging.info("Linking done for repo" + repo_name)