def getDistinctLabels(): db = initializeConnections.getConnectionMySql() cursor = db.cursor() distinct_labelSQL = "select distinct(Labels) from IssueLabelMapping" cursor.execute(distinct_labelSQL) result = cursor.fetchall() db.close() labels = [] for row in result: labels.append(row[0]) return labels
def mineCommits(): labels = commitModifiedFilesUtil.getDistinctLabels() print(str(labels)) repos = initializeConnections.getConnectionGitHub() issue_minedlabel = {} print("mine open issues") for repo_issue in repos.issues(state='closed'): print(repo_issue.number) if (repo_issue.number < 1203): mined_labels = [] for issue_comment in repo_issue.comments(): print("getting tokens") tokens = getTokens(issue_comment.body_text) print("got tokens") for label in labels: if ((label in tokens) and (label not in mined_labels)): mined_labels.append(label) if (len(mined_labels) > 0): issue_minedlabel[repo_issue.number] = mined_labels db = initializeConnections.getConnectionMySql() cursor = db.cursor() # print("clearing data in commentminedlabels table") # clearDataSQL = "DELETE FROM commentminedlabels" # cursor.execute(clearDataSQL) # db.commit() placeholders = ', '.join(['%s'] * 2) insertCommentminedlabelsSQLQuery = "INSERT INTO commentminedlabels VALUES ({})".format( placeholders) print("populating commentminedlabels issue table") for (issue_number, mined_labelinfo) in issue_minedlabel.items(): for label in mined_labelinfo: print(str(issue_number)) print(str(label)) if issue_number != '': cursor.execute(insertCommentminedlabelsSQLQuery, [issue_number, label]) db.commit()
def getNumberofFilesTouched(label): from tabulate import tabulate db = initializeConnections.getConnectionMySql() cursor = db.cursor() file_issuefixSQL = "select FILENAME,Count( distinct ILM.Issue) as TOTAL,GROUP_CONCAT(distinct ILM.Issue SEPARATOR ',') as Issues from IssueLabelMapping as ILM inner join CommitIssueMapping on ILM.Issue = CommitIssueMapping.ISSUE inner join CommitModifiedFilesMapping on CommitIssueMapping.COMMIT = CommitModifiedFilesMapping.COMMIT where Labels = '" + label + "' group by FILENAME order by TOTAL DESC;" cursor.execute(file_issuefixSQL) result = cursor.fetchall() db.close() formatted_data = [] for row in result: formatted_data.append([row[0], row[1], row[2]]) print( tabulate(formatted_data, ["FILEPATH", "TOTAL", label.upper()], tablefmt="github")) return formatted_data
def getNumberofIssuewrtLabel(): from tabulate import tabulate import excelexport db = initializeConnections.getConnectionMySql() cursor = db.cursor() labelIssues = "select Labels , Count(*) as TotalIssues from IssueLabelMapping group by Labels order by TotalIssues desc;" cursor.execute(labelIssues) result = cursor.fetchall() db.close() formatteddata = [] for row in result: formatteddata.append([row[0], row[1]]) displayheaders = ["Labels", "TotalIssues"] print(tabulate(formatteddata, displayheaders, tablefmt="github")) exceldata = [displayheaders] exceldata = exceldata + formatteddata excelexport.saveExcel(exceldata, "LabelsTotalIssues") return formatteddata
def getIssueFrequencyForLabelsClosed(label): from tabulate import tabulate import excelexport db = initializeConnections.getConnectionMySql() cursor = db.cursor() labelFreq = "SELECT YEAR(CLOSEDAT), MONTH(CLOSEDAT) MONTH, COUNT(*) TOTALISSUES FROM Issues inner join IssueLabelMapping ON Issues.NUMBER = IssueLabelMapping.Issue where IssueLabelMapping.Labels = '"+label+"' GROUP BY YEAR(CLOSEDAT), MONTH(CLOSEDAT) ORDER BY TOTALISSUES DESC" cursor.execute(labelFreq) result = cursor.fetchall() db.close() formatteddata = [] for row in result : formatteddata.append([row[0],row[1],row[2]]) displayheaders= ["YEAR","MONTH","NOOF" + label] print(tabulate(formatteddata,displayheaders , tablefmt="github")) exceldata = [displayheaders] exceldata = exceldata + formatteddata excelexport.saveExcel(exceldata,"frequencyofClosed"+label) return formatteddata
def getCommitFrequency(): from tabulate import tabulate import excelexport db = initializeConnections.getConnectionMySql() cursor = db.cursor() commitFreq = "SELECT YEAR(COMMITEDDATE), MONTH(COMMITEDDATE) MONTH, COUNT(*) TOTALCOMMIT FROM Commits GROUP BY YEAR(COMMITEDDATE), MONTH(COMMITEDDATE) ORDER BY TOTALCOMMIT DESC" cursor.execute(commitFreq) result = cursor.fetchall() formatteddata = [] for row in result : formatteddata.append([row[0],row[1],row[2]]) displayheaders= ["YEAR","MONTH","NOOFCOMMITS"] print(tabulate(formatteddata,displayheaders , tablefmt="github")) exceldata = [displayheaders] exceldata = exceldata + formatteddata excelexport.saveExcel(exceldata,"frequencyofcommits") return formatteddata
def getNoOfCommitsByUsers(): from tabulate import tabulate import excelexport db = initializeConnections.getConnectionMySql() cursor = db.cursor() commitsSQL = "select EMAIL, count(*) as total from Commits group by EMAIL order by total desc" cursor.execute(commitsSQL) result = cursor.fetchall() formatteddata = [] for row in result: formatteddata.append([row[0], row[1]]) displayheaders = ["EMAIL", "TOTALCOMMITS"] print(tabulate(formatteddata, displayheaders, tablefmt="github")) exceldata = [displayheaders] exceldata = exceldata + formatteddata excelexport.saveExcel(exceldata, "UserCommits") return formatteddata
def getNoOfIssueByLabel(label): from tabulate import tabulate import excelexport db = initializeConnections.getConnectionMySql() cursor = db.cursor() labelFreq = "SELECT USERLOGIN, COUNT(*) as TOTAL FROM Issues inner join IssueLabelMapping ON Issues.NUMBER = IssueLabelMapping.Issue where IssueLabelMapping.Labels = '" + label + "' group by USERLOGIN order by TOTAL desc" cursor.execute(labelFreq) result = cursor.fetchall() formatteddata = [] for row in result: formatteddata.append([row[0], row[1]]) displayheaders = ["USERNAME", "TOTAL" + label] print(tabulate(formatteddata, displayheaders, tablefmt="github")) exceldata = [displayheaders] exceldata = exceldata + formatteddata excelexport.saveExcel(exceldata, "usercreatedissues" + label) return formatteddata
def getSubscribersData(): from tabulate import tabulate import excelexport db = initializeConnections.getConnectionMySql() cursor = db.cursor() subFreq = "SELECT YEAR(SUBSCRIBEDDATE), MONTH(SUBSCRIBEDDATE) MONTH, COUNT(*) TOTALSUBSCRIBERS FROM subscribers GROUP BY YEAR(SUBSCRIBEDDATE), MONTH(SUBSCRIBEDDATE) ORDER BY TOTALSUBSCRIBERS DESC" cursor.execute(subFreq) result = cursor.fetchall() db.close() formatteddata = [] for row in result: formatteddata.append([row[0], row[1], row[2]]) displayheaders = ["YEAR", "MONTH", "NOOFSUBSCRIBERS"] print(tabulate(formatteddata, displayheaders, tablefmt="github")) exceldata = [displayheaders] exceldata = exceldata + formatteddata excelexport.saveExcel(exceldata, "frequencyofSubscribers") return formatteddata
def getIssuesNotMatchingWithLabel(label): from tabulate import tabulate import excelexport import initializeConnections import sentimentanalysistest label = "bug" db = initializeConnections.getConnectionMySql() cursor = db.cursor() issuesSQL = "SELECT Issue FROM minegithub.commentminedlabels WHERE Label = '" + label + "' AND Issue NOT IN (SELECT Issue FROM minegithub.IssueLabelMapping WHERE Labels ='" + label + "')" cursor.execute(issuesSQL) result = cursor.fetchall() db.close() issueList = [] repos = initializeConnections.getConnectionGitHub() for row in result: lastSentence = "" author = "" commentId = "" repos_issue_list = [repos.issue(int(row[0]))] for repos_issue in repos_issue_list: print("mining issue number :" + str(row[0])) for comment in repos_issue.comments(): print("sentence tokenized") sentTokenList = getSentenceTokens(comment.body_text) for sentence in sentTokenList: print("word tookenized") tokens = getTokens(sentence) if label in tokens: print("label is present") print("adding sentences") print(sentence) lastSentence = sentence commentId = comment.id author = comment.user.login print("changed last sentence") if lastSentence != "": print("rnnning sentiment analysis") sentimentAnalysisResult = sentimentanalysistest.getTextBasedClassification( lastSentence) print(lastSentence) issueList.append([ row[0], commentId, author, lastSentence, sentimentAnalysisResult ]) print("issuelist appended") displayheaders = ["Issue", "Id", "author", "CommentText", "Result"] print(tabulate(issueList, displayheaders, tablefmt="github")) exceldata = [displayheaders] exceldata = exceldata + issueList excelexport.saveExcel(exceldata, "issuecommentsentimentanalysis_" + label) return issueList labels = commitModifiedFilesUtil.getDistinctLabels() for label in labels: print(label[0]) getIssuesNotMatchingWithLabel("bug")
print("Frequency of bugs created") frequency.getIssueFrequency() print("Frequency of issues created with respect to labels") frequency.getIssueFrequencyForLabelsCreation("api") import excelexport print("save data list to an excel file") excelexport.saveExcel([], "samplefilename") import initializeConnections print("get sql connection object") initializeConnections.getConnectionMySql() print("get github connection object") initializeConnections.getConnectionGitHub() import issuelabelUtil print("get number of issues with respect to label") issuelabelUtil.getNumberofIssuewrtLabel() import subscribers print("gets subscribers data for the repository") subscribers.getSubscribersData() import userinfo