def getDistinctLabels():
    db = initializeConnections.getConnectionMySql()
    cursor = db.cursor()

    distinct_labelSQL = "select distinct(Labels) from IssueLabelMapping"
    cursor.execute(distinct_labelSQL)

    result = cursor.fetchall()
    db.close()

    labels = []
    for row in result:
        labels.append(row[0])

    return labels
Ejemplo n.º 2
0
def mineCommits():
    labels = commitModifiedFilesUtil.getDistinctLabels()
    print(str(labels))
    repos = initializeConnections.getConnectionGitHub()
    issue_minedlabel = {}
    print("mine open issues")
    for repo_issue in repos.issues(state='closed'):
        print(repo_issue.number)
        if (repo_issue.number < 1203):
            mined_labels = []
            for issue_comment in repo_issue.comments():
                print("getting tokens")
                tokens = getTokens(issue_comment.body_text)
                print("got tokens")
                for label in labels:
                    if ((label in tokens) and (label not in mined_labels)):
                        mined_labels.append(label)
            if (len(mined_labels) > 0):
                issue_minedlabel[repo_issue.number] = mined_labels

    db = initializeConnections.getConnectionMySql()
    cursor = db.cursor()

    #    print("clearing data in commentminedlabels table")
    #    clearDataSQL = "DELETE FROM commentminedlabels"
    #    cursor.execute(clearDataSQL)
    #    db.commit()

    placeholders = ', '.join(['%s'] * 2)
    insertCommentminedlabelsSQLQuery = "INSERT INTO commentminedlabels VALUES ({})".format(
        placeholders)

    print("populating commentminedlabels issue table")
    for (issue_number, mined_labelinfo) in issue_minedlabel.items():
        for label in mined_labelinfo:
            print(str(issue_number))
            print(str(label))
            if issue_number != '':
                cursor.execute(insertCommentminedlabelsSQLQuery,
                               [issue_number, label])

    db.commit()
def getNumberofFilesTouched(label):
    from tabulate import tabulate
    db = initializeConnections.getConnectionMySql()
    cursor = db.cursor()

    file_issuefixSQL = "select FILENAME,Count( distinct ILM.Issue) as TOTAL,GROUP_CONCAT(distinct ILM.Issue SEPARATOR ',') as Issues  from IssueLabelMapping as ILM inner join CommitIssueMapping on ILM.Issue = CommitIssueMapping.ISSUE inner join CommitModifiedFilesMapping on CommitIssueMapping.COMMIT = CommitModifiedFilesMapping.COMMIT where Labels = '" + label + "'  group by FILENAME order by TOTAL DESC;"
    cursor.execute(file_issuefixSQL)

    result = cursor.fetchall()
    db.close()

    formatted_data = []
    for row in result:
        formatted_data.append([row[0], row[1], row[2]])

    print(
        tabulate(formatted_data,
                 ["FILEPATH", "TOTAL", label.upper()],
                 tablefmt="github"))

    return formatted_data
Ejemplo n.º 4
0
def getNumberofIssuewrtLabel():
    from tabulate import tabulate
    import excelexport
    db = initializeConnections.getConnectionMySql()
    cursor = db.cursor()

    labelIssues = "select Labels , Count(*) as TotalIssues from IssueLabelMapping group by Labels order by TotalIssues desc;"

    cursor.execute(labelIssues)
    result = cursor.fetchall()
    db.close()
    formatteddata = []

    for row in result:
        formatteddata.append([row[0], row[1]])

    displayheaders = ["Labels", "TotalIssues"]
    print(tabulate(formatteddata, displayheaders, tablefmt="github"))

    exceldata = [displayheaders]
    exceldata = exceldata + formatteddata
    excelexport.saveExcel(exceldata, "LabelsTotalIssues")
    return formatteddata
Ejemplo n.º 5
0
def getIssueFrequencyForLabelsClosed(label):
    from tabulate import tabulate
    import excelexport
    db = initializeConnections.getConnectionMySql()
    cursor = db.cursor()
    
    labelFreq = "SELECT YEAR(CLOSEDAT), MONTH(CLOSEDAT) MONTH, COUNT(*) TOTALISSUES FROM Issues inner join IssueLabelMapping ON Issues.NUMBER = IssueLabelMapping.Issue where IssueLabelMapping.Labels = '"+label+"' GROUP BY YEAR(CLOSEDAT), MONTH(CLOSEDAT) ORDER BY TOTALISSUES DESC"
    
    cursor.execute(labelFreq)
    result = cursor.fetchall()
    db.close()
    formatteddata = []
    
    for row in result :
        formatteddata.append([row[0],row[1],row[2]])
        
    displayheaders= ["YEAR","MONTH","NOOF" + label]    
    print(tabulate(formatteddata,displayheaders , tablefmt="github"))
    
    exceldata = [displayheaders]
    exceldata = exceldata + formatteddata
    excelexport.saveExcel(exceldata,"frequencyofClosed"+label)
    return formatteddata
Ejemplo n.º 6
0
def getCommitFrequency():
    from tabulate import tabulate
    import excelexport
    db = initializeConnections.getConnectionMySql()
    cursor = db.cursor()
    
    commitFreq = "SELECT YEAR(COMMITEDDATE), MONTH(COMMITEDDATE) MONTH, COUNT(*) TOTALCOMMIT FROM Commits GROUP BY YEAR(COMMITEDDATE), MONTH(COMMITEDDATE) ORDER BY TOTALCOMMIT DESC"
    
    cursor.execute(commitFreq)
    result = cursor.fetchall()
    
    formatteddata = []
    
    for row in result :
        formatteddata.append([row[0],row[1],row[2]])
        
    displayheaders= ["YEAR","MONTH","NOOFCOMMITS"]    
    print(tabulate(formatteddata,displayheaders , tablefmt="github"))
    
    exceldata = [displayheaders]
    exceldata = exceldata + formatteddata
    excelexport.saveExcel(exceldata,"frequencyofcommits")
    return formatteddata
Ejemplo n.º 7
0
def getNoOfCommitsByUsers():
    from tabulate import tabulate
    import excelexport
    db = initializeConnections.getConnectionMySql()
    cursor = db.cursor()

    commitsSQL = "select EMAIL, count(*) as total  from Commits group by EMAIL order by total desc"

    cursor.execute(commitsSQL)
    result = cursor.fetchall()

    formatteddata = []

    for row in result:
        formatteddata.append([row[0], row[1]])

    displayheaders = ["EMAIL", "TOTALCOMMITS"]
    print(tabulate(formatteddata, displayheaders, tablefmt="github"))

    exceldata = [displayheaders]
    exceldata = exceldata + formatteddata
    excelexport.saveExcel(exceldata, "UserCommits")
    return formatteddata
Ejemplo n.º 8
0
def getNoOfIssueByLabel(label):
    from tabulate import tabulate
    import excelexport
    db = initializeConnections.getConnectionMySql()
    cursor = db.cursor()

    labelFreq = "SELECT USERLOGIN, COUNT(*) as TOTAL  FROM Issues inner join IssueLabelMapping ON Issues.NUMBER = IssueLabelMapping.Issue where IssueLabelMapping.Labels = '" + label + "' group by USERLOGIN order by TOTAL desc"

    cursor.execute(labelFreq)
    result = cursor.fetchall()

    formatteddata = []

    for row in result:
        formatteddata.append([row[0], row[1]])

    displayheaders = ["USERNAME", "TOTAL" + label]
    print(tabulate(formatteddata, displayheaders, tablefmt="github"))

    exceldata = [displayheaders]
    exceldata = exceldata + formatteddata
    excelexport.saveExcel(exceldata, "usercreatedissues" + label)
    return formatteddata
Ejemplo n.º 9
0
def getSubscribersData():
    from tabulate import tabulate
    import excelexport
    db = initializeConnections.getConnectionMySql()
    cursor = db.cursor()

    subFreq = "SELECT YEAR(SUBSCRIBEDDATE), MONTH(SUBSCRIBEDDATE) MONTH, COUNT(*) TOTALSUBSCRIBERS FROM subscribers  GROUP BY YEAR(SUBSCRIBEDDATE), MONTH(SUBSCRIBEDDATE) ORDER BY TOTALSUBSCRIBERS DESC"

    cursor.execute(subFreq)
    result = cursor.fetchall()
    db.close()

    formatteddata = []

    for row in result:
        formatteddata.append([row[0], row[1], row[2]])

    displayheaders = ["YEAR", "MONTH", "NOOFSUBSCRIBERS"]
    print(tabulate(formatteddata, displayheaders, tablefmt="github"))

    exceldata = [displayheaders]
    exceldata = exceldata + formatteddata
    excelexport.saveExcel(exceldata, "frequencyofSubscribers")
    return formatteddata
Ejemplo n.º 10
0
def getIssuesNotMatchingWithLabel(label):
    from tabulate import tabulate
    import excelexport
    import initializeConnections
    import sentimentanalysistest
    label = "bug"
    db = initializeConnections.getConnectionMySql()
    cursor = db.cursor()

    issuesSQL = "SELECT Issue FROM minegithub.commentminedlabels WHERE Label = '" + label + "' AND Issue NOT IN (SELECT Issue FROM minegithub.IssueLabelMapping WHERE Labels ='" + label + "')"

    cursor.execute(issuesSQL)
    result = cursor.fetchall()

    db.close()
    issueList = []

    repos = initializeConnections.getConnectionGitHub()
    for row in result:
        lastSentence = ""
        author = ""
        commentId = ""
        repos_issue_list = [repos.issue(int(row[0]))]
        for repos_issue in repos_issue_list:
            print("mining issue number :" + str(row[0]))
            for comment in repos_issue.comments():
                print("sentence tokenized")
                sentTokenList = getSentenceTokens(comment.body_text)
                for sentence in sentTokenList:
                    print("word tookenized")
                    tokens = getTokens(sentence)
                    if label in tokens:
                        print("label is present")
                        print("adding sentences")
                        print(sentence)
                        lastSentence = sentence
                        commentId = comment.id
                        author = comment.user.login
                        print("changed last sentence")

            if lastSentence != "":
                print("rnnning sentiment analysis")
                sentimentAnalysisResult = sentimentanalysistest.getTextBasedClassification(
                    lastSentence)
                print(lastSentence)
                issueList.append([
                    row[0], commentId, author, lastSentence,
                    sentimentAnalysisResult
                ])
                print("issuelist appended")

    displayheaders = ["Issue", "Id", "author", "CommentText", "Result"]
    print(tabulate(issueList, displayheaders, tablefmt="github"))

    exceldata = [displayheaders]
    exceldata = exceldata + issueList
    excelexport.saveExcel(exceldata, "issuecommentsentimentanalysis_" + label)
    return issueList

    labels = commitModifiedFilesUtil.getDistinctLabels()
    for label in labels:
        print(label[0])
        getIssuesNotMatchingWithLabel("bug")
Ejemplo n.º 11
0
print("Frequency of bugs created")
frequency.getIssueFrequency()

print("Frequency of issues created with respect to labels")
frequency.getIssueFrequencyForLabelsCreation("api")

import excelexport

print("save data list to an excel file")
excelexport.saveExcel([], "samplefilename")

import initializeConnections

print("get sql connection object")
initializeConnections.getConnectionMySql()

print("get github connection object")
initializeConnections.getConnectionGitHub()

import issuelabelUtil

print("get number of issues with respect to label")
issuelabelUtil.getNumberofIssuewrtLabel()

import subscribers

print("gets subscribers data for the repository")
subscribers.getSubscribersData()

import userinfo