Ejemplo n.º 1
0
 def __init__(self):
     self.vcs = GitProvider(config.getRepoDir())
     self.ccdb = CCDatabase(config.getCcDbFile())
     self.codeChecker = CodeChecker(config.getRepoDir())
     self.checkers = Checkers()
     self.loadCommitList()
Ejemplo n.º 2
0
class Predictor():
    def __init__(self):
        self.vcs = GitProvider(config.getRepoDir())
        self.ccdb = CCDatabase(config.getCcDbFile())
        self.codeChecker = CodeChecker(config.getRepoDir())
        self.checkers = Checkers()
        self.loadCommitList()

    def loadCommitList(self):
        self.commits = self.vcs.getAllVersions(config.getBranch())
        self.currentCommitIndex = 0

    def convertFilePathToRepoRelativePath(self, path):
        return os.path.relpath(path, config.getRepoDir())

    def getDiffResolvedIds(self):
        resolved = self.codeChecker.diffResolved(config.getCcRunName(),
                                                 config.getTmpDir(), self.ccdb)
        ids = []
        for bug in resolved:
            ids.append(bug['reportId'])
        return ids

    def predict(self, id, checker):
        # Load all bugs
        print("Loading bug data...")
        ids = []
        if id == -1:
            bugs = self.ccdb.getAllBugsForChecker(checker)
            ids = [x[0] for x in bugs]
        else:
            ids.append(id)

        # Loading model
        print("Loading model...")
        model = load_model(config.cfModelFilenameFormat.format(checker))
        model.summary()
        vLabels = ['NOT OK', 'OK', 'Skipped']

        # Initialize coder
        print("Initializing coder...")
        self.dictionary = Dictionary(checker)
        self.coder = Coder(self.dictionary)
        self.totalDictionaryLength = self.dictionary.length()

        # Predicting
        print("Starting predictions...")
        for i in ids:
            allData = self.ccdb.getBugData(i)
            if allData.getChecker(
            ) not in globals.availableCheckers or allData.getChecker(
            ) != checker:
                print("Bug #{0} - checker not supported".format(i))
            else:
                # Load extra tokens from checker message
                checkerInfo = self.checkers.extractTokensForChecker(
                    allData.getChecker(), allData.getMessage())
                # Retrieve code fragment with bug
                fileRelativePath = self.convertFilePathToRepoRelativePath(
                    allData.getFile())
                fullCodeWithBug = self.vcs.getFileContents(
                    fileRelativePath, self.commits[self.currentCommitIndex])
                extractor = CodeExtractor(allData)
                extractor.loadCodeFromText(fullCodeWithBug)
                extractor.extractBugCode()
                bugCodeFragment = extractor.getBugCodeFragment()
                fixCodeFragment = ''
                # Encode it
                encodedBugData, initialUnkList = self.coder.encode(
                    bugCodeFragment, checkerData=checkerInfo)
                # Convert to one-hot
                MODEL_X_MAX_LEN = model.get_layer(index=0).input_shape[1]
                if len(encodedBugData) > MODEL_X_MAX_LEN:
                    print(
                        "Bug #{0} - Code too big for model, ignored".format(i))
                    continue
                elif id == -1:
                    print("Bug #{0} - Good to go".format(i))
                    continue
                noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData)
                if noZerosToPad > 0:
                    encodedBugData = self.coder.applyPadding(
                        encodedBugData, noZerosToPad)
                X = np.zeros((1, MODEL_X_MAX_LEN, self.totalDictionaryLength))
                X[0] = self.coder.convertToOneHot(
                    encodedBugData,
                    np.zeros((MODEL_X_MAX_LEN, self.totalDictionaryLength)))
                # Predict and convert from one-hot
                Y = self.coder.convertFromOneHot(model.predict(X)[0])
                print(Y)
                # Decode
                Y = self.coder.removePadding(Y)
                fixCodeFragment = self.coder.decode(Y, initialUnkList)

                #Verify?
                vStatus = 2
                if config.cfVerifyPrediction:
                    # Apply fix in source code file
                    extractor.applyFix(fixCodeFragment)
                    extractor.saveToFile(allData.getFile())
                    # Run CodeChecker and analyze code
                    self.codeChecker.check(True)
                    resolvedIds = self.getDiffResolvedIds()
                    # Check if ID is resolved in tmp folder
                    isFixed = i in resolvedIds
                    # Set vStatus accordingly
                    if isFixed:
                        vStatus = 1
                    else:
                        vStatus = 0
                #Print
                print("Bug #{0} - summary".format(i))
                print("== Code fragment with bug ==")
                print(bugCodeFragment)
                print("== Suggested fix ==")
                print(fixCodeFragment)
                print("Verification: {0}".format(vLabels[vStatus]))
                a = ' '
                while a != 'y' and a != 'n':
                    a = input("Apply fix? (y/n): ")
                if a == 'y':
                    if not config.cfVerifyPrediction:
                        # Apply fix in source code file
                        extractor.applyFix(fixCodeFragment)
                        extractor.saveToFile(allData.getFile())
                elif config.cfVerifyPrediction:
                    # Revert file contents
                    self.vcs.checkout(self.commits[self.currentCommitIndex])
                print('Done')
        print("All done, exiting...")
Ejemplo n.º 3
0
 def __init__(self):
     self.vcs = GitProvider(config.getRepoDir())
     self.ccdb = CCDatabase(config.getCcDbFile())
     self.codeChecker = CodeChecker(config.getRepoDir())
Ejemplo n.º 4
0
class TestDbBuilder():
    def __init__(self):
        self.vcs = GitProvider(config.getRepoDir())
        self.ccdb = CCDatabase(config.getCcDbFile())
        self.codeChecker = CodeChecker(config.getRepoDir())

    def loadCommitList(self, clean=False):
        self.commits = self.vcs.getAllVersions(config.getBranch())
        if not clean:
            lastCommit = self.db.getLastCommit()
            lastIndex = self.commits.index(lastCommit) + 1
            if lastIndex < len(self.commits) - 1:
                self.commits = self.commits[0:lastIndex]
            self.currentCommitIndex = lastIndex - 1
        else:
            self.currentCommitIndex = len(self.commits)

    def prepareDb(self, clean=False):
        self.db = CFDatabase(config.getCfDbFile())
        if clean:
            self.db.clean()

    def checkoutToNextVersion(self):
        self.currentCommitIndex = self.currentCommitIndex - 1
        if (self.currentCommitIndex < 0):
            return False
        self.vcs.checkout(self.commits[self.currentCommitIndex])
        return True

    def getDiffResolvedIds(self):
        resolved = self.codeChecker.diffResolved(config.getCcRunName(),
                                                 config.getTmpDir(), self.ccdb)
        ids = []
        for bug in resolved:
            ids.append(bug['reportId'])
        return ids

    def convertFilePathToRepoRelativePath(self, path):
        return os.path.relpath(path, config.getRepoDir())

    def extractCode(self, id):
        bugData = self.ccdb.getNotResolvedBugData(id)
        #TODO: Possible improvement for bugData
        if bugData is None:
            #TODO: Implement custom errors
            return None

        fileRelativePath = self.convertFilePathToRepoRelativePath(
            bugData.getFile())
        try:
            fullCodeWithBug = self.vcs.getFileContents(
                fileRelativePath, self.commits[self.currentCommitIndex + 1])
            fullCodeWithoutBug = self.vcs.getFileContents(
                fileRelativePath, self.commits[self.currentCommitIndex])
        except KeyError as extractError:
            return None

        diff = POSIXDiffer().diff(fullCodeWithBug, fullCodeWithoutBug)

        extractor = CodeExtractor(bugData)
        try:
            extractor.extractAll(fullCodeWithBug, diff)
        except ValueError as extractError:
            return None

        bugCodeFragment = extractor.getBugCodeFragment()
        fixCodeFragment = extractor.getFixCodeFragment()

        usedDiffs = extractor.getUsedDiffs()
        #Easy version - ignore bug if none or more than one diff used to fix
        #TODO: Possible improvement here
        if len(usedDiffs) != 1:
            return None
        return entities.FixData(bugCodeFragment, fixCodeFragment,
                                bugData.getChecker(), bugData.getMessage(),
                                bugData.getLine() - bugData.getStartLine())

    def prepareEnv(self, clean=False):
        print('Preparing train db... ', end='')
        self.prepareDb(clean)
        print('done')
        print('Loading commit list... ', end='')
        self.loadCommitList(clean)
        print('done')
        if clean:
            print('Checking out to root... ', end='')
            self.checkoutToNextVersion()
            print('done')
            print('Initial analysis... ', end='')
            self.codeChecker.check(True)
            print('done')
            print('Storing initial results... ', end='')
            self.codeChecker.store(self.commits[self.currentCommitIndex])
            print('done')
            print('Storing version information... ', end='')
            self.db.storeLastCommit(self.commits[self.currentCommitIndex])
            print('done')
            print('Cleaning up tmp directory... ', end='')
            shutil.rmtree(config.getTmpDir())
            print('done')
            print('Cleaning up working directory... ', end='')
            self.codeChecker.clean()
            print('done')

    def findAndStoreFixDataForVersion(self):
        print('Analyzing version',
              self.commits[self.currentCommitIndex],
              '... ',
              end='')
        self.codeChecker.check(True)
        print('done')
        print('Getting list of resolved bugs for version',
              self.commits[self.currentCommitIndex],
              '... ',
              end='')
        ids = self.getDiffResolvedIds()
        print('done')
        bugNo = 1
        allBugs = len(ids)
        anyStored = False
        for id in ids:
            print('Parsing data for bug ({0}/{1}, #{2})...'.format(
                bugNo, allBugs, id),
                  sep='',
                  end='')
            fixData = self.extractCode(id)
            bugNo = bugNo + 1
            print('done')
            if fixData is not None:
                print('Storing fixData... ', end='')
                self.db.store(fixData.getBugCode(), fixData.getFixCode(),
                              fixData.getChecker(), fixData.getMessage(),
                              fixData.getLine())
                anyStored = True
                print('done')
            if bugNo % 100 == 0 and anyStored:
                self.db.commit()
                anyStored = False
        if anyStored:
            self.db.commit()
        print('Storing CodeChecker results for this version... ', end='')
        self.codeChecker.store(self.commits[self.currentCommitIndex])
        print('done')
        print('Storing version information... ', end='')
        self.db.storeLastCommit(self.commits[self.currentCommitIndex])
        print('done')
        print('Cleaning up tmp directory... ', end='')
        shutil.rmtree(config.getTmpDir())
        print('done')
        print('Cleaning up working directory... ', end='')
        self.codeChecker.clean()
        print('done')

    def iterateThroughVcsHistory(self):
        while self.checkoutToNextVersion():
            self.findAndStoreFixDataForVersion()

    def checkoutToTop(self):
        self.vcs.checkout(config.getBranch())

    def build(self, clean=False):
        self.prepareEnv(clean)
        self.iterateThroughVcsHistory()
        self.checkoutToTop()
Ejemplo n.º 5
0
class Verifier():
    def __init__(self):
        self.vcs = GitProvider(config.getRepoDir())
        self.ccdb = CCDatabase(config.getCcDbFile())
        self.codeChecker = CodeChecker(config.getRepoDir())
        self.checkers = Checkers()

    def convertFilePathToRepoRelativePath(self, path):
        return os.path.relpath(path, config.getRepoDir())

    def getDiffResolvedIds(self):
        resolved = self.codeChecker.diffResolved(config.getCcRunName(), config.getTmpDir(), self.ccdb)
        ids = []
        for bug in resolved:
            ids.append(bug['reportId'])
        return ids
    
    def getBugDataFromDiff(self, obj):
        return BugData(int(obj['location']['line']), int(obj['location']['line']), obj['location']['file_name'], obj['check_name'], 'New', obj['description'], int(obj['location']['line']), None)

    def getDiffNew(self):
        new = self.codeChecker.diffNew(config.getCcRunName(), config.getTmpDir(), self.ccdb)
        ids = []
        for bug in new:
            ids.append(self.getBugDataFromDiff(bug))
        return ids
    
    def isBugDataEqual(self, b1, b2):
        if b1.getLine() != b2.getLine():
            return False
        if b1.getChecker() != b2.getChecker():
            return False
        if b1.getMessage() != b2.getMessage():
            return False
        if b1.getFile() != b2.getFile():
            return False
        if b1.getStatus() != b2.getStatus():
            return False
        if b1.getReviewStatus() != b2.getReviewStatus():
            return False
        return True
    
    def displaySuggestions(self, suggestions):
        statuses = ['Negative', 'Positive', 'Skipped']
        for s in suggestions:
            print("File: {2}, L{0}, Type: {1}".format(s.bug.getLine(), s.bug.getChecker(), s.file))
            print("Verification status: {0}".format(statuses[s.verificationStatus]))
            if s.verificationStatus in [1, 2]:
                print("Code fragment with bug: \n{0}".format(s.bugCode))
                print("Suggested code fragment for replacement: \n{0}".format(s.fixCode))
    
    def applyValidFixes(self, suggestions, files):
        for f in files:
            bugs = []
            for s in suggestions:
                if s.file == f and s.verificationStatus in [1, 2]:
                    bugs.append(s)
            bugs.sort(key=lambda x: x.bug.getLine(), reverse=True)
            for b in bugs:
                print("File: {2}, L{0}, Type: {1}... ".format(b.bug.getLine(), b.bug.getChecker(), s.file), end='')
                self.applyFix(b)
                print("Applied")
            self.vcs.applyChangeForFile(f)
    
    def applyFix(self, s):
        extractor = CodeExtractor(s.bug)
        extractor.loadCodeFromFile()
        extractor.extractBugCode()
        extractor.applyFix(s.fixCode)
        extractor.saveToFile(s.bug.getFile())
    
    def main(self):
        # Do analysis
        shutil.rmtree(config.getTmpDir())
        self.codeChecker.check(True)

        # Diff new
        newBugs = self.getDiffNew()

        if len(newBugs) < 1:
            print('No new bugs introduced, commit is accepted!')
            return
        
        print("New bugs found! Count: {0}. Attempting repairs...".format(len(newBugs)))

        # Load models
        models = {}
        for checker in globals.availableCheckers:
            models[checker] = load_model(config.cfModelFilenameFormat.format(checker))

        # Load all content from files having new
        files = set([self.convertFilePathToRepoRelativePath(x.getFile()) for x in newBugs])
        fileContents = {}
        for f in files:
            fn = config.getRepoDir() + f
            with open(fn, 'r') as fh:
                fileContents[f] = ''.join(fh.readlines())

        # For each file sort by bug line desc
        suggestions = []
        validSuggestions = 0
        for f in files:
            bugs = [x for x in newBugs if self.convertFilePathToRepoRelativePath(x.getFile()) == f]
            bugs.sort(key=lambda x: x.getLine(), reverse=True)
            print("=== File: {0} ===".format(f))
            # For each bug get a suggestion and test it
            for b in bugs:
                print("L{0}, Type: {1}".format(b.getLine(), b.getChecker()))
                # Prepare useful data
                dictionary = Dictionary(b.getChecker())
                coder = Coder(dictionary)
                totalDictionaryLength = dictionary.length()
                # Prepare and extract bug fragment
                checkerInfo = self.checkers.extractTokensForChecker(b.getChecker(), b.getMessage())
                extractor = CodeExtractor(b)
                extractor.loadCodeFromText(fileContents[f])
                extractor.extractBugCode()
                bugCodeFragment = extractor.getBugCodeFragment()
                fixCodeFragment = ''
                # Encode it
                encodedBugData, initialUnkList = coder.encode(bugCodeFragment, checkerData = checkerInfo)
                # Convert to one-hot
                MODEL_X_MAX_LEN = models[b.getChecker()].get_layer(index = 0).input_shape[1]

                if len(encodedBugData) > MODEL_X_MAX_LEN:
                    print("Ignored: Code too big for model")
                    continue

                noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData)
                if noZerosToPad > 0:
                    encodedBugData = coder.applyPadding(encodedBugData, noZerosToPad)
                X = np.zeros((1, MODEL_X_MAX_LEN, totalDictionaryLength))
                X[0] = coder.convertToOneHot(encodedBugData, np.zeros((MODEL_X_MAX_LEN, totalDictionaryLength)))
                # Predict and convert from one-hot
                Y = coder.convertFromOneHot(models[b.getChecker()].predict(X)[0])
                Y = coder.removePadding(Y)
                # Decode
                fixCodeFragment = coder.decode(Y, initialUnkList)[:-1]
                
                #Verify?
                vStatus = 2
                if config.cfVerifyPrediction:
                    # Apply fix in source code file
                    extractor.applyFix(fixCodeFragment)
                    extractor.saveToFile(b.getFile())
                    # Run CodeChecker and analyze code
                    shutil.rmtree(config.getTmpDir())
                    compilationLog = self.codeChecker.check(True)
                    newBugsAfterFix = self.getDiffNew()
                    # Check if ID is resolved in tmp folder
                    isFixed = 'Build failed' not in compilationLog
                    for nb in newBugsAfterFix:
                        if self.isBugDataEqual(b, nb):
                            isFixed = False
                    # Set vStatus accordingly
                    if isFixed:
                        vStatus = 1
                    else:
                        vStatus = 0
                    # Revert file
                    extractor.loadCodeFromText(fileContents[f])
                    extractor.saveToFile(b.getFile())
                if vStatus == 0:
                    print("Verification: Negative, cannot be applied")
                elif vStatus == 1:
                    print("Verification: Positive, can be applied")
                    validSuggestions += 1
                elif vStatus == 2:
                    print("Verification: Skipped")
                    validSuggestions += 1
                sugg = SuggestionData(f, b, bugCodeFragment, fixCodeFragment, vStatus)
                suggestions.append(sugg)
        print("Valid suggestions prepared for {0} / {1} bugs.".format(validSuggestions, len(newBugs)))

        if validSuggestions > 0:
            print("Apply valid suggestions (a), display them (d), ignore them (i) or abort commit (q)?")
            apply = False
            choice = True
            while choice:
                c = sys.stdin.read(1)
                if c == 'a':
                    apply = True
                    choice = False
                    print("Applying fixes...")
                elif c == 'i':
                    choice = False
                    print("Fixes ignored...")
                elif c == 'd':
                    self.displaySuggestions(suggestions)
                    print("Apply valid suggestions (a), ignore them (i) or abort commit (q)?")
                elif c == 'q':
                    print("Aborting commit...")
                    sys.exit(1)
            if apply:
                self.applyValidFixes(suggestions, files)
                print("Fixes applied!")
        if validSuggestions != len(newBugs):
            print("Unable to fix all bugs, continue with commit (c) or abort (q)?")
            choice = True
            while choice:
                c = sys.stdin.read(1)
                if c == 'c':
                    choice = False
                    print("Continuing...")
                elif c == 'q':
                    print("Aborting commit...")
                    sys.exit(1)
        else:
            print("Bugs corrected, commit is good to go!")
Ejemplo n.º 6
0
    if bugData.getChecker() == 'clang-diagnostic-unused-parameter':
        return fixUnusedParam(code, bugData)
    if bugData.getChecker() == 'clang-diagnostic-constant-conversion':
        return fixConstConv(code, bugData)
    return None


ccdb = CCDatabase(config.getCcDbFile())
db = sqlite3.connect('../Results/db.sqlite')
cursor = db.cursor()
cursor.execute('SELECT * FROM bugs')
dataFromDb = cursor.fetchall()
bugs = []
bugsPerFile = {}
BUG_NOT_PROCESSED = 0
vcs = GitProvider(config.getRepoDir())
checkers = Checkers()
currentCommit = vcs.getAllVersions(config.getBranch())[0]
bugDataList = {}
fileContents = {}
codechecker = CodeChecker(config.getRepoDir())

if len(dataFromDb) > 0:
    print("Skipping steps 1-2, DB already filled with data")
    for bug in dataFromDb:
        if bug[2] not in bugsPerFile:
            bugsPerFile[bug[2]] = []
        if bug[3] == BUG_NOT_PROCESSED:
            bugDataList[bug[0]] = ccdb.getNotResolvedBugData(bug[0])
            bugsPerFile[bug[2]].append(bug[0])
else: