def check_and_xform(self, ast): assert (ast.tag == 'PROGRAM') self.coord = ast.coord splits = Splits(ast) self.class_names = [] #wrap top-level code into $Main class main_class = MainClass(splits) checker.types.put(main_class.name, main_class) self.class_names.append(main_class.name) #check class headers for ast in splits.classes: assert (ast.tag == 'CLASS') if not is_multi_def(checker.types, ast.type_name, ast.coord): name = ast.type_name self.class_names.append(name) info = self.make_class_info(ast) checker.types.put(name, info) self.supers_check() #order class names by depth in inheritance hierarchy self.class_names = (sorted(self.class_names, key=lambda c: checker.types.get(c).depth)) #semantic check top-level of each class for name in self.class_names: info = checker.types.get(name) assert (info.kind == 'class') info.check_and_xform() #semantic check function bodies within each class code_checker = CodeChecker() for name in self.class_names: info = checker.types.get(name) assert (info.kind == 'class') code_checker.check_and_xform(info) return self.xformed_ast()
def __init__(self): self.vcs = GitProvider(config.getRepoDir()) self.ccdb = CCDatabase(config.getCcDbFile()) self.codeChecker = CodeChecker(config.getRepoDir()) self.checkers = Checkers() self.loadCommitList()
class Predictor(): def __init__(self): self.vcs = GitProvider(config.getRepoDir()) self.ccdb = CCDatabase(config.getCcDbFile()) self.codeChecker = CodeChecker(config.getRepoDir()) self.checkers = Checkers() self.loadCommitList() def loadCommitList(self): self.commits = self.vcs.getAllVersions(config.getBranch()) self.currentCommitIndex = 0 def convertFilePathToRepoRelativePath(self, path): return os.path.relpath(path, config.getRepoDir()) def getDiffResolvedIds(self): resolved = self.codeChecker.diffResolved(config.getCcRunName(), config.getTmpDir(), self.ccdb) ids = [] for bug in resolved: ids.append(bug['reportId']) return ids def predict(self, id, checker): # Load all bugs print("Loading bug data...") ids = [] if id == -1: bugs = self.ccdb.getAllBugsForChecker(checker) ids = [x[0] for x in bugs] else: ids.append(id) # Loading model print("Loading model...") model = load_model(config.cfModelFilenameFormat.format(checker)) model.summary() vLabels = ['NOT OK', 'OK', 'Skipped'] # Initialize coder print("Initializing coder...") self.dictionary = Dictionary(checker) self.coder = Coder(self.dictionary) self.totalDictionaryLength = self.dictionary.length() # Predicting print("Starting predictions...") for i in ids: allData = self.ccdb.getBugData(i) if allData.getChecker( ) not in globals.availableCheckers or allData.getChecker( ) != checker: print("Bug #{0} - checker not supported".format(i)) else: # Load extra tokens from checker message checkerInfo = self.checkers.extractTokensForChecker( allData.getChecker(), allData.getMessage()) # Retrieve code fragment with bug fileRelativePath = self.convertFilePathToRepoRelativePath( allData.getFile()) fullCodeWithBug = self.vcs.getFileContents( fileRelativePath, self.commits[self.currentCommitIndex]) extractor = CodeExtractor(allData) extractor.loadCodeFromText(fullCodeWithBug) extractor.extractBugCode() bugCodeFragment = extractor.getBugCodeFragment() fixCodeFragment = '' # Encode it encodedBugData, initialUnkList = self.coder.encode( bugCodeFragment, checkerData=checkerInfo) # Convert to one-hot MODEL_X_MAX_LEN = model.get_layer(index=0).input_shape[1] if len(encodedBugData) > MODEL_X_MAX_LEN: print( "Bug #{0} - Code too big for model, ignored".format(i)) continue elif id == -1: print("Bug #{0} - Good to go".format(i)) continue noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData) if noZerosToPad > 0: encodedBugData = self.coder.applyPadding( encodedBugData, noZerosToPad) X = np.zeros((1, MODEL_X_MAX_LEN, self.totalDictionaryLength)) X[0] = self.coder.convertToOneHot( encodedBugData, np.zeros((MODEL_X_MAX_LEN, self.totalDictionaryLength))) # Predict and convert from one-hot Y = self.coder.convertFromOneHot(model.predict(X)[0]) print(Y) # Decode Y = self.coder.removePadding(Y) fixCodeFragment = self.coder.decode(Y, initialUnkList) #Verify? vStatus = 2 if config.cfVerifyPrediction: # Apply fix in source code file extractor.applyFix(fixCodeFragment) extractor.saveToFile(allData.getFile()) # Run CodeChecker and analyze code self.codeChecker.check(True) resolvedIds = self.getDiffResolvedIds() # Check if ID is resolved in tmp folder isFixed = i in resolvedIds # Set vStatus accordingly if isFixed: vStatus = 1 else: vStatus = 0 #Print print("Bug #{0} - summary".format(i)) print("== Code fragment with bug ==") print(bugCodeFragment) print("== Suggested fix ==") print(fixCodeFragment) print("Verification: {0}".format(vLabels[vStatus])) a = ' ' while a != 'y' and a != 'n': a = input("Apply fix? (y/n): ") if a == 'y': if not config.cfVerifyPrediction: # Apply fix in source code file extractor.applyFix(fixCodeFragment) extractor.saveToFile(allData.getFile()) elif config.cfVerifyPrediction: # Revert file contents self.vcs.checkout(self.commits[self.currentCommitIndex]) print('Done') print("All done, exiting...")
def __init__(self): self.vcs = GitProvider(config.getRepoDir()) self.ccdb = CCDatabase(config.getCcDbFile()) self.codeChecker = CodeChecker(config.getRepoDir())
class TestDbBuilder(): def __init__(self): self.vcs = GitProvider(config.getRepoDir()) self.ccdb = CCDatabase(config.getCcDbFile()) self.codeChecker = CodeChecker(config.getRepoDir()) def loadCommitList(self, clean=False): self.commits = self.vcs.getAllVersions(config.getBranch()) if not clean: lastCommit = self.db.getLastCommit() lastIndex = self.commits.index(lastCommit) + 1 if lastIndex < len(self.commits) - 1: self.commits = self.commits[0:lastIndex] self.currentCommitIndex = lastIndex - 1 else: self.currentCommitIndex = len(self.commits) def prepareDb(self, clean=False): self.db = CFDatabase(config.getCfDbFile()) if clean: self.db.clean() def checkoutToNextVersion(self): self.currentCommitIndex = self.currentCommitIndex - 1 if (self.currentCommitIndex < 0): return False self.vcs.checkout(self.commits[self.currentCommitIndex]) return True def getDiffResolvedIds(self): resolved = self.codeChecker.diffResolved(config.getCcRunName(), config.getTmpDir(), self.ccdb) ids = [] for bug in resolved: ids.append(bug['reportId']) return ids def convertFilePathToRepoRelativePath(self, path): return os.path.relpath(path, config.getRepoDir()) def extractCode(self, id): bugData = self.ccdb.getNotResolvedBugData(id) #TODO: Possible improvement for bugData if bugData is None: #TODO: Implement custom errors return None fileRelativePath = self.convertFilePathToRepoRelativePath( bugData.getFile()) try: fullCodeWithBug = self.vcs.getFileContents( fileRelativePath, self.commits[self.currentCommitIndex + 1]) fullCodeWithoutBug = self.vcs.getFileContents( fileRelativePath, self.commits[self.currentCommitIndex]) except KeyError as extractError: return None diff = POSIXDiffer().diff(fullCodeWithBug, fullCodeWithoutBug) extractor = CodeExtractor(bugData) try: extractor.extractAll(fullCodeWithBug, diff) except ValueError as extractError: return None bugCodeFragment = extractor.getBugCodeFragment() fixCodeFragment = extractor.getFixCodeFragment() usedDiffs = extractor.getUsedDiffs() #Easy version - ignore bug if none or more than one diff used to fix #TODO: Possible improvement here if len(usedDiffs) != 1: return None return entities.FixData(bugCodeFragment, fixCodeFragment, bugData.getChecker(), bugData.getMessage(), bugData.getLine() - bugData.getStartLine()) def prepareEnv(self, clean=False): print('Preparing train db... ', end='') self.prepareDb(clean) print('done') print('Loading commit list... ', end='') self.loadCommitList(clean) print('done') if clean: print('Checking out to root... ', end='') self.checkoutToNextVersion() print('done') print('Initial analysis... ', end='') self.codeChecker.check(True) print('done') print('Storing initial results... ', end='') self.codeChecker.store(self.commits[self.currentCommitIndex]) print('done') print('Storing version information... ', end='') self.db.storeLastCommit(self.commits[self.currentCommitIndex]) print('done') print('Cleaning up tmp directory... ', end='') shutil.rmtree(config.getTmpDir()) print('done') print('Cleaning up working directory... ', end='') self.codeChecker.clean() print('done') def findAndStoreFixDataForVersion(self): print('Analyzing version', self.commits[self.currentCommitIndex], '... ', end='') self.codeChecker.check(True) print('done') print('Getting list of resolved bugs for version', self.commits[self.currentCommitIndex], '... ', end='') ids = self.getDiffResolvedIds() print('done') bugNo = 1 allBugs = len(ids) anyStored = False for id in ids: print('Parsing data for bug ({0}/{1}, #{2})...'.format( bugNo, allBugs, id), sep='', end='') fixData = self.extractCode(id) bugNo = bugNo + 1 print('done') if fixData is not None: print('Storing fixData... ', end='') self.db.store(fixData.getBugCode(), fixData.getFixCode(), fixData.getChecker(), fixData.getMessage(), fixData.getLine()) anyStored = True print('done') if bugNo % 100 == 0 and anyStored: self.db.commit() anyStored = False if anyStored: self.db.commit() print('Storing CodeChecker results for this version... ', end='') self.codeChecker.store(self.commits[self.currentCommitIndex]) print('done') print('Storing version information... ', end='') self.db.storeLastCommit(self.commits[self.currentCommitIndex]) print('done') print('Cleaning up tmp directory... ', end='') shutil.rmtree(config.getTmpDir()) print('done') print('Cleaning up working directory... ', end='') self.codeChecker.clean() print('done') def iterateThroughVcsHistory(self): while self.checkoutToNextVersion(): self.findAndStoreFixDataForVersion() def checkoutToTop(self): self.vcs.checkout(config.getBranch()) def build(self, clean=False): self.prepareEnv(clean) self.iterateThroughVcsHistory() self.checkoutToTop()
class Verifier(): def __init__(self): self.vcs = GitProvider(config.getRepoDir()) self.ccdb = CCDatabase(config.getCcDbFile()) self.codeChecker = CodeChecker(config.getRepoDir()) self.checkers = Checkers() def convertFilePathToRepoRelativePath(self, path): return os.path.relpath(path, config.getRepoDir()) def getDiffResolvedIds(self): resolved = self.codeChecker.diffResolved(config.getCcRunName(), config.getTmpDir(), self.ccdb) ids = [] for bug in resolved: ids.append(bug['reportId']) return ids def getBugDataFromDiff(self, obj): return BugData(int(obj['location']['line']), int(obj['location']['line']), obj['location']['file_name'], obj['check_name'], 'New', obj['description'], int(obj['location']['line']), None) def getDiffNew(self): new = self.codeChecker.diffNew(config.getCcRunName(), config.getTmpDir(), self.ccdb) ids = [] for bug in new: ids.append(self.getBugDataFromDiff(bug)) return ids def isBugDataEqual(self, b1, b2): if b1.getLine() != b2.getLine(): return False if b1.getChecker() != b2.getChecker(): return False if b1.getMessage() != b2.getMessage(): return False if b1.getFile() != b2.getFile(): return False if b1.getStatus() != b2.getStatus(): return False if b1.getReviewStatus() != b2.getReviewStatus(): return False return True def displaySuggestions(self, suggestions): statuses = ['Negative', 'Positive', 'Skipped'] for s in suggestions: print("File: {2}, L{0}, Type: {1}".format(s.bug.getLine(), s.bug.getChecker(), s.file)) print("Verification status: {0}".format(statuses[s.verificationStatus])) if s.verificationStatus in [1, 2]: print("Code fragment with bug: \n{0}".format(s.bugCode)) print("Suggested code fragment for replacement: \n{0}".format(s.fixCode)) def applyValidFixes(self, suggestions, files): for f in files: bugs = [] for s in suggestions: if s.file == f and s.verificationStatus in [1, 2]: bugs.append(s) bugs.sort(key=lambda x: x.bug.getLine(), reverse=True) for b in bugs: print("File: {2}, L{0}, Type: {1}... ".format(b.bug.getLine(), b.bug.getChecker(), s.file), end='') self.applyFix(b) print("Applied") self.vcs.applyChangeForFile(f) def applyFix(self, s): extractor = CodeExtractor(s.bug) extractor.loadCodeFromFile() extractor.extractBugCode() extractor.applyFix(s.fixCode) extractor.saveToFile(s.bug.getFile()) def main(self): # Do analysis shutil.rmtree(config.getTmpDir()) self.codeChecker.check(True) # Diff new newBugs = self.getDiffNew() if len(newBugs) < 1: print('No new bugs introduced, commit is accepted!') return print("New bugs found! Count: {0}. Attempting repairs...".format(len(newBugs))) # Load models models = {} for checker in globals.availableCheckers: models[checker] = load_model(config.cfModelFilenameFormat.format(checker)) # Load all content from files having new files = set([self.convertFilePathToRepoRelativePath(x.getFile()) for x in newBugs]) fileContents = {} for f in files: fn = config.getRepoDir() + f with open(fn, 'r') as fh: fileContents[f] = ''.join(fh.readlines()) # For each file sort by bug line desc suggestions = [] validSuggestions = 0 for f in files: bugs = [x for x in newBugs if self.convertFilePathToRepoRelativePath(x.getFile()) == f] bugs.sort(key=lambda x: x.getLine(), reverse=True) print("=== File: {0} ===".format(f)) # For each bug get a suggestion and test it for b in bugs: print("L{0}, Type: {1}".format(b.getLine(), b.getChecker())) # Prepare useful data dictionary = Dictionary(b.getChecker()) coder = Coder(dictionary) totalDictionaryLength = dictionary.length() # Prepare and extract bug fragment checkerInfo = self.checkers.extractTokensForChecker(b.getChecker(), b.getMessage()) extractor = CodeExtractor(b) extractor.loadCodeFromText(fileContents[f]) extractor.extractBugCode() bugCodeFragment = extractor.getBugCodeFragment() fixCodeFragment = '' # Encode it encodedBugData, initialUnkList = coder.encode(bugCodeFragment, checkerData = checkerInfo) # Convert to one-hot MODEL_X_MAX_LEN = models[b.getChecker()].get_layer(index = 0).input_shape[1] if len(encodedBugData) > MODEL_X_MAX_LEN: print("Ignored: Code too big for model") continue noZerosToPad = MODEL_X_MAX_LEN - len(encodedBugData) if noZerosToPad > 0: encodedBugData = coder.applyPadding(encodedBugData, noZerosToPad) X = np.zeros((1, MODEL_X_MAX_LEN, totalDictionaryLength)) X[0] = coder.convertToOneHot(encodedBugData, np.zeros((MODEL_X_MAX_LEN, totalDictionaryLength))) # Predict and convert from one-hot Y = coder.convertFromOneHot(models[b.getChecker()].predict(X)[0]) Y = coder.removePadding(Y) # Decode fixCodeFragment = coder.decode(Y, initialUnkList)[:-1] #Verify? vStatus = 2 if config.cfVerifyPrediction: # Apply fix in source code file extractor.applyFix(fixCodeFragment) extractor.saveToFile(b.getFile()) # Run CodeChecker and analyze code shutil.rmtree(config.getTmpDir()) compilationLog = self.codeChecker.check(True) newBugsAfterFix = self.getDiffNew() # Check if ID is resolved in tmp folder isFixed = 'Build failed' not in compilationLog for nb in newBugsAfterFix: if self.isBugDataEqual(b, nb): isFixed = False # Set vStatus accordingly if isFixed: vStatus = 1 else: vStatus = 0 # Revert file extractor.loadCodeFromText(fileContents[f]) extractor.saveToFile(b.getFile()) if vStatus == 0: print("Verification: Negative, cannot be applied") elif vStatus == 1: print("Verification: Positive, can be applied") validSuggestions += 1 elif vStatus == 2: print("Verification: Skipped") validSuggestions += 1 sugg = SuggestionData(f, b, bugCodeFragment, fixCodeFragment, vStatus) suggestions.append(sugg) print("Valid suggestions prepared for {0} / {1} bugs.".format(validSuggestions, len(newBugs))) if validSuggestions > 0: print("Apply valid suggestions (a), display them (d), ignore them (i) or abort commit (q)?") apply = False choice = True while choice: c = sys.stdin.read(1) if c == 'a': apply = True choice = False print("Applying fixes...") elif c == 'i': choice = False print("Fixes ignored...") elif c == 'd': self.displaySuggestions(suggestions) print("Apply valid suggestions (a), ignore them (i) or abort commit (q)?") elif c == 'q': print("Aborting commit...") sys.exit(1) if apply: self.applyValidFixes(suggestions, files) print("Fixes applied!") if validSuggestions != len(newBugs): print("Unable to fix all bugs, continue with commit (c) or abort (q)?") choice = True while choice: c = sys.stdin.read(1) if c == 'c': choice = False print("Continuing...") elif c == 'q': print("Aborting commit...") sys.exit(1) else: print("Bugs corrected, commit is good to go!")
ccdb = CCDatabase(config.getCcDbFile()) db = sqlite3.connect('../Results/db.sqlite') cursor = db.cursor() cursor.execute('SELECT * FROM bugs') dataFromDb = cursor.fetchall() bugs = [] bugsPerFile = {} BUG_NOT_PROCESSED = 0 vcs = GitProvider(config.getRepoDir()) checkers = Checkers() currentCommit = vcs.getAllVersions(config.getBranch())[0] bugDataList = {} fileContents = {} codechecker = CodeChecker(config.getRepoDir()) if len(dataFromDb) > 0: print("Skipping steps 1-2, DB already filled with data") for bug in dataFromDb: if bug[2] not in bugsPerFile: bugsPerFile[bug[2]] = [] if bug[3] == BUG_NOT_PROCESSED: bugDataList[bug[0]] = ccdb.getNotResolvedBugData(bug[0]) bugsPerFile[bug[2]].append(bug[0]) else: # 1. print("Step 1") lists = [ '../Results/ID_checker1.txt', '../Results/ID_checker2.txt', '../Results/ID_checker3.txt', '../Results/ID_checker4.txt'
def __init__(self): self.ccdb = CCDatabase(config.getCcDbFile()) self.codeChecker = CodeChecker(config.getRepoDir()) self.code = [] self.checkers = Checkers()