def run(df, predAtK): startTime = timer() columns = ['id', 'sourceText', 'targetText', 'predText', 'actLineNums', 'predLineNums', \ 'actSourceLine', 'localSourceLine', 'targetLine', 'predLine', \ 'actSourceAbsLine', 'localSourceAbsLine', 'targetAbsLine', 'predAbsLine', \ 'errSet', 'isLocated', 'isRelevant', 'isConcretized', 'isExactMatch', 'isCompiled'] results = [] #True to turn on localization Module, False to turn off #allErrors = ClusterError.getAllErrs() # For each erroneous code for i, row in df.iterrows(): srcID, trgtID = str(row['id']) + '_source', str(row['id']) + '_target' srcText, trgtText = str(row['sourceText']), str(row['targetText']) trgtErrLines, trgtErrAbsLines = str( row['targetLineText']).strip(), str(row['targetLineAbs']).strip() actLinesStr = str(row['lineNums_Text']) # Parse the source/erroneous code srcCodeObj, trgtCodeObj = Code(srcText, codeID=srcID), Code(trgtText, codeID=trgtID) srcLines, trgtLines = srcText.splitlines(), trgtText.splitlines() errSet = ClusterError.getErrSetStr(AllErrs, srcCodeObj) # Fetch its abstraction srcAbsLines = AbstractWrapper.getProgAbstraction(srcCodeObj) trgtAbsLines = AbstractWrapper.getProgAbstraction(trgtCodeObj) #Fetch Line numbers lineNums = errLoc(activeLocalization, srcCodeObj, actLinesStr, useTracers_errLoc) if srcCodeObj.getNumErrors() > 0: # If there are errors # Run prediction on all erroneous lines predText, srcErrLines, predErrLines, srcErrAbsLines, predErrAbsLines, isConcretized, isExactMatch = \ runPerLine(srcCodeObj, srcLines, trgtLines, srcAbsLines, trgtAbsLines,errSet,lineNums,predAtK) # Calculate accuracy and log it isLocated, isRelevant, isCompiled = calcAccuracy(actLinesStr, lineNums, \ trgtText, trgtErrAbsLines, predErrAbsLines, predErrLines, predText) results.append((row['id'], srcText, trgtText, predText, actLinesStr, H.joinList(lineNums), \ row['sourceLineText'], H.joinList(srcErrLines), trgtErrLines, H.joinLL(predErrLines), \ row['sourceLineAbs'], H.joinLL(srcErrAbsLines), trgtErrAbsLines, H.joinLL(predErrAbsLines), errSet, \ H.toInt(isLocated), H.toInt(isRelevant), H.toInt(isConcretized), H.toInt(isExactMatch), H.toInt(isCompiled))) if i != 0 and i % 100 == 0: print('\t...', i, '/', len(df), 'Completed') # break endTime = timer() print('\n#Programs=', len(df), 'Time Taken=', round(endTime - startTime, 2), '(s)') return pd.DataFrame(results, columns=columns)
def repairErrLine(srcCodeObj, repairLines, repairAbsLines, srcAbsLine, trgtLine, trgtAbsLine, errSetLine, lineNum, predErrAbsLines, predErrLines, predAtK): '''Pred@K and concretize the best line (with least errors)''' isConcretized, isExactMatch = None, None bestPredAbsLine, bestPredLine = None, None bestPredAbsLines, bestPredLines = repairAbsLines, repairLines prePredCodeObj = Code(H.joinList(repairLines)) minNumErrs = prePredCodeObj.getNumErrors() for predAbsLine in Predict.predictAbs(srcAbsLine, errSetLine, trgtAbsLine, predAtK): # Create copy of previous obtained repairLines, and replace with predictedLines predLines, predAbsLines = copy.deepcopy(repairLines), copy.deepcopy( repairAbsLines) predAbsLines[lineNum - 1] = H.joinList(predAbsLine, joinStr=' ') # Concretize the predicted abstract fix predLine, tempIsConcretized = ConcreteWrapper.attemptConcretization( srcCodeObj, lineNum, predAbsLine) predLines[lineNum - 1] = H.joinList(predLine, joinStr=' ') # Concretization success? isConcretized = H.NoneAnd(isConcretized, tempIsConcretized) tempIsExactMatch = checkRelevant2(predAbsLine, trgtAbsLine) isExactMatch = H.NoneOr(isExactMatch, tempIsExactMatch) # Find best prediction predCodeObj = Code(H.joinList(predLines)) if minNumErrs is None or predCodeObj.getNumErrors() < minNumErrs: minNumErrs = predCodeObj.getNumErrors() bestPredAbsLines, bestPredLines = predAbsLines, predLines bestPredAbsLine, bestPredLine = predAbsLine, predLine return bestPredAbsLine, bestPredLine, bestPredAbsLines, bestPredLines, isConcretized, isExactMatch
def runPerLine(srcCodeObj, srcLines, trgtLines, srcAbsLines, trgtAbsLines, errSet, lineNums, predAtK): '''For each compiler error line, call predErrLine''' srcErrLines, srcErrAbsLines = [], [] predErrLines, predErrAbsLines = [], [] repairLines, repairAbsLines = copy.deepcopy(srcLines), copy.deepcopy( srcAbsLines) isConcretized, isExactMatch = None, None # For each compiler flagged lineNums for lineNum in lineNums: lineNum = int(lineNum) if lineNum <= min([len(srcLines), len(srcAbsLines) ]): # If compiler returned valid line-num srcLine, srcAbsLine = srcLines[lineNum - 1], srcAbsLines[ lineNum - 1] # lineNum-1 since off-by-one trgtLine, trgtAbsLine = None, None if lineNum <= min([len(trgtLines), len(trgtAbsLines)]) and lineNum > 0: trgtLine, trgtAbsLine = trgtLines[lineNum - 1], trgtAbsLines[lineNum - 1] srcErrLines.append(srcLine), srcErrAbsLines.append(srcAbsLine) # Use ErrSet at line=lineNum? Or at program-level errSetLine = errSet if flagErrSet_Line: errSetLine = ClusterError.getErrSetStr(AllErrs, srcCodeObj, lineNum=lineNum) # Predict@K the concrete repair line predAbsLine, predLine, repairAbsLines, repairLines, tempIsConcretized, tempIsExactMatch = repairErrLine(srcCodeObj, \ repairLines, repairAbsLines, srcAbsLine, trgtLine, trgtAbsLine, errSetLine, lineNum, \ predErrAbsLines, predErrLines, predAtK) # Concretization success? isConcretized = H.NoneAnd(isConcretized, tempIsConcretized) isExactMatch = H.NoneAnd(isExactMatch, tempIsExactMatch) # Record the predicted abstract and concrete line if predAbsLine is not None: predErrAbsLines.append(predAbsLine) predErrLines.append(predLine) predText = H.joinList(repairLines) return predText, srcErrLines, predErrLines, srcErrAbsLines, predErrAbsLines, isConcretized, isExactMatch
def createClass(fnameDataset): '''Given a dataset (CSV) file, replace old error-IDs (obtained using regex) with new ones (obtained using Clang LLVM)''' df = pd.read_csv(fnameDataset, encoding="ISO-8859-1") allErrs = getAllErrs(CF.fname_newErrIDs) classes, classesRepeat, newErrSets = [], [], [] mult = 10 for i, row in df.iterrows(): oldClass = row['errSet_diffs'] codeObj = Code(row['sourceText']) newErrsetStr = getErrSetStr(allErrs, codeObj) newClass = newErrsetStr + '\n' + H.joinList(oldClass.splitlines()[1:]) newErrSets.append(newErrsetStr) classes.append(newClass) if i >= len(df) * mult / 100: print(str(mult) + '%', end=' ', flush=True) mult += 10 df['class'] = classes df['newErrSet'] = newErrSets df.to_csv(fnameDataset, index=False)
def getErrSetStr(allErrs, codeObj, lineNum=None): errSet = getErrSet(allErrs, codeObj, lineNum) return H.joinList(errSet, ';') + ';'
def __str__(self): return H.joinList(self.abstractTokens, ' ')
def printProgAbstraction(): codeText = open(CF.inputPath + 'temp.c').read() codeObj = Code(codeText) absLines = getProgAbstraction(codeObj) for line in absLines: print(H.joinList(line, ' '))