Exemple #1
0
def writeAbstractions():
    path = './final_all_noindent_singleL/'
    nameRead = path + 'subset-srcTrgtPairs'
    nameWrite = nameRead + '_newAbs'

    headers, lines = H.readCSV(nameRead + '.csv')
    headers += ['NEW_SrcAbs', 'NEW_TrgtAbs']
    writeLines = []

    count = 0
    for line in lines[:10]:
        writeLine = line
        srcText = line[headers.index('sourceText')]
        trgtText = line[headers.index('targetText')]

        for text, hname in zip([srcText, trgtText], ['', '']):
            codeObj = Code(text)
            absLines = getProgAbstraction(codeObj)
            writeLine.append(H.joinLL(absLines))

        count += 1
        print(count, line[headers.index('sourceID')])
        writeLines.append(writeLine)

    H.writeCSV(nameWrite + '.csv', headers, writeLines)
Exemple #2
0
    def getAbstractLiteral(self):
        flagIsString = False
        quotes = ['\'', '"']

        if self.cursorType == TypeKind.CONSTANTARRAY:
            flagIsString = True
        
        elif len(self.spell)>=2 and self.spell[0] in quotes and self.spell[-1] in quotes:
            flagIsString = True
        
        if flagIsString: # TypeKind.CONSTANTARRAY or TypeKind.INT with single quotes - char or Invalids with double quotes
            self.addAbstract(self.spell[0], self.spell[0]) # Add First Quote

            intermediateStr = self.spell[1:-1]
            if len(intermediateStr) > 0: # Ignore 0 length LITERAL, to differentiate those cases when nothing exists inside quotes
                formatSpecs = self.extractFormatSpec(intermediateStr) # If String, abstract format spec (%d), special chars,... 

                if len(formatSpecs)>0: # If format specifiers present, add them instead of Char/String
                    list(map(self.addAbstract, formatSpecs, formatSpecs))
                elif len(intermediateStr) == 1: # Character: Otherwise, if no formatSpecs
                    self.addAbstract(str(self.kind) + '_CHAR', intermediateStr)  #Add a placeholder Literal_Char
                else: # String - if len(intermediateStr) >= 1
                    self.addAbstract(str(self.kind) + '_STRING', intermediateStr) # Else, add a placeholder Literal_String
                
            self.addAbstract(self.spell[-1], self.spell[-1]) # Add Last Quote
            
        elif isInt(self.spell): # If actually an integer literal
            self.addAbstract(str(self.kind) + '_INT', self.spell ) # Add a placeholder Literal_Int
        elif isFloat(self.spell): # If actually a float literal
            self.addAbstract(str(self.kind) + "_DOUBLE", self.spell) # Add a placeholder Literal_Int
        else: # If neither String, nor int/float: add cursorType (can't abstract - mostly Invalid)
            self.addAbstract(str(self.kind) +'_'+ str(self.cursorType), self.spell) 
            if self.cursorType!=TypeKind.INVALID: # Log the "special" type of Literal (unless its INVALID)
                H.errorLog([['CodeID', self.codeID], ['AbstractToken new literal-type', str(self.kind) +'_'+ str(self.cursorType)],
                ['lineNum', self.lineNum], ['spell', self.spell]])
Exemple #3
0
def writeTypeKind():
    path = './final_all_noindent_singleL/'
    nameRead = path + 'subset-srcTrgtPairs.csv'
    nameWrite = path + 'TokenKind.csv'
    headers, lines = H.readCSV(nameRead)
    writeH = ['spell', 'kind', 'cursorTypeKind']
    dictSpell = collections.defaultdict(lambda: {})

    count = 0
    for line in lines:
        srcText = line[headers.index('sourceText')]
        codeObj = Code(srcText)
        for token in codeObj.getTokens():
            cToken = CToken(token, codeObj)

            dictSpell[cToken.spell][str(cToken.kind) + '!@#$%' +
                                    str(cToken.cursorType)] = 0

        count += 1
        print(count, line[headers.index('sourceID')])

    writeL = [[spell,
               kindType.split('!@#$%')[0],
               kindType.split('!@#$%')[1]] for spell in dictSpell
              for kindType in dictSpell[spell]]
    H.writeCSV(nameWrite, writeH, writeL)
Exemple #4
0
def run(df, predAtK):
    startTime = timer()
    columns = ['id', 'sourceText', 'targetText', 'predText', 'actLineNums', 'predLineNums', \
            'actSourceLine', 'localSourceLine', 'targetLine', 'predLine', \
            'actSourceAbsLine', 'localSourceAbsLine', 'targetAbsLine', 'predAbsLine', \
            'errSet', 'isLocated', 'isRelevant', 'isConcretized', 'isExactMatch', 'isCompiled']
    results = []  #True to turn on localization Module, False to turn off
    #allErrors = ClusterError.getAllErrs()

    # For each erroneous code
    for i, row in df.iterrows():
        srcID, trgtID = str(row['id']) + '_source', str(row['id']) + '_target'
        srcText, trgtText = str(row['sourceText']), str(row['targetText'])
        trgtErrLines, trgtErrAbsLines = str(
            row['targetLineText']).strip(), str(row['targetLineAbs']).strip()
        actLinesStr = str(row['lineNums_Text'])

        # Parse the source/erroneous code
        srcCodeObj, trgtCodeObj = Code(srcText,
                                       codeID=srcID), Code(trgtText,
                                                           codeID=trgtID)
        srcLines, trgtLines = srcText.splitlines(), trgtText.splitlines()
        errSet = ClusterError.getErrSetStr(AllErrs, srcCodeObj)

        # Fetch its abstraction
        srcAbsLines = AbstractWrapper.getProgAbstraction(srcCodeObj)
        trgtAbsLines = AbstractWrapper.getProgAbstraction(trgtCodeObj)

        #Fetch Line numbers
        lineNums = errLoc(activeLocalization, srcCodeObj, actLinesStr,
                          useTracers_errLoc)

        if srcCodeObj.getNumErrors() > 0:  # If there are errors
            # Run prediction on all erroneous lines
            predText, srcErrLines, predErrLines, srcErrAbsLines, predErrAbsLines, isConcretized, isExactMatch  = \
                runPerLine(srcCodeObj, srcLines, trgtLines, srcAbsLines, trgtAbsLines,errSet,lineNums,predAtK)

            # Calculate accuracy and log it
            isLocated, isRelevant, isCompiled = calcAccuracy(actLinesStr, lineNums, \
                trgtText, trgtErrAbsLines, predErrAbsLines, predErrLines, predText)

            results.append((row['id'], srcText, trgtText, predText, actLinesStr, H.joinList(lineNums), \
                row['sourceLineText'], H.joinList(srcErrLines), trgtErrLines, H.joinLL(predErrLines), \
                row['sourceLineAbs'], H.joinLL(srcErrAbsLines), trgtErrAbsLines, H.joinLL(predErrAbsLines), errSet, \
                H.toInt(isLocated), H.toInt(isRelevant), H.toInt(isConcretized), H.toInt(isExactMatch), H.toInt(isCompiled)))

        if i != 0 and i % 100 == 0:
            print('\t...', i, '/', len(df), 'Completed')
            # break

    endTime = timer()
    print('\n#Programs=', len(df), 'Time Taken=',
          round(endTime - startTime, 2), '(s)')
    return pd.DataFrame(results, columns=columns)
Exemple #5
0
def runPerLine(srcCodeObj, srcLines, trgtLines, srcAbsLines, trgtAbsLines,
               errSet, lineNums, predAtK):
    '''For each compiler error line, call predErrLine'''
    srcErrLines, srcErrAbsLines = [], []
    predErrLines, predErrAbsLines = [], []
    repairLines, repairAbsLines = copy.deepcopy(srcLines), copy.deepcopy(
        srcAbsLines)
    isConcretized, isExactMatch = None, None

    # For each compiler flagged lineNums
    for lineNum in lineNums:
        lineNum = int(lineNum)

        if lineNum <= min([len(srcLines), len(srcAbsLines)
                           ]):  # If compiler returned valid line-num
            srcLine, srcAbsLine = srcLines[lineNum - 1], srcAbsLines[
                lineNum - 1]  # lineNum-1 since off-by-one
            trgtLine, trgtAbsLine = None, None
            if lineNum <= min([len(trgtLines),
                               len(trgtAbsLines)]) and lineNum > 0:
                trgtLine, trgtAbsLine = trgtLines[lineNum -
                                                  1], trgtAbsLines[lineNum - 1]
            srcErrLines.append(srcLine), srcErrAbsLines.append(srcAbsLine)

            # Use ErrSet at line=lineNum? Or at program-level
            errSetLine = errSet
            if flagErrSet_Line:
                errSetLine = ClusterError.getErrSetStr(AllErrs,
                                                       srcCodeObj,
                                                       lineNum=lineNum)

            # Predict@K the concrete repair line
            predAbsLine, predLine, repairAbsLines, repairLines, tempIsConcretized, tempIsExactMatch = repairErrLine(srcCodeObj, \
                repairLines, repairAbsLines, srcAbsLine, trgtLine, trgtAbsLine, errSetLine, lineNum, \
                predErrAbsLines, predErrLines, predAtK)

            # Concretization success?
            isConcretized = H.NoneAnd(isConcretized, tempIsConcretized)
            isExactMatch = H.NoneAnd(isExactMatch, tempIsExactMatch)

            # Record the predicted abstract and concrete line
            if predAbsLine is not None:
                predErrAbsLines.append(predAbsLine)
                predErrLines.append(predLine)

    predText = H.joinList(repairLines)
    return predText, srcErrLines, predErrLines, srcErrAbsLines, predErrAbsLines, isConcretized, isExactMatch
Exemple #6
0
def getAbstractAtLine(codeObj, lineNum):
    srcAbsObjs, srcCTokens, symbTable = getProgAbstractTokenSymbTab(
        codeObj, lineNum)

    srcAbs = H.stringifyL(srcAbsObjs)
    srcLine = [
        absObj.spell for cTok in srcCTokens for absObj in cTok.abstractTokens
    ]
    return srcLine, srcAbs, symbTable
Exemple #7
0
    def getAbstractIdentifier(self, symbTable):
        '''If Identifier, then add the type of identifier as Abstract token (except for special cases)'''
        if self.flagIsDirective: # If directive declaration (#include<>), add actual spellings to abstraction (and not invalid-types)
            self.addAbstract(self.spell, self.spell)
        elif self.spell in CF.IncludeIdentifiers: # Handle specials like printf
            self.addAbstract(self.spell, self.spell)
        else:  # All other cursorTypes
            #print '-getAbstractIdentifier-\n', self.spell, self.cursorType
        
            symbTable.insertToken(self.spell, self.cursor) # Check & Add unknown variable/func declaration to Symbol-Table
            symbTypes = symbTable.lookup(self.spell) # try to fetch type from sybmTable

            if len(symbTypes)>0: # If lookup success, add the symbType as the abstraction
                list(map(self.addAbstract, symbTypes, [self.spell]*len(symbTypes)) )
                # Add self.spell as Concretization of all AbstractTypes

                # Log error in case SymbTable and Clang differ in claimed Type
                if len(symbTypes)==1 and self.cursorType!=TypeKind.INVALID and self.cursorType!=TypeKind.FUNCTIONPROTO:
                    # Unless the type is INVALID or FUNCTION
                    if symbTypes[0]!=self.cursorType:
                        H.errorLog([['CodeID', self.codeID], ['AbstractToken SymbTab & Clang mismatch type', str(symbTypes[0]) +' and '+ str(self.cursorType)], ['lineNum', self.lineNum], ['spell', self.spell]])
                    
            else: # Otherwise, If symbTable doesn't have the type, insert the cursorType (probably INVALID type)
                self.addAbstract(self.cursorType, self.spell)      
Exemple #8
0
def createClass(fnameDataset):
    '''Given a dataset (CSV) file, replace old error-IDs (obtained using regex) with new ones (obtained using Clang LLVM)'''
    df = pd.read_csv(fnameDataset, encoding="ISO-8859-1")
    allErrs = getAllErrs(CF.fname_newErrIDs)
    classes, classesRepeat, newErrSets = [], [], []
    mult = 10

    for i, row in df.iterrows():
        oldClass = row['errSet_diffs']
        codeObj = Code(row['sourceText'])

        newErrsetStr = getErrSetStr(allErrs, codeObj)
        newClass = newErrsetStr + '\n' + H.joinList(oldClass.splitlines()[1:])

        newErrSets.append(newErrsetStr)
        classes.append(newClass)

        if i >= len(df) * mult / 100:
            print(str(mult) + '%', end=' ', flush=True)
            mult += 10

    df['class'] = classes
    df['newErrSet'] = newErrSets
    df.to_csv(fnameDataset, index=False)
Exemple #9
0
def repairErrLine(srcCodeObj, repairLines, repairAbsLines, srcAbsLine,
                  trgtLine, trgtAbsLine, errSetLine, lineNum, predErrAbsLines,
                  predErrLines, predAtK):
    '''Pred@K and concretize the best line (with least errors)'''
    isConcretized, isExactMatch = None, None
    bestPredAbsLine, bestPredLine = None, None
    bestPredAbsLines, bestPredLines = repairAbsLines, repairLines

    prePredCodeObj = Code(H.joinList(repairLines))
    minNumErrs = prePredCodeObj.getNumErrors()

    for predAbsLine in Predict.predictAbs(srcAbsLine, errSetLine, trgtAbsLine,
                                          predAtK):
        # Create copy of previous obtained repairLines, and replace with predictedLines
        predLines, predAbsLines = copy.deepcopy(repairLines), copy.deepcopy(
            repairAbsLines)
        predAbsLines[lineNum - 1] = H.joinList(predAbsLine, joinStr=' ')

        # Concretize the predicted abstract fix
        predLine, tempIsConcretized = ConcreteWrapper.attemptConcretization(
            srcCodeObj, lineNum, predAbsLine)
        predLines[lineNum - 1] = H.joinList(predLine, joinStr=' ')

        # Concretization success?
        isConcretized = H.NoneAnd(isConcretized, tempIsConcretized)
        tempIsExactMatch = checkRelevant2(predAbsLine, trgtAbsLine)
        isExactMatch = H.NoneOr(isExactMatch, tempIsExactMatch)

        # Find best prediction
        predCodeObj = Code(H.joinList(predLines))
        if minNumErrs is None or predCodeObj.getNumErrors() < minNumErrs:
            minNumErrs = predCodeObj.getNumErrors()
            bestPredAbsLines, bestPredLines = predAbsLines, predLines
            bestPredAbsLine, bestPredLine = predAbsLine, predLine

    return bestPredAbsLine, bestPredLine, bestPredAbsLines, bestPredLines, isConcretized, isExactMatch
Exemple #10
0
def getErrSetStr(allErrs, codeObj, lineNum=None):
    errSet = getErrSet(allErrs, codeObj, lineNum)
    return H.joinList(errSet, ';') + ';'
Exemple #11
0
 def __str__(self):
     return H.joinList(self.abstractTokens, ' ')
Exemple #12
0
def printProgAbstraction():
    codeText = open(CF.inputPath + 'temp.c').read()
    codeObj = Code(codeText)
    absLines = getProgAbstraction(codeObj)
    for line in absLines:
        print(H.joinList(line, ' '))