예제 #1
0
def source_from_cvepatch(
        ctr,
        diffFileName):  # diffFileName holds the filename of each DIFF patch
    # diffFileName looks like: CVE-2012-2372_7a9bc620049fed37a798f478c5699a11726b3d33.diff
    global repoName
    global debugMode
    global total
    global multimodeFlag
    global dummyFunction
    global diffDir
    global originalDir

    chunksCnt = 0  # number of DIFF patches
    currentCounter = 0

    with ctr.diffFileCntLock:
        currentCounter = ctr.diffFileCnt.value
        print str(ctr.diffFileCnt.value + 1) + '/' + str(total),
        ctr.diffFileCnt.value += 1

    if os.path.getsize(os.path.join(diffDir, repoName,
                                    diffFileName)) > 1000000:
        # don't do anything with big DIFFs (merges, upgrades, ...).
        print "[-]", diffFileName, "\t(file too large)"
    else:
        diffFileNameSplitted = diffFileName.split('_')
        cveId = diffFileNameSplitted[0]  # use only one CVEid
        commitHashValue = diffFileNameSplitted[-1].split('.')[0]

        print "[+]", diffFileName, "\t(proceed)"
        with open(os.path.join(diffDir, repoName, diffFileName), 'r') as fp:
            patchLines = ''.join(fp.readlines())
            patchLinesSplitted = re.split(pat_src, patchLines)
            commitLog = patchLinesSplitted[0]
            affectedFilesList = patchLinesSplitted[1:]

        repoPath = ''
        if multimodeFlag:  # multimode DIFFs have repoPath at the beginning.
            repoPath = commitLog.split('\n')[0].rstrip()

        numAffectedFiles = len(affectedFilesList)
        for aidx, affectedFile in enumerate(affectedFilesList):
            if debugMode:
                print "\tFile # " + str(aidx +
                                        1) + '/' + str(numAffectedFiles),
            firstLine = affectedFile.split('\n')[
                0]  # git --diff a/path/filename.ext b/path/filename.ext
            affectedFileName = firstLine.split("--git ")[1].split(
                " ")[0].split("/")[-1]
            codePath = firstLine.split(' b')[1].strip()  # path/filename.ext

            if not codePath.endswith(".c") and not codePath.endswith(
                    ".cpp") and not codePath.endswith(
                        ".cc") and not codePath.endswith(
                            ".c++") and not codePath.endswith(".cxx"):
                if debugMode:
                    print "\t[-]", codePath, "(wrong extension)"
            else:
                secondLine = affectedFile.split('\n')[1]

                if secondLine.startswith(
                        "index"
                ) == 0:  # or secondLine.endswith("100644") == 0:
                    if debugMode:
                        print "\t[-]", codePath, "(invalid metadata)"  # we are looking for "index" only.
                else:
                    if debugMode:
                        print "\t[+]", codePath
                    indexHashOld = secondLine.split(' ')[1].split('..')[0]
                    indexHashNew = secondLine.split(' ')[1].split('..')[1]

                    chunksList = re.split(
                        pat_chunk,
                        affectedFile)[1:]  # diff file per chunk (in list)
                    chunksCnt += len(chunksList)

                    if multimodeFlag:
                        os.chdir(
                            os.path.join(config.gitStoragePath, repoName,
                                         repoPath))
                    else:
                        os.chdir(os.path.join(config.gitStoragePath, repoName))

                    tmpOldFileName = os.path.join(
                        originalDir, "tmp",
                        "{0}_{1}_old".format(repoName, currentCounter))
                    command_show = "\"{0}\" show {1} > {2}".format(
                        config.gitBinary, indexHashOld, tmpOldFileName)
                    os.system(command_show)

                    tmpNewFileName = os.path.join(
                        originalDir, "tmp",
                        "{0}_{1}_new".format(repoName, currentCounter))
                    command_show = "\"{0}\" show {1} > {2}".format(
                        config.gitBinary, indexHashNew, tmpNewFileName)
                    os.system(command_show)

                    os.chdir(originalDir)
                    oldFunctionInstanceList = parseutility.parseFile_shallow(
                        tmpOldFileName, "")
                    newFunctionInstanceList = parseutility.parseFile_shallow(
                        tmpNewFileName, "")

                    finalOldFunctionList = []

                    numChunks = len(chunksList)
                    for ci, chunk in enumerate(chunksList):
                        if debugMode:
                            print "\t\tChunk # " + str(ci + 1) + "/" + str(
                                numChunks),

                        chunkSplitted = chunk.split('\n')
                        chunkFirstLine = chunkSplitted[0]
                        chunkLines = chunkSplitted[1:]

                        if debugMode:
                            print chunkFirstLine
                        lineNums = pat_linenum.search(chunkFirstLine)
                        oldLines = lineNums.group(1).split(',')
                        newLines = lineNums.group(2).split(',')

                        offset = int(oldLines[0])
                        pmList = []
                        lnList = []
                        for chunkLine in chunkSplitted[1:]:
                            if len(chunkLine) != 0:
                                pmList.append(chunkLine[0])

                        for i, pm in enumerate(pmList):
                            if pm == ' ' or pm == '-':
                                lnList.append(offset + i)
                            elif pm == '+':
                                lnList.append(offset + i - 1)
                                offset -= 1
                        """ HERE, ADD CHECK FOR NEW FUNCTIONS """
                        hitOldFunctionList = []
                        for f in oldFunctionInstanceList:
                            # print f.lines[0], f.lines[1]

                            for num in range(f.lines[0], f.lines[1] + 1):
                                if num in lnList:
                                    # print "Hit at", num

                                    hitOldFunctionList.append(f)
                                    break  # found the function to be patched

                                    # if f.lines[0] <= offset <= f.lines[1]:
                                    #     print "\t\t\tOffset HIT!!", f.name
                                    # elif f.lines[0] <= bound <= f.lines[1]:
                                    #     print "\t\t\tBound  HIT!!", f.name

                        for f in hitOldFunctionList:
                            # print "Verify hitFunction", f.name
                            # print "ln",
                            for num in range(f.lines[0], f.lines[1] + 1):
                                # print num,
                                try:
                                    listIndex = lnList.index(num)
                                except ValueError:
                                    pass
                                else:
                                    if lnList.count(num) > 1:
                                        listIndex += 1
                                    # print "\nmatch:", num
                                    # print "value\t", chunkSplitted[1:][lnList.index(num)]
                                    # print "pm   \t", pmList[lnList.index(num)]
                                    if pmList[listIndex] == '+' or pmList[
                                            listIndex] == '-':
                                        # print "Maybe meaningful",
                                        flag = 0
                                        for commentKeyword in [
                                                "/*", "*/", "//", "*"
                                        ]:
                                            if chunkLines[listIndex][
                                                    1:].lstrip().startswith(
                                                        commentKeyword):
                                                flag = 1
                                                break
                                        if flag:
                                            pass
                                            # print "but not."
                                        else:
                                            # print "MEANINGFUL!!"
                                            finalOldFunctionList.append(f)
                                            break
                                    else:
                                        pass
                                        # print "Not meaningful"
                                        # print "============\n"

                    finalOldFunctionList = list(
                        set(finalOldFunctionList))  # sometimes list has dups

                    finalNewFunctionList = []
                    for fold in finalOldFunctionList:
                        flag = 0
                        for fnew in newFunctionInstanceList:
                            if fold.name == fnew.name:
                                finalNewFunctionList.append(fnew)
                                flag = 1
                                break
                        if not flag:
                            finalNewFunctionList.append(dummyFunction)

                    if debugMode:
                        print "\t\t\t", len(
                            finalNewFunctionList), "functions found."
                    vulFileNameBase = diffFileName.split(
                        '.diff')[0] + '_' + affectedFileName

                    # os.chdir(os.path.join(originalDir, "vul", repoName))

                    for index, f in enumerate(finalOldFunctionList):
                        os.chdir(originalDir)
                        oldFuncInstance = finalOldFunctionList[index]

                        fp = open(oldFuncInstance.parentFile, 'r')
                        srcFileRaw = fp.readlines()
                        fp.close()
                        finalOldFunction = ''.join(
                            srcFileRaw[oldFuncInstance.lines[0] -
                                       1:oldFuncInstance.lines[1]])

                        # oldFuncArgs = ''
                        # for ai, funcArg in enumerate(oldFuncInstance.parameterList):
                        #     oldFuncArgs += "DTYPE " + funcArg
                        #     if ai + 1 != len(oldFuncInstance.parameterList):
                        #         oldFuncArgs += ', '
                        # finalOldFunction = "DTYPE {0} ({1})\n{{ {2}\n}}"\
                        #     .format(oldFuncInstance.name, oldFuncArgs, oldFuncInstance.funcBody)

                        finalOldFuncId = str(oldFuncInstance.funcId)

                        newFuncInstance = finalNewFunctionList[index]

                        if newFuncInstance.name is None:
                            finalNewFunction = ""
                        else:
                            fp = open(newFuncInstance.parentFile, 'r')
                            srcFileRaw = fp.readlines()
                            fp.close()
                            finalNewFunction = ''.join(
                                srcFileRaw[newFuncInstance.lines[0] -
                                           1:newFuncInstance.lines[1]])

                            # finalNewFunction = finalNewFunctionList[index].funcBody

                        finalOldBody = finalOldFunction[
                            finalOldFunction.find('{') +
                            1:finalOldFunction.rfind('}')]
                        finalNewBody = finalNewFunction[
                            finalNewFunction.find('{') +
                            1:finalNewFunction.rfind('}')]
                        tmpold = parseutility.normalize(
                            parseutility.removeComment(finalOldBody))
                        tmpnew = parseutility.normalize(
                            parseutility.removeComment(finalNewBody))

                        if tmpold != tmpnew and len(tmpnew) > 0:
                            # if two are same, it means nothing but comment is patched.
                            with ctr.functionCntLock:
                                ctr.functionCnt.value += 1
                            os.chdir(os.path.join(originalDir, "vul",
                                                  repoName))
                            vulOldFileName = vulFileNameBase + '_' + finalOldFuncId + "_OLD.vul"
                            vulNewFileName = vulFileNameBase + '_' + finalOldFuncId + "_NEW.vul"
                            with open(vulOldFileName, 'w') as fp:
                                fp.write(finalOldFunction)
                            with open(vulNewFileName, 'w') as fp:
                                if finalNewFunctionList[
                                        index].name is not None:
                                    fp.write(finalNewFunction)
                                else:
                                    fp.write("")
                            diffCommand = "\"{0}\" -u {1} {2} >> {3}_{4}.patch".format(
                                config.diffBinary, vulOldFileName,
                                vulNewFileName, vulFileNameBase,
                                finalOldFuncId)
                            os.system(diffCommand)
예제 #2
0

originalDir = os.path.dirname(os.path.dirname(
    os.path.abspath(__file__)))  # vuddy root directory
vulsDir = os.path.join(originalDir, "vul")
dirs = os.listdir(vulsDir)
rmcntDict = {}
for dir in dirs:
    # print dir
    if dir != "chromium":
        continue
    for vul in os.listdir(os.path.join(vulsDir, dir)):
        if vul.endswith("OLD.vul"):
            with open(os.path.join(vulsDir, dir, vul), "r") as fp:
                raw = ''.join(fp.readlines())
                body = getBody(pu.removeComment(raw))

            if body.count(";") == 1:
                kill = 1  # this function must be single-line
            else:
                kill = 0

            cnt = 0
            for line in body.split('\n'):
                if len(line.strip()) > 0:
                    cnt += 1  # cnt will be 1 for single lined functions

            with open(os.path.join(vulsDir, dir, vul[:-8] + "_NEW.vul"),
                      'r') as fp:
                newraw = ''.join(fp.readlines())
                newbody = getBody(pu.removeComment(newraw))