Ejemplo n.º 1
0
    def extractDirectSyscalls(self, folder):
        #exceptList = ["lib", "grep", "sed", "bash", "sh"]
        exceptList = [
            "ld.so", "libc.so", "libdl.so", "libcrypt.so", "libnss_compat.so",
            "libnsl.so", "libnss_files.so", "libnss_nis.so", "libpthread.so",
            "libm.so", "libresolv.so", "librt.so", "libutil.so",
            "libnss_dns.so", "gosu"
        ]
        lib = ".so"

        fileList = list()
        filesAdded = set()
        finalSyscallSet = set()
        for fileName in os.listdir(folder):
            if (util.isElf(folder + "/" + fileName)):
                if (lib in fileName):
                    tmpFileName = re.sub("-.*so", ".so", fileName)
                    tmpFileName = tmpFileName[:tmpFileName.index(".so")]
                    tmpFileName = tmpFileName + ".so"
                else:
                    tmpFileName = fileName
                if (tmpFileName not in exceptList
                        and tmpFileName not in filesAdded):
                    fileList.append(folder + "/" + fileName)
                    filesAdded.add(tmpFileName)

                #libWoVersionName = fileName
                #if ( fileName.startswith("lib") ):
                #    libWoVersionName = fileName[:fileName.index(".so")]
                #    libWoVersionName = libWoVersionName + ".so"
                #if ( libWoVersionName not in libWoVersion ):
                #    libWoVersion.add(libWoVersionName)
                #    fileList.append(folder + "/" + fileName)
                #    for exceptItem in exceptList:
                #        #if ( lib in fileName or fileName.startswith(exceptItem) or util.isGo(folder + "/" + fileName, self.logger) ):
                #        if ( fileName.startswith(exceptItem) or util.isGo(folder + "/" + fileName, self.logger) ):
                #            removeList.append(folder + "/" + fileName)
                #            break
        finalSet = set(fileList)  # - set(removeList)
        for filePath in finalSet:
            self.logger.debug("extraction direct syscall for %s", filePath)
            #temp = util.extractDirectSyscalls(filePath, self.logger)
            #self.directSyscallCount += temp
            #self.logger.debug("directSyscall for %s is %d", filePath, temp)
            #temp = util.extractLibcSyscalls(filePath, self.logger)
            #self.libcSyscallCount += temp
            #self.logger.debug("libcSyscall for %s is %d", filePath, temp)
            binAnalysis = binaryAnalysis.BinaryAnalysis(filePath, self.logger)
            syscallSet, successCount, failCount = binAnalysis.extractDirectSyscalls(
            )
            self.logger.debug(
                "Successfull direct syscalls: %d list: %s, Failed direct syscalls: %d",
                successCount, str(syscallSet), failCount)
            #self.logger.warning("Failed syscalls: %d", failCount)
            finalSyscallSet.update(syscallSet)
        return finalSyscallSet
Ejemplo n.º 2
0
            rootLogger.debug("iterating over exported function: %s",
                             exportedFunc)
            leaves = myGraph.getLeavesFromStartNode(exportedFunc, syscallList,
                                                    list())
            for leaf in leaves:
                if (leaf.startswith("syscall")):
                    leaf = leaf.replace("syscall", "")
                    leaf = leaf.replace("(", "")
                    leaf = leaf.replace(")", "")
                    syscallNum = int(leaf.strip())
                    reachableSyscalls.add(syscallNum)

        rootLogger.debug("len(callgraphSyscalls): %d callgraphSyscalls: %s",
                         len(reachableSyscalls), str(reachableSyscalls))

        myBinary = binaryAnalysis.BinaryAnalysis(options.binpath, rootLogger)
        directSyscallSet, successCount, failedCount = myBinary.extractDirectSyscalls(
        )

        missedSyscallSet = set(directSyscallSet - reachableSyscalls)

        rootLogger.debug("directSyscalls-callgraphSyscalls = %s",
                         str(missedSyscallSet))
        if (not options.output):
            rootLogger.info("directSyscalls-callgraphSyscalls = %s",
                            str(missedSyscallSet))
        else:
            outputFile = open(options.output, 'w')
            for exportedFunc in exportedFuncs:
                for missedSyscall in missedSyscallSet:
                    if (missedSyscall < 1000):
Ejemplo n.º 3
0
                        modList.add(options.input + "/" + fileName)
                    else:
                        binList.add(options.input + "/" + fileName)
                    fileList.add(options.input + "/" + fileName)
                    filesAdded.add(tmpFileName)

        syscallDict = dict()  #Key: bin/lib/mod Value: syscall set
        ALLLIBS = "libs"
        MAINAPP = options.mainapp
        syscallDict[ALLLIBS] = set()
        syscallDict[MAINAPP] = set()
        for filePath in fileList:
            directSyscallSet = set()
            indirectSyscallSet = set()
            fileName = filePath[filePath.rindex("/") + 1:]
            myBinary = binaryAnalysis.BinaryAnalysis(filePath, rootLogger)

            if (filePath in indirectList):
                rootLogger.info("Extracting direct syscalls for: %s", fileName)
                directSyscallSet, successCount, failCount = myBinary.extractDirectSyscalls(
                )
                rootLogger.info("Successfull direct syscalls: %d",
                                successCount)
                rootLogger.warning("Failed syscalls: %d", failCount)

            rootLogger.info("Extracting indirect syscalls for: %s", fileName)
            indirectSyscallSet = myBinary.extractIndirectSyscalls(libcCfg)

            rootLogger.info(
                "ELF: %s, directSyscallCount: %d, indirectSyscallCount: %d",
                fileName, len(directSyscallSet), len(indirectSyscallSet))
def processSyscalls(binprofiler, bininput, apptolibmap, appname, libccfginput,
                    debug, rootLogger):

    if binprofiler and (not apptolibmap or not appname or not bininput):
        parser.error(
            "Options --appname, --apptolibmap and --bininput should be provided when enabling the --binprofiler feature."
        )
        return False

    libcGraph = graph.Graph(rootLogger)
    libcGraph.createGraphFromInput(libccfginput, ":")

    libSet = set(
    )  #Libraries which should be added to the both import table and svf analysis output
    otherLibSet = set(
    )  #Libraries which should only be added to the import table (e.g. apr, apr-util)
    if (binprofiler):
        appName = appname
        appToLibMap = None
        try:
            appToLibFile = open(apptolibmap, 'r')
            appToLibStr = appToLibFile.read()
            appToLibMap = json.loads(appToLibStr)
        except Exception as e:
            rootLogger.warning(
                "Trying to load app to lib map json from: %s, but doesn't exist: %s",
                apptolibmap, str(e))
            rootLogger.debug("Finished loading json")
            sys.exit(-1)
        for app in appToLibMap["apps"]:
            for key, value in app.items():
                if (key.strip() == appName.strip()):
                    for lib in value["libs"]:
                        libSet.add(cleanLib(lib))
                    for lib in value["otherlibs"]:
                        otherLibSet.add(cleanLib(lib))
    importTableSyscalls = set()
    if (binprofiler):
        #TODO Extract required library system calls from binary profiler
        lib = ".so"
        filesAdded = set()

        for fileName in os.listdir(bininput):
            if (util.isElf(bininput + "/" + fileName)):
                if (lib in fileName):
                    tmpFileName = cleanLib(fileName)
                else:
                    tmpFileName = fileName
                if (tmpFileName not in filesAdded and
                    (tmpFileName in otherLibSet or tmpFileName in libSet
                     or tmpFileName == appname)):
                    filePath = bininput + "/" + fileName
                    myBinary = binaryAnalysis.BinaryAnalysis(
                        filePath, rootLogger)
                    directSyscallSet, successCount, failCount = myBinary.extractDirectSyscalls(
                    )
                    indirectSyscallSet = myBinary.extractIndirectSyscalls(
                        libcGraph)
                    importTableSyscalls.update(directSyscallSet)
                    importTableSyscalls.update(indirectSyscallSet)

    rootLogger.info("len(importTableSyscalls): %d", len(importTableSyscalls))
    return importTableSyscalls
Ejemplo n.º 5
0
    def createCompleteGraph(self, exceptList=list()):
        '''TODO
        1. Extract required libraries from binary (ldd)
        2. Find call graph for each library from specified folder (input: callgraph folder)
        3. Create start->leaves graph from complete call graph
        4. Create complete global graph for application along with all libraries
            Complete graph:
                Application: entire graph
                Libc: entire graph
                Other Libraries: start->leave partition
        '''
        libcRelatedList = [
            "ld", "libc", "libdl", "libcrypt", "libnss_compat", "libnsl",
            "libnss_files", "libnss_nis", "libpthread", "libm", "libresolv",
            "librt", "libutil", "libnss_dns"
        ]
        libraryCfgGraphs = dict()
        librarySyscalls = set(
        )  #Only for libraries which we DO NOT have the CFG
        libraryToPathDict = util.readLibrariesWithLdd(self.binaryPath)

        startNodeToLibDict = dict()

        libcGraph = graph.Graph(self.logger)
        libcGraph.createGraphFromInput(self.libcCfgPath, self.libcSeparator)

        completeGraph = graph.Graph(self.logger)
        result = completeGraph.createGraphFromInput(self.binaryCfgPath)
        if (result == -1):
            self.logger.error("Failed to create graph for input: %s",
                              self.binaryCfgPath)
            sys.exit(-1)

        for libraryName, libPath in libraryToPathDict.items():
            #self.logger.info("Checking library: %s", libraryName)
            libraryCfgFileName = self.cleanLib(libraryName) + ".callgraph.out"
            libraryCfgFilePath = self.cfgPath + "/" + libraryCfgFileName
            if (libraryName not in libcRelatedList
                    and libraryName not in exceptList):
                if (os.path.isfile(libraryCfgFilePath)):
                    #We have the CFG for this library
                    #self.logger.info("The library call graph exists for: %s", libraryName)

                    libraryGraph = graph.Graph(self.logger)
                    libraryGraph.createGraphFromInput(libraryCfgFilePath)
                    #self.logger.info("Finished create graph object for library: %s", libraryName)
                    libraryStartNodes = libraryGraph.extractStartingNodes()
                    #self.logger.info("Finished extracting start nodes for library: %s", libraryName)

                    #We're going keep a copy of the full library call graph, for later stats creation
                    libraryCfgGraphs[libraryName] = libraryGraph

                    #(Step 3 in todo list): We're going to make a smaller graph containing only start nodes and end nodes
                    #libraryStartToEndGraph = graph.Graph(self.logger)

                    for startNode in libraryStartNodes:
                        #if ( startNodeToLibDict.get(startNode, None) ):
                        #    self.logger.warning("library startNode seen in more than one library: %s and %s", libraryName, startNodeToLibDict[startNode])
                        startNodeToLibDict[startNode] = libraryName
                        leaves = libraryGraph.getLeavesFromStartNode(
                            startNode, list(), list())
                        for leaf in leaves:
                            #self.logger.debug("Adding edge %s->%s from library: %s to complete graph.", startNode, leaf, libraryName)
                            #libraryStartToEndGraph.addEdge(startNode, leaf)
                            completeGraph.addEdge(startNode, leaf)
                    #libraryGraphs[libraryName] = libraryStartToEndGraph
                elif (os.path.isfile(libPath)):
                    #We don't have the CFG for this library, all exported functions will be considered as starting nodes in our final graph
                    #self.logger.info("The library call graph doesn't exist, considering all imported functions for: %s", libraryName)
                    libraryProfiler = binaryAnalysis.BinaryAnalysis(
                        libPath, self.logger)
                    directSyscallSet, successCount, failedCount = libraryProfiler.extractDirectSyscalls(
                    )
                    indirectSyscallSet = libraryProfiler.extractIndirectSyscalls(
                        libcGraph)

                    librarySyscalls.update(directSyscallSet)
                    librarySyscalls.update(indirectSyscallSet)
            #    else:
            #self.logger.warning("Skipping library: %s because path: %s doesn't exist", libraryName, libPath)
            #else:
            #    self.logger.info("Skipping except list library: %s", libraryName)

        return completeGraph, librarySyscalls, libraryCfgGraphs, libcGraph
Ejemplo n.º 6
0
if __name__ == '__main__':
    """
    Find system calls for function
    """
    usage = "Usage: %prog -c <Callgraph> -s <Separator in callgraph file llvm=-> glibc=: > -f <Function name>"

    parser = optparse.OptionParser(usage=usage, version="1")

    parser.add_option("-i",
                      "--input",
                      dest="input",
                      default=None,
                      nargs=1,
                      help="Path to binary to analyze")

    parser.add_option("-d",
                      "--debug",
                      dest="debug",
                      action="store_true",
                      default=False,
                      help="Debug enabled/disabled")

    (options, args) = parser.parse_args()
    if isValidOpts(options):
        rootLogger = setLogPath("graph.log")
        tmpSet = set()
        myBinary = binaryAnalysis.BinaryAnalysis(options.input, rootLogger)
        a, b, c = myBinary.extractDirectSyscalls()
        tmpSet.update(a)
        rootLogger.info("a: %s, b: %d, c: %d", a, b, c)
def processSyscalls(targetcfg,
                    cfginput,
                    mastermain,
                    workermain,
                    edgefilterlist,
                    libsyscalls,
                    libsyscallpath,
                    binprofiler,
                    bininput,
                    apptolibmap,
                    appname,
                    output,
                    debug,
                    rootLogger,
                    cfginputseparator=":"):

    if not cfginput or not targetcfg or not mastermain or not workermain:
        parser.error("All options -c, -f, -m and -w should be provided.")
        return False

    if libsyscalls and (not apptolibmap or not libsyscallpath or not appname):
        parser.error(
            "Options --appname, --apptolibmap and --libsyscallpath  should be provided when enabling the --libsyscalls feature."
        )
        return False

    if binprofiler and (not apptolibmap or not appname or not bininput
                        or not output):
        parser.error(
            "Options -o, --appname, --apptolibmap and --bininput should be provided when enabling the --binprofiler feature."
        )
        return False

    syscallList = list()

    i = 0
    while i < 400:
        syscallList.append("syscall(" + str(i) + ")")
        syscallList.append("syscall(" + str(i) + ")")
        syscallList.append("syscall ( " + str(i) + " )")
        syscallList.append("syscall( " + str(i) + " )")
        i += 1

    workerMainFuncList = list()
    if ("," in workermain):
        workerMainFuncList = workermain.split(",")
    else:
        workerMainFuncList.append(workermain)

    edgeFilterList = list()
    if (edgefilterlist):
        if ("," in edgefilterlist):
            edgeFilterList = edgefilterlist.split(",")
        else:
            edgeFilterList.append(edgefilterlist)
    else:
        edgeFilterList = workerMainFuncList
    applicationGraph = graph.Graph(rootLogger)
    applicationGraph.createGraphFromInputWithFilter(targetcfg, "->",
                                                    edgeFilterList)
    libcGraph = graph.Graph(rootLogger)
    libcGraph.createGraphFromInput(cfginput, cfginputseparator)

    rootLogger.info("-------------Extraction Master leave functions---------")
    masterFunctions = applicationGraph.getLeavesFromStartNode(
        mastermain, list(), list())
    rootLogger.info("-------------Extraction Worker leave functions---------")
    workerFunctions = set()
    for workerFunc in workerMainFuncList:
        workerFunctions.update(
            applicationGraph.getLeavesFromStartNode(workerFunc, list(),
                                                    list()))

    masterSyscalls = set()
    for masterFunc in masterFunctions:
        rootLogger.debug("masterfunc: %s", masterFunc)
        masterSyscalls.update(libcGraph.getSyscallFromStartNode(masterFunc))

    workerSyscalls = set()
    for workerFunc in workerFunctions:
        rootLogger.debug("workerfunc: %s", workerFunc)
        workerSyscalls.update(libcGraph.getSyscallFromStartNode(workerFunc))

    rootLogger.info(
        "len(masterSyscalls): %d len(workerSyscalls): %d before adding library syscalls",
        len(masterSyscalls), len(workerSyscalls))

    libSet = set(
    )  #Libraries which should be added to the both import table and svf analysis output
    otherLibSet = set(
    )  #Libraries which should only be added to the import table (e.g. apr, apr-util)
    if (libsyscalls or binprofiler):
        appName = appname
        appToLibMap = None
        try:
            appToLibFile = open(apptolibmap, 'r')
            appToLibStr = appToLibFile.read()
            appToLibMap = json.loads(appToLibStr)
        except Exception as e:
            rootLogger.warning(
                "Trying to load app to lib map json from: %s, but doesn't exist: %s",
                apptolibmap, str(e))
            rootLogger.debug("Finished loading json")
            sys.exit(-1)
        for app in appToLibMap["apps"]:
            for key, value in app.items():
                if (key.strip() == appName.strip()):
                    for lib in value["libs"]:
                        libSet.add(cleanLib(lib))
                    for lib in value["otherlibs"]:
                        otherLibSet.add(cleanLib(lib))
    if (libsyscalls and not binprofiler):
        libSyscallFile = open(libsyscallpath, 'r')
        libSyscallLine = libSyscallFile.readline()
        while libSyscallLine:
            splittedLine = libSyscallLine.split()
            if (len(splittedLine) > 2 and cleanLib(splittedLine[1]) in libSet):
                for syscallNum in splittedLine[2:]:
                    if ("{" in syscallNum):
                        syscallNum = syscallNum.replace("{", "")
                    if ("," in syscallNum):
                        syscallNum = syscallNum.replace(",", "")
                    if ("}" in syscallNum):
                        syscallNum = syscallNum.replace("}", "")
                    masterSyscalls.add(int(syscallNum))
                    workerSyscalls.add(int(syscallNum))
            libSyscallLine = libSyscallFile.readline()
    importTableSyscalls = set()
    if (not libsyscalls and binprofiler):
        #TODO Extract required library system calls from binary profiler
        lib = ".so"
        filesAdded = set()

        for fileName in os.listdir(bininput):
            if (util.isElf(bininput + "/" + fileName)):
                if (lib in fileName):
                    tmpFileName = cleanLib(fileName)
                else:
                    tmpFileName = fileName
                if (tmpFileName not in filesAdded and
                    (tmpFileName in otherLibSet or tmpFileName in libSet
                     or tmpFileName == appname)):
                    filePath = bininput + "/" + fileName
                    myBinary = binaryAnalysis.BinaryAnalysis(
                        filePath, rootLogger)
                    directSyscallSet, successCount, failCount = myBinary.extractDirectSyscalls(
                    )
                    indirectSyscallSet = myBinary.extractIndirectSyscalls(
                        libcGraph)
                    importTableSyscalls.update(directSyscallSet)
                    importTableSyscalls.update(indirectSyscallSet)
                    if (tmpFileName in libSet):
                        masterSyscalls.update(directSyscallSet)
                        masterSyscalls.update(indirectSyscallSet)
                        workerSyscalls.update(directSyscallSet)
                        workerSyscalls.update(indirectSyscallSet)

    rootLogger.info(
        "len(importTableSyscalls): %d len(masterSyscalls): %d len(workerSyscalls): %d after adding library syscalls",
        len(importTableSyscalls), len(masterSyscalls), len(workerSyscalls))

    #print ("master: ")
    #print (masterSyscalls)

    #print ("worker: ")
    #print (workerSyscalls)
    translator = sycall.Syscall(rootLogger)
    syscallmap = translator.createMap()
    mminusw = sorted(masterSyscalls.difference(workerSyscalls))
    wminusm = sorted(workerSyscalls.difference(masterSyscalls))

    importTableSyscallNames = set()
    masterSyscallNames = set()
    workerSyscallNames = set()
    blImportTableSyscallNames = set()
    blMasterSyscallNames = set()
    blWorkerSyscallNames = set()

    if (binprofiler):
        i = 0
        while (i < 400):
            if i not in importTableSyscalls:
                if (syscallmap.get(i, None)):
                    blImportTableSyscallNames.add(syscallmap[i])
            i += 1
        for syscall in importTableSyscalls:
            if (syscallmap.get(syscall, None)):
                importTableSyscallNames.add(syscallmap[syscall])
    print("------- main -------")
    i = 0
    while (i < 400):
        if i not in masterSyscalls:
            if (syscallmap.get(i, None)):
                blMasterSyscallNames.add(syscallmap[i])
        i += 1
    for syscall in masterSyscalls:
        if (syscallmap.get(syscall, None)):
            masterSyscallNames.add(syscallmap[syscall])
        print(syscallmap.get(syscall, ""))
    print("------- child -------")
    i = 0
    while (i < 400):
        if i not in workerSyscalls:
            if (syscallmap.get(i, None)):
                blWorkerSyscallNames.add(syscallmap[i])
        i += 1
    for syscall in workerSyscalls:
        if (syscallmap.get(syscall, None)):
            workerSyscallNames.add(syscallmap[syscall])
        print(syscallmap.get(syscall, ""))

    if (binprofiler):
        outputDict = dict()
        outputDict['importTable'] = importTableSyscallNames
        outputDict['master'] = masterSyscallNames
        outputDict['worker'] = workerSyscallNames
        outputDict['blImportTable'] = blImportTableSyscallNames
        outputDict['blMaster'] = blMasterSyscallNames
        outputDict['blWorker'] = blWorkerSyscallNames

        util.writeDictToFile(outputDict, output)
    '''
    mminusw = sorted(mainset.difference(workerset))
    wminusm = sorted(workerset.difference(mainset))

    #mminusc = sorted(mainset.difference(cachemgrset))
    #cminusm = sorted(cachemgrset.difference(mainset))

    rootLogger = setLogPath("graph.log")

    translator = sycall.Syscall(rootLogger)
    syscallmap = translator.createMap()
    '''

    print("------- main minus worker -------")
    for syscall in mminusw:
        print(syscallmap[syscall])
    print("------- worker minus main -------")
    for syscall in wminusm:
        print(syscallmap[syscall])