def extractDirectSyscalls(self, folder): #exceptList = ["lib", "grep", "sed", "bash", "sh"] exceptList = [ "ld.so", "libc.so", "libdl.so", "libcrypt.so", "libnss_compat.so", "libnsl.so", "libnss_files.so", "libnss_nis.so", "libpthread.so", "libm.so", "libresolv.so", "librt.so", "libutil.so", "libnss_dns.so", "gosu" ] lib = ".so" fileList = list() filesAdded = set() finalSyscallSet = set() for fileName in os.listdir(folder): if (util.isElf(folder + "/" + fileName)): if (lib in fileName): tmpFileName = re.sub("-.*so", ".so", fileName) tmpFileName = tmpFileName[:tmpFileName.index(".so")] tmpFileName = tmpFileName + ".so" else: tmpFileName = fileName if (tmpFileName not in exceptList and tmpFileName not in filesAdded): fileList.append(folder + "/" + fileName) filesAdded.add(tmpFileName) #libWoVersionName = fileName #if ( fileName.startswith("lib") ): # libWoVersionName = fileName[:fileName.index(".so")] # libWoVersionName = libWoVersionName + ".so" #if ( libWoVersionName not in libWoVersion ): # libWoVersion.add(libWoVersionName) # fileList.append(folder + "/" + fileName) # for exceptItem in exceptList: # #if ( lib in fileName or fileName.startswith(exceptItem) or util.isGo(folder + "/" + fileName, self.logger) ): # if ( fileName.startswith(exceptItem) or util.isGo(folder + "/" + fileName, self.logger) ): # removeList.append(folder + "/" + fileName) # break finalSet = set(fileList) # - set(removeList) for filePath in finalSet: self.logger.debug("extraction direct syscall for %s", filePath) #temp = util.extractDirectSyscalls(filePath, self.logger) #self.directSyscallCount += temp #self.logger.debug("directSyscall for %s is %d", filePath, temp) #temp = util.extractLibcSyscalls(filePath, self.logger) #self.libcSyscallCount += temp #self.logger.debug("libcSyscall for %s is %d", filePath, temp) binAnalysis = binaryAnalysis.BinaryAnalysis(filePath, self.logger) syscallSet, successCount, failCount = binAnalysis.extractDirectSyscalls( ) self.logger.debug( "Successfull direct syscalls: %d list: %s, Failed direct syscalls: %d", successCount, str(syscallSet), failCount) #self.logger.warning("Failed syscalls: %d", failCount) finalSyscallSet.update(syscallSet) return finalSyscallSet
rootLogger.debug("iterating over exported function: %s", exportedFunc) leaves = myGraph.getLeavesFromStartNode(exportedFunc, syscallList, list()) for leaf in leaves: if (leaf.startswith("syscall")): leaf = leaf.replace("syscall", "") leaf = leaf.replace("(", "") leaf = leaf.replace(")", "") syscallNum = int(leaf.strip()) reachableSyscalls.add(syscallNum) rootLogger.debug("len(callgraphSyscalls): %d callgraphSyscalls: %s", len(reachableSyscalls), str(reachableSyscalls)) myBinary = binaryAnalysis.BinaryAnalysis(options.binpath, rootLogger) directSyscallSet, successCount, failedCount = myBinary.extractDirectSyscalls( ) missedSyscallSet = set(directSyscallSet - reachableSyscalls) rootLogger.debug("directSyscalls-callgraphSyscalls = %s", str(missedSyscallSet)) if (not options.output): rootLogger.info("directSyscalls-callgraphSyscalls = %s", str(missedSyscallSet)) else: outputFile = open(options.output, 'w') for exportedFunc in exportedFuncs: for missedSyscall in missedSyscallSet: if (missedSyscall < 1000):
modList.add(options.input + "/" + fileName) else: binList.add(options.input + "/" + fileName) fileList.add(options.input + "/" + fileName) filesAdded.add(tmpFileName) syscallDict = dict() #Key: bin/lib/mod Value: syscall set ALLLIBS = "libs" MAINAPP = options.mainapp syscallDict[ALLLIBS] = set() syscallDict[MAINAPP] = set() for filePath in fileList: directSyscallSet = set() indirectSyscallSet = set() fileName = filePath[filePath.rindex("/") + 1:] myBinary = binaryAnalysis.BinaryAnalysis(filePath, rootLogger) if (filePath in indirectList): rootLogger.info("Extracting direct syscalls for: %s", fileName) directSyscallSet, successCount, failCount = myBinary.extractDirectSyscalls( ) rootLogger.info("Successfull direct syscalls: %d", successCount) rootLogger.warning("Failed syscalls: %d", failCount) rootLogger.info("Extracting indirect syscalls for: %s", fileName) indirectSyscallSet = myBinary.extractIndirectSyscalls(libcCfg) rootLogger.info( "ELF: %s, directSyscallCount: %d, indirectSyscallCount: %d", fileName, len(directSyscallSet), len(indirectSyscallSet))
def processSyscalls(binprofiler, bininput, apptolibmap, appname, libccfginput, debug, rootLogger): if binprofiler and (not apptolibmap or not appname or not bininput): parser.error( "Options --appname, --apptolibmap and --bininput should be provided when enabling the --binprofiler feature." ) return False libcGraph = graph.Graph(rootLogger) libcGraph.createGraphFromInput(libccfginput, ":") libSet = set( ) #Libraries which should be added to the both import table and svf analysis output otherLibSet = set( ) #Libraries which should only be added to the import table (e.g. apr, apr-util) if (binprofiler): appName = appname appToLibMap = None try: appToLibFile = open(apptolibmap, 'r') appToLibStr = appToLibFile.read() appToLibMap = json.loads(appToLibStr) except Exception as e: rootLogger.warning( "Trying to load app to lib map json from: %s, but doesn't exist: %s", apptolibmap, str(e)) rootLogger.debug("Finished loading json") sys.exit(-1) for app in appToLibMap["apps"]: for key, value in app.items(): if (key.strip() == appName.strip()): for lib in value["libs"]: libSet.add(cleanLib(lib)) for lib in value["otherlibs"]: otherLibSet.add(cleanLib(lib)) importTableSyscalls = set() if (binprofiler): #TODO Extract required library system calls from binary profiler lib = ".so" filesAdded = set() for fileName in os.listdir(bininput): if (util.isElf(bininput + "/" + fileName)): if (lib in fileName): tmpFileName = cleanLib(fileName) else: tmpFileName = fileName if (tmpFileName not in filesAdded and (tmpFileName in otherLibSet or tmpFileName in libSet or tmpFileName == appname)): filePath = bininput + "/" + fileName myBinary = binaryAnalysis.BinaryAnalysis( filePath, rootLogger) directSyscallSet, successCount, failCount = myBinary.extractDirectSyscalls( ) indirectSyscallSet = myBinary.extractIndirectSyscalls( libcGraph) importTableSyscalls.update(directSyscallSet) importTableSyscalls.update(indirectSyscallSet) rootLogger.info("len(importTableSyscalls): %d", len(importTableSyscalls)) return importTableSyscalls
def createCompleteGraph(self, exceptList=list()): '''TODO 1. Extract required libraries from binary (ldd) 2. Find call graph for each library from specified folder (input: callgraph folder) 3. Create start->leaves graph from complete call graph 4. Create complete global graph for application along with all libraries Complete graph: Application: entire graph Libc: entire graph Other Libraries: start->leave partition ''' libcRelatedList = [ "ld", "libc", "libdl", "libcrypt", "libnss_compat", "libnsl", "libnss_files", "libnss_nis", "libpthread", "libm", "libresolv", "librt", "libutil", "libnss_dns" ] libraryCfgGraphs = dict() librarySyscalls = set( ) #Only for libraries which we DO NOT have the CFG libraryToPathDict = util.readLibrariesWithLdd(self.binaryPath) startNodeToLibDict = dict() libcGraph = graph.Graph(self.logger) libcGraph.createGraphFromInput(self.libcCfgPath, self.libcSeparator) completeGraph = graph.Graph(self.logger) result = completeGraph.createGraphFromInput(self.binaryCfgPath) if (result == -1): self.logger.error("Failed to create graph for input: %s", self.binaryCfgPath) sys.exit(-1) for libraryName, libPath in libraryToPathDict.items(): #self.logger.info("Checking library: %s", libraryName) libraryCfgFileName = self.cleanLib(libraryName) + ".callgraph.out" libraryCfgFilePath = self.cfgPath + "/" + libraryCfgFileName if (libraryName not in libcRelatedList and libraryName not in exceptList): if (os.path.isfile(libraryCfgFilePath)): #We have the CFG for this library #self.logger.info("The library call graph exists for: %s", libraryName) libraryGraph = graph.Graph(self.logger) libraryGraph.createGraphFromInput(libraryCfgFilePath) #self.logger.info("Finished create graph object for library: %s", libraryName) libraryStartNodes = libraryGraph.extractStartingNodes() #self.logger.info("Finished extracting start nodes for library: %s", libraryName) #We're going keep a copy of the full library call graph, for later stats creation libraryCfgGraphs[libraryName] = libraryGraph #(Step 3 in todo list): We're going to make a smaller graph containing only start nodes and end nodes #libraryStartToEndGraph = graph.Graph(self.logger) for startNode in libraryStartNodes: #if ( startNodeToLibDict.get(startNode, None) ): # self.logger.warning("library startNode seen in more than one library: %s and %s", libraryName, startNodeToLibDict[startNode]) startNodeToLibDict[startNode] = libraryName leaves = libraryGraph.getLeavesFromStartNode( startNode, list(), list()) for leaf in leaves: #self.logger.debug("Adding edge %s->%s from library: %s to complete graph.", startNode, leaf, libraryName) #libraryStartToEndGraph.addEdge(startNode, leaf) completeGraph.addEdge(startNode, leaf) #libraryGraphs[libraryName] = libraryStartToEndGraph elif (os.path.isfile(libPath)): #We don't have the CFG for this library, all exported functions will be considered as starting nodes in our final graph #self.logger.info("The library call graph doesn't exist, considering all imported functions for: %s", libraryName) libraryProfiler = binaryAnalysis.BinaryAnalysis( libPath, self.logger) directSyscallSet, successCount, failedCount = libraryProfiler.extractDirectSyscalls( ) indirectSyscallSet = libraryProfiler.extractIndirectSyscalls( libcGraph) librarySyscalls.update(directSyscallSet) librarySyscalls.update(indirectSyscallSet) # else: #self.logger.warning("Skipping library: %s because path: %s doesn't exist", libraryName, libPath) #else: # self.logger.info("Skipping except list library: %s", libraryName) return completeGraph, librarySyscalls, libraryCfgGraphs, libcGraph
if __name__ == '__main__': """ Find system calls for function """ usage = "Usage: %prog -c <Callgraph> -s <Separator in callgraph file llvm=-> glibc=: > -f <Function name>" parser = optparse.OptionParser(usage=usage, version="1") parser.add_option("-i", "--input", dest="input", default=None, nargs=1, help="Path to binary to analyze") parser.add_option("-d", "--debug", dest="debug", action="store_true", default=False, help="Debug enabled/disabled") (options, args) = parser.parse_args() if isValidOpts(options): rootLogger = setLogPath("graph.log") tmpSet = set() myBinary = binaryAnalysis.BinaryAnalysis(options.input, rootLogger) a, b, c = myBinary.extractDirectSyscalls() tmpSet.update(a) rootLogger.info("a: %s, b: %d, c: %d", a, b, c)
def processSyscalls(targetcfg, cfginput, mastermain, workermain, edgefilterlist, libsyscalls, libsyscallpath, binprofiler, bininput, apptolibmap, appname, output, debug, rootLogger, cfginputseparator=":"): if not cfginput or not targetcfg or not mastermain or not workermain: parser.error("All options -c, -f, -m and -w should be provided.") return False if libsyscalls and (not apptolibmap or not libsyscallpath or not appname): parser.error( "Options --appname, --apptolibmap and --libsyscallpath should be provided when enabling the --libsyscalls feature." ) return False if binprofiler and (not apptolibmap or not appname or not bininput or not output): parser.error( "Options -o, --appname, --apptolibmap and --bininput should be provided when enabling the --binprofiler feature." ) return False syscallList = list() i = 0 while i < 400: syscallList.append("syscall(" + str(i) + ")") syscallList.append("syscall(" + str(i) + ")") syscallList.append("syscall ( " + str(i) + " )") syscallList.append("syscall( " + str(i) + " )") i += 1 workerMainFuncList = list() if ("," in workermain): workerMainFuncList = workermain.split(",") else: workerMainFuncList.append(workermain) edgeFilterList = list() if (edgefilterlist): if ("," in edgefilterlist): edgeFilterList = edgefilterlist.split(",") else: edgeFilterList.append(edgefilterlist) else: edgeFilterList = workerMainFuncList applicationGraph = graph.Graph(rootLogger) applicationGraph.createGraphFromInputWithFilter(targetcfg, "->", edgeFilterList) libcGraph = graph.Graph(rootLogger) libcGraph.createGraphFromInput(cfginput, cfginputseparator) rootLogger.info("-------------Extraction Master leave functions---------") masterFunctions = applicationGraph.getLeavesFromStartNode( mastermain, list(), list()) rootLogger.info("-------------Extraction Worker leave functions---------") workerFunctions = set() for workerFunc in workerMainFuncList: workerFunctions.update( applicationGraph.getLeavesFromStartNode(workerFunc, list(), list())) masterSyscalls = set() for masterFunc in masterFunctions: rootLogger.debug("masterfunc: %s", masterFunc) masterSyscalls.update(libcGraph.getSyscallFromStartNode(masterFunc)) workerSyscalls = set() for workerFunc in workerFunctions: rootLogger.debug("workerfunc: %s", workerFunc) workerSyscalls.update(libcGraph.getSyscallFromStartNode(workerFunc)) rootLogger.info( "len(masterSyscalls): %d len(workerSyscalls): %d before adding library syscalls", len(masterSyscalls), len(workerSyscalls)) libSet = set( ) #Libraries which should be added to the both import table and svf analysis output otherLibSet = set( ) #Libraries which should only be added to the import table (e.g. apr, apr-util) if (libsyscalls or binprofiler): appName = appname appToLibMap = None try: appToLibFile = open(apptolibmap, 'r') appToLibStr = appToLibFile.read() appToLibMap = json.loads(appToLibStr) except Exception as e: rootLogger.warning( "Trying to load app to lib map json from: %s, but doesn't exist: %s", apptolibmap, str(e)) rootLogger.debug("Finished loading json") sys.exit(-1) for app in appToLibMap["apps"]: for key, value in app.items(): if (key.strip() == appName.strip()): for lib in value["libs"]: libSet.add(cleanLib(lib)) for lib in value["otherlibs"]: otherLibSet.add(cleanLib(lib)) if (libsyscalls and not binprofiler): libSyscallFile = open(libsyscallpath, 'r') libSyscallLine = libSyscallFile.readline() while libSyscallLine: splittedLine = libSyscallLine.split() if (len(splittedLine) > 2 and cleanLib(splittedLine[1]) in libSet): for syscallNum in splittedLine[2:]: if ("{" in syscallNum): syscallNum = syscallNum.replace("{", "") if ("," in syscallNum): syscallNum = syscallNum.replace(",", "") if ("}" in syscallNum): syscallNum = syscallNum.replace("}", "") masterSyscalls.add(int(syscallNum)) workerSyscalls.add(int(syscallNum)) libSyscallLine = libSyscallFile.readline() importTableSyscalls = set() if (not libsyscalls and binprofiler): #TODO Extract required library system calls from binary profiler lib = ".so" filesAdded = set() for fileName in os.listdir(bininput): if (util.isElf(bininput + "/" + fileName)): if (lib in fileName): tmpFileName = cleanLib(fileName) else: tmpFileName = fileName if (tmpFileName not in filesAdded and (tmpFileName in otherLibSet or tmpFileName in libSet or tmpFileName == appname)): filePath = bininput + "/" + fileName myBinary = binaryAnalysis.BinaryAnalysis( filePath, rootLogger) directSyscallSet, successCount, failCount = myBinary.extractDirectSyscalls( ) indirectSyscallSet = myBinary.extractIndirectSyscalls( libcGraph) importTableSyscalls.update(directSyscallSet) importTableSyscalls.update(indirectSyscallSet) if (tmpFileName in libSet): masterSyscalls.update(directSyscallSet) masterSyscalls.update(indirectSyscallSet) workerSyscalls.update(directSyscallSet) workerSyscalls.update(indirectSyscallSet) rootLogger.info( "len(importTableSyscalls): %d len(masterSyscalls): %d len(workerSyscalls): %d after adding library syscalls", len(importTableSyscalls), len(masterSyscalls), len(workerSyscalls)) #print ("master: ") #print (masterSyscalls) #print ("worker: ") #print (workerSyscalls) translator = sycall.Syscall(rootLogger) syscallmap = translator.createMap() mminusw = sorted(masterSyscalls.difference(workerSyscalls)) wminusm = sorted(workerSyscalls.difference(masterSyscalls)) importTableSyscallNames = set() masterSyscallNames = set() workerSyscallNames = set() blImportTableSyscallNames = set() blMasterSyscallNames = set() blWorkerSyscallNames = set() if (binprofiler): i = 0 while (i < 400): if i not in importTableSyscalls: if (syscallmap.get(i, None)): blImportTableSyscallNames.add(syscallmap[i]) i += 1 for syscall in importTableSyscalls: if (syscallmap.get(syscall, None)): importTableSyscallNames.add(syscallmap[syscall]) print("------- main -------") i = 0 while (i < 400): if i not in masterSyscalls: if (syscallmap.get(i, None)): blMasterSyscallNames.add(syscallmap[i]) i += 1 for syscall in masterSyscalls: if (syscallmap.get(syscall, None)): masterSyscallNames.add(syscallmap[syscall]) print(syscallmap.get(syscall, "")) print("------- child -------") i = 0 while (i < 400): if i not in workerSyscalls: if (syscallmap.get(i, None)): blWorkerSyscallNames.add(syscallmap[i]) i += 1 for syscall in workerSyscalls: if (syscallmap.get(syscall, None)): workerSyscallNames.add(syscallmap[syscall]) print(syscallmap.get(syscall, "")) if (binprofiler): outputDict = dict() outputDict['importTable'] = importTableSyscallNames outputDict['master'] = masterSyscallNames outputDict['worker'] = workerSyscallNames outputDict['blImportTable'] = blImportTableSyscallNames outputDict['blMaster'] = blMasterSyscallNames outputDict['blWorker'] = blWorkerSyscallNames util.writeDictToFile(outputDict, output) ''' mminusw = sorted(mainset.difference(workerset)) wminusm = sorted(workerset.difference(mainset)) #mminusc = sorted(mainset.difference(cachemgrset)) #cminusm = sorted(cachemgrset.difference(mainset)) rootLogger = setLogPath("graph.log") translator = sycall.Syscall(rootLogger) syscallmap = translator.createMap() ''' print("------- main minus worker -------") for syscall in mminusw: print(syscallmap[syscall]) print("------- worker minus main -------") for syscall in wminusm: print(syscallmap[syscall])