Example #1
0
    def extractDirectSyscalls(self, folder):
        #exceptList = ["lib", "grep", "sed", "bash", "sh"]
        exceptList = [
            "ld.so", "libc.so", "libdl.so", "libcrypt.so", "libnss_compat.so",
            "libnsl.so", "libnss_files.so", "libnss_nis.so", "libpthread.so",
            "libm.so", "libresolv.so", "librt.so", "libutil.so",
            "libnss_dns.so", "gosu"
        ]
        lib = ".so"

        fileList = list()
        filesAdded = set()
        finalSyscallSet = set()
        for fileName in os.listdir(folder):
            if (util.isElf(folder + "/" + fileName)):
                if (lib in fileName):
                    tmpFileName = re.sub("-.*so", ".so", fileName)
                    tmpFileName = tmpFileName[:tmpFileName.index(".so")]
                    tmpFileName = tmpFileName + ".so"
                else:
                    tmpFileName = fileName
                if (tmpFileName not in exceptList
                        and tmpFileName not in filesAdded):
                    fileList.append(folder + "/" + fileName)
                    filesAdded.add(tmpFileName)

                #libWoVersionName = fileName
                #if ( fileName.startswith("lib") ):
                #    libWoVersionName = fileName[:fileName.index(".so")]
                #    libWoVersionName = libWoVersionName + ".so"
                #if ( libWoVersionName not in libWoVersion ):
                #    libWoVersion.add(libWoVersionName)
                #    fileList.append(folder + "/" + fileName)
                #    for exceptItem in exceptList:
                #        #if ( lib in fileName or fileName.startswith(exceptItem) or util.isGo(folder + "/" + fileName, self.logger) ):
                #        if ( fileName.startswith(exceptItem) or util.isGo(folder + "/" + fileName, self.logger) ):
                #            removeList.append(folder + "/" + fileName)
                #            break
        finalSet = set(fileList)  # - set(removeList)
        for filePath in finalSet:
            self.logger.debug("extraction direct syscall for %s", filePath)
            #temp = util.extractDirectSyscalls(filePath, self.logger)
            #self.directSyscallCount += temp
            #self.logger.debug("directSyscall for %s is %d", filePath, temp)
            #temp = util.extractLibcSyscalls(filePath, self.logger)
            #self.libcSyscallCount += temp
            #self.logger.debug("libcSyscall for %s is %d", filePath, temp)
            binAnalysis = binaryAnalysis.BinaryAnalysis(filePath, self.logger)
            syscallSet, successCount, failCount = binAnalysis.extractDirectSyscalls(
            )
            self.logger.debug(
                "Successfull direct syscalls: %d list: %s, Failed direct syscalls: %d",
                successCount, str(syscallSet), failCount)
            #self.logger.warning("Failed syscalls: %d", failCount)
            finalSyscallSet.update(syscallSet)
        return finalSyscallSet
    (options, args) = parser.parse_args()
    if isValidOpts(options):
        rootLogger = setLogPath("directsyscallextractor.log")

        #        exceptList = ["lib", "grep", "sed", "bash", "sh", "touch", "mkdir", "systemctl-systemd", "chmod", "sv", "ssh-keygen", "sysctl", "env", "svlogd", "sleep", "mount", "blkid", "stat", "cat", "systemd-path", "dpkg-query", "df", "readlink", "rm", "cp", "systemd-sysctl", "hostnamectl", "loginctl", "uname", "id", "which", "dumb-init", "find", "basename", "awk", "hostname", "modprobe", "chown", "sort", "ip", "ldconfig", "egrep", "ldconfig.real", "mv", "dirname", "whoami", "date"]
        exceptList = []
        lib = ".so"

        for folderName in os.listdir(options.outputpath):
            fileList = list()
            removeList = list()
            #            rootLogger.info("////////////Checking image: %s//////////////////", folderName)
            folderName = os.path.join(options.outputpath, folderName)
            if (os.path.isdir(folderName)):
                for fileName in os.listdir(folderName):
                    if (util.isElf(folderName + "/" + fileName)):
                        fileList.append(folderName + "/" + fileName)
                        for exceptItem in exceptList:
                            #if ( lib in fileName or fileName.startswith(exceptItem) or util.isGo(folderName + "/" + fileName, rootLogger) ):
                            if (fileName.startswith(exceptItem) or util.isGo(
                                    folderName + "/" + fileName, rootLogger)):
                                removeList.append(folderName + "/" + fileName)
                                break
            finalSet = set(fileList) - set(removeList)
            #            rootLogger.info("List of binaries for %s: %s", folderName, str(finalSet))
            for filePath in finalSet:
                #rootLogger.debug("extraction direct syscall for %s", filePath)
                temp1 = util.extractDirectSyscalls(filePath, rootLogger)
                temp2 = util.extractLibcSyscalls(filePath, rootLogger)
                if (temp1 != 0 or temp2 != 0):
                    rootLogger.info(
def processSyscalls(binprofiler, bininput, apptolibmap, appname, libccfginput,
                    debug, rootLogger):

    if binprofiler and (not apptolibmap or not appname or not bininput):
        parser.error(
            "Options --appname, --apptolibmap and --bininput should be provided when enabling the --binprofiler feature."
        )
        return False

    libcGraph = graph.Graph(rootLogger)
    libcGraph.createGraphFromInput(libccfginput, ":")

    libSet = set(
    )  #Libraries which should be added to the both import table and svf analysis output
    otherLibSet = set(
    )  #Libraries which should only be added to the import table (e.g. apr, apr-util)
    if (binprofiler):
        appName = appname
        appToLibMap = None
        try:
            appToLibFile = open(apptolibmap, 'r')
            appToLibStr = appToLibFile.read()
            appToLibMap = json.loads(appToLibStr)
        except Exception as e:
            rootLogger.warning(
                "Trying to load app to lib map json from: %s, but doesn't exist: %s",
                apptolibmap, str(e))
            rootLogger.debug("Finished loading json")
            sys.exit(-1)
        for app in appToLibMap["apps"]:
            for key, value in app.items():
                if (key.strip() == appName.strip()):
                    for lib in value["libs"]:
                        libSet.add(cleanLib(lib))
                    for lib in value["otherlibs"]:
                        otherLibSet.add(cleanLib(lib))
    importTableSyscalls = set()
    if (binprofiler):
        #TODO Extract required library system calls from binary profiler
        lib = ".so"
        filesAdded = set()

        for fileName in os.listdir(bininput):
            if (util.isElf(bininput + "/" + fileName)):
                if (lib in fileName):
                    tmpFileName = cleanLib(fileName)
                else:
                    tmpFileName = fileName
                if (tmpFileName not in filesAdded and
                    (tmpFileName in otherLibSet or tmpFileName in libSet
                     or tmpFileName == appname)):
                    filePath = bininput + "/" + fileName
                    myBinary = binaryAnalysis.BinaryAnalysis(
                        filePath, rootLogger)
                    directSyscallSet, successCount, failCount = myBinary.extractDirectSyscalls(
                    )
                    indirectSyscallSet = myBinary.extractIndirectSyscalls(
                        libcGraph)
                    importTableSyscalls.update(directSyscallSet)
                    importTableSyscalls.update(indirectSyscallSet)

    rootLogger.info("len(importTableSyscalls): %d", len(importTableSyscalls))
    return importTableSyscalls
Example #4
0
        binList = set()
        libList = set()
        modList = set()
        indirectList = set()
        fileList = set()
        filesAdded = set()

        libcList = [
            "ld.so", "libc.so", "libdl.so", "libcrypt.so", "libnss_compat.so",
            "libnsl.so", "libnss_files.so", "libnss_nis.so", "libpthread.so",
            "libm.so", "libresolv.so", "librt.so", "libutil.so",
            "libnss_dns.so"
        ]

        for fileName in os.listdir(options.input):
            if (util.isElf(options.input + "/" + fileName)):
                if (lib in fileName):
                    tmpFileName = re.sub("-.*so", ".so", fileName)
                    tmpFileName = tmpFileName[:tmpFileName.index(".so")]
                    tmpFileName = tmpFileName + ".so"
                else:
                    tmpFileName = fileName
                if (tmpFileName not in filesAdded):
                    if (tmpFileName not in libcList):
                        indirectList.add(options.input + "/" + fileName)
                    if (fileName.startswith("lib") and lib in fileName):
                        libList.add(options.input + "/" + fileName)
                    elif (fileName.startswith("mod") and lib in fileName):
                        modList.add(options.input + "/" + fileName)
                    else:
                        binList.add(options.input + "/" + fileName)
def processSyscalls(targetcfg,
                    cfginput,
                    mastermain,
                    workermain,
                    edgefilterlist,
                    libsyscalls,
                    libsyscallpath,
                    binprofiler,
                    bininput,
                    apptolibmap,
                    appname,
                    output,
                    debug,
                    rootLogger,
                    cfginputseparator=":"):

    if not cfginput or not targetcfg or not mastermain or not workermain:
        parser.error("All options -c, -f, -m and -w should be provided.")
        return False

    if libsyscalls and (not apptolibmap or not libsyscallpath or not appname):
        parser.error(
            "Options --appname, --apptolibmap and --libsyscallpath  should be provided when enabling the --libsyscalls feature."
        )
        return False

    if binprofiler and (not apptolibmap or not appname or not bininput
                        or not output):
        parser.error(
            "Options -o, --appname, --apptolibmap and --bininput should be provided when enabling the --binprofiler feature."
        )
        return False

    syscallList = list()

    i = 0
    while i < 400:
        syscallList.append("syscall(" + str(i) + ")")
        syscallList.append("syscall(" + str(i) + ")")
        syscallList.append("syscall ( " + str(i) + " )")
        syscallList.append("syscall( " + str(i) + " )")
        i += 1

    workerMainFuncList = list()
    if ("," in workermain):
        workerMainFuncList = workermain.split(",")
    else:
        workerMainFuncList.append(workermain)

    edgeFilterList = list()
    if (edgefilterlist):
        if ("," in edgefilterlist):
            edgeFilterList = edgefilterlist.split(",")
        else:
            edgeFilterList.append(edgefilterlist)
    else:
        edgeFilterList = workerMainFuncList
    applicationGraph = graph.Graph(rootLogger)
    applicationGraph.createGraphFromInputWithFilter(targetcfg, "->",
                                                    edgeFilterList)
    libcGraph = graph.Graph(rootLogger)
    libcGraph.createGraphFromInput(cfginput, cfginputseparator)

    rootLogger.info("-------------Extraction Master leave functions---------")
    masterFunctions = applicationGraph.getLeavesFromStartNode(
        mastermain, list(), list())
    rootLogger.info("-------------Extraction Worker leave functions---------")
    workerFunctions = set()
    for workerFunc in workerMainFuncList:
        workerFunctions.update(
            applicationGraph.getLeavesFromStartNode(workerFunc, list(),
                                                    list()))

    masterSyscalls = set()
    for masterFunc in masterFunctions:
        rootLogger.debug("masterfunc: %s", masterFunc)
        masterSyscalls.update(libcGraph.getSyscallFromStartNode(masterFunc))

    workerSyscalls = set()
    for workerFunc in workerFunctions:
        rootLogger.debug("workerfunc: %s", workerFunc)
        workerSyscalls.update(libcGraph.getSyscallFromStartNode(workerFunc))

    rootLogger.info(
        "len(masterSyscalls): %d len(workerSyscalls): %d before adding library syscalls",
        len(masterSyscalls), len(workerSyscalls))

    libSet = set(
    )  #Libraries which should be added to the both import table and svf analysis output
    otherLibSet = set(
    )  #Libraries which should only be added to the import table (e.g. apr, apr-util)
    if (libsyscalls or binprofiler):
        appName = appname
        appToLibMap = None
        try:
            appToLibFile = open(apptolibmap, 'r')
            appToLibStr = appToLibFile.read()
            appToLibMap = json.loads(appToLibStr)
        except Exception as e:
            rootLogger.warning(
                "Trying to load app to lib map json from: %s, but doesn't exist: %s",
                apptolibmap, str(e))
            rootLogger.debug("Finished loading json")
            sys.exit(-1)
        for app in appToLibMap["apps"]:
            for key, value in app.items():
                if (key.strip() == appName.strip()):
                    for lib in value["libs"]:
                        libSet.add(cleanLib(lib))
                    for lib in value["otherlibs"]:
                        otherLibSet.add(cleanLib(lib))
    if (libsyscalls and not binprofiler):
        libSyscallFile = open(libsyscallpath, 'r')
        libSyscallLine = libSyscallFile.readline()
        while libSyscallLine:
            splittedLine = libSyscallLine.split()
            if (len(splittedLine) > 2 and cleanLib(splittedLine[1]) in libSet):
                for syscallNum in splittedLine[2:]:
                    if ("{" in syscallNum):
                        syscallNum = syscallNum.replace("{", "")
                    if ("," in syscallNum):
                        syscallNum = syscallNum.replace(",", "")
                    if ("}" in syscallNum):
                        syscallNum = syscallNum.replace("}", "")
                    masterSyscalls.add(int(syscallNum))
                    workerSyscalls.add(int(syscallNum))
            libSyscallLine = libSyscallFile.readline()
    importTableSyscalls = set()
    if (not libsyscalls and binprofiler):
        #TODO Extract required library system calls from binary profiler
        lib = ".so"
        filesAdded = set()

        for fileName in os.listdir(bininput):
            if (util.isElf(bininput + "/" + fileName)):
                if (lib in fileName):
                    tmpFileName = cleanLib(fileName)
                else:
                    tmpFileName = fileName
                if (tmpFileName not in filesAdded and
                    (tmpFileName in otherLibSet or tmpFileName in libSet
                     or tmpFileName == appname)):
                    filePath = bininput + "/" + fileName
                    myBinary = binaryAnalysis.BinaryAnalysis(
                        filePath, rootLogger)
                    directSyscallSet, successCount, failCount = myBinary.extractDirectSyscalls(
                    )
                    indirectSyscallSet = myBinary.extractIndirectSyscalls(
                        libcGraph)
                    importTableSyscalls.update(directSyscallSet)
                    importTableSyscalls.update(indirectSyscallSet)
                    if (tmpFileName in libSet):
                        masterSyscalls.update(directSyscallSet)
                        masterSyscalls.update(indirectSyscallSet)
                        workerSyscalls.update(directSyscallSet)
                        workerSyscalls.update(indirectSyscallSet)

    rootLogger.info(
        "len(importTableSyscalls): %d len(masterSyscalls): %d len(workerSyscalls): %d after adding library syscalls",
        len(importTableSyscalls), len(masterSyscalls), len(workerSyscalls))

    #print ("master: ")
    #print (masterSyscalls)

    #print ("worker: ")
    #print (workerSyscalls)
    translator = sycall.Syscall(rootLogger)
    syscallmap = translator.createMap()
    mminusw = sorted(masterSyscalls.difference(workerSyscalls))
    wminusm = sorted(workerSyscalls.difference(masterSyscalls))

    importTableSyscallNames = set()
    masterSyscallNames = set()
    workerSyscallNames = set()
    blImportTableSyscallNames = set()
    blMasterSyscallNames = set()
    blWorkerSyscallNames = set()

    if (binprofiler):
        i = 0
        while (i < 400):
            if i not in importTableSyscalls:
                if (syscallmap.get(i, None)):
                    blImportTableSyscallNames.add(syscallmap[i])
            i += 1
        for syscall in importTableSyscalls:
            if (syscallmap.get(syscall, None)):
                importTableSyscallNames.add(syscallmap[syscall])
    print("------- main -------")
    i = 0
    while (i < 400):
        if i not in masterSyscalls:
            if (syscallmap.get(i, None)):
                blMasterSyscallNames.add(syscallmap[i])
        i += 1
    for syscall in masterSyscalls:
        if (syscallmap.get(syscall, None)):
            masterSyscallNames.add(syscallmap[syscall])
        print(syscallmap.get(syscall, ""))
    print("------- child -------")
    i = 0
    while (i < 400):
        if i not in workerSyscalls:
            if (syscallmap.get(i, None)):
                blWorkerSyscallNames.add(syscallmap[i])
        i += 1
    for syscall in workerSyscalls:
        if (syscallmap.get(syscall, None)):
            workerSyscallNames.add(syscallmap[syscall])
        print(syscallmap.get(syscall, ""))

    if (binprofiler):
        outputDict = dict()
        outputDict['importTable'] = importTableSyscallNames
        outputDict['master'] = masterSyscallNames
        outputDict['worker'] = workerSyscallNames
        outputDict['blImportTable'] = blImportTableSyscallNames
        outputDict['blMaster'] = blMasterSyscallNames
        outputDict['blWorker'] = blWorkerSyscallNames

        util.writeDictToFile(outputDict, output)
    '''
    mminusw = sorted(mainset.difference(workerset))
    wminusm = sorted(workerset.difference(mainset))

    #mminusc = sorted(mainset.difference(cachemgrset))
    #cminusm = sorted(cachemgrset.difference(mainset))

    rootLogger = setLogPath("graph.log")

    translator = sycall.Syscall(rootLogger)
    syscallmap = translator.createMap()
    '''

    print("------- main minus worker -------")
    for syscall in mminusw:
        print(syscallmap[syscall])
    print("------- worker minus main -------")
    for syscall in wminusm:
        print(syscallmap[syscall])