Python ksplitGraphObject Examples, split2k.ksplitGraphObject Python Examples

Example #1

0

Show file

File: vulnsFinder.py Project: chubbymaggie/TRACY

def CompareWithKSplit():

    refGraph = igraph.read(os.path.join(sourcesDir,sourceFunctionName))
    refGraph['name'] = sourceFunctionName

    if (os.name != "nt"):
        #p = multiprocessing.Pool(initializer=workerInit,initargs=[len(refGraph.vs)])
        p = multiprocessing.Pool()
        mapper = p.imap_unordered
        #mapper = itertools.imap
    else:
        mapper = itertools.imap
        #workerInit(len(refGraph.vs))
    
    reportFile = CounterXlsReport(csvName + "-K=" + str(myK) + "-" + sourceFunctionName) 

    print "Prepping db - (func files) - target=" +os.path.join(sourcesDir,sourceFunctionName)
    sourcesList3k = list(mapper(identity,split2k.ksplitGraphObject(myK,refGraph,True)))
    print "For k=" + str(myK) + " we have - " + str(len(sourcesList3k))
    print "end db prep"
   
    from simple_db import DBSimpleClient

    db = DBSimpleClient()
            
    params =  [doOneFunctionFileRW,itertools.izip(db.get_all(),itertools.cycle([sourcesList3k]))]
    #if (mapper !=  itertools.imap):
    #    params.append(50)       
         
    for allFields in mapper(*params):    
        for fields in allFields:           
            reportFile.writeLine(fields)

Example #2

0

Show file

File: findClonesCONS-NDP.py Project: yuedeji/TRACY

def CompareWithKSplit():

    refGraph = igraph.read(os.path.join(sourcesDir, sourceFunctionName))
    refGraph['name'] = sourceFunctionName

    if (os.name != "nt"):
        #p = multiprocessing.Pool(initializer=workerInit,initargs=[len(refGraph.vs)])
        p = multiprocessing.Pool()
        mapper = p.imap_unordered
        #mapper = itertools.imap
    else:
        mapper = itertools.imap
        #workerInit(len(refGraph.vs))

    reportFile = CounterXlsReport(csvName + "-K=" + str(myK) + "-" +
                                  sourceFunctionName)

    print "Prepping db - (func files) - target=" + os.path.join(
        sourcesDir, sourceFunctionName)
    sourcesList3k = list(
        mapper(identity, split2k.ksplitGraphObject(myK, refGraph, True)))
    print "For k=" + str(myK) + " we have - " + str(len(sourcesList3k))
    print "end db prep"

    def superFunctionsGenerator():
        for exeName in os.listdir(targetsDir):
            print "loading - " + exeName + " ... ",
            currentExeDir = os.path.join(
                targetsDir, os.path.join(exeName,
                                         functionsGraphsDirectoryName))

            for funcFileName in filter(lambda x: x.endswith(myExt),
                                       os.listdir(currentExeDir)):
                tarGraph = igraph.read(
                    os.path.join(currentExeDir, funcFileName))
                tarGraph['name'] = funcFileName

                #funcFileName and exe are only for the timeout print
                yield {
                    'tarGraph': tarGraph,
                    'refGraph': refGraph,
                    'sourcesList3k': sourcesList3k,
                    'funcFileName': funcFileName,
                    'exeName': exeName
                }

            print "finished loading " + exeName

    params = [doOneFunctionFileRW, superFunctionsGenerator()]
    #if (mapper !=  itertools.imap):
    #    params.append(50)

    for allFields in mapper(*params):
        for fields in allFields:
            reportFile.writeLine(fields)

Example #3

0

Show file

File: findClonesCONS_SUBSET.py Project: chubbymaggie/TRACY

def CompareWithKSplit():

    refGraph = igraph.read(os.path.join(sourcesDir,sourceFunctionName))
    refGraph['name'] = sourceFunctionName

    if (os.name != "nt"):
        #p = multiprocessing.Pool(initializer=workerInit,initargs=[len(refGraph.vs)])
        p = multiprocessing.Pool()
        mapper = p.imap_unordered
        #mapper = itertools.imap
    else:
        mapper = itertools.imap
        #workerInit(len(refGraph.vs))
    
    reportFile = CounterXlsReport(csvName + "-K=" + str(myK) + "-" + sourceFunctionName) 

    print "Prepping db - (func files) - target=" +os.path.join(sourcesDir,sourceFunctionName)
    sourcesList3k = list(mapper(identity,split2k.ksplitGraphObject(myK,refGraph,True)))
    print "For k=" + str(myK) + " we have - " + str(len(sourcesList3k))
    print "end db prep"
   
    def inWhiteList(value,atrbName):
        for whiteListEntry in filter(lambda x: x['source']==sourceName,whiteList):
            #print "CHECK " + atrbName
            if whiteListEntry[atrbName] == value:
                return True
        return False
   
    def superFunctionsGenerator():
        for exeName in filter(lambda x:inWhiteList(x,'exe'),os.listdir(targetsDir)):
            
            print "loading - " + exeName + " ... " 
            currentExeDir = os.path.join(targetsDir,os.path.join(exeName,functionsGraphsDirectoryName))

            for funcFileName in filter(lambda x:inWhiteList(os.path.splitext(x)[0],'functionName'),filter(lambda x:x.endswith(myExt),os.listdir(currentExeDir))):
                
                print "FUNC begin - " + funcFileName + " ... " 
                
                tarGraph = igraph.read(os.path.join(currentExeDir,funcFileName))
                tarGraph['name'] = funcFileName
                
                #funcFileName and exe are only for the timeout print
                yield {'tarGraph':tarGraph,'refGraph':refGraph,'sourcesList3k':sourcesList3k,'funcFileName':funcFileName,'exeName':exeName}
                
                print "FUNC finished " + funcFileName
            
            print "finished loading " + exeName 
            
    params =  [doOneFunctionFileRW,superFunctionsGenerator()]
    #if (mapper !=  itertools.imap):
    #    params.append(50)       
         
    for allFields in mapper(*params):    
        for fields in allFields:           
            reportFile.writeLine(fields)

Example #4

0

Show file

File: findClonesCONS.py Project: zyc1314/malware-polymorphism-detection

def CompareWithKSplit(sourcesDirectory, targetsDir):
    print targetsDir
    print 'target', targetsDir
    refGraph = igraph.read(sourcesDirectory)
    #print refGraph
    sourceFunctionName = sourcesDirectory.split('\\')[-1]
    refGraph['name'] = sourceFunctionName
    if (os.name != "nt"):  # judge the platform nt = windows, posix = linux
        #p = multiprocessing.Pool(initializer=workerInit,initargs=[len(refGraph.vs)])
        p = multiprocessing.Pool()
        mapper = p.imap_unordered
        #mapper = itertools.imap
    else:
        mapper = itertools.imap
        #workerInit(len(refGraph.vs))
    sourcesExeName = sourcesDirectory.split('\\')[-2]
    targetsExeName = targetsDir.split('\\')[-1]
    familyName = targetsDir.split('\\')[-2]
    reportFile = CounterXlsReport(sourcesExeName + "-" + familyName + "@" + targetsExeName + "-K=" + str(myK) + "-" + sourceFunctionName) # the name of result file

    print "Prepping db - (func files) - target=" +sourcesDirectory
    sourcesList3k = list(mapper(identity,split2k.ksplitGraphObject(myK,refGraph,True)))
    #print "For k=" + str(myK) + " we have - " + str(len(sourcesList3k))
    #print "end db prep"
   
    def superFunctionsGenerator(currentExeDir):
        #for exeName in os.listdir(targetsDir):
            #print "loading - " + exeName + " ... " ,
            #print exeName
            #print functionsGraphsDirectoryName
            #currentExeDir = os.path.join(targetsDir,os.path.join(exeName))
            #print currentExeDir
            ##exit(0)
        for funcFileName in filter(lambda x:x.endswith(myExt),os.listdir(currentExeDir)):
                #print funcFileName
			tarGraph = igraph.read(os.path.join(currentExeDir,funcFileName))
            tarGraph['name'] = funcFileName
                
                #funcFileName and exe are only for the timeout print
            yield {'tarGraph':tarGraph,'refGraph':refGraph,'sourcesList3k':sourcesList3k,'funcFileName':funcFileName,'exeName':targetsExeName}

Example #5

0

Show file

def CompareWithKSplit():

    refGraph = igraph.read(os.path.join(sourcesDir, sourceFunctionName))
    refGraph['name'] = sourceFunctionName

    if (os.name != "nt"):
        #p = multiprocessing.Pool(initializer=workerInit,initargs=[len(refGraph.vs)])
        p = multiprocessing.Pool()
        mapper = p.imap_unordered
        #mapper = itertools.imap
    else:
        mapper = itertools.imap
        #workerInit(len(refGraph.vs))

    reportFile = CounterXlsReport(csvName + "-K=" + str(myK) + "-" +
                                  sourceFunctionName)

    print "Prepping db - (func files) - target=" + os.path.join(
        sourcesDir, sourceFunctionName)
    sourcesList3k = list(
        mapper(identity, split2k.ksplitGraphObject(myK, refGraph, True)))
    print "For k=" + str(myK) + " we have - " + str(len(sourcesList3k))
    print "end db prep"

    from simple_db import DBSimpleClient

    db = DBSimpleClient()

    params = [
        doOneFunctionFileRW,
        itertools.izip(db.get_all(), itertools.cycle([sourcesList3k]))
    ]
    #if (mapper !=  itertools.imap):
    #    params.append(50)

    for allFields in mapper(*params):
        for fields in allFields:
            reportFile.writeLine(fields)

Example #6

0

Show file

File: findClonesCONS.py Project: chubbymaggie/TRACY

def doOneFunctionFileRW(workerParams):
    
    sourcesList3k = workerParams['sourcesList3k']
    tarGraph = workerParams['tarGraph']
    refGraph = workerParams['refGraph']
    exeName = workerParams['exeName']
    funcfilename = workerParams['funcFileName']
    
    gradesCache = []
    
    timeout = [False,time.time()]
    def toggleTimeout():
        timeout[0] = True
    # wait 3 hours
    timeOutTimer = threading.Timer(timeOutInSecs,toggleTimeout)
    timeOutTimer.start()
    
    allFields = []
    
    if len(refGraph.vs) > 3000:
        print "skipping cuz > 3000 - func=" + funcfilename + ",exe-" + exeName
        return allFields
    
    # startVal of hack - only take the last 2 confs with k=3 (trace = true OR false)
    
    #bestKSolverDeltas = KBest(KToKeep,lambda x: 100-x['delta'],lambda x:x['delta'])
    #bestKSolverBroken = KBest(KToKeep,lambda x: 1000-x['broken'],lambda x:x['broken'])
    GradeSystemsInfoInst = GradeSystemsInfo(sourcesList3k, gradeSystemNames)
    
    startForXlsRecord = time.time()
    
    #this cache could probebly get optimized
    
    for refvIndex, refv in enumerate(refGraph.vs):
        gradesCache.append([])
        #gradesCache[refv['id']] = {}
        for tarvIndex, tarv in enumerate(tarGraph.vs):
            gradesDict = getMatchedCmdsWithGrade(refv['code'],tarv['code'])
            #for key in grade.keys():
            gradesCache[refvIndex].append(gradesDict)
            #gradesCache[refvIndex][tarvIndex] = grade 
         
    objectsCount = 0
    
    for g in split2k.ksplitGraphObject(myK,tarGraph,True):
        
        if timeout[0] == True:
            print "Breaking out cuz of timeout, time=" + str(int(time.time()-timeout[1])) + " from - func=" + funcfilename + ",exe-" + exeName
            return []
        
        objectsCount += 1
        for refGraphletContainer in GradeSystemsInfoInst:
            
            refGraphlet = refGraphletContainer.getObject()
            
            (isIso,Map12,Map21) = refGraphlet.isomorphic_vf2(g,None,None,None,None,True,False,None,None)
            if isIso:                                      
                
                nodeGradesInfos = []

                for (x,y) in itertools.imap(lambda x,y: (int(x['id']),int(y['id'])),map(lambda x:refGraphlet.vs[x],range(0,len(refGraphlet.vs))),map(lambda x:g.vs[Map12[x]],range(0,len(g.vs)))  ):
                    nodeGradesInfos.append(gradesCache[x][y])
                  
                  
                gradeSystemsFinals = OrderedDict()
                
                for methodType in ['contain','ratio']:
                    gradeSystemsFinals[methodType] = numpy.mean(map(lambda nodeGradesInfo: nodeGradesInfo['gradesDict'][methodType],nodeGradesInfos))
            
                
                if "Solver"=="Solver":
                    if gradeSystemsFinals['ratio'] > 50:
                        conDict = GraphletsConstraints(nodeGradesInfos)
                        newTarNodes = list(conDict.getRW())
                        refNodes = map(lambda x:x['refCode'],nodeGradesInfos)
                        grades = map(compareDisAsmCode,refNodes,newTarNodes)
                                            
                        for methodType in ['contain','ratio']:
                            gradeSystemsFinals['Solver_' + methodType] = numpy.mean(map(lambda x:x[methodType],grades))
                            #delta = int(gradeSystemsFinals['Solver_' + methodType] - gradeSystemsFinals[methodType])
                            #refFullCode = ";".join(refNodes)
                            #tarFullCOde = ";".join(newTarNodes)
                            
                        #bestKSolverDeltas.insert({'delta':delta,'ref':refFullCode,'tar':tarFullCOde})
                        #bestKSolverBroken.insert({'broken':conDict.getBrokenNumber(),'ref':refFullCode,'tar':tarFullCOde})
                            
                            #gradeSystemsDict[gradeSystemName]['bestSolImprovmentList'].insert({'delta':delta,'ref':refFullCode,'tar':tarFullCOde}
                    else:
                        for methodType in ['contain','ratio']:
                            gradeSystemsFinals['Solver_' + methodType] = gradeSystemsFinals[methodType]
                else:
                    for methodType in ['contain','ratio']:
                        gradeSystemsFinals['Solver_' + methodType] = numpy.mean(map(lambda nodeGradesInfo: nodeGradesInfo['gradesDict'][methodType],nodeGradesInfos))
                
                for gradeSystemName in gradeSystemNames:
                    refGraphletContainer[gradeSystemName] = max(gradeSystemsFinals[gradeSystemName],refGraphletContainer[gradeSystemName])
                
            
    endForXlsRecord = time.time()
    
    for gradeSystemName in gradeSystemNames: 
        counters = GradeSystemsInfoInst.tallyForGradeSystem(gradeSystemName,borders)
        #counters = gradeSystemsDict[gradeSystemName]['DictCounter'].tallyCounters()
        
        #["exe","funcname","#objects","K","Trace","System","Generator","GradeSystem","TimeStr","TimeNumber"]
        
        fields = [exeName,funcfilename,str(objectsCount),str(myK),"TRUE",methodSystemName,"normal",gradeSystemName,
              "\""+getDurationStr(int(endForXlsRecord-startForXlsRecord))+"\"",str(endForXlsRecord-startForXlsRecord)]
       
            
        for i in range(0,len(borders)):
            fields.append("%.4f" % (float(counters[i]) / float(len(GradeSystemsInfoInst))))

        """            
        if (gradeSystemName == "Solver_contain"):    
            fields.append(str(bestKSolverDeltas.max()))
            fields.append(str(bestKSolverDeltas))
            
            fields.append(str(bestKSolverBroken.max()))
            fields.append(str(bestKSolverBroken))
        else:
            for i in range(0,4):
                fields.append("see Solver_contain")
        
        sets = GradeSystemsInfoInst.detectPatchForGradeSystem(gradeSystemName,[60,70,80])
        sets['total'] = len(tarGraph.vs)
        fields.append(str(sets))
        """
            
        allFields.append(fields)

        
    timeOutTimer.cancel()
    del timeOutTimer
            
    return allFields

Example #7

0

Show file

File: findClonesCONS-NDP.py Project: yuedeji/TRACY

def doOneFunctionFileRW(workerParams):

    sourcesList3k = workerParams['sourcesList3k']
    tarGraph = workerParams['tarGraph']
    refGraph = workerParams['refGraph']
    exeName = workerParams['exeName']
    funcfilename = workerParams['funcFileName']

    gradesCache = []

    timeout = [False, time.time()]

    def toggleTimeout():
        timeout[0] = True

    # wait 3 hours
    timeOutTimer = threading.Timer(timeOutInSecs, toggleTimeout)
    timeOutTimer.start()

    allFields = []

    if len(refGraph.vs) > 3000:
        print "skipping cuz > 3000 - func=" + funcfilename + ",exe-" + exeName
        return allFields

    # startVal of hack - only take the last 2 confs with k=3 (trace = true OR false)

    #bestKSolverDeltas = KBest(KToKeep,lambda x: 100-x['delta'],lambda x:x['delta'])
    #bestKSolverBroken = KBest(KToKeep,lambda x: 1000-x['broken'],lambda x:x['broken'])
    GradeSystemsInfoInst = GradeSystemsInfo(sourcesList3k, gradeSystemNames)

    startForXlsRecord = time.time()

    #this cache could probebly get optimized

    for refvIndex, refv in enumerate(refGraph.vs):
        gradesCache.append([])
        #gradesCache[refv['id']] = {}
        for tarvIndex, tarv in enumerate(tarGraph.vs):
            gradesDict = getMatchedCmdsWithGrade(refv['code'], tarv['code'])
            #for key in grade.keys():
            gradesCache[refvIndex].append(gradesDict)
            #gradesCache[refvIndex][tarvIndex] = grade

    objectsCount = 0

    for g in split2k.ksplitGraphObject(myK, tarGraph, True):

        if timeout[0] == True:
            print "Breaking out cuz of timeout, time=" + str(
                int(time.time() - timeout[1])
            ) + " from - func=" + funcfilename + ",exe-" + exeName
            return []

        objectsCount += 1
        for refGraphletContainer in GradeSystemsInfoInst:

            refGraphlet = refGraphletContainer.getObject()

            (isIso, Map12,
             Map21) = refGraphlet.isomorphic_vf2(g, None, None, None, None,
                                                 True, False, None, None)
            if isIso:

                nodeGradesInfos = []

                for (x, y) in itertools.imap(
                        lambda x, y: (int(x['id']), int(y['id'])),
                        map(lambda x: refGraphlet.vs[x],
                            range(0, len(refGraphlet.vs))),
                        map(lambda x: g.vs[Map12[x]], range(0, len(g.vs)))):
                    nodeGradesInfos.append(gradesCache[x][y])

                gradeSystemsFinals = OrderedDict()

                for methodType in ['contain', 'ratio']:
                    gradeSystemsFinals[methodType] = numpy.mean(
                        map(
                            lambda nodeGradesInfo: nodeGradesInfo['gradesDict']
                            [methodType], nodeGradesInfos))

                if "Solver" == "Solver":
                    if gradeSystemsFinals['ratio'] > 50:
                        conDict = GraphletsConstraints(nodeGradesInfos)
                        newTarNodes = list(conDict.getRW())
                        refNodes = map(lambda x: x['refCode'], nodeGradesInfos)
                        grades = map(compareDisAsmCode, refNodes, newTarNodes)

                        for methodType in ['contain', 'ratio']:
                            gradeSystemsFinals['Solver_' +
                                               methodType] = numpy.mean(
                                                   map(lambda x: x[methodType],
                                                       grades))
                            #delta = int(gradeSystemsFinals['Solver_' + methodType] - gradeSystemsFinals[methodType])
                            #refFullCode = ";".join(refNodes)
                            #tarFullCOde = ";".join(newTarNodes)

                        #bestKSolverDeltas.insert({'delta':delta,'ref':refFullCode,'tar':tarFullCOde})
                        #bestKSolverBroken.insert({'broken':conDict.getBrokenNumber(),'ref':refFullCode,'tar':tarFullCOde})

                        #gradeSystemsDict[gradeSystemName]['bestSolImprovmentList'].insert({'delta':delta,'ref':refFullCode,'tar':tarFullCOde}
                    else:
                        for methodType in ['contain', 'ratio']:
                            gradeSystemsFinals[
                                'Solver_' +
                                methodType] = gradeSystemsFinals[methodType]
                else:
                    for methodType in ['contain', 'ratio']:
                        gradeSystemsFinals[
                            'Solver_' + methodType] = numpy.mean(
                                map(
                                    lambda nodeGradesInfo: nodeGradesInfo[
                                        'gradesDict'][methodType],
                                    nodeGradesInfos))

                for gradeSystemName in gradeSystemNames:
                    refGraphletContainer[gradeSystemName] = max(
                        gradeSystemsFinals[gradeSystemName],
                        refGraphletContainer[gradeSystemName])

    endForXlsRecord = time.time()

    for gradeSystemName in gradeSystemNames:
        counters = GradeSystemsInfoInst.tallyForGradeSystem(
            gradeSystemName, borders)
        #counters = gradeSystemsDict[gradeSystemName]['DictCounter'].tallyCounters()

        #["exe","funcname","#objects","K","Trace","System","Generator","GradeSystem","TimeStr","TimeNumber"]

        fields = [
            exeName, funcfilename,
            str(objectsCount),
            str(myK), "TRUE", methodSystemName, "normal", gradeSystemName,
            "\"" + getDurationStr(int(endForXlsRecord - startForXlsRecord)) +
            "\"",
            str(endForXlsRecord - startForXlsRecord)
        ]

        for i in range(0, len(borders)):
            fields.append(
                "%.4f" %
                (float(counters[i]) / float(len(GradeSystemsInfoInst))))
        """            
        if (gradeSystemName == "Solver_contain"):    
            fields.append(str(bestKSolverDeltas.max()))
            fields.append(str(bestKSolverDeltas))
            
            fields.append(str(bestKSolverBroken.max()))
            fields.append(str(bestKSolverBroken))
        else:
            for i in range(0,4):
                fields.append("see Solver_contain")
        
        sets = GradeSystemsInfoInst.detectPatchForGradeSystem(gradeSystemName,[60,70,80])
        sets['total'] = len(tarGraph.vs)
        fields.append(str(sets))
        """

        allFields.append(fields)

    timeOutTimer.cancel()
    del timeOutTimer

    return allFields

Example #8

0

Show file

File: GetStats.py Project: chubbymaggie/TRACY

    if len(collection) == 0:
        return "EMPTY (len==0)" 
    else:
        return " Sum:[" + printInt(sum(collection)) + "],Min:[" + printInt(min(collection)) + "],Max:[" + printInt(max(collection)) + "],Avg:[" + printInt(np.mean(collection)) + "],STD:[" + printInt(np.std(collection)) + "]"

#target tracelets info - number of tracelets

print "Sources (paper targets)\n\n"



for sourceName in sourceNames:
    refGraph = igraph.read(os.path.join(sourcesDir,sourceName) + myExt)
    for k in Koptions:
        counter = 0
        for tracelet in split2k.ksplitGraphObject(k, refGraph , True):
            counter += 1
            
        print sourceName + ": K=" + str(k) + " , count = " + str(counter)    
        
print "\n\nTargets (paper references)\n\n"

dictK = {}
for k in Koptions:
    dictK[k] = {}
    for collectionName in ["traceletPerFunctionInfo","opcodePerTraceletInfo","opcodePerBasicBlock","callsPerBB","callsPerTrace"]:
        dictK[k][collectionName] = []

Degrees = {"VInDegree":[],"VOutDgree":[]}
BBInGraphs = []

Example #9

0

Show file

        return " Sum:[" + printInt(sum(collection)) + "],Min:[" + printInt(
            min(collection)) + "],Max:[" + printInt(
                max(collection)) + "],Avg:[" + printInt(
                    np.mean(collection)) + "],STD:[" + printInt(
                        np.std(collection)) + "]"


#target tracelets info - number of tracelets

print "Sources (paper targets)\n\n"

for sourceName in sourceNames:
    refGraph = igraph.read(os.path.join(sourcesDir, sourceName) + myExt)
    for k in Koptions:
        counter = 0
        for tracelet in split2k.ksplitGraphObject(k, refGraph, True):
            counter += 1

        print sourceName + ": K=" + str(k) + " , count = " + str(counter)

print "\n\nTargets (paper references)\n\n"

dictK = {}
for k in Koptions:
    dictK[k] = {}
    for collectionName in [
            "traceletPerFunctionInfo", "opcodePerTraceletInfo",
            "opcodePerBasicBlock", "callsPerBB", "callsPerTrace"
    ]:
        dictK[k][collectionName] = []