Ejemplo n.º 1
0
    def buildCFG(self):

        # ignore the first line in the samples file
        self.ifb.getLine()

        moreBatches = not self.ifb.eof

        commonBinIns = dict()
        totalIns = dict()

        lowstdev = 0
        highstdev = 0

        stddevs = []

        while (moreBatches):

            ib = InstructionBatch(self.batchSize, self.ifb)
            moreBatches = ib.fromFile()
            ib.calcStatistics(self.windowSize, 1)
            self.stat.registerLowStDevStatistics(ib)

            stddevs.append(ib.meanWindowStdev)

            if ib.batchId % 100 == 0:
                logger.debug("batch %d", ib.batchId)

            if ib.meanWindowStdev <= self.stdDevThreshold:
                instrGen = ib.genInstruction()
                self.buildCFGR(instrGen, 0, ib)
                lowstdev+=1
            else:
                highstdev+=1

                printedIns = False
                instrGen = ib.genInstruction()

                for i in instrGen:

                    if i.isBranchOrCall():

                        b = self.stat.getBinFromAddr(i.pc)

                        if b is None:
                            continue

                        bb = self.bbr.getBB(i.pc)

                        if bb and b.count > self.recurrentThreshold:
                            iafter = ib.getInstructionAfter(i)
                            if iafter is not None:
                                otherBB = self.bbr.getBB(iafter.pc)
                                if not otherBB:
                                    logger.debug("trying to create a new BB for %x", iafter.pc)
                                    self.buildCFGR(instrGen, 1, ib)
                                    self.numHighStdevTries+=1
                                    otherBB = self.bbr.getBB(iafter.pc)
                                    if otherBB:
                                        logger.debug("got it!")
                                        self.numHighStdevOK+=1

                                if otherBB:
                                    bb.addTarget(otherBB)
                                    otherBB.addSource(bb)
                                    self.cfg.addOrIncrementEdge(bb, otherBB)
                                    self.highStdevEdges+=1

        self.cfg.toDot("test_builder1.dot", True, False)
        self.cfg.printCFG()

        print len(self.bbr.blocks), " basic blocks were recognized"
        print len(self.stat.bins), " address bins were created"
        print lowstdev, " low standard deviation batches"
        print highstdev, " high standard deviation batches"

        totalBBIns = []
        for i in self.bbr.blocks:
            totalBBIns.append(len(self.bbr.blocks[i].instructions))

        print "each block has an average of ",  np.mean(totalBBIns), "+-" , np.std(totalBBIns), " instructions"
        print "number of basic block merges: ", self.numMerge
        print "number of high standard deviation recurrent edges marked: ", self.highStdevEdges
        print "number of high standard deviation basic block build tries: ", self.numHighStdevTries
        print "number of high standard deviation basic block actually built: ", self.numHighStdevOK

        #implementar metricas: quantidade de instrucoes e blocos basicos por funcao

        stdev = file(self.samplesFile + ".stdev", 'w')
        for i in stddevs:
            stdev.write(str(i) + "\n")
        stdev.close()
Ejemplo n.º 2
0
    def buildCFG(self):

        # ignore the first line in the samples file
        self.ifb.getLine()

        moreBatches = not self.ifb.eof

        commonBinIns = dict()
        totalIns = dict()

        lowstdev = 0
        highstdev = 0

        lowstdevValues = []
        histdevValues = []

        stddevs = []

        while (moreBatches):

            ib = InstructionBatch(self.batchSize, self.ifb)
            moreBatches = ib.fromFile()
            ib.calcStatistics(self.windowSize, 1)
            self.stat.registerLowStDevStatistics(ib)

            stddevs.append(ib.meanWindowStdev)

            if ib.batchId % 100 == 0:
                logger.debug("batch %d", ib.batchId)

            if ib.meanWindowStdev <= self.stdDevThreshold:
                instrGen = ib.genInstruction()
                self.buildCFGR(instrGen, 0, ib)
                lowstdev+=1
                lowstdevValues.append(ib.meanWindowStdev)
            else:
                highstdev+=1
                histdevValues.append(ib.meanWindowStdev)

                printedIns = False
                instrGen = ib.genInstruction()

                for i in instrGen:

                    if i.isBranchOrCall():
                        if self.targets.has_key(i.target):
                            self.targets[i.target] += 1
                        else:
                            self.targets[i.target] = 1

                        b = self.stat.getBinFromAddr(i.pc)

                        if b is None:
                            continue

                        bb = self.bbr.getBB(i.pc)

                        if bb and b.count > self.recurrentThreshold:
                            iafter = ib.getInstructionAfter(i)
                            if iafter is not None:
                                otherBB = self.bbr.getBB(iafter.pc)
                                if not otherBB:
                                    logger.debug("trying to create a new BB for %x", iafter.pc)
                                    self.buildCFGR(instrGen, 1, ib)
                                    self.numHighStdevTries+=1
                                    otherBB = self.bbr.getBB(iafter.pc)
                                    if otherBB:
                                        logger.debug("got it!")
                                        self.numHighStdevOK+=1

                                if otherBB:
                                    bb.addTarget(otherBB)
                                    otherBB.addSource(bb)
                                    self.cfg.addOrIncrementEdge(bb, otherBB)
                                    self.highStdevEdges+=1

            # if i.pc not in totalIns:
            #     totalIns[i.pc] = 1
            # else:
            #     totalIns[i.pc]+=1
            #
            # b = self.stat.getBinFromAddr(i.pc)
            # if b.count > self.recurrentThreshold:
            #     #logger.debug("instruction at %X is in a common bin", i.pc)
            #     if i.pc not in commonBinIns:
            #         commonBinIns[i.pc] = 1
            #     else:
            #         commonBinIns[i.pc]+=1
            #
            # if i.isBranchOrCall():
            #     if(self.targets.has_key(i.target)):
            #         self.targets[i.target]+=1
            #     else:
            #         self.targets[i.target] = 1

        #
        # commonBinList = commonBinIns.items()
        #
        # commonBinList.sort(key=lambda x: x[0])
        #
        # logger.debug("common bins:")
        # for i in commonBinList:
        #     logger.debug("0x%x: %d", i[0], i[1])
        #
        # logger.debug("control flow targets: (%d items)", len(self.targets))
        # targetList = self.targets.items()
        # targetList.sort(key=lambda x: x[0])
        #
        # for i in targetList:
        #     logger.debug("0x%x: %d", i[0], i[1])
        #
        # logger.debug("there were %d distinct instructions in common bins", len(commonBinIns))
        #
        # logger.debug("a total of %d distinct instructions were sampled.", len(totalIns))

        self.cfg.toDot("test_builder0.dot", False, True)
        self.cfg.printCFG()

        # bb = self.bbr.getBB(0x400811)
        #
        # for i in bb.getInstructions():
        #     print hex(i.pc)

        print len(self.bbr.blocks), " basic blocks were recognized"
        print len(self.stat.bins), " address bins were created"
        print lowstdev, " low standard deviation batches"
        print highstdev, " high standard deviation batches"

        totalBBIns = []
        for i in self.bbr.blocks:
            totalBBIns.append(len(self.bbr.blocks[i].instructions))

        print "each block has an average of ",  np.mean(totalBBIns), "+-" , np.std(totalBBIns), " instructions"
        print "number of basic block merges: ", self.numMerge
        print "number of high standard deviation recurrent edges marked: ", self.highStdevEdges
        print "number of high standard deviation basic block build tries: ", self.numHighStdevTries
        print "number of high standard deviation basic block actually built: ", self.numHighStdevOK

        histdevValues.sort()
        lowstdevValues.sort()

        print "high stdev values", histdevValues
        print "hsv mean = ", np.mean(histdevValues), "+-" ,np.std(histdevValues)
        print "low stdev values", lowstdevValues
        print "lo mean = ", np.mean(lowstdevValues), "+-" ,np.std(lowstdevValues)

        #implementar metricas: quantidade de instrucoes e blocos basicos por funcao

        stdev = file(self.samplesFile + ".stdev", 'w')
        for i in stddevs:
            stdev.write(str(i) + "\n")
        stdev.close()