def outputTrace(infile, outfile): ifb = InputFileBuffer(1000, infile) line = ifb.getLine() more_lines = not ifb.eof out = open(outfile, 'w') while not ifb.eof: line = ifb.getLine() line = line.split('\t') out.write(str(int(line[1],16)) + "\n")
class CFGBuilder1(CFGBuilder): ''' :type samplesFile: string :type batchSize: int :type binSize: int :type stdDevThreshold: float :type windowSize: int :type recurrentThreshold: int :type targets: dict[int, int] :type bbr: BBRepository :type cfg: CFG ''' def __init__(self, samplesFile, batchSize, binSize, stdDevThreshold, windowSize, recurrentThreshold): CFGBuilder.__init__(self) self.stat = Statistics(binSize, stdDevThreshold) self.ifb = InputFileBuffer(50000, samplesFile) self.batchSize = batchSize self.binSize = binSize self.stdDevThreshold = stdDevThreshold self.samplesFile = samplesFile self.windowSize = windowSize self.recurrentThreshold = recurrentThreshold self.targets = dict() self.bbr = BBRepository() self.cfg = CFG() self.numMerge = 0 self.highStdevEdges = 0 self.numHighStdevTries = 0 self.numHighStdevOK = 0 def buildCFGR(self, instrGen, justBuild, ib): for i in instrGen: b = self.stat.getBinFromAddr(i.pc) if b is None and (justBuild == 0): return if b is not None: recurrent = b.count > self.recurrentThreshold else: recurrent = False if recurrent or justBuild==1: # logger.debug("\t is target...") bb = self.bbr.getBB(i.pc) if not bb: bb = BB(i.pc) self.bbr.addBB(bb) while not bb.done: x = self.bbr.getBB(i.pc) if x: if x.entryAddress != bb.entryAddress: bb.done = 1 self.numMerge+=1 logger.debug("merging blocks %x and %x", bb.entryAddress, x.entryAddress) bb.addTarget(x) x.addSource(bb) self.cfg.addOrIncrementEdge(bb, x) break if not bb.hasInstruction(i.pc): bb.addInstruction(i) if i.isBranchOrCall(): bb.done = 1 iafter = ib.getInstructionAfter(i) if iafter is None: break #logger.debug("i: %s", i) #logger.debug("iafter: %s", iafter) if iafter.pc == i.target: #branch taken #logger.debug("0x%x: branch taken to 0x%x (%s)\n", i.pc, iafter.pc, i.text) # justBuild = 1 if b.count > self.recurrentThreshold else 0 # # self.buildCFGR(instrGen, justBuild, ib) self.buildCFGR(instrGen, 0, ib) targetBB = self.bbr.getBB(i.target) else: #branch not taken #logger.debug("0x%x: fallthrough to 0x%x (%s)\n", i.pc, iafter.pc, i.text) self.buildCFGR(instrGen, 0, ib) targetBB = self.bbr.getBB(iafter.pc) if targetBB: bb.addTarget(targetBB) targetBB.addSource(bb) self.cfg.addOrIncrementEdge(bb, targetBB) try: i = instrGen.next() except StopIteration: break if i.isBranchOrCall(): iafter = ib.getInstructionAfter(i) if iafter is not None: targetBB = self.bbr.getBB(iafter.pc) thisBB = self.bbr.getBB(i.pc) if targetBB and thisBB: thisBB.addTarget(targetBB) targetBB.addSource(thisBB) self.cfg.addOrIncrementEdge(thisBB, targetBB) def buildCFG(self): # ignore the first line in the samples file self.ifb.getLine() moreBatches = not self.ifb.eof commonBinIns = dict() totalIns = dict() lowstdev = 0 highstdev = 0 stddevs = [] while (moreBatches): ib = InstructionBatch(self.batchSize, self.ifb) moreBatches = ib.fromFile() ib.calcStatistics(self.windowSize, 1) self.stat.registerLowStDevStatistics(ib) stddevs.append(ib.meanWindowStdev) if ib.batchId % 100 == 0: logger.debug("batch %d", ib.batchId) if ib.meanWindowStdev <= self.stdDevThreshold: instrGen = ib.genInstruction() self.buildCFGR(instrGen, 0, ib) lowstdev+=1 else: highstdev+=1 printedIns = False instrGen = ib.genInstruction() for i in instrGen: if i.isBranchOrCall(): b = self.stat.getBinFromAddr(i.pc) if b is None: continue bb = self.bbr.getBB(i.pc) if bb and b.count > self.recurrentThreshold: iafter = ib.getInstructionAfter(i) if iafter is not None: otherBB = self.bbr.getBB(iafter.pc) if not otherBB: logger.debug("trying to create a new BB for %x", iafter.pc) self.buildCFGR(instrGen, 1, ib) self.numHighStdevTries+=1 otherBB = self.bbr.getBB(iafter.pc) if otherBB: logger.debug("got it!") self.numHighStdevOK+=1 if otherBB: bb.addTarget(otherBB) otherBB.addSource(bb) self.cfg.addOrIncrementEdge(bb, otherBB) self.highStdevEdges+=1 self.cfg.toDot("test_builder1.dot", True, False) self.cfg.printCFG() print len(self.bbr.blocks), " basic blocks were recognized" print len(self.stat.bins), " address bins were created" print lowstdev, " low standard deviation batches" print highstdev, " high standard deviation batches" totalBBIns = [] for i in self.bbr.blocks: totalBBIns.append(len(self.bbr.blocks[i].instructions)) print "each block has an average of ", np.mean(totalBBIns), "+-" , np.std(totalBBIns), " instructions" print "number of basic block merges: ", self.numMerge print "number of high standard deviation recurrent edges marked: ", self.highStdevEdges print "number of high standard deviation basic block build tries: ", self.numHighStdevTries print "number of high standard deviation basic block actually built: ", self.numHighStdevOK #implementar metricas: quantidade de instrucoes e blocos basicos por funcao stdev = file(self.samplesFile + ".stdev", 'w') for i in stddevs: stdev.write(str(i) + "\n") stdev.close()