def test_memory(): arches = ['VexArchX86', 'VexArchPPC32', 'VexArchAMD64', 'VexArchARM'] # we're not including VexArchMIPS32 cause it segfaults sometimes for i in xrange(10000): try: s = hex(random.randint(2**100, 2**100 * 16))[2:] a = random.choice(arches) p = pyvex.IRSB(bytes=s, arch=a) except pyvex.PyVEXError: pass kb_start = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss for i in xrange(20000): try: s = hex(random.randint(2**100, 2**100 * 16))[2:] a = random.choice(arches) p = pyvex.IRSB(bytes=s, arch=a) except pyvex.PyVEXError: pass del p gc.collect() kb_end = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss # allow a 2mb leeway nose.tools.assert_less(kb_end - kb_start, 2000)
def test_two_irsb(): irsb1 = pyvex.IRSB(bytes='\x5d\xc3') irsb2 = pyvex.IRSB(bytes='\x5d\x5d\x5d\x5d') stmts1 = irsb1.statements stmts2 = irsb2.statements nose.tools.assert_not_equal(len(stmts1), len(stmts2))
def test_irsb_tyenv(): irsb = pyvex.IRSB(bytes='\x5d\xc3') print irsb.tyenv print "Orig" print irsb.tyenv print "Copy" print irsb.tyenv.deepCopy() print "Empty" irsb2 = pyvex.IRSB() print irsb2.tyenv print "Unwrapped" irsb2.tyenv = irsb.tyenv.deepCopy() print irsb2.tyenv
def test_irsb_deepCopy(): irsb = pyvex.IRSB(bytes='\x5d\xc3') stmts = irsb.statements irsb2 = irsb.deepCopy() stmts2 = irsb2.statements nose.tools.assert_equal(len(stmts), len(stmts2))
def test_irstmt_noop(): irsb = pyvex.IRSB(bytes='\x90\x5d\xc3') irnop = irsb.statements[0] irnop2 = pyvex.IRStmt.NoOp() irnop3 = irnop2.deepCopy() nose.tools.assert_equal(irnop.tag, "Ist_NoOp") nose.tools.assert_equal(type(irnop), type(irnop2)) nose.tools.assert_equal(type(irnop), type(irnop3))
def test_irsb_addStmt(): irsb = pyvex.IRSB(bytes='\x5d\xc3') stmts = irsb.statements irsb2 = irsb.deepCopyExceptStmts() nose.tools.assert_equal(len(irsb2.statements), 0) for n, i in enumerate(stmts): nose.tools.assert_equal(len(irsb2.statements), n) irsb2.addStatement(i.deepCopy()) irsb2.pp()
def test_irexpr_rdtmp(): m = pyvex.IRExpr.RdTmp(123) nose.tools.assert_equal(m.tag, "Iex_RdTmp") nose.tools.assert_equal(m.tmp, m.deepCopy().tmp) nose.tools.assert_equal(m.tmp, 123) m.tmp = 1337 nose.tools.assert_equal(m.tmp, 1337) nose.tools.assert_raises(Exception, pyvex.IRExpr.RdTmp, ()) nose.tools.assert_equal(type(m), type(m.deepCopy())) print "F**K" irsb = pyvex.IRSB(bytes='\x90\x5d\xc3') print "TMP:", irsb.next.tmp nose.tools.assert_equal(irsb.next.tmp, irsb.next.deepCopy().tmp)
def test_irsb_popret(): irsb = pyvex.IRSB(bytes='\x5d\xc3') stmts = irsb.statements irsb.pp() nose.tools.assert_greater(len(stmts), 0) nose.tools.assert_equal(irsb.jumpkind, "Ijk_Ret") nose.tools.assert_equal(irsb.offsIP, 184) cursize = len(irsb.tyenv.types) nose.tools.assert_greater(cursize, 0) print irsb.statements[16].data print irsb.statements[16].data.tmp print irsb.tyenv.types[irsb.statements[16].data] nose.tools.assert_equal(irsb.tyenv.typeOf(irsb.statements[16].data), 'Ity_I64')
def test_arm_postprocess(): for i in xrange(3): # Thumb # push {r7} # add r7, sp, #0 # mov.w r1, #6 # mov r0, pc # add.w lr, r0, r1 # b.w 10408 irsb = pyvex.IRSB(data=('\x80\xb4' '\x00\xaf' '\x4f\xf0\x06\x01' '\x78\x46' '\x00\xeb\x01\x0e' '\xff\xf7\xec\xbf'), mem_addr=0x1041f, arch=archinfo.ArchARMEL(), num_inst=6, bytes_offset=1, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call') # mov lr, pc # b.w 10408 irsb = pyvex.IRSB(data=('\xfe\x46' '\xe9\xe7'), mem_addr=0x10431, arch=archinfo.ArchARMEL(), num_inst=2, bytes_offset=1, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call') # add r2, pc, #0 # add.w lr, r2, #4 # ldr.w pc, [pc, #52] irsb = pyvex.IRSB(data=('\x00\xa2' '\x02\xf1\x06\x0e' '\xdf\xf8\x34\xf0'), mem_addr=0x10435, arch=archinfo.ArchARMEL(), num_inst=3, bytes_offset=1, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call') # ldr r0, [pc, #48] # mov r1, pc # add.w r2, r1, #4 # add.w r3, r2, #4 # add.w r4, r3, #4 # add.w lr, r4, #4 # mov pc, r0 irsb = pyvex.IRSB(data=('\x0c\x48' '\x79\x46' '\x01\xf1\x04\x02' '\x02\xf1\x04\x03' '\x03\xf1\x04\x04' '\x04\xf1\x04\x0e' '\x87\x46'), mem_addr=0x1043f, arch=archinfo.ArchARMEL(), num_inst=7, bytes_offset=1, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call') # eor.w r0, r0, r0 # mov lr, pc # b.n 10460 irsb = pyvex.IRSB(data=('\x80\xea\x00\x00' '\x86\x46' '\x01\xe0'), mem_addr=0x10455, arch=archinfo.ArchARMEL(), num_inst=3, bytes_offset=1, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Boring') # Thumb compiled with optimizations (gcc -O2) # mov.w r1, #6 # mov r0, pc # add.w lr, r0, r1 # b.w 104bc irsb = pyvex.IRSB(data=('\x4f\xf0\x06\x01' '\x78\x46' '\x00\xeb\x01\x0e' '\x00\xf0\xc5\xb8'), mem_addr=0x10325, arch=archinfo.ArchARMEL(), num_inst=4, bytes_offset=1, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call') # ldr r0, [pc, #56] # mov r1, pc # add.w r2, r1, #4 # add.w r3, r2, #4 # add.w r4, r3, #4 # add.w lr, r4, #4 # mov pc, r0 irsb = pyvex.IRSB(data=('\x0e\x48' '\x79\x46' '\x01\xf1\x04\x02' '\x02\xf1\x04\x03' '\x03\xf1\x04\x04' '\x04\xf1\x04\x0e' '\x87\x46'), mem_addr=0x10333, arch=archinfo.ArchARMEL(), num_inst=7, bytes_offset=1, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call') # add r2, pc, #0 # add.w lr, r2, #6 # ldr.w pc, [pc, #28] irsb = pyvex.IRSB(data=('\x00\xa2' '\x02\xf1\x06\x0e' '\xdf\xf8\x1c\xf0'), mem_addr=0x10349, arch=archinfo.ArchARMEL(), num_inst=3, bytes_offset=1, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call') # mov lr, pc # b.w 104bc irsb = pyvex.IRSB(data=('\xfe\x46' '\xb2\xe0'), mem_addr=0x10353, arch=archinfo.ArchARMEL(), num_inst=2, bytes_offset=1, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call') # eor.w r0, r0, r0 # mov lr, pc # b.n 10362 irsb = pyvex.IRSB(data=('\x80\xea\x00\x00' '\x86\x46' '\x01\xe0'), mem_addr=0x10357, arch=archinfo.ArchARMEL(), num_inst=3, bytes_offset=1, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Boring') # ARM compiled with optimizations (gcc -O2) # mov r1, #4 # mov r0, pc # add lr, r0, r1 # ldr pc, [pc, #56] irsb = pyvex.IRSB(data=('\x04\x10\xa0\xe3' '\x0f\x00\xa0\xe1' '\x01\xe0\x80\xe0' '\x38\xf0\x9f\xe5'), mem_addr=0x10298, arch=archinfo.ArchARMEL(), num_inst=4, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call') # add r1, pc, #0 # add r2, r1, #4 # add r3, r2, #4 # add r4, r3, #4 # add lr, r4, #4 # b 10414 irsb = pyvex.IRSB(data=('\x00\x10\x8f\xe2' '\x04\x20\x81\xe2' '\x04\x30\x82\xe2' '\x04\x40\x83\xe2' '\x04\xe0\x84\xe2' '\x54\x00\x00\xea'), mem_addr=0x102a8, arch=archinfo.ArchARMEL(), num_inst=6, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call') # mov lr, pc # b 10414 irsb = pyvex.IRSB(data=('\x0f\xe0\xa0\xe1' '\x52\x00\x00\xea'), mem_addr=0x102c0, arch=archinfo.ArchARMEL(), num_inst=2, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call') # eor r0, r0, r0 # mov lr, r0 # b 102d8 irsb = pyvex.IRSB(data=('\x00\x00\x20\xe0' '\x00\xe0\xa0\xe1' '\x00\x00\x00\xea'), mem_addr=0x102c8, arch=archinfo.ArchARMEL(), num_inst=3, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Boring') # ARM # push {fp} # add fp, sp, #0 # mov r1, #4 # mov r0, pc # add lr, r0, r1 # ldr pc, [pc, #68] irsb = pyvex.IRSB(data=('\x04\xb0\x2d\xe5' '\x00\xb0\x8d\xe2' '\x04\x10\xa0\xe3' '\x0f\x00\xa0\xe1' '\x01\xe0\x80\xe0' '\x44\xf0\x9f\xe5'), mem_addr=0x103e8, arch=archinfo.ArchARMEL(), num_inst=6, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call') # add r1, pc, #0 # add r2, r1, #4 # add r3, r2, #4 # add r4, r3, #4 # add lr, r4, #4 # b 103c4 irsb = pyvex.IRSB(data=('\x00\x10\x8f\xe2' '\x04\x20\x81\xe2' '\x04\x30\x82\xe2' '\x04\x40\x83\xe2' '\x04\xe0\x84\xe2' '\x54\xff\xff\xea'), mem_addr=0x10400, arch=archinfo.ArchARMEL(), num_inst=6, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call') # mov lr, pc # b 103c4 irsb = pyvex.IRSB(data=('\x0f\xe0\xa0\xe1' '\xe8\xff\xff\xea'), mem_addr=0x10418, arch=archinfo.ArchARMEL(), num_inst=2, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Call') # eor r0, r0, r0 # mov lr, r0 # b 10430 irsb = pyvex.IRSB(data=('\x00\x00\x20\xe0' '\x00\xe0\xa0\xe1' '\x00\x00\x00\xea'), mem_addr=0x10420, arch=archinfo.ArchARMEL(), num_inst=3, opt_level=i) nose.tools.assert_equals(irsb.jumpkind, 'Ijk_Boring')
def handleInsns(self): consts = {} ips = [] #set dafaukt value for PC, SP, BP pc_offset = self.arch.ip_offset regs = {pc_offset: 0, self.arch.sp_offset: 1, self.arch.bp_offset: 2} consts = {} irsbs = [] for instr_c in range(len(self.insns_list)): off = self.insns_list[instr_c][0] instr = self.insns_list[instr_c][1] #manage instruction not recognized by libVEX if self.arch.name == "X86" or self.arch.name == "AMD64": if instr == "\xf4": #hlt x86 instruction irsbs.append("HALT") continue elif instr.startswith("\xf0"): #lock x86 prefix irsbs.append("LOCK") if len(instr) == 1: continue instr = instr[1:] try: irsb = pyvex.IRSB(instr, off, self.arch, opt_level=0) except pyvex.errors.PyVEXError as err: print( "[Please report to the developer] Error with instruction " + instr.encode("hex")) raise err irsbs.append(irsb) stmts = irsb.statements n_addr = 0 for i in range(len(stmts)): #TODO PutI GetI if isinstance(stmts[i], pyvex.stmt.IMark): n_addr = stmts[i].addr + stmts[i].len elif isinstance(stmts[i], pyvex.stmt.Put): if stmts[i].offset == pc_offset and len( stmts[i].constants) == 1: c = stmts[i].constants[0] if c.value in self.targets: stmts[i].data = StrConst(self.targets[c.value]) stmts[i].offset = 0 continue elif c.value == n_addr: stmts[i].data = StrConst("_NEXT_") stmts[i].offset = 0 continue else: ips.append(c.value) stmts[i].reg_name = 0xABADCAFE stmts[i].offset = 0 else: # constants replace for j in range(len(stmts[i].constants)): c = stmts[i].constants[j] if c.value in self.targets: stmts[i].constants[j] = StrConst( self.targets[c.value]) elif c.value == n_addr: stmts[i].constants[j] = StrConst("_NEXT_") else: # constants abstraction consts[c.value] = consts.get( c.value, len(consts)) c.value = consts[c.value] # registers abstraction regs[stmts[i].offset] = regs.get( stmts[i].offset, len(regs)) stmts[i].offset = regs[stmts[i].offset] elif isinstance(stmts[i], pyvex.stmt.Exit): c = stmts[i].dst if c.value in self.targets: stmts[i] = "if (%s) { PUT(offset=0) = %s; %s }" % ( stmts[i].guard, self.targets[c.value], stmts[i].jumpkind) continue else: ips.append(c.value) stmts[i].reg_name = 0xDEADBEEF else: # constants replace for j in range(len(stmts[i].constants)): c = stmts[i].constants[j] if c.value in self.targets: stmts[i].constants[j] = StrConst( self.targets[c.value]) elif c.value == n_addr: stmts[i].constants[j] = StrConst("_NEXT_") else: # constants abstraction consts[c.value] = consts.get(c.value, len(consts)) c.value = consts[c.value] for expr in stmts[i].expressions: if isinstance(expr, pyvex.expr.Get): # registers abstraction regs[expr.offset] = regs.get(expr.offset, len(regs)) expr.offset = regs[expr.offset] #order addresses addrs = {} ips.sort() for i in range(len(ips)): addrs[ips[i]] = i #self.vex_code = "" #self.shingled_code = "" vexhash = datasketch.MinHash(num_perm=64) shingled = {} last = "" for c in range(len(irsbs)): irsb = irsbs[c] if type(irsb) == type(""): ngram = last + irsb #self.vex_code += "+++ Instr #%d +++\n%s\n" % (c, irsb) shingled[ngram] = shingled.get(ngram, 0) + 1 last = irsb continue stmts = irsb.statements ins = "" for i in range(len(stmts)): if isinstance(stmts[i], pyvex.stmt.IMark) or isinstance( stmts[i], pyvex.stmt.AbiHint): continue if hasattr(stmts[i], "reg_name"): if stmts[i].reg_name == 0xABADCAFE: stmts[i].constants[0].value = addrs[ stmts[i].constants[0].value] elif stmts[i].reg_name == 0xDEADBEEF: stmts[i].dst.value = addrs[stmts[i].dst.value] v = str(stmts[i]) + "\n" ins += v ngram = last + v shingled[ngram] = shingled.get(ngram, 0) + 1 last = v #self.vex_code += "+++ Instr #%d +++\n%s\n" % (c, ins) for ngram in shingled: for c in range(shingled[ngram]): vexhash.update("[%d]\n%s" % (c, ngram)) #self.shingled_code += "[%d]\n%s" % (c, ngram) lean_vexhash = datasketch.LeanMinHash(vexhash) vexhash_buf = bytearray(lean_vexhash.bytesize()) lean_vexhash.serialize(vexhash_buf) self.vexhash = str(vexhash_buf)
def test_irsb_arm(): irsb = pyvex.IRSB(bytes='\x33\xff\x2f\xe1', arch="VexArchARM") nose.tools.assert_equal( sum([1 for i in irsb.statements if type(i) == pyvex.IRStmt.IMark]), 1)
def test_irstmt_pp(): irsb = pyvex.IRSB(bytes='\x5d\xc3') stmts = irsb.statements for i in stmts: print "STMT: ", print i