def __init__(self, vw, logwrite=False, logread=False): self.vw = vw self.funcva = None # Set if using runFunction self.hooks = {} self.taints = {} self.taintva = itertools.count(0x41560000, 8192) self.uninit_use = {} self.logwrite = logwrite self.logread = logread self.path = self.newCodePathNode() self.curpath = self.path self.opcache = {} self.emumon = None self.psize = self.getPointerSize() self.stack_map_mask = e_bits.sign_extend(0xfff00000, 4, vw.psize) self.stack_map_base = e_bits.sign_extend(0xbfb00000, 4, vw.psize) self.stack_pointer = self.stack_map_base + 4096 # Possibly need an "options" API? self._safe_mem = True # Should we be forgiving about memory accesses? self._func_only = True # is this emulator meant to stay in one function scope? # Map in a memory map for the stack self.addMemoryMap(self.stack_map_base, 6, "[stack]", init_stack_map) # Map in all the memory associated with the workspace for va, size, perms, fname in vw.getMemoryMaps(): offset, bytes = vw.getByteDef(va) self.addMemoryMap(va, perms, fname, bytes) for regidx in self.taintregs: rname = self.getRegisterName(regidx) regval = self.setVivTaint('uninitreg', regidx) self.setRegister(regidx, regval) self.setStackCounter(self.stack_pointer) # Create some pre-made taints for positive stack indexes # NOTE: This is *ugly* for speed.... taints = [ self.setVivTaint('funcstack', i * self.psize) for i in xrange(20) ] taintbytes = ''.join( [e_bits.buildbytes(taint, self.psize) for taint in taints]) self.writeMemory(self.stack_pointer, taintbytes) for name in dir(self): val = getattr(self, name, None) if val == None: continue impname = getattr(val, '__imphook__', None) if impname == None: continue self.hooks[impname] = val
def runStrings(vw, ea, uselocalagg=True): ''' Returns a list of (write log entry, decoded strings) where the write log is the tuple (pc, va, bytes) for the instruction that wrote the first byte of the string ''' emu = vw.getEmulator(True, True) #modify the stack base for the emulator - smaller mask & frame size # wasn't working for funcs with large locals frame size emu.stack_map_mask = e_bits.sign_extend(0xfff00000, 4, vw.psize) emu.stack_map_base = e_bits.sign_extend(0xbfb00000, 4, vw.psize) emu.stack_pointer = emu.stack_map_base + 16*4096 emu.runFunction(ea, maxhit=1, maxloop=1) logger = jayutils.getLogger('stack_graph') if uselocalagg: #logger.info('Using local agg') stringList = [] jayutils.path_bfs(emu.path, stack_track_visitor, vw=vw, emu=emu, logger=logger, res=stringList ) return stringList else: #logger.info('Using global agg') agg = StringAccumulator() jayutils.path_bfs(emu.path, stack_track_visitor, vw=vw, emu=emu, logger=logger, agg=agg ) return agg.stringDict.values()
def __init__(self, vw, logwrite=False, logread=False): self.vw = vw self.funcva = None # Set if using runFunction self.emustop = False self.hooks = {} self.taints = {} self.taintva = itertools.count(0x41560000, 8192) self.uninit_use = {} self.logwrite = logwrite self.logread = logread self.path = self.newCodePathNode() self.curpath = self.path self.opcache = {} self.emumon = None self.psize = self.getPointerSize() self.stack_map_mask = e_bits.sign_extend(0xfff00000, 4, vw.psize) self.stack_map_base = e_bits.sign_extend(0xbfb00000, 4, vw.psize) self.stack_pointer = self.stack_map_base + 4096 # Possibly need an "options" API? self._safe_mem = True # Should we be forgiving about memory accesses? self._func_only = True # is this emulator meant to stay in one function scope? # Map in a memory map for the stack self.addMemoryMap(self.stack_map_base, 6, "[stack]", init_stack_map) # Map in all the memory associated with the workspace for va, size, perms, fname in vw.getMemoryMaps(): offset, bytes = vw.getByteDef(va) self.addMemoryMap(va, perms, fname, bytes) for regidx in self.taintregs: rname = self.getRegisterName(regidx) regval = self.setVivTaint( 'uninitreg', regidx ) self.setRegister(regidx, regval) self.setStackCounter(self.stack_pointer) # Create some pre-made taints for positive stack indexes # NOTE: This is *ugly* for speed.... taints = [ self.setVivTaint('funcstack', i * self.psize) for i in xrange(20) ] taintbytes = ''.join([ e_bits.buildbytes(taint,self.psize) for taint in taints ]) self.writeMemory(self.stack_pointer, taintbytes ) for name in dir(self): val = getattr(self, name, None) if val == None: continue impname = getattr(val, '__imphook__',None) if impname == None: continue self.hooks[impname] = val
def i_subs(self, op): # Src op gets sign extended to dst ssize = op.opers[0].tsize dsize = op.opers[1].tsize src = e_bits.sign_extend(self.getOperValue(op, 0), ssize, self.ptrsz) dst = e_bits.sign_extend(self.getOperValue(op, 1), ssize, self.ptrsz) if src == None or dst == None: self.undefFlags() return None res = src - dst self.setOperValue(op, 1, res)
def i_subx(self, op): # Src op gets sign extended to dst ssize = op.opers[0].tsize dsize = op.opers[1].tsize src = e_bits.sign_extend(self.getOperValue(op, 0), ssize, self.ptrsz) dst = e_bits.sign_extend(self.getOperValue(op, 1), dsize, self.ptrsz) C = self.getFlag(h8_regs.CCR_C) if src is None or dst is None: self.undefFlags() return None (ssize, dsize, sres, ures, sdst, udst) = self.intSubBase(dst, src + C, dsize, ssize) self.setOperValue(op, 1, ures)
def initStackMemory(self, stacksize=init_stack_size): ''' Setup and initialize stack memory. You may call this prior to emulating instructions. ''' if self.stack_map_base is None: self.stack_map_mask = e_bits.sign_extend(0xfff00000, 4, self.vw.psize) self.stack_map_base = e_bits.sign_extend(0xbfb00000, 4, self.vw.psize) self.stack_map_top = self.stack_map_base + stacksize self.stack_pointer = self.stack_map_top stack_map = init_stack_map if stacksize != init_stack_size: stack_map = b'\xfe' * stacksize # Map in a memory map for the stack self.addMemoryMap(self.stack_map_base, 6, "[stack]", stack_map) self.setStackCounter(self.stack_pointer) # Create some pre-made taints for positive stack indexes # NOTE: This is *ugly* for speed.... taints = [ self.setVivTaint('funcstack', i * self.psize) for i in range(20) ] taintbytes = b''.join( [e_bits.buildbytes(taint, self.psize) for taint in taints]) self.stack_pointer -= len(taintbytes) self.setStackCounter(self.stack_pointer) self.writeMemory(self.stack_pointer, taintbytes) else: existing_map_size = self.stack_map_top - self.stack_map_base new_map_size = stacksize - existing_map_size if new_map_size < 0: raise RuntimeError('cannot shrink stack') new_map_top = self.stack_map_base new_map_base = new_map_top - new_map_size stack_map = b''.join([ struct.pack('<I', new_map_base + (i * 4)) for i in range(new_map_size) ]) self.addMemoryMap(new_map_base, 6, "[stack]", stack_map) self.stack_map_base = new_map_base
def logicalAnd(self, op): dst = self.getOperValue(op, 0) src = self.getOperValue(op, 1) dsize = op.opers[0].tsize ssize = op.opers[1].tsize # sign-extend an immediate if needed if dsize != ssize: src = e_bits.sign_extend(src, ssize, dsize) ssize = dsize # Make sure everybody's on the same bit page. dst = e_bits.unsigned(dst, dsize) src = e_bits.unsigned(src, ssize) res = src & dst self.setFlag(EFLAGS_AF, 0) # AF is undefined, but it seems like it is zeroed self.setFlag(EFLAGS_OF, 0) self.setFlag(EFLAGS_CF, 0) self.setFlag(EFLAGS_SF, e_bits.is_signed(res, dsize)) self.setFlag(EFLAGS_ZF, not res) self.setFlag(EFLAGS_PF, e_bits.is_parity_byte(res)) return res
def i_adc(self, op): dst = self.getOperValue(op, 0) src = self.getOperValue(op, 1) cf = 0 if self.getFlag(EFLAGS_CF): cf = 1 dsize = op.opers[0].tsize ssize = op.opers[1].tsize sdst = e_bits.signed(dst, dsize) ssrc = e_bits.signed(src, ssize) if (isinstance(op.opers[1], i386ImmOper) and ssize < dsize): src = e_bits.sign_extend(src, ssize, dsize) ssize = dsize #FIXME perhaps unify the add/adc flags/arith code res = dst + src + cf sres = sdst + ssrc + cf tsize = op.opers[0].tsize self.setFlag(EFLAGS_CF, e_bits.is_unsigned_carry(res, tsize)) self.setFlag(EFLAGS_PF, e_bits.is_parity_byte(res)) self.setFlag(EFLAGS_AF, e_bits.is_aux_carry(src, dst)) self.setFlag(EFLAGS_ZF, not res) self.setFlag(EFLAGS_SF, e_bits.is_signed(res, tsize)) self.setFlag(EFLAGS_OF, e_bits.is_signed_overflow(sres, dsize)) self.setOperValue(op, 0, res)
def i_add(self, op): dst = self.getOperValue(op, 0) src = self.getOperValue(op, 1) dsize = op.opers[0].tsize ssize = op.opers[1].tsize if dsize > ssize: src = e_bits.sign_extend(src, ssize, dsize) ssize = dsize udst = e_bits.unsigned(dst, dsize) usrc = e_bits.unsigned(src, ssize) sdst = e_bits.signed(dst, dsize) ssrc = e_bits.signed(src, ssize) ures = udst + usrc sres = sdst + ssrc self.setFlag(EFLAGS_CF, e_bits.is_unsigned_carry(ures, dsize)) self.setFlag(EFLAGS_PF, e_bits.is_parity_byte(ures)) self.setFlag(EFLAGS_AF, e_bits.is_aux_carry(src, dst)) self.setFlag(EFLAGS_ZF, not ures) self.setFlag(EFLAGS_SF, e_bits.is_signed(ures, dsize)) self.setFlag(EFLAGS_OF, e_bits.is_signed_overflow(sres, dsize)) self.setOperValue(op, 0, ures)
def logicalAnd(self, op): dst = self.getOperValue(op, 0) src = self.getOperValue(op, 1) # PDE if dst == None or src == None: self.undefFlags() self.setOperValue(op, 0, None) return dsize = op.opers[0].tsize ssize = op.opers[1].tsize # sign-extend an immediate if needed if dsize != ssize: src = e_bits.sign_extend(src, ssize, dsize) ssize = dsize # Make sure everybody's on the same bit page. dst = e_bits.unsigned(dst, dsize) src = e_bits.unsigned(src, ssize) res = src & dst self.setFlag(EFLAGS_AF, 0) # AF is undefined, but it seems like it is zeroed self.setFlag(EFLAGS_OF, 0) self.setFlag(EFLAGS_CF, 0) self.setFlag(EFLAGS_SF, e_bits.is_signed(res, dsize)) self.setFlag(EFLAGS_ZF, not res) self.setFlag(EFLAGS_PF, e_bits.is_parity_byte(res)) return res
def integerSubtraction(self, op): """ Do the core of integer subtraction but only *return* the resulting value rather than assigning it. (allows cmp and sub to use the same code) """ # Src op gets sign extended to dst #FIXME account for same operand with zero result for PDE dst = self.getOperValue(op, 0) src = self.getOperValue(op, 1) if src == None or dst == None: self.undefFlags() return None # So we can either do a BUNCH of crazyness with xor and shifting to # get the necissary flags here, *or* we can just do both a signed and # unsigned sub and use the results. dsize = op.opers[0].tsize ssize = op.opers[1].tsize # Sign extend immediates where the sizes don't match if dsize != ssize: src = e_bits.sign_extend(src, ssize, dsize) ssize = dsize return self.intSubBase(src, dst, ssize, dsize)
def i_adc(self, op): dst = self.getOperValue(op, 0) src = self.getOperValue(op, 1) # PDE if dst == None or src == None: self.undefFlags() self.setOperValue(op, 0, None) return cf = 0 if self.getFlag(EFLAGS_CF): cf = 1 dstsize = op.opers[0].tsize srcsize = op.opers[1].tsize if (isinstance(op.opers[1], i386ImmOper) and srcsize < dstsize): src = e_bits.sign_extend(src, srcsize, dstsize) srcsize = dstsize #FIXME perhaps unify the add/adc flags/arith code res = dst + src + cf tsize = op.opers[0].tsize self.setFlag(EFLAGS_CF, e_bits.is_unsigned_carry(res, tsize)) self.setFlag(EFLAGS_PF, e_bits.is_parity_byte(res)) self.setFlag(EFLAGS_AF, e_bits.is_aux_carry(src, dst)) self.setFlag(EFLAGS_ZF, not res) self.setFlag(EFLAGS_SF, e_bits.is_signed(res, tsize)) self.setFlag(EFLAGS_OF, e_bits.is_signed_overflow(res, tsize)) self.setOperValue(op, 0, res)
def i_add(self, op): dst = self.getOperValue(op, 0) src = self.getOperValue(op, 1) dsize = op.opers[0].tsize ssize = op.opers[1].tsize #FIXME PDE and flags if dst == None or src == None: self.undefFlags() self.setOperValue(op, 0, None) return if dsize > ssize: src = e_bits.sign_extend(src, ssize, dsize) ssize = dsize udst = e_bits.unsigned(dst, dsize) usrc = e_bits.unsigned(src, ssize) sdst = e_bits.signed(dst, dsize) ssrc = e_bits.signed(src, ssize) ures = udst + usrc sres = sdst + ssrc self.setFlag(EFLAGS_CF, e_bits.is_unsigned_carry(ures, dsize)) self.setFlag(EFLAGS_PF, e_bits.is_parity_byte(ures)) self.setFlag(EFLAGS_AF, e_bits.is_aux_carry(src, dst)) self.setFlag(EFLAGS_ZF, not ures) self.setFlag(EFLAGS_SF, e_bits.is_signed(ures, dsize)) self.setFlag(EFLAGS_OF, e_bits.is_signed_overflow(sres, dsize)) self.setOperValue(op, 0, ures)
def i_imul(self, op): #FIXME eflags # FIXME imul bugs ocount = len(op.opers) if ocount == 2: dst = self.getOperValue(op, 0) src = self.getOperValue(op, 1) dsize = op.opers[0].tsize ssize = op.opers[1].tsize # FIXME all these are taken care of in disasm now... if dsize > ssize: src = e_bits.sign_extend(src, ssize, dsize) ssize = dsize res = dst * src sof = e_bits.is_unsigned_carry(res, dsize) self.setFlag(EFLAGS_CF, sof) self.setFlag(EFLAGS_OF, sof) self.setOperValue(op, 0, res) elif ocount == 3: dst = self.getOperValue(op, 0) src1 = self.getOperValue(op, 1) src2 = self.getOperValue(op, 2) dsize = op.opers[0].tsize ssize1 = op.opers[1].tsize ssize2 = op.opers[2].tsize if dsize > ssize2: # Only the last operand may be shorter imm src2 = e_bits.sign_extend(src2, ssize2, dsize) ssize2 = dsize res = src1 * src2 sof = e_bits.is_unsigned_carry(res, dsize) self.setFlag(EFLAGS_CF, sof) self.setFlag(EFLAGS_OF, sof) self.setOperValue(op, 0, res) else: raise envi.UnsupportedInstruction(self, op)
def repr(self, op): ival = self.imm # Do the extra conditionals to make this fast if self.tsize == 1: if op.opcode in sextend: o1 = op.opers[0] if self.tsize != o1.tsize: ival = e_bits.sign_extend(ival, self.tsize, o1.tsize) if ival > 4096: return "0x%.8x" % ival return str(ival)
def initStackMemory(self, stacksize=init_stack_size): ''' Setup and initialize stack memory. You may call this prior to emulating instructions. ''' if self.stack_map_base is None: self.stack_map_mask = e_bits.sign_extend(0xfff00000, 4, self.vw.psize) self.stack_map_base = e_bits.sign_extend(0xbfb00000, 4, self.vw.psize) self.stack_map_top = self.stack_map_base + stacksize self.stack_pointer = self.stack_map_top stack_map = init_stack_map if stacksize != init_stack_size: stack_map = b'\xfe' * stacksize # Map in a memory map for the stack self.addMemoryMap(self.stack_map_base, 6, "[stack]", stack_map) self.setStackCounter(self.stack_pointer) # Create some pre-made taints for positive stack indexes # NOTE: This is *ugly* for speed.... taints = [ self.setVivTaint('funcstack', i * self.psize) for i in xrange(20) ] taintbytes = ''.join([ e_bits.buildbytes(taint,self.psize) for taint in taints ]) self.writeMemory(self.stack_pointer, taintbytes) else: existing_map_size = self.stack_map_top - self.stack_map_base new_map_size = stacksize - existing_map_size if new_map_size < 0: raise RuntimeError('cannot shrink stack') new_map_top = self.stack_map_base new_map_base = new_map_top - new_map_size stack_map = ''.join([struct.pack('<I', new_map_base+(i*4)) for i in xrange(new_map_size)]) self.addMemoryMap(new_map_base, 6, "[stack]", stack_map) self.stack_map_base = new_map_base
def i_or(self, op): dst = self.getOperValue(op, 0) dsize = op.opers[0].tsize src = self.getOperValue(op, 1) ssize = op.opers[1].tsize if dsize != ssize: src = e_bits.sign_extend(src, ssize, dsize) ssize = dsize res = dst | src self.setOperValue(op, 0, res) self.setFlag(EFLAGS_OF, 0) self.setFlag(EFLAGS_CF, 0) self.setFlag(EFLAGS_SF, e_bits.is_signed(res, dsize)) self.setFlag(EFLAGS_ZF, not res) self.setFlag(EFLAGS_PF, e_bits.is_parity_byte(res))
def i_sbb(self, op): dst = self.getOperValue(op, 0) src = self.getOperValue(op, 1) # Much like "integer subtraction" but we need # too add in the carry flag if src == None or dst == None: self.undefFlags() return None dsize = op.opers[0].tsize ssize = op.opers[1].tsize # Sign extend immediates where the sizes don't match if dsize != ssize: src = e_bits.sign_extend(src, ssize, dsize) ssize = dsize src += self.getFlag(EFLAGS_CF) res = self.intSubBase(src, dst, ssize, dsize) self.setOperValue(op, 0, res)
def i_xor(self, op): # NOTE: This is pre-emptive for partially defined emulation dsize = op.opers[0].tsize ssize = op.opers[1].tsize if op.opers[0] == op.opers[1]: ret = 0 else: dst = self.getOperValue(op, 0) src = self.getOperValue(op, 1) if dsize != ssize: src = e_bits.sign_extend(src, ssize, dsize) ssize = dsize ret = src ^ dst self.setOperValue(op, 0, ret) self.setFlag(EFLAGS_CF, 0) self.setFlag(EFLAGS_OF, 0) self.setFlag(EFLAGS_SF, e_bits.is_signed(ret, dsize)) self.setFlag(EFLAGS_ZF, not ret) self.setFlag(EFLAGS_PF, e_bits.is_parity_byte(ret)) self.setFlag(EFLAGS_AF, False) # Undefined but actually cleared on amd64 X2
def integerSubtraction(self, op): """ Do the core of integer subtraction but only *return* the resulting value rather than assigning it. (allows cmp and sub to use the same code) """ src = self.getOperValue(op,0) dst = self.getOperValue(op,1) # So we can either do a BUNCH of crazyness with xor and shifting to # get the necessary flags here, *or* we can just do both a signed and # unsigned sub and use the results. if op.iflags & IF_BYTE: dsize = BYTE ssize = BYTE else: dsize = WORD ssize = WORD # Sign extend immediates where the sizes don't match if dsize != ssize: src = e_bits.sign_extend(src, ssize, dsize) ssize = dsize return self.intSubBase(src, dst, ssize, dsize)
def integerSubtraction(self, op): """ Do the core of integer subtraction but only *return* the resulting value rather than assigning it. (allows cmp and sub to use the same code) """ src = self.getOperValue(op, 0) dst = self.getOperValue(op, 1) # So we can either do a BUNCH of crazyness with xor and shifting to # get the necessary flags here, *or* we can just do both a signed and # unsigned sub and use the results. if op.iflags & IF_BYTE: dsize = BYTE ssize = BYTE else: dsize = WORD ssize = WORD # Sign extend immediates where the sizes don't match if dsize != ssize: src = e_bits.sign_extend(src, ssize, dsize) ssize = dsize return self.intSubBase(src, dst, ssize, dsize)
def _solve(self, emu=None): v1 = self.kids[0].solve(emu=emu) cursz = self._cursz.solve(emu=emu) tgtsz = self._tgtsz.solve(emu=emu) return e_bits.sign_extend(v1, cursz, tgtsz)
def i_movsxd(self, op): val = self.getOperValue(op, 1) val = e_bits.sign_extend(val, 4, 8) self.setOperValue(op, 0, val)
def disasm(self, bytez, offset, va): # FIXME: for newer instructions, the VEX.W bit needs to be able to change the opcode. ugh. # Stuff for opcode parsing tabdesc = all_tables[ opcode86.TBL_Main ] # A tuple (optable, shiftbits, mask byte, sub, max) startoff = offset # Use startoff as a size knob if needed # Stuff we'll be putting in the opcode object optype = None # This gets set if we successfully decode below mnem = None operands = [] prefixes = 0 pho_prefixes = 0 # faux prefixes... don't immediately apply them, they may not be the prefixes we're looking for while True: obyte = ord(bytez[offset]) # This line changes in 64 bit mode p = self._dis_prefixes[obyte] if p == None: break #print "OBYTE",hex(obyte) if obyte in mandatory_prefixes: pho_prefixes |= p # ratchet through the tables tabidx = ((obyte - tabdesc[4]) >> tabdesc[2]) & tabdesc[3] #print "TABIDX: %d" % tabidx opdesc = tabdesc[0][tabidx] #print 'OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0])) tabdesc = all_tables[opdesc[0]] else: prefixes |= p if p & PREFIX_VEX: if p == PREFIX_VEX2: offset += 1 imm1 = ord(bytez[offset]) if imm1 & 0xc0 != 0xc0: # shouldn't in 64-bit mode, but in 32-bit, this keeps LES from colliding break inv1 = imm1 ^ 0xff vex_l = (0, PREFIX_VEX_L)[(imm1&4)>>2] vvvv = ((inv1 >> 3) & 0xf) pp = imm1 & 3 prefixes |= (inv1 << 11) & PREFIX_REX_R # R is inverted prefixes |= vex_l prefixes |= (vvvv << VEX_V_SHIFT) combined_mand_prefixes = vex_pp_table[ pp ] elif p == PREFIX_VEX3: imm1 = ord(bytez[offset+1]) if imm1 & 0xc0 != 0xc0: # shouldn't in 64-bit mode, but in 32-bit, this keeps LDS from colliding break offset += 2 imm2 = ord(bytez[offset]) inv1 = imm1 ^ 0xff inv2 = imm2 ^ 0xff vex_l = (0, PREFIX_VEX_L)[(imm2&4)>>2] vvvv = ((inv2 >> 3) & 0xf) pp = imm2 & 3 m_mmmm = imm1 & 0x1f #print "imms: %x %x \tl: %d\tvvvv: 0x%x\tpp: %d\tm_mmmm: 0x%x" % (imm1, imm2, vex_l, vvvv, pp, m_mmmm) prefixes |= ((inv1 << 11) & PREFIX_REX_RXB) # RXB are inverted prefixes |= ((imm2 << 12) & PREFIX_REX_W) # W is not inverted prefixes |= vex_l prefixes |= (vvvv << VEX_V_SHIFT) # vvvv combined_mand_prefixes = vex_pp_table[ pp ] + vex3_mmmm_table[m_mmmm] # VEX prefixes default to 0F table, possibly F20F, F30F or 660F # VEX3 prefixes may also specify depths into 38 and 3A tables for tabidx in combined_mand_prefixes: if tabidx == None: continue #print "TABIDX: %d" % tabidx opdesc = tabdesc[0][tabidx] #print 'OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0])) tabdesc = all_tables[opdesc[0]] offset += 1 continue if obyte != 0x0f: prefixes |= pho_prefixes while True: obyte = ord(bytez[offset]) #print "OBYTE",hex(obyte) if (obyte > tabdesc[5]): #print "Jumping To Overflow Table:", tabdesc[5] tabdesc = all_tables[tabdesc[6]] tabidx = ((obyte - tabdesc[4]) >> tabdesc[2]) & tabdesc[3] #print "TABIDX: %s" % tabidx opdesc = tabdesc[0][tabidx] #print 'OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0])) # Hunt down multi-byte opcodes nexttable = opdesc[0] #print "NEXT",nexttable,hex(obyte), opcode86.tables_lookup.get(nexttable) if nexttable != 0: # If we have a sub-table specified, use it. #print "Multi-Byte Next Hop For",hex(obyte),opdesc[0] tabdesc = all_tables[nexttable] # Account for the table jump we made offset += 1 continue # We are now on the final table... #print repr(opdesc) tbl_opercnt = tabdesc[1] mnem = opdesc[3 + tbl_opercnt] optype = opdesc[1] if tabdesc[3] == 0xff: offset += 1 # For our final opcode byte break if optype == 0: #print tabidx #print opdesc #print "OPTTYPE 0" raise envi.InvalidInstruction(bytez=bytez[startoff:startoff+16], va=va) operoffset = 0 # Begin parsing operands based off address method for i in range(operands_index, operands_index + tbl_opercnt): oper = None # Set this if we end up with an operand osize = 0 # Pull out the operand description from the table operflags = opdesc[i] opertype = operflags & opcode86.OPTYPE_MASK addrmeth = operflags & opcode86.ADDRMETH_MASK # If there are no more operands, break out of the loop! if operflags == 0: break #print "ADDRTYPE: %.8x OPERTYPE: %.8x" % (addrmeth, opertype) # handles tsize calculations including new REX prefixes tsize = self._dis_calc_tsize(opertype, prefixes, operflags) #print hex(opertype),hex(addrmeth),hex(tsize) # If addrmeth is zero, we have operands embedded in the opcode if addrmeth == 0: osize = 0 oper = self.ameth_0(operflags, opdesc[2+tbl_opercnt+i], tsize, prefixes) else: #print "ADDRTYPE",hex(addrmeth) ameth = self._dis_amethods[addrmeth >> 16] #print "AMETH",ameth if ameth == None: raise Exception("Implement Addressing Method 0x%.8x" % addrmeth) # NOTE: Depending on your addrmethod you may get beginning of operands, or offset try: if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J: osize, oper = ameth(bytez, offset+operoffset, tsize, prefixes, operflags) # If we are a sign extended immediate and not the same as the other operand, # do the sign extension during disassembly so nothing else has to worry about it.. if len(operands) and tsize != operands[-1].tsize: # Check if we are an explicitly signed operand *or* REX.W if operflags & opcode86.OP_SIGNED or prefixes & PREFIX_REX_W: otsize = operands[-1].tsize oper.imm = e_bits.sign_extend(oper.imm, oper.tsize, otsize) oper.tsize = otsize else: osize, oper = ameth(bytez, offset, tsize, prefixes, operflags) except struct.error, e: # Catch struct unpack errors due to insufficient data length raise envi.InvalidInstruction(bytez=bytez[startoff:startoff+16]) if oper != None: # This is a filty hack for now... oper._dis_regctx = self._dis_regctx operands.append(oper) operoffset += osize
def disasm(self, bytez, offset, va): # FIXME: for newer instructions, the VEX.W bit needs to be able to change the opcode. ugh. # Stuff for opcode parsing tabdesc = all_tables[ opcode86. TBL_Main] # A tuple (optable, shiftbits, mask byte, sub, max) startoff = offset # Use startoff as a size knob if needed # Stuff we'll be putting in the opcode object optype = None # This gets set if we successfully decode below mnem = None operands = [] prefixes = 0 pho_prefixes = 0 # faux prefixes...don't immediately apply them, they may not be the prefixes we're looking for while True: obyte = ord(bytez[offset]) # This line changes in 64 bit mode p = self._dis_prefixes[obyte] if p is None: break # print("OBYTE",hex(obyte)) if obyte in mandatory_prefixes: pho_prefixes |= p # ratchet through the tables tabidx = ((obyte - tabdesc[4]) >> tabdesc[2]) & tabdesc[3] # print("TABIDX: %d" % tabidx) opdesc = tabdesc[0][tabidx] # print('OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0]))) tabdesc = all_tables[opdesc[0]] else: prefixes |= p if p & PREFIX_VEX: if p == PREFIX_VEX2: offset += 1 imm1 = ord(bytez[offset]) if imm1 & 0xc0 != 0xc0: # shouldn't in 64-bit mode, but in 32-bit, this keeps LES from colliding break inv1 = imm1 ^ 0xff vex_l = (0, PREFIX_VEX_L)[(imm1 & 4) >> 2] vvvv = ((inv1 >> 3) & 0xf) pp = imm1 & 3 prefixes |= (inv1 << 11) & PREFIX_REX_R # R is inverted prefixes |= vex_l prefixes |= (vvvv << VEX_V_SHIFT) combined_mand_prefixes = vex_pp_table[pp] elif p == PREFIX_VEX3: imm1 = ord(bytez[offset + 1]) if imm1 & 0xc0 != 0xc0: # shouldn't in 64-bit mode, but in 32-bit, this keeps LDS from colliding break offset += 2 imm2 = ord(bytez[offset]) inv1 = imm1 ^ 0xff inv2 = imm2 ^ 0xff vex_l = (0, PREFIX_VEX_L)[(imm2 & 4) >> 2] vvvv = ((inv2 >> 3) & 0xf) pp = imm2 & 3 m_mmmm = imm1 & 0x1f # print("imms: %x %x \tl: %d\tvvvv: 0x%x\tpp: %d\tm_mmmm: 0x%x" % (imm1, imm2, vex_l, vvvv, pp, m_mmmm)) prefixes |= ( (inv1 << 11) & PREFIX_REX_RXB) # RXB are inverted prefixes |= ( (imm2 << 12) & PREFIX_REX_W) # W is not inverted prefixes |= vex_l prefixes |= (vvvv << VEX_V_SHIFT) # vvvv combined_mand_prefixes = vex_pp_table[ pp] + vex3_mmmm_table[m_mmmm] # VEX prefixes default to 0F table, possibly F20F, F30F or 660F # VEX3 prefixes may also specify depths into 38 and 3A tables for tabidx in combined_mand_prefixes: if tabidx is None: continue # print("TABIDX: %d" % tabidx) opdesc = tabdesc[0][tabidx] # print('OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0]))) tabdesc = all_tables[opdesc[0]] offset += 1 continue if obyte != 0x0f: prefixes |= pho_prefixes while True: obyte = ord(bytez[offset]) # print("OP-OBYTE", hex(obyte)) if (obyte > tabdesc[5]): # print("Jumping To Overflow Table: %s" % hex(tabdesc[5])) tabdesc = all_tables[tabdesc[6]] tabidx = ((obyte - tabdesc[4]) >> tabdesc[2]) & tabdesc[3] # print("TABIDX: %s" % tabidx) opdesc = tabdesc[0][tabidx] # print('OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0]))) # Hunt down multi-byte opcodes nexttable = opdesc[0] # print("NEXT", nexttable, hex(obyte), opcode86.tables_lookup.get(nexttable)) if nexttable != 0: # If we have a sub-table specified, use it. # print("Multi-Byte Next Hop For (%s, %s)" % (hex(obyte), opdesc[0])) tabdesc = all_tables[nexttable] # Account for the table jump we made offset += 1 continue # We are now on the final table... # print(repr(opdesc)) tbl_opercnt = tabdesc[1] mnem = opdesc[3 + tbl_opercnt] optype = opdesc[1] if tabdesc[3] == 0xff: offset += 1 # For our final opcode byte break if optype == 0: # print(tabidx) # print(opdesc) # print("OPTTYPE 0") raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16], va=va) operoffset = 0 # Begin parsing operands based off address method for i in range(operands_index, operands_index + tbl_opercnt): oper = None # Set this if we end up with an operand osize = 0 # Pull out the operand description from the table operflags = opdesc[i] opertype = operflags & opcode86.OPTYPE_MASK addrmeth = operflags & opcode86.ADDRMETH_MASK # If there are no more operands, break out of the loop! if operflags == 0: break # print("ADDRTYPE: %.8x OPERTYPE: %.8x" % (addrmeth, opertype)) # handles tsize calculations including new REX prefixes tsize = self._dis_calc_tsize(opertype, prefixes, operflags) # print(hex(opertype), hex(addrmeth), hex(tsize)) # If addrmeth is zero, we have operands embedded in the opcode if addrmeth == 0: osize = 0 oper = self.ameth_0(operflags, opdesc[2 + tbl_opercnt + i], tsize, prefixes) else: # print("ADDRTYPE", hex(addrmeth)) ameth = self._dis_amethods[addrmeth >> 16] # print("AMETH", ameth) if ameth is None: raise Exception("Implement Addressing Method 0x%.8x" % addrmeth) # NOTE: Depending on your addrmethod you may get beginning of operands, or offset try: if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J: osize, oper = ameth(bytez, offset + operoffset, tsize, prefixes, operflags) # If we are a sign extended immediate and not the same as the other operand, # do the sign extension during disassembly so nothing else has to worry about it.. if len(operands) and tsize != operands[-1].tsize: # Check if we are an explicitly signed operand *or* REX.W if operflags & opcode86.OP_SIGNED or prefixes & PREFIX_REX_W: otsize = operands[-1].tsize oper.imm = e_bits.sign_extend( oper.imm, oper.tsize, otsize) oper.tsize = otsize else: osize, oper = ameth(bytez, offset, tsize, prefixes, operflags) except struct.error: # Catch struct unpack errors due to insufficient data length raise envi.InvalidInstruction( bytez=bytez[startoff:startoff + 16]) if oper is not None: # This is a filty hack for now... oper._dis_regctx = self._dis_regctx operands.append(oper) operoffset += osize # Pull in the envi generic instruction flags iflags = iflag_lookup.get(optype, 0) | self._dis_oparch if prefixes & ed_i386.PREFIX_REP_MASK: iflags |= envi.IF_REPEAT if priv_lookup.get(mnem, False): iflags |= envi.IF_PRIV # Lea will have a reg-mem/sib operand with _is_deref True, but should be false if optype == opcode86.INS_LEA: operands[1]._is_deref = False ret = Amd64Opcode(va, optype, mnem, prefixes, (offset - startoff) + operoffset, operands, iflags) return ret
def disasm(self, bytez, offset, va): # Stuff for opcode parsing tabdesc = all_tables[0] # A tuple (optable, shiftbits, mask byte, sub, max) startoff = offset # Use startoff as a size knob if needed # Stuff we'll be putting in the opcode object optype = None # This gets set if we successfully decode below mnem = None operands = [] prefixes = 0 while True: obyte = ord(bytez[offset]) # This line changes in 64 bit mode p = self._dis_prefixes[obyte] if p == None: break if obyte == 0x66 and ord(bytez[offset+1]) == 0x0f: break prefixes |= p offset += 1 continue #pdone = False while True: obyte = ord(bytez[offset]) #print "OBYTE",hex(obyte) if (obyte > tabdesc[4]): #print "Jumping To Overflow Table:", tabdesc[5] tabdesc = all_tables[tabdesc[5]] tabidx = ((obyte - tabdesc[3]) >> tabdesc[1]) & tabdesc[2] #print "TABIDX: %d" % tabidx opdesc = tabdesc[0][tabidx] #print 'OPDESC: %s' % repr(opdesc) # Hunt down multi-byte opcodes nexttable = opdesc[0] #print "NEXT",nexttable,hex(obyte) if nexttable != 0: # If we have a sub-table specified, use it. #print "Multi-Byte Next Hop For",hex(obyte),opdesc[0] tabdesc = all_tables[nexttable] # In the case of 66 0f, the next table is *already* assuming we ate # the 66 *and* the 0f... oblidge them. if obyte == 0x66 and ord(bytez[offset+1]) == 0x0f: offset += 1 # Account for the table jump we made offset += 1 continue # We are now on the final table... #print repr(opdesc) mnem = opdesc[6] optype = opdesc[1] if tabdesc[2] == 0xff: offset += 1 # For our final opcode byte break if optype == 0: #print tabidx #print opdesc #print "OPTTYPE 0" raise envi.InvalidInstruction(bytez=bytez[startoff:startoff+16], va=va) operoffset = 0 # Begin parsing operands based off address method for i in operand_range: oper = None # Set this if we end up with an operand osize = 0 # Pull out the operand description from the table operflags = opdesc[i] opertype = operflags & opcode86.OPTYPE_MASK addrmeth = operflags & opcode86.ADDRMETH_MASK # If there are no more operands, break out of the loop! if operflags == 0: break #print "ADDRTYPE: %.8x OPERTYPE: %.8x" % (addrmeth, opertype) tsize = self._dis_calc_tsize(opertype, prefixes, operflags) #print hex(opertype),hex(addrmeth), hex(tsize) # If addrmeth is zero, we have operands embedded in the opcode if addrmeth == 0: osize = 0 oper = self.ameth_0(operflags, opdesc[5+i], tsize, prefixes) else: #print "ADDRTYPE",hex(addrmeth) ameth = self._dis_amethods[addrmeth >> 16] #print "AMETH",ameth if ameth == None: raise Exception("Implement Addressing Method 0x%.8x" % addrmeth) # NOTE: Depending on your addrmethod you may get beginning of operands, or offset try: if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J: osize, oper = ameth(bytez, offset+operoffset, tsize, prefixes, operflags) # If we are a sign extended immediate and not the same as the other operand, # do the sign extension during disassembly so nothing else has to worry about it.. if operflags & opcode86.OP_SIGNED and len(operands) and tsize != operands[-1].tsize: otsize = operands[-1].tsize oper.imm = e_bits.sign_extend(oper.imm, oper.tsize, otsize) oper.tsize = otsize else: osize, oper = ameth(bytez, offset, tsize, prefixes, operflags) except struct.error, e: # Catch struct unpack errors due to insufficient data length raise envi.InvalidInstruction(bytez=bytez[startoff:startoff+16]) if oper != None: # This is a filty hack for now... oper._dis_regctx = self._dis_regctx operands.append(oper) operoffset += osize
def disasm(self, bytez, offset, va): ''' The main amd64 decoder function. The inital steps it takes are determining what potential prefixes are attached to the instruction. By "potential", we mean that at this stage we don't know if thigs like 66, F2, F3 are being used as normal prefixes (representing things like a rep prefix) or if they're being used as mandatory prefixes that completely change with instruction we're decoding. All potential prefixes are stored in the pho_prefixes variable. To that end, there's some tap dancing we need to do to deal with what the intel manual refers to as "mandatory prefixes". If we hit a main opcode byte of 0F and we know we have a potentially mandatory prefix (and we're not in VEX land), we treat the byte right before the 0F as the only potential mandatory prefix (as laid out in the intel manual). Then we basically brute force the decoding since we really only have two paths to try. One where the mandatory prefix is merely a normal prefix (and doesn't affect which set of tables we traverse down) and one where the mandatory prefix does affect what tables we rachet through (and thus directly changes which instruction we're looking at). For each case, we append all the relevant output to a list (should the decoding produce a meaningful output). If we end up producing no instruction definitions from our brute force loop, we've hit an invalid sequence of instruction bytes and we throw an exception. If only one path produce output, then that's our results and we proceed on to use the instruction definition to determine what addressing methods and size types to use when determining operands. If both paths produce a valid instruction definition, then the path that uses the mandatory prefix to directly change the instruction takes precedence over the path where it's just a normal prefix. In both the one and two results case, outside of our instruction decoding loop, we've kept a list of the possible decodings we could have hit, and just merely pop off the end of the list (so order matters when building the ppref variable). ''' # FIXME: for newer instructions, the VEX.W bit needs to be able to change the opcode. ugh. # FIXME: And also REX.W # Stuff for opcode parsing tabdesc = all_tables[ opcode86. TBL_Main] # A tuple (optable, shiftbits, mask byte, sub, max) startoff = offset # Use startoff as a size knob if needed isvex = False vexw = None last_pref = 0 ppref = [(None, None)] # Stuff we'll be putting in the opcode object optype = None # This gets set if we successfully decode below mnem = None operands = [] prefixes = 0 pho_prefixes = 0 # faux prefixes...don't immediately apply them, they may not be the prefixes we're looking for while True: obyte = ord(bytez[offset]) # This line changes in 64 bit mode p = self._dis_prefixes[obyte] if p is None: break if MANDATORY_PREFIXES[obyte]: pho_prefixes |= p last_pref = obyte else: prefixes |= p if p & PREFIX_VEX: isvex = True if p == PREFIX_VEX2: offset += 1 imm1 = ord(bytez[offset]) # shouldn't in 64-bit mode, but in 32-bit, this keeps LES from colliding # TODO: So we're always in 64 bit here. This will need to be here once we unify 32/64 decoding #if imm1 & 0xc0 != 0xc0: #break inv1 = imm1 ^ 0xff vex_l = (0, PREFIX_VEX_L)[(imm1 & 4) >> 2] vvvv = ((inv1 >> 3) & 0xf) pp = imm1 & 3 prefixes |= (inv1 << 11) & PREFIX_REX_R # R is inverted prefixes |= vex_l prefixes |= (vvvv << VEX_V_SHIFT) combined_mand_prefixes = vex_pp_table[pp] elif p == PREFIX_VEX3: imm1 = ord(bytez[offset + 1]) offset += 2 # TODO: So we're always in 64 bit here. This will need to be here once we unify 32/64 decoding #if imm1 & 0xc0 != 0xc0: #break imm2 = ord(bytez[offset]) inv1 = imm1 ^ 0xff inv2 = imm2 ^ 0xff vex_l = (0, PREFIX_VEX_L)[(imm2 & 4) >> 2] vvvv = ((inv2 >> 3) & 0xf) pp = imm2 & 3 m_mmmm = imm1 & 0x1f prefixes |= ( (inv1 << 11) & PREFIX_REX_RXB) # RXB are inverted vexw = ((imm2 << 12) & PREFIX_REX_W) # W is not inverted prefixes |= vexw prefixes |= vex_l prefixes |= (vvvv << VEX_V_SHIFT) # vvvv combined_mand_prefixes = vex_pp_table[ pp] + vex3_mmmm_table[m_mmmm] # VEX prefixes default to 0F table, possibly F20F, F30F or 660F # VEX3 prefixes may also specify depths into 38 and 3A tables for tabidx in combined_mand_prefixes: if tabidx is None: continue opdesc = tabdesc[0][tabidx] tabdesc = all_tables[opdesc[0]] # So VEX and mandatory prefixes don't really intermingle offset += 1 break offset += 1 continue if obyte != 0x0f: prefixes |= pho_prefixes # intel manual says VEX and legacy prefixes don't intermingle if obyte == 0x0f and MANDATORY_PREFIXES[last_pref] and not isvex: obyte = last_pref ppref.append((last_pref, amd64_prefixes[last_pref])) decodings = [] mainbyte = offset all_prefixes = prefixes ogtabdesc = tabdesc # onehot in this case refers to the their prefixes that are defined in i386/disasm.py where only # on bit of the entire integer is set. We use that to quickly pop things in and out of the prefixes # list for pref, onehot in ppref: tabdesc = ogtabdesc offset = mainbyte if pref is not None: # our mandatory prefix is not none, which means that we have to jump through the tables # using the mandatory prefix byte as our "main byte" # As a bit of a hack, the 66/F2/F3 entries in the main table # directly point to the 660F/F20F/F30F tables since we're carefully tap dancing around # what our opcode byte really is obyte = pref # since we're treating this prefix as mandatory and not as REPNZ/REPZ/etc, we need to rip # it out of the pho_prefixes before we combine pho_prefixes with the main prefixes container all_prefixes = prefixes | (pho_prefixes & (~onehot)) else: # treat nothing as a mandatory prefix (or we defaulted into here if we got no mandatory # prefixes). For most instructions this will be the normal case. obyte = ord(bytez[offset]) all_prefixes = prefixes | pho_prefixes while True: if (obyte > tabdesc[5]): tabdesc = all_tables[tabdesc[6]] tabidx = ((obyte - tabdesc[4]) >> tabdesc[2]) & tabdesc[3] opdesc = tabdesc[0][tabidx] # Hunt down multi-byte opcodes nexttable = opdesc[0] if nexttable != 0: # If we have a sub-table specified, use it. tabdesc = all_tables[nexttable] # Account for the table jump we made offset += 1 obyte = ord(bytez[offset]) continue # We are now on the final table... tbl_opercnt = tabdesc[1] mnem = opdesc[3 + tbl_opercnt] optype = opdesc[1] if tabdesc[3] == 0xff: offset += 1 # For our final opcode byte break if optype & INS_VEXREQ and not isvex: continue if optype != 0: decodings.append((tabdesc, opdesc, offset, all_prefixes)) if not len(decodings): raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16], va=va) tabdesc, opdesc, offset, prefixes = decodings.pop() optype = opdesc[1] tbl_opercnt = tabdesc[1] mnem = opdesc[3 + tbl_opercnt] if optype == 0: raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16], va=va) operoffset = 0 # Begin parsing operands based off address method for i in range(operands_index, operands_index + tbl_opercnt): oper = None # Set this if we end up with an operand osize = 0 # Pull out the operand description from the table operflags = opdesc[i] opertype = operflags & opcode86.OPTYPE_MASK addrmeth = operflags & opcode86.ADDRMETH_MASK # If there are no more operands, break out of the loop! if operflags == 0: break # handles tsize calculations including new REX prefixes tsize = self._dis_calc_tsize(opertype, prefixes, operflags) # If addrmeth is zero, we have operands embedded in the opcode if addrmeth == 0: osize = 0 oper = self.ameth_0(operflags, opdesc[2 + tbl_opercnt + i], tsize, prefixes) else: # So the 0x7f is here to help us deal with an issue between VEX and non-VEX # A super common patter in vex is to add an operand somewhere in the middle of the # existing operands. So if we have like cmpps xmm2, 17 in non-VEX, the vex version # will look like vsprlw xmm3, xmm4, 17. # The fun bit of this is that the vex only portions aren't exclusive to the VEX-only # addressing methods, so we can have ADDRMETH_V be skipped outside of VEX mode too, and not # just things like ADDRMETH_H. Hence, we need a new flag that I stash in the upper bits of # instruction operand definition so we can know when to skip operands ameth = self._dis_amethods[(addrmeth >> 16) & 0x7F] vex_skip = addrmeth & opcode86.ADDRMETH_VEXSKIP if not isvex and vex_skip: continue if ameth is None: raise Exception("Implement Addressing Method 0x%.8x" % addrmeth) # NOTE: Depending on your addrmethod you may get beginning of operands, or offset try: if addrmeth in IMM_REQOFFS: osize, oper = ameth(bytez, offset + operoffset, tsize, prefixes, operflags) # If we are a sign extended immediate and not the same as the other operand, # do the sign extension during disassembly so nothing else has to worry about it.. if operflags & opcode86.OP_SIGNED: if len(operands) and tsize != operands[-1].tsize: otsize = operands[-1].tsize oper.imm = e_bits.sign_extend( oper.imm, oper.tsize, otsize) oper.tsize = otsize elif not len(operands): oper.imm = e_bits.sign_extend( oper.imm, oper.tsize, self._dis_default_size) oper.tsize = self._dis_default_size else: # see same code section in i386 for this rationale osize, oper = ameth(bytez, offset, tsize, prefixes, operflags) if getattr(oper, "_is_deref", False): memsz = OP_EXTRA_MEMSIZES[(operflags & OP_MEMMASK) >> 4] if memsz is not None: oper.tsize = memsz except struct.error: # Catch struct unpack errors due to insufficient data length raise envi.InvalidInstruction( bytez=bytez[startoff:startoff + 16]) if oper is not None: # This is a filty hack for now... oper._dis_regctx = self._dis_regctx operands.append(oper) operoffset += osize typemask = optype & 0xFFFF # Pull in the envi generic instruction flags iflags = iflag_lookup.get(typemask, 0) | self._dis_oparch if prefixes & PREFIX_REP_MASK: iflags |= envi.IF_REPEAT if priv_lookup.get(mnem, False): iflags |= envi.IF_PRIV # Lea will have a reg-mem/sib operand with _is_deref True, but should be false if typemask == opcode86.INS_LEA: operands[1]._is_deref = False ret = Amd64Opcode(va, optype, mnem, prefixes, (offset - startoff) + operoffset, operands, iflags) return ret
def _solve(self, emu=None, vals=None): symval = self.kids[0].solve(emu=emu, vals=vals) cursz = self.kids[0].getWidth() tgtsz = self.kids[1].solve(emu=emu, vals=vals) return e_bits.sign_extend(symval, cursz, tgtsz)
def _solve(self, emu=None, vals=None): symval = self.kids[0].solve(emu=emu,vals=vals) cursz = self.kids[0].getWidth() tgtsz = self.kids[1].solve(emu=emu,vals=vals) return e_bits.sign_extend(symval, cursz, tgtsz)
def i_movsx(self, op): osize = op.opers[1].tsize nsize = op.opers[0].tsize val = self.getOperValue(op, 1) val = e_bits.sign_extend(val, osize, nsize) self.setOperValue(op, 0, val)
def i_cwde(self, op): # FIXME "cbw" 16 bit mode ax = self.getRegister(REG_AX) eax = e_bits.sign_extend(ax, 2, 4) self.setRegister(REG_EAX, eax)
def i_cwde(self, op): # FIXME "cbw" 16 bit mode ax = self.getRegister(REG_AX) eax = e_bits.sign_extend(ax,2,4) self.setRegister(REG_EAX,eax)
def disasm(self, bytez, offset, va): # Stuff for opcode parsing tabdesc = all_tables[ 0] # A tuple (optable, shiftbits, mask byte, sub, max) startoff = offset # Use startoff as a size knob if needed # Stuff we'll be putting in the opcode object optype = None # This gets set if we successfully decode below mnem = None operands = [] all_prefixes = 0 prefix_len = 0 last_pref = 0 while True: obyte = ord(bytez[offset]) # print("PREFIXBYTE: 0x%x" % obyte) # This line changes in 64 bit mode p = self._dis_prefixes[obyte] if p is None: break all_prefixes |= p last_pref = obyte offset += 1 continue # At this point we should have all the possible prefixes, but some may be mandatory ones that we # need to not use as display prefixes and use as jumps in the table instead. # So we're going to lie to the rest of the code in order to use them as we want obyte = ord(bytez[offset]) ppref = [(None, None)] # print("PREFXIES: 0x%x" % all_prefixes) if obyte == 0x0f and MANDATORY_PREFIXES[last_pref]: obyte = last_pref ppref.append((last_pref, i386_prefixes[last_pref])) # print("POSTFIXES: 0x%x" % all_prefixes) #pdone = False decodings = [] mainbyte = offset prefixes = all_prefixes # as noted above, since we can have prefixes that may or may not be mandatory, # we roll through those and pop off the last one, since there's two cases we have # to deal with: a normal prefix that just modifies the opers, and a mandatory prefix # that modifies the instruction semantics entirely. Either way, the mandatory prefix # takes precedence and whichever one wins will be at the end of the list <decodings> for pref, onehot in ppref: if pref is not None: obyte = pref offset = mainbyte prefixes = all_prefixes & (~onehot) else: offset = mainbyte obyte = ord(bytez[offset]) tabdesc = all_tables[0] while True: # print("OBYTE", hex(obyte)) if (obyte > tabdesc[4]): # print("Jumping To Overflow Table: %s" % repr(tabdesc[5])) tabdesc = all_tables[tabdesc[5]] tabidx = ((obyte - tabdesc[3]) >> tabdesc[1]) & tabdesc[2] # print("TABIDX: %d" % tabidx) if tabidx > len(tabdesc[0]): # print("Jumped off end of table. Continuing on") break opdesc = tabdesc[0][tabidx] # print('OPDESC: %s' % repr(opdesc)) # Hunt down multi-byte opcodes nexttable = opdesc[0] # print("NEXT", nexttable, hex(obyte)) if nexttable != 0: # If we have a sub-table specified, use it. # print("Multi-Byte Next Hop For %s: %s" % (hex(obyte), repr(opdesc[0]))) # print("Jumping to table %d" % nexttable) tabdesc = all_tables[nexttable] offset += 1 obyte = ord(bytez[offset]) continue # We are now on the final table... # print(repr(opdesc)) mnem = opdesc[6] optype = opdesc[1] if tabdesc[2] == 0xff: offset += 1 # For our final opcode byte break if optype != 0: decodings.append((opdesc, offset, prefixes)) if not len(decodings): raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16], va=va) opdesc, offset, all_prefixes = decodings.pop() optype = opdesc[1] mnem = opdesc[6] if optype == 0: raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16], va=va) operoffset = 0 # Begin parsing operands based off address method for i in operand_range: oper = None # Set this if we end up with an operand osize = 0 # Pull out the operand description from the table operflags = opdesc[i] opertype = operflags & opcode86.OPTYPE_MASK addrmeth = operflags & opcode86.ADDRMETH_MASK # If there are no more operands, break out of the loop! if operflags == 0: break # print("ADDRTYPE: %.8x OPERTYPE: %.8x" % (addrmeth, opertype)) # print("ALLPREFIXES 0x%x" % (all_prefixes)) tsize = self._dis_calc_tsize(opertype, all_prefixes, operflags) # print(hex(opertype), hex(addrmeth), hex(tsize)) # If addrmeth is zero,we have operands embedded in the opcode if addrmeth == 0: osize = 0 oper = self.ameth_0(operflags, opdesc[5 + i], tsize, all_prefixes) else: # print("ADDRTYPE", hex(addrmeth)) ameth = self._dis_amethods[addrmeth >> 16] # print("AMETH", ameth) if ameth is None: raise Exception("Implement Addressing Method 0x%.8x" % addrmeth) # NOTE: Depending on your addrmethod you may get beginning of operands, or offset try: if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J: osize, oper = ameth(bytez, offset + operoffset, tsize, all_prefixes, operflags) # If we are a sign extended immediate and not the same as the other operand, # do the sign extension during disassembly so nothing else has to worry about it.. if operflags & opcode86.OP_SIGNED and len( operands) and tsize != operands[-1].tsize: otsize = operands[-1].tsize oper.imm = e_bits.sign_extend( oper.imm, oper.tsize, otsize) oper.tsize = otsize else: osize, oper = ameth(bytez, offset, tsize, all_prefixes, operflags) # so in the opcode maps intel directly mentions that some opcodes are # ADDRMETH_W but if the operand is a memory ref, it's always of a specific # size, with no rhyme or reason as to which it is. So we directly embed # that knowledge into the opcodes mappings we maintain and pluck it out # here. if getattr(oper, "_is_deref", False): memsz = OP_EXTRA_MEMSIZES[(operflags & OP_MEMMASK) >> 4] if memsz is not None: oper.tsize = memsz except struct.error as e: # Catch struct unpack errors due to insufficient data length raise envi.InvalidInstruction( bytez=bytez[startoff:startoff + 16]) if oper is not None: # This is a filty hack for now... oper._dis_regctx = self._dis_regctx operands.append(oper) operoffset += osize # Pull in the envi generic instruction flags iflags = iflag_lookup.get(optype & 0xFFFF, 0) | self._dis_oparch if all_prefixes & PREFIX_REP_MASK: iflags |= envi.IF_REPEAT if priv_lookup.get(mnem, False): iflags |= envi.IF_PRIV # Lea will have a reg-mem/sib operand with _is_deref True, but should be false if optype == opcode86.INS_LEA: operands[1]._is_deref = False ret = i386Opcode(va, optype, mnem, all_prefixes, (offset - startoff) + operoffset, operands, iflags) return ret
def i_push(self, op): val = self.getOperValue(op, 0) if isinstance(op.opers[0], i386ImmOper): val = e_bits.sign_extend(val, op.opers[0].tsize, 4) #FIXME 64bit self.doPush(val)
def disasm(self, bytes, offset, va): # Stuff for opcode parsing tabdesc = all_tables[ 0] # A tuple (optable, shiftbits, mask byte, sub, max) startoff = offset # Use startoff as a size knob if needed # Stuff we'll be putting in the opcode object optype = None # This gets set if we successfully decode below mnem = None operands = [] prefixes = 0 while True: obyte = ord(bytes[offset]) # This line changes in 64 bit mode p = self._dis_prefixes[obyte] if p == None: break if obyte == 0x66 and ord(bytes[offset + 1]) == 0x0f: break prefixes |= p offset += 1 continue #pdone = False while True: obyte = ord(bytes[offset]) #print "OBYTE",hex(obyte) if (obyte > tabdesc[4]): #print "Jumping To Overflow Table:", tabdesc[5] tabdesc = all_tables[tabdesc[5]] tabidx = ((obyte - tabdesc[3]) >> tabdesc[1]) & tabdesc[2] #print "TABIDX: %d" % tabidx opdesc = tabdesc[0][tabidx] #print 'OPDESC: %s' % repr(opdesc) # Hunt down multi-byte opcodes nexttable = opdesc[0] #print "NEXT",nexttable,hex(obyte) if nexttable != 0: # If we have a sub-table specified, use it. #print "Multi-Byte Next Hop For",hex(obyte),opdesc[0] tabdesc = all_tables[nexttable] # In the case of 66 0f, the next table is *already* assuming we ate # the 66 *and* the 0f... oblidge them. if obyte == 0x66 and ord(bytes[offset + 1]) == 0x0f: offset += 1 # Account for the table jump we made offset += 1 continue # We are now on the final table... #print repr(opdesc) mnem = opdesc[6] optype = opdesc[1] if tabdesc[2] == 0xff: offset += 1 # For our final opcode byte break if optype == 0: #print tabidx #print opdesc #print "OPTTYPE 0" raise envi.InvalidInstruction(bytes=bytes[startoff:startoff + 16]) operoffset = 0 # Begin parsing operands based off address method for i in operand_range: oper = None # Set this if we end up with an operand osize = 0 # Pull out the operand description from the table operflags = opdesc[i] opertype = operflags & opcode86.OPTYPE_MASK addrmeth = operflags & opcode86.ADDRMETH_MASK # If there are no more operands, break out of the loop! if operflags == 0: break #print "ADDRTYPE: %.8x OPERTYPE: %.8x" % (addrmeth, opertype) tsize = self._dis_calc_tsize(opertype, prefixes) #print hex(opertype),hex(addrmeth) # If addrmeth is zero, we have operands embedded in the opcode if addrmeth == 0: osize = 0 oper = self.ameth_0(operflags, opdesc[5 + i], tsize, prefixes) else: #print "ADDRTYPE",hex(addrmeth) ameth = self._dis_amethods[addrmeth >> 16] #print "AMETH",ameth if ameth == None: raise Exception("Implement Addressing Method 0x%.8x" % addrmeth) # NOTE: Depending on your addrmethod you may get beginning of operands, or offset try: if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J: osize, oper = ameth(bytes, offset + operoffset, tsize, prefixes) # If we are a sign extended immediate and not the same as the other operand, # do the sign extension during disassembly so nothing else has to worry about it.. if operflags & opcode86.OP_SIGNED and len( operands) and tsize != operands[-1].tsize: otsize = operands[-1].tsize oper.imm = e_bits.sign_extend( oper.imm, oper.tsize, otsize) oper.tsize = otsize else: osize, oper = ameth(bytes, offset, tsize, prefixes) except struct.error: # Catch struct unpack errors due to insufficient data length raise envi.InvalidInstruction( bytes=bytes[startoff:startoff + 16]) if oper != None: # This is a filty hack for now... oper._dis_regctx = self._dis_regctx operands.append(oper) operoffset += osize # Pull in the envi generic instruction flags iflags = iflag_lookup.get(optype, 0) if priv_lookup.get(mnem, False): iflags |= envi.IF_PRIV # Lea will have a reg-mem/sib operand with _is_deref True, but should be false if optype == opcode86.INS_LEA: operands[1]._is_deref = False ret = i386Opcode(va, optype, mnem, prefixes, (offset - startoff) + operoffset, operands, iflags) return ret