def p_6A_6B(va, val, buf, off, tsize): op = val >> 4 diff = op & 0xf osz = 1 + ((val >> 8) & 1) if op & 0x8: # Rs, @aa:16/24 if diff == 0xa: op, mnem, opers, iflags, isz = p_Rs_aAA24(va, val, buf, off, tsize) iflags |= e_const.OSZ_FLAGS[osz] return op, mnem, opers, iflags, isz elif diff == 0x8: op, mnem, opers, iflags, isz = p_Rs_aAA16(va, val, buf, off, tsize) iflags |= e_const.OSZ_FLAGS[osz] return op, mnem, opers, iflags, isz else: raise envi.InvalidInstruction(bytez=buf[off:off + 16], va=va) else: # @aa:16/24, Rd if diff == 0x2: op, mnem, opers, iflags, isz = p_aAA24_Rd(va, val, buf, off, tsize) iflags |= e_const.OSZ_FLAGS[osz] return op, mnem, opers, iflags, isz elif diff == 0x0: op, mnem, opers, iflags, isz = p_aAA16_Rd(va, val, buf, off, tsize) iflags |= e_const.OSZ_FLAGS[osz] return op, mnem, opers, iflags, isz elif val in (0x6a10, 0x6a18, 0x6a30, 0x6a38): # non-MOV instructions isz, aasize, fmt = (None, (6, 2, '>HH'), None, (8, 4, '>IH'))[(val >> 4) & 3] aa, val2 = struct.unpack(fmt, buf[off + 2:off + isz]) op, mnem, niflags = getBitDbl_OpMnem(val2) if val2 & 0x1c00: i3 = (val2 >> 4) & 7 opers = ( h8_operands.H8ImmOper(i3, tsize), h8_operands.H8AbsAddrOper(aa, tsize, aasize), ) else: rn = (val2 >> 4) & 0xf opers = ( h8_operands.H8RegDirOper( rn, tsize, va, ), h8_operands.H8AbsAddrOper(aa, tsize, aasize), ) return op, mnem, opers, 0, isz else: raise envi.InvalidInstruction(bytez=buf[off:off + 16], va=va)
def makeOpcode(self, bytes, offset): opval = struct.unpack("<L", bytes[offset:offset + 4])[0] cond = opval >> 28 if cond == COND_EXTENDED: return "FIXME - make extended opcode parser" # Begin the table lookup sequence with the first 3 non-cond bits encfam = (opval >> 25) & 0x7 enc, nexttab = inittable[encfam] if nexttab != None: # we have to sub-parse... for mask, val, penc in nexttab: if (opval & mask) == val: enc = penc break # If we don't know the encoding by here, we never will ;) if enc == None: raise envi.InvalidInstruction("omg") #print "ENCFAM",encfam #print "COND",cond #print "ENCODING",enc op = ienc_parsers[enc](opval) op.cond = cond return op
def disasm(self, bytez, offset, va): """ Parse a sequence of bytes out into an envi.Opcode instance. """ opval, = struct.unpack_from('>H', bytez, offset) prim = opval >> 8 opdata = main_table[prim] if opdata == None: raise envi.InvalidInstruction(bytez=bytez[offset:offset + 16], va=va) subtable, mnem, decoder, tsize, iflags = opdata if subtable: raise Exception( "WHAT ARE WE DOING HERE. NEED subtable at 0x%x: %s" % (va, bytez[offset:offset + 16].encode('hex'))) elif decoder != None: opcode, nmnem, olist, flags, isize = decoder( va, opval, bytez, offset, tsize) #print opcode, nmnem, olist, flags, isize, decoder if nmnem != None: mnem = nmnem iflags |= flags else: opcode = opval isize = 2 olist = tuple() #raise envi.InvalidInstruction(mesg='Failed to find subtable or decoder', bytez=bytez[offset:offset+16], va=va) if olist == None: raise envi.InvalidInstruction( mesg='Operand list cannot be None for instruction "%s"' % mnem, bytez=bytez[offset:offset + 16], va=va) op = H8Opcode(va, opcode, mnem, None, isize, olist, iflags) if op.opers != None: # following the nasty little hack from other modules. "everybody's doing it!" for oper in op.opers: oper._dis_regctx = self._dis_regctx return op
def disasm(self, bytes, offset, va): row = sigtree.getSignature(bytes, offset) if row == None: raise envi.InvalidInstruction(bytes=bytes[offset:offset+8]) sigmask, mnem, o1type, o1info, o2type, o2info, oplen, immoff, iflags = row #ret = i386Opcode(va, optype, mnem, prefixes, (offset-startoff)+operoffset, operands, iflags) opers = [] if o1type != None: opers.append(self._buildOper(bytes, offset, immoff, o1type, o1info)) if o2type != None: opers.append(self._buildOper(bytes, offset, immoff, o2type, o2info)) return z80Opcode(va, 0, mnem, 0, oplen, opers, iflags)
def p_eepmov(va, val, buf, off, tsize): val2, = struct.unpack('>H', buf[off + 2:off + 4]) op = (val << 8) | val2 # tsize = (1, 2)[(val >> 7) & 1] diff = val & 0xff if diff == 0x5c: iflags = e_const.IF_B elif diff == 0xd4: iflags = e_const.IF_W else: raise envi.InvalidInstruction(bytez=buf[off:off + 16], va=va) return op, None, (), iflags, 4
def p_0f_1f(va, val, buf, off, tsize): aors = val >> 12 diff = val & 0xf0 if diff == 0: op = val >> 4 mnem = ('daa', 'das')[aors] iflags = 0 rd = val & 0xf opers = (h8_operands.H8RegDirOper(rd, 1, va=va, oflags=0), ) elif diff >= 0x80: mnem = ('mov', 'cmp')[aors] op, nmnem, opers, iflags, isz = p_ERs_ERd(va, val, buf, off, tsize=4) else: raise envi.InvalidInstruction(bytez=buf[off:off + 16], va=va) return (op, mnem, opers, iflags, 2)
def p_0a_1a(va, val, buf, off, tsize): diff = (val >> 12) if val & 0xf0 == 0: mnem = ('inc', 'dec')[diff] op, nmnem, opers, iflags, isz = p_Rd(va, val, buf, off, tsize=1) iflags = e_const.IF_B elif val & 0xf0 >= 0x80: mnem = ('add', 'sub')[diff] op, nmnem, opers, iflags, isz = p_ERs_ERd(va, val, buf, off, tsize=4) else: raise envi.InvalidInstruction(bytez=buf[off:off + 16], va=va) return (op, mnem, opers, iflags, isz)
def p_7e(va, val, buf, off, tsize): # btst, bor, bior, bxor, bixor, band, biand, bid, bild (erd) val2, = struct.unpack('>H', buf[off + 2:off + 4]) op, mnem, iflags = getBitDbl_OpMnem(val2) op |= ((val & 0xff80) << 9) aa = val & 0xff telltale = (val2 >> 8) # FIXME: is any of this redundant with previous encodings? if telltale == 0x63: # btst (0x####63## mnem = 'btst' rn = (val2 >> 4) & 0xf opers = ( h8_operands.H8RegDirOper(rn, tsize, va, 0), h8_operands.H8AbsAddrOper(aa, tsize=tsize, aasize=1), ) elif telltale == 0x73: # btst (0x####73## mnem = 'btst' i3 = (val2 >> 4) & 0x7 opers = ( h8_operands.H8ImmOper(i3, tsize), h8_operands.H8AbsAddrOper(aa, tsize=tsize, aasize=1), ) elif 0x78 > telltale > 0x73: # other bit-halves: tsize = 1 i3 = (val2 >> 4) & 0x7 opers = ( h8_operands.H8ImmOper(i3, tsize), h8_operands.H8AbsAddrOper(aa, tsize=tsize, aasize=1), ) else: raise envi.InvalidInstruction(bytez=buf[off:off + 16], va=va) return op, mnem, opers, iflags, 4
def disasm(self, bytes, offset, va): # Stuff for opcode parsing tabdesc = all_tables[ 0] # A tuple (optable, shiftbits, mask byte, sub, max) startoff = offset # Use startoff as a size knob if needed # Stuff we'll be putting in the opcode object optype = None # This gets set if we successfully decode below mnem = None operands = [] prefixes = 0 while True: obyte = ord(bytes[offset]) # This line changes in 64 bit mode p = self._dis_prefixes[obyte] if p == None: break if obyte == 0x66 and ord(bytes[offset + 1]) == 0x0f: break prefixes |= p offset += 1 continue #pdone = False while True: obyte = ord(bytes[offset]) #print "OBYTE",hex(obyte) if (obyte > tabdesc[4]): #print "Jumping To Overflow Table:", tabdesc[5] tabdesc = all_tables[tabdesc[5]] tabidx = ((obyte - tabdesc[3]) >> tabdesc[1]) & tabdesc[2] #print "TABIDX: %d" % tabidx opdesc = tabdesc[0][tabidx] #print 'OPDESC: %s' % repr(opdesc) # Hunt down multi-byte opcodes nexttable = opdesc[0] #print "NEXT",nexttable,hex(obyte) if nexttable != 0: # If we have a sub-table specified, use it. #print "Multi-Byte Next Hop For",hex(obyte),opdesc[0] tabdesc = all_tables[nexttable] # In the case of 66 0f, the next table is *already* assuming we ate # the 66 *and* the 0f... oblidge them. if obyte == 0x66 and ord(bytes[offset + 1]) == 0x0f: offset += 1 # Account for the table jump we made offset += 1 continue # We are now on the final table... #print repr(opdesc) mnem = opdesc[6] optype = opdesc[1] if tabdesc[2] == 0xff: offset += 1 # For our final opcode byte break if optype == 0: #print tabidx #print opdesc #print "OPTTYPE 0" raise envi.InvalidInstruction(bytes=bytes[startoff:startoff + 16]) operoffset = 0 # Begin parsing operands based off address method for i in operand_range: oper = None # Set this if we end up with an operand osize = 0 # Pull out the operand description from the table operflags = opdesc[i] opertype = operflags & opcode86.OPTYPE_MASK addrmeth = operflags & opcode86.ADDRMETH_MASK # If there are no more operands, break out of the loop! if operflags == 0: break #print "ADDRTYPE: %.8x OPERTYPE: %.8x" % (addrmeth, opertype) tsize = self._dis_calc_tsize(opertype, prefixes) #print hex(opertype),hex(addrmeth) # If addrmeth is zero, we have operands embedded in the opcode if addrmeth == 0: osize = 0 oper = self.ameth_0(operflags, opdesc[5 + i], tsize, prefixes) else: #print "ADDRTYPE",hex(addrmeth) ameth = self._dis_amethods[addrmeth >> 16] #print "AMETH",ameth if ameth == None: raise Exception("Implement Addressing Method 0x%.8x" % addrmeth) # NOTE: Depending on your addrmethod you may get beginning of operands, or offset try: if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J: osize, oper = ameth(bytes, offset + operoffset, tsize, prefixes) # If we are a sign extended immediate and not the same as the other operand, # do the sign extension during disassembly so nothing else has to worry about it.. if operflags & opcode86.OP_SIGNED and len( operands) and tsize != operands[-1].tsize: otsize = operands[-1].tsize oper.imm = e_bits.sign_extend( oper.imm, oper.tsize, otsize) oper.tsize = otsize else: osize, oper = ameth(bytes, offset, tsize, prefixes) except struct.error: # Catch struct unpack errors due to insufficient data length raise envi.InvalidInstruction( bytes=bytes[startoff:startoff + 16]) if oper != None: # This is a filty hack for now... oper._dis_regctx = self._dis_regctx operands.append(oper) operoffset += osize # Pull in the envi generic instruction flags iflags = iflag_lookup.get(optype, 0) if priv_lookup.get(mnem, False): iflags |= envi.IF_PRIV # Lea will have a reg-mem/sib operand with _is_deref True, but should be false if optype == opcode86.INS_LEA: operands[1]._is_deref = False ret = i386Opcode(va, optype, mnem, prefixes, (offset - startoff) + operoffset, operands, iflags) return ret
def p_01(va, val, buf, off, tsize): mnem = None iflags = 0 opers = None diff = (val >> 4) & 0xf if diff == 8: # sleep op = 0x0180 mnem = 'sleep' opers = tuple() return op, mnem, opers, iflags, 2 val2, = struct.unpack('>H', buf[off + 2:off + 4]) isz = 4 op = (val << 9) | (val2 >> 7) if diff == 0: mnem = 'mov' # all 0100#### opcodes share these: tsize = 4 iflags |= e_const.IF_L d2 = val2 >> 8 # mov 0100##... where ## is basically another mov encoding with different register sizes if d2 == 0x69: erd = (val2 >> 4) & 7 ers = val2 & 7 if val2 & 0x80: opers = ( h8_operands.H8RegDirOper(ers, tsize, va), h8_operands.H8RegIndirOper(erd, tsize, va), ) else: opers = ( h8_operands.H8RegIndirOper(erd, tsize, va), h8_operands.H8RegDirOper(ers, tsize, va), ) elif d2 == 0x6b: if val2 & 0x20: isz = 8 val3, = struct.unpack('>I', buf[off + 4:off + 8]) if val2 & 0x80: # a erd = val2 & 7 aa = val3 & 0xffffffff opers = ( h8_operands.H8RegDirOper(erd, tsize, va), h8_operands.H8AbsAddrOper(aa, tsize, aasize=4), ) else: # 2 ers = val2 & 7 aa = val3 & 0xffffffff opers = ( h8_operands.H8AbsAddrOper(aa, tsize, aasize=4), h8_operands.H8RegDirOper(ers, tsize, va), ) else: val3, = struct.unpack('>H', buf[off + 4:off + 6]) isz = 6 if val2 & 0x80: # 8 erd = val2 & 7 aa = val3 & 0xffff opers = ( h8_operands.H8RegDirOper(erd, tsize, va), h8_operands.H8AbsAddrOper(aa, tsize, aasize=2), ) else: # 0 ers = val2 & 7 aa = val3 & 0xffff opers = ( h8_operands.H8AbsAddrOper(aa, tsize, aasize=2), h8_operands.H8RegDirOper(ers, tsize, va), ) elif d2 == 0x6d: # TODO: test me!! newop, mnem, opers, iflags, nisz = p_6c_6d_0100( va, val2, buf, off + 2, 4) isz = nisz + 2 op = newop | (0x01000000) elif d2 == 0x6f: disp, = struct.unpack('>H', buf[off + 4:off + 6]) isz = 6 er0 = val2 & 7 er1 = (val2 >> 4) & 7 if val2 & 0x80: # mov.l ERs, @(d:16,ERd) opers = ( h8_operands.H8RegDirOper(er0, tsize, va), h8_operands.H8RegIndirOper(er1, tsize, va, disp, dispsz=2), ) else: # mov.l @(d:16,ERs), ERd opers = ( h8_operands.H8RegIndirOper(er1, tsize, va, disp, dispsz=2), h8_operands.H8RegDirOper(er0, tsize, va), ) elif d2 == 0x78: isz = 10 val3, disp = struct.unpack('>HI', buf[off + 4:off + 10]) if val3 & 0xff20 != 0x6b20: raise envi.InvalidInstruction(bytez=buf[off:off + 16], va=va) er0 = val3 & 7 er1 = (val2 >> 4) & 7 if (val3 & 0x80): # mov.l ERs, @(d:24,ERd) opers = ( h8_operands.H8RegDirOper(er0, tsize, va), h8_operands.H8RegIndirOper(er1, tsize, va, disp, dispsz=4), ) else: # mov.l @(d:24,ERs), ERd opers = ( h8_operands.H8RegIndirOper(er1, tsize, va, disp, dispsz=4), h8_operands.H8RegDirOper(er0, tsize, va), ) elif diff in (1, 2, 3): # ldm/stm (ERn-ERn+diff), @-SP iflags = e_const.IF_L tsize = 4 optest = val2 & 0xfff8 rn = val2 & 0x7 rcount = diff + 1 if optest == 0x6df0: mnem = 'stm' opers = ( h8_operands.H8RegMultiOper(rn, rcount), h8_operands.H8RegIndirOper(e_const.REG_SP, tsize, va, 0, oflags=e_const.OF_PREDEC), ) elif optest == 0x6d70: mnem = 'ldm' opers = ( h8_operands.H8RegIndirOper(e_const.REG_SP, tsize, va, 0, oflags=e_const.OF_POSTINC), h8_operands.H8RegMultiOper(rn - diff, rcount), ) else: raise envi.InvalidInstruction(bytez=buf[off:off + 16], va=va) elif diff == 4: # ldc/stc - anything that touches ccr or exr # we'll build it for ldc, and reverse it if it's stc d2 = val2 >> 8 isStc = (val2 >> 7) & 1 oflags = 0 tsize = 2 exr = val & 0x1 if d2 == 6: op, nmnem, opers, iflags, nisz = p_i8_CCR(va, val2, buf, off, tsize, exr) return op, 'andc', opers, iflags, isz elif d2 == 5: op, nmnem, opers, iflags, nisz = p_i8_CCR(va, val2, buf, off, tsize, exr) return op, 'xorc', opers, iflags, isz else: iflags = e_const.IF_W tsize = 2 if d2 == 0x04: # xx:8, EXR op, nmnem, opers, iflags, nisz = p_i8_CCR( va, val2, buf, off, tsize, exr) return op, 'orc', opers, iflags, isz elif d2 == 0x07: # xx:8, EXR op, nmnem, opers, niflags, nisz = p_i8_CCR( va, val2, buf, off, tsize, exr) iflags = e_const.IF_B return op, 'ldc', opers, iflags, isz elif d2 in (0x69, 0x6d): # @ERs,CCR / @ERs+,CCR if d2 == 0x6d: oflags = e_const.OF_POSTINC ers = (val2 >> 4) & 0x7 opers = (h8_operands.H8RegIndirOper(ers, tsize, va, oflags=oflags), h8_operands.H8RegDirOper(e_regs.REG_CCR + exr, 4, va)) elif d2 in (0x6f, 0x78): # @(d:16,ERs),CCR / @(d:24,ERs) if d2 == 0x78: val3, disp = struct.unpack('>HI', buf[off + 4:off + 10]) isStc = (val3 >> 7) & 1 isz = 10 dispsz = 4 else: disp, = struct.unpack('>H', buf[off + 4:off + 6]) isz = 6 dispsz = 2 ers = (val2 >> 4) & 0x7 opers = (h8_operands.H8RegIndirOper(ers, tsize, va, disp, dispsz), h8_operands.H8RegDirOper(e_regs.REG_CCR + exr, 4, va)) elif d2 == 0x6b: # @aa:16,CCR / @aa:24,CCR if val2 & 0x20: aa, = struct.unpack('>I', buf[off + 4:off + 8]) isz = 8 aasize = 4 else: aa, = struct.unpack('>H', buf[off + 4:off + 6]) isz = 6 aasize = 2 isStc = (val2 >> 7) & 1 opers = (h8_operands.H8AbsAddrOper(aa, tsize, aasize), h8_operands.H8RegDirOper(e_regs.REG_CCR + exr, 4, va)) # after all the decisions... mnem = ('ldc', 'stc')[isStc] if isStc: opers = opers[::-1] elif diff == 0xc: if val2 & 0xfd00 == 0x5000: # mulxs mnem = 'mulxs' op, nmnem, opers, iflags, nisz = p_Rs_Rd_4b(va, val, buf, off, tsize=1) else: raise envi.InvalidInstruction(bytez=buf[off:off + 16], va=va) elif diff == 0xd: if val2 & 0xfd00 == 0x5100: mnem = 'divxs' # divxs op, nmnem, opers, iflags, nisz = p_Rs_Rd_4b( va, val, buf, off, tsize) else: raise envi.InvalidInstruction(bytez=buf[off:off + 16], va=va) elif diff == 0xe: if val2 & 0xff00 == 0x7b00: mnem = 'tas' # FIXME: check out what this decodes to tsize = 1 erd = (val2 >> 4) & 7 opers = (h8_operands.H8RegIndirOper(erd, tsize, va, oflags=0), ) else: raise envi.InvalidInstruction(bytez=buf[off:off + 16], va=va) elif diff == 0xf: if val2 & 0xfc00 == 0x6400: # or/xor/and nop, nmnem, opers, iflags, nisz = p_ERs_ERd(va, val2, buf, off, tsize=4) op = (val << 8) | (val2 >> 8) mnembits = (val2 >> 8) & 3 mnem = ('or', 'xor', 'and')[mnembits] else: raise envi.InvalidInstruction(bytez=buf[off:off + 16], va=va) else: raise envi.InvalidInstruction(bytez=buf[off:off + 16], va=va) return (op, mnem, opers, iflags, isz)
def disasm(self, bytez, offset, va): # FIXME: for newer instructions, the VEX.W bit needs to be able to change the opcode. ugh. # Stuff for opcode parsing tabdesc = all_tables[ opcode86. TBL_Main] # A tuple (optable, shiftbits, mask byte, sub, max) startoff = offset # Use startoff as a size knob if needed # Stuff we'll be putting in the opcode object optype = None # This gets set if we successfully decode below mnem = None operands = [] prefixes = 0 pho_prefixes = 0 # faux prefixes...don't immediately apply them, they may not be the prefixes we're looking for while True: obyte = ord(bytez[offset]) # This line changes in 64 bit mode p = self._dis_prefixes[obyte] if p is None: break # print("OBYTE",hex(obyte)) if obyte in mandatory_prefixes: pho_prefixes |= p # ratchet through the tables tabidx = ((obyte - tabdesc[4]) >> tabdesc[2]) & tabdesc[3] # print("TABIDX: %d" % tabidx) opdesc = tabdesc[0][tabidx] # print('OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0]))) tabdesc = all_tables[opdesc[0]] else: prefixes |= p if p & PREFIX_VEX: if p == PREFIX_VEX2: offset += 1 imm1 = ord(bytez[offset]) if imm1 & 0xc0 != 0xc0: # shouldn't in 64-bit mode, but in 32-bit, this keeps LES from colliding break inv1 = imm1 ^ 0xff vex_l = (0, PREFIX_VEX_L)[(imm1 & 4) >> 2] vvvv = ((inv1 >> 3) & 0xf) pp = imm1 & 3 prefixes |= (inv1 << 11) & PREFIX_REX_R # R is inverted prefixes |= vex_l prefixes |= (vvvv << VEX_V_SHIFT) combined_mand_prefixes = vex_pp_table[pp] elif p == PREFIX_VEX3: imm1 = ord(bytez[offset + 1]) if imm1 & 0xc0 != 0xc0: # shouldn't in 64-bit mode, but in 32-bit, this keeps LDS from colliding break offset += 2 imm2 = ord(bytez[offset]) inv1 = imm1 ^ 0xff inv2 = imm2 ^ 0xff vex_l = (0, PREFIX_VEX_L)[(imm2 & 4) >> 2] vvvv = ((inv2 >> 3) & 0xf) pp = imm2 & 3 m_mmmm = imm1 & 0x1f # print("imms: %x %x \tl: %d\tvvvv: 0x%x\tpp: %d\tm_mmmm: 0x%x" % (imm1, imm2, vex_l, vvvv, pp, m_mmmm)) prefixes |= ( (inv1 << 11) & PREFIX_REX_RXB) # RXB are inverted prefixes |= ( (imm2 << 12) & PREFIX_REX_W) # W is not inverted prefixes |= vex_l prefixes |= (vvvv << VEX_V_SHIFT) # vvvv combined_mand_prefixes = vex_pp_table[ pp] + vex3_mmmm_table[m_mmmm] # VEX prefixes default to 0F table, possibly F20F, F30F or 660F # VEX3 prefixes may also specify depths into 38 and 3A tables for tabidx in combined_mand_prefixes: if tabidx is None: continue # print("TABIDX: %d" % tabidx) opdesc = tabdesc[0][tabidx] # print('OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0]))) tabdesc = all_tables[opdesc[0]] offset += 1 continue if obyte != 0x0f: prefixes |= pho_prefixes while True: obyte = ord(bytez[offset]) # print("OP-OBYTE", hex(obyte)) if (obyte > tabdesc[5]): # print("Jumping To Overflow Table: %s" % hex(tabdesc[5])) tabdesc = all_tables[tabdesc[6]] tabidx = ((obyte - tabdesc[4]) >> tabdesc[2]) & tabdesc[3] # print("TABIDX: %s" % tabidx) opdesc = tabdesc[0][tabidx] # print('OPDESC: %s -> %s' % (repr(opdesc), opcode86.tables_lookup.get(opdesc[0]))) # Hunt down multi-byte opcodes nexttable = opdesc[0] # print("NEXT", nexttable, hex(obyte), opcode86.tables_lookup.get(nexttable)) if nexttable != 0: # If we have a sub-table specified, use it. # print("Multi-Byte Next Hop For (%s, %s)" % (hex(obyte), opdesc[0])) tabdesc = all_tables[nexttable] # Account for the table jump we made offset += 1 continue # We are now on the final table... # print(repr(opdesc)) tbl_opercnt = tabdesc[1] mnem = opdesc[3 + tbl_opercnt] optype = opdesc[1] if tabdesc[3] == 0xff: offset += 1 # For our final opcode byte break if optype == 0: # print(tabidx) # print(opdesc) # print("OPTTYPE 0") raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16], va=va) operoffset = 0 # Begin parsing operands based off address method for i in range(operands_index, operands_index + tbl_opercnt): oper = None # Set this if we end up with an operand osize = 0 # Pull out the operand description from the table operflags = opdesc[i] opertype = operflags & opcode86.OPTYPE_MASK addrmeth = operflags & opcode86.ADDRMETH_MASK # If there are no more operands, break out of the loop! if operflags == 0: break # print("ADDRTYPE: %.8x OPERTYPE: %.8x" % (addrmeth, opertype)) # handles tsize calculations including new REX prefixes tsize = self._dis_calc_tsize(opertype, prefixes, operflags) # print(hex(opertype), hex(addrmeth), hex(tsize)) # If addrmeth is zero, we have operands embedded in the opcode if addrmeth == 0: osize = 0 oper = self.ameth_0(operflags, opdesc[2 + tbl_opercnt + i], tsize, prefixes) else: # print("ADDRTYPE", hex(addrmeth)) ameth = self._dis_amethods[addrmeth >> 16] # print("AMETH", ameth) if ameth is None: raise Exception("Implement Addressing Method 0x%.8x" % addrmeth) # NOTE: Depending on your addrmethod you may get beginning of operands, or offset try: if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J: osize, oper = ameth(bytez, offset + operoffset, tsize, prefixes, operflags) # If we are a sign extended immediate and not the same as the other operand, # do the sign extension during disassembly so nothing else has to worry about it.. if len(operands) and tsize != operands[-1].tsize: # Check if we are an explicitly signed operand *or* REX.W if operflags & opcode86.OP_SIGNED or prefixes & PREFIX_REX_W: otsize = operands[-1].tsize oper.imm = e_bits.sign_extend( oper.imm, oper.tsize, otsize) oper.tsize = otsize else: osize, oper = ameth(bytez, offset, tsize, prefixes, operflags) except struct.error: # Catch struct unpack errors due to insufficient data length raise envi.InvalidInstruction( bytez=bytez[startoff:startoff + 16]) if oper is not None: # This is a filty hack for now... oper._dis_regctx = self._dis_regctx operands.append(oper) operoffset += osize # Pull in the envi generic instruction flags iflags = iflag_lookup.get(optype, 0) | self._dis_oparch if prefixes & ed_i386.PREFIX_REP_MASK: iflags |= envi.IF_REPEAT if priv_lookup.get(mnem, False): iflags |= envi.IF_PRIV # Lea will have a reg-mem/sib operand with _is_deref True, but should be false if optype == opcode86.INS_LEA: operands[1]._is_deref = False ret = Amd64Opcode(va, optype, mnem, prefixes, (offset - startoff) + operoffset, operands, iflags) return ret
def disasm(self, bytez, offset, va): # Stuff for opcode parsing tabdesc = all_tables[ 0] # A tuple (optable, shiftbits, mask byte, sub, max) startoff = offset # Use startoff as a size knob if needed # Stuff we'll be putting in the opcode object optype = None # This gets set if we successfully decode below mnem = None operands = [] all_prefixes = 0 prefix_len = 0 last_pref = 0 while True: obyte = ord(bytez[offset]) # print("PREFIXBYTE: 0x%x" % obyte) # This line changes in 64 bit mode p = self._dis_prefixes[obyte] if p is None: break all_prefixes |= p last_pref = obyte offset += 1 continue # At this point we should have all the possible prefixes, but some may be mandatory ones that we # need to not use as display prefixes and use as jumps in the table instead. # So we're going to lie to the rest of the code in order to use them as we want obyte = ord(bytez[offset]) ppref = [(None, None)] # print("PREFXIES: 0x%x" % all_prefixes) if obyte == 0x0f and MANDATORY_PREFIXES[last_pref]: obyte = last_pref ppref.append((last_pref, i386_prefixes[last_pref])) # print("POSTFIXES: 0x%x" % all_prefixes) #pdone = False decodings = [] mainbyte = offset prefixes = all_prefixes # as noted above, since we can have prefixes that may or may not be mandatory, # we roll through those and pop off the last one, since there's two cases we have # to deal with: a normal prefix that just modifies the opers, and a mandatory prefix # that modifies the instruction semantics entirely. Either way, the mandatory prefix # takes precedence and whichever one wins will be at the end of the list <decodings> for pref, onehot in ppref: if pref is not None: obyte = pref offset = mainbyte prefixes = all_prefixes & (~onehot) else: offset = mainbyte obyte = ord(bytez[offset]) tabdesc = all_tables[0] while True: # print("OBYTE", hex(obyte)) if (obyte > tabdesc[4]): # print("Jumping To Overflow Table: %s" % repr(tabdesc[5])) tabdesc = all_tables[tabdesc[5]] tabidx = ((obyte - tabdesc[3]) >> tabdesc[1]) & tabdesc[2] # print("TABIDX: %d" % tabidx) if tabidx > len(tabdesc[0]): # print("Jumped off end of table. Continuing on") break opdesc = tabdesc[0][tabidx] # print('OPDESC: %s' % repr(opdesc)) # Hunt down multi-byte opcodes nexttable = opdesc[0] # print("NEXT", nexttable, hex(obyte)) if nexttable != 0: # If we have a sub-table specified, use it. # print("Multi-Byte Next Hop For %s: %s" % (hex(obyte), repr(opdesc[0]))) # print("Jumping to table %d" % nexttable) tabdesc = all_tables[nexttable] offset += 1 obyte = ord(bytez[offset]) continue # We are now on the final table... # print(repr(opdesc)) mnem = opdesc[6] optype = opdesc[1] if tabdesc[2] == 0xff: offset += 1 # For our final opcode byte break if optype != 0: decodings.append((opdesc, offset, prefixes)) if not len(decodings): raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16], va=va) opdesc, offset, all_prefixes = decodings.pop() optype = opdesc[1] mnem = opdesc[6] if optype == 0: raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16], va=va) operoffset = 0 # Begin parsing operands based off address method for i in operand_range: oper = None # Set this if we end up with an operand osize = 0 # Pull out the operand description from the table operflags = opdesc[i] opertype = operflags & opcode86.OPTYPE_MASK addrmeth = operflags & opcode86.ADDRMETH_MASK # If there are no more operands, break out of the loop! if operflags == 0: break # print("ADDRTYPE: %.8x OPERTYPE: %.8x" % (addrmeth, opertype)) # print("ALLPREFIXES 0x%x" % (all_prefixes)) tsize = self._dis_calc_tsize(opertype, all_prefixes, operflags) # print(hex(opertype), hex(addrmeth), hex(tsize)) # If addrmeth is zero,we have operands embedded in the opcode if addrmeth == 0: osize = 0 oper = self.ameth_0(operflags, opdesc[5 + i], tsize, all_prefixes) else: # print("ADDRTYPE", hex(addrmeth)) ameth = self._dis_amethods[addrmeth >> 16] # print("AMETH", ameth) if ameth is None: raise Exception("Implement Addressing Method 0x%.8x" % addrmeth) # NOTE: Depending on your addrmethod you may get beginning of operands, or offset try: if addrmeth == opcode86.ADDRMETH_I or addrmeth == opcode86.ADDRMETH_J: osize, oper = ameth(bytez, offset + operoffset, tsize, all_prefixes, operflags) # If we are a sign extended immediate and not the same as the other operand, # do the sign extension during disassembly so nothing else has to worry about it.. if operflags & opcode86.OP_SIGNED and len( operands) and tsize != operands[-1].tsize: otsize = operands[-1].tsize oper.imm = e_bits.sign_extend( oper.imm, oper.tsize, otsize) oper.tsize = otsize else: osize, oper = ameth(bytez, offset, tsize, all_prefixes, operflags) # so in the opcode maps intel directly mentions that some opcodes are # ADDRMETH_W but if the operand is a memory ref, it's always of a specific # size, with no rhyme or reason as to which it is. So we directly embed # that knowledge into the opcodes mappings we maintain and pluck it out # here. if getattr(oper, "_is_deref", False): memsz = OP_EXTRA_MEMSIZES[(operflags & OP_MEMMASK) >> 4] if memsz is not None: oper.tsize = memsz except struct.error as e: # Catch struct unpack errors due to insufficient data length raise envi.InvalidInstruction( bytez=bytez[startoff:startoff + 16]) if oper is not None: # This is a filty hack for now... oper._dis_regctx = self._dis_regctx operands.append(oper) operoffset += osize # Pull in the envi generic instruction flags iflags = iflag_lookup.get(optype & 0xFFFF, 0) | self._dis_oparch if all_prefixes & PREFIX_REP_MASK: iflags |= envi.IF_REPEAT if priv_lookup.get(mnem, False): iflags |= envi.IF_PRIV # Lea will have a reg-mem/sib operand with _is_deref True, but should be false if optype == opcode86.INS_LEA: operands[1]._is_deref = False ret = i386Opcode(va, optype, mnem, all_prefixes, (offset - startoff) + operoffset, operands, iflags) return ret
def disasm(self, bytez, offset, va): ''' The main amd64 decoder function. The inital steps it takes are determining what potential prefixes are attached to the instruction. By "potential", we mean that at this stage we don't know if thigs like 66, F2, F3 are being used as normal prefixes (representing things like a rep prefix) or if they're being used as mandatory prefixes that completely change with instruction we're decoding. All potential prefixes are stored in the pho_prefixes variable. To that end, there's some tap dancing we need to do to deal with what the intel manual refers to as "mandatory prefixes". If we hit a main opcode byte of 0F and we know we have a potentially mandatory prefix (and we're not in VEX land), we treat the byte right before the 0F as the only potential mandatory prefix (as laid out in the intel manual). Then we basically brute force the decoding since we really only have two paths to try. One where the mandatory prefix is merely a normal prefix (and doesn't affect which set of tables we traverse down) and one where the mandatory prefix does affect what tables we rachet through (and thus directly changes which instruction we're looking at). For each case, we append all the relevant output to a list (should the decoding produce a meaningful output). If we end up producing no instruction definitions from our brute force loop, we've hit an invalid sequence of instruction bytes and we throw an exception. If only one path produce output, then that's our results and we proceed on to use the instruction definition to determine what addressing methods and size types to use when determining operands. If both paths produce a valid instruction definition, then the path that uses the mandatory prefix to directly change the instruction takes precedence over the path where it's just a normal prefix. In both the one and two results case, outside of our instruction decoding loop, we've kept a list of the possible decodings we could have hit, and just merely pop off the end of the list (so order matters when building the ppref variable). ''' # FIXME: for newer instructions, the VEX.W bit needs to be able to change the opcode. ugh. # FIXME: And also REX.W # Stuff for opcode parsing tabdesc = all_tables[ opcode86. TBL_Main] # A tuple (optable, shiftbits, mask byte, sub, max) startoff = offset # Use startoff as a size knob if needed isvex = False vexw = None last_pref = 0 ppref = [(None, None)] # Stuff we'll be putting in the opcode object optype = None # This gets set if we successfully decode below mnem = None operands = [] prefixes = 0 pho_prefixes = 0 # faux prefixes...don't immediately apply them, they may not be the prefixes we're looking for while True: obyte = ord(bytez[offset]) # This line changes in 64 bit mode p = self._dis_prefixes[obyte] if p is None: break if MANDATORY_PREFIXES[obyte]: pho_prefixes |= p last_pref = obyte else: prefixes |= p if p & PREFIX_VEX: isvex = True if p == PREFIX_VEX2: offset += 1 imm1 = ord(bytez[offset]) # shouldn't in 64-bit mode, but in 32-bit, this keeps LES from colliding # TODO: So we're always in 64 bit here. This will need to be here once we unify 32/64 decoding #if imm1 & 0xc0 != 0xc0: #break inv1 = imm1 ^ 0xff vex_l = (0, PREFIX_VEX_L)[(imm1 & 4) >> 2] vvvv = ((inv1 >> 3) & 0xf) pp = imm1 & 3 prefixes |= (inv1 << 11) & PREFIX_REX_R # R is inverted prefixes |= vex_l prefixes |= (vvvv << VEX_V_SHIFT) combined_mand_prefixes = vex_pp_table[pp] elif p == PREFIX_VEX3: imm1 = ord(bytez[offset + 1]) offset += 2 # TODO: So we're always in 64 bit here. This will need to be here once we unify 32/64 decoding #if imm1 & 0xc0 != 0xc0: #break imm2 = ord(bytez[offset]) inv1 = imm1 ^ 0xff inv2 = imm2 ^ 0xff vex_l = (0, PREFIX_VEX_L)[(imm2 & 4) >> 2] vvvv = ((inv2 >> 3) & 0xf) pp = imm2 & 3 m_mmmm = imm1 & 0x1f prefixes |= ( (inv1 << 11) & PREFIX_REX_RXB) # RXB are inverted vexw = ((imm2 << 12) & PREFIX_REX_W) # W is not inverted prefixes |= vexw prefixes |= vex_l prefixes |= (vvvv << VEX_V_SHIFT) # vvvv combined_mand_prefixes = vex_pp_table[ pp] + vex3_mmmm_table[m_mmmm] # VEX prefixes default to 0F table, possibly F20F, F30F or 660F # VEX3 prefixes may also specify depths into 38 and 3A tables for tabidx in combined_mand_prefixes: if tabidx is None: continue opdesc = tabdesc[0][tabidx] tabdesc = all_tables[opdesc[0]] # So VEX and mandatory prefixes don't really intermingle offset += 1 break offset += 1 continue if obyte != 0x0f: prefixes |= pho_prefixes # intel manual says VEX and legacy prefixes don't intermingle if obyte == 0x0f and MANDATORY_PREFIXES[last_pref] and not isvex: obyte = last_pref ppref.append((last_pref, amd64_prefixes[last_pref])) decodings = [] mainbyte = offset all_prefixes = prefixes ogtabdesc = tabdesc # onehot in this case refers to the their prefixes that are defined in i386/disasm.py where only # on bit of the entire integer is set. We use that to quickly pop things in and out of the prefixes # list for pref, onehot in ppref: tabdesc = ogtabdesc offset = mainbyte if pref is not None: # our mandatory prefix is not none, which means that we have to jump through the tables # using the mandatory prefix byte as our "main byte" # As a bit of a hack, the 66/F2/F3 entries in the main table # directly point to the 660F/F20F/F30F tables since we're carefully tap dancing around # what our opcode byte really is obyte = pref # since we're treating this prefix as mandatory and not as REPNZ/REPZ/etc, we need to rip # it out of the pho_prefixes before we combine pho_prefixes with the main prefixes container all_prefixes = prefixes | (pho_prefixes & (~onehot)) else: # treat nothing as a mandatory prefix (or we defaulted into here if we got no mandatory # prefixes). For most instructions this will be the normal case. obyte = ord(bytez[offset]) all_prefixes = prefixes | pho_prefixes while True: if (obyte > tabdesc[5]): tabdesc = all_tables[tabdesc[6]] tabidx = ((obyte - tabdesc[4]) >> tabdesc[2]) & tabdesc[3] opdesc = tabdesc[0][tabidx] # Hunt down multi-byte opcodes nexttable = opdesc[0] if nexttable != 0: # If we have a sub-table specified, use it. tabdesc = all_tables[nexttable] # Account for the table jump we made offset += 1 obyte = ord(bytez[offset]) continue # We are now on the final table... tbl_opercnt = tabdesc[1] mnem = opdesc[3 + tbl_opercnt] optype = opdesc[1] if tabdesc[3] == 0xff: offset += 1 # For our final opcode byte break if optype & INS_VEXREQ and not isvex: continue if optype != 0: decodings.append((tabdesc, opdesc, offset, all_prefixes)) if not len(decodings): raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16], va=va) tabdesc, opdesc, offset, prefixes = decodings.pop() optype = opdesc[1] tbl_opercnt = tabdesc[1] mnem = opdesc[3 + tbl_opercnt] if optype == 0: raise envi.InvalidInstruction(bytez=bytez[startoff:startoff + 16], va=va) operoffset = 0 # Begin parsing operands based off address method for i in range(operands_index, operands_index + tbl_opercnt): oper = None # Set this if we end up with an operand osize = 0 # Pull out the operand description from the table operflags = opdesc[i] opertype = operflags & opcode86.OPTYPE_MASK addrmeth = operflags & opcode86.ADDRMETH_MASK # If there are no more operands, break out of the loop! if operflags == 0: break # handles tsize calculations including new REX prefixes tsize = self._dis_calc_tsize(opertype, prefixes, operflags) # If addrmeth is zero, we have operands embedded in the opcode if addrmeth == 0: osize = 0 oper = self.ameth_0(operflags, opdesc[2 + tbl_opercnt + i], tsize, prefixes) else: # So the 0x7f is here to help us deal with an issue between VEX and non-VEX # A super common patter in vex is to add an operand somewhere in the middle of the # existing operands. So if we have like cmpps xmm2, 17 in non-VEX, the vex version # will look like vsprlw xmm3, xmm4, 17. # The fun bit of this is that the vex only portions aren't exclusive to the VEX-only # addressing methods, so we can have ADDRMETH_V be skipped outside of VEX mode too, and not # just things like ADDRMETH_H. Hence, we need a new flag that I stash in the upper bits of # instruction operand definition so we can know when to skip operands ameth = self._dis_amethods[(addrmeth >> 16) & 0x7F] vex_skip = addrmeth & opcode86.ADDRMETH_VEXSKIP if not isvex and vex_skip: continue if ameth is None: raise Exception("Implement Addressing Method 0x%.8x" % addrmeth) # NOTE: Depending on your addrmethod you may get beginning of operands, or offset try: if addrmeth in IMM_REQOFFS: osize, oper = ameth(bytez, offset + operoffset, tsize, prefixes, operflags) # If we are a sign extended immediate and not the same as the other operand, # do the sign extension during disassembly so nothing else has to worry about it.. if operflags & opcode86.OP_SIGNED: if len(operands) and tsize != operands[-1].tsize: otsize = operands[-1].tsize oper.imm = e_bits.sign_extend( oper.imm, oper.tsize, otsize) oper.tsize = otsize elif not len(operands): oper.imm = e_bits.sign_extend( oper.imm, oper.tsize, self._dis_default_size) oper.tsize = self._dis_default_size else: # see same code section in i386 for this rationale osize, oper = ameth(bytez, offset, tsize, prefixes, operflags) if getattr(oper, "_is_deref", False): memsz = OP_EXTRA_MEMSIZES[(operflags & OP_MEMMASK) >> 4] if memsz is not None: oper.tsize = memsz except struct.error: # Catch struct unpack errors due to insufficient data length raise envi.InvalidInstruction( bytez=bytez[startoff:startoff + 16]) if oper is not None: # This is a filty hack for now... oper._dis_regctx = self._dis_regctx operands.append(oper) operoffset += osize typemask = optype & 0xFFFF # Pull in the envi generic instruction flags iflags = iflag_lookup.get(typemask, 0) | self._dis_oparch if prefixes & PREFIX_REP_MASK: iflags |= envi.IF_REPEAT if priv_lookup.get(mnem, False): iflags |= envi.IF_PRIV # Lea will have a reg-mem/sib operand with _is_deref True, but should be false if typemask == opcode86.INS_LEA: operands[1]._is_deref = False ret = Amd64Opcode(va, optype, mnem, prefixes, (offset - startoff) + operoffset, operands, iflags) return ret