def get_op(ea, op, stkvars=None): '''ea_t -> int -> opt:{int : tinfo_t} -> op_ret''' cmd = idautils.DecodeInstruction(ea) cmd.Operands = get_operands(cmd) # for mips_op_hack op = mips_op_hack(cmd, op) opd = cmd[op] if opd.type == idaapi.o_reg: # gpr, XXX sorta MIPS-specific return op_ret(op_ty.reg, regs.gpr(opd.reg), 0) elif opd.type == idaapi.o_idpspec1: # fpr, XXX sorta MIPS-specific return op_ret(op_ty.reg, regs.fpr(opd.reg), 0) elif opd.type in [idaapi.o_near, idaapi.o_mem]: return op_ret(op_ty.name, idc.Name(opd.addr), 0) elif idc.isStkvar1(idc.GetFlags(ea)): # IDA seems to set this flag even for operands beyond the second, # i.e. both of these are true for isStkvar1: # .text:10003A84 sd $a1, 0x2E0+var_58($sp) # .text:10003A68 addiu $a1, $sp, 0x2E0+var_2D8 try: func = idaapi.get_func(ea) off = idaapi.calc_stkvar_struc_offset(func, ea, op) (name, ti) = stkvars[off] return op_ret_for_ti(ti, name, off, off) except KeyError: raise OperandUnresolvableError('unable to get operand %u at %s' % (op, idc.atoa(ea))) elif opd.type in [idaapi.o_imm, idaapi.o_displ]: return cpu_ida.ida_current_cpu().data.get_op_addrmode(ea, op, cmd) else: raise OperandUnresolvableError('unable to get operand %u at %s' % (op, idc.atoa(ea)))
def single_operand_parser(self, address, op, idx): """Parse a PPC operand.""" def constraint_value(value): if value > 2**16: return -(2**32 - value) return value # Operand parsing # if op.type == OPERAND_TYPE_NO_OPERAND: return None #print '>>>', hex(address), idx, op.type segment = idaapi.getseg(address) addressing_mode = segment.bitness # Start creating the AST, the root entry is always the width of the # operand operand = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] # Compose the rest of the AST # if op.type == OPERAND_TYPE_DISPLACEMENT: # A displacement operatior might refer to a variable... # var_name = None # Try to get any name that might have been assigned to the # variable. It's only done if the register is: # sp/esp (4) os bp/ebp (5) # flags = idc.GetFlags(address) if (idx == 0 and idc.isStkvar0(flags)) or (idx == 1 and idc.isStkvar1(flags)): var_name = self.get_operand_stack_variable_name( address, op, idx) #if has_sib_byte(op) is True: # when SIB byte set, process the SIB indexing # phrase = parse_phrase(op) #else: phrase = [ self.NODE_TYPE_OPERATOR_PLUS, [ self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 0 ] ] if var_name: value = arch.ExpressionNamedValue(long(op.addr), var_name) else: value = constraint_value(op.addr) operand.extend([[ self.NODE_TYPE_DEREFERENCE, phrase + [[self.NODE_TYPE_VALUE, value, 1]] ]]) elif op.type == OPERAND_TYPE_REGISTER: operand.extend([[ self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 1 ]]) elif op.type == OPERAND_TYPE_MEMORY: addr_name = self.get_address_name(op.addr) if addr_name: value = arch.ExpressionNamedValue(long(op.addr), addr_name) else: value = op.addr operand.extend([[ self.NODE_TYPE_DEREFERENCE, [self.NODE_TYPE_VALUE, value, 0] ]]) elif op.type == OPERAND_TYPE_IMMEDIATE: # Keep the value's size # if self.as_byte_value(op.dtyp) == 0: mask = 0xff elif self.as_byte_value(op.dtyp) == 1: mask = 0xffff else: mask = 0xffffffff operand.extend([[self.NODE_TYPE_VALUE, op.value & mask, 0]]) elif op.type in (OPERAND_TYPE_NEAR, OPERAND_TYPE_FAR): addr_name = self.get_address_name(op.addr) if addr_name: value = arch.ExpressionNamedValue(long(op.addr), addr_name) else: value = op.addr operand.extend([[self.NODE_TYPE_VALUE, value, 0]]) elif op.type == OPERAND_TYPE_PHRASE: print '***Dunno how to parse PHRASE' operand.extend([[ self.NODE_TYPE_SYMBOL, 'UNK_PHRASE(val:%d, reg:%d, type:%d)' % (op.value, self.as_byte_value(op.reg), op.type), 0 ]]) elif op.type == OPERAND_TYPE_IDPSPEC0: # Handle Special Purpose Registers # register = self.SPR_REGISTERS.get( op.value, 'UNKNOWN_REGISTER(val:%x)' % op.value) operand.extend([[self.NODE_TYPE_REGISTER, register, 0]]) elif op.type == OPERAND_TYPE_IDPSPEC1: #print '***Dunno how to parse OPERAND_TYPE_IDPSPEC1' #operand.extend([[self.NODE_TYPE_SYMBOL, # 'UNK_IDPSPEC1(val:%d, reg:%d, type:%d)' % ( # op.value, op.reg, op.type), 0]]) operand.extend([[ self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 1 ]]) operand.extend([[ self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.specflag1)], 2 ]]) elif op.type == OPERAND_TYPE_IDPSPEC2: # IDSPEC2 is operand type for all rlwinm and rlwnm # instructions which are in general op reg, reg, byte, byte, byte # or eqivalent. simplified mnemonics sometimes take less than # five arguments. # # Keep the value's size # if self.as_byte_value(op.dtyp) == 0: mask = 0xff elif self.as_byte_value(op.dtyp) == 1: mask = 0xffff else: mask = 0xffffffff operand_1 = [] operand_2 = [] operand_3 = [] # Get the object representing the instruction's data. # It varies between IDA pre-5.7 and 5.7 onwards, the following check # will take care of it (for more detail look into the similar # construct in arch.py) # if hasattr(idaapi, 'cmd'): idaapi.decode_insn(address) ida_instruction = idaapi.cmd else: idaapi.ua_code(address) ida_instruction = idaapi.cvar.cmd if (ida_instruction.auxpref & 0x0020): #print "SH" operand_1 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] operand_1.extend([[ self.NODE_TYPE_VALUE, self.as_byte_value(op.reg) & mask, 0 ]]) else: operand_1 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] operand_1.extend([[ self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 0 ]]) #print operand_1 if (ida_instruction.auxpref & 0x0040): #print "MB" operand_2 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] operand_2.extend([[ self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag1) & mask, 0 ]]) #print operand_2 if (ida_instruction.auxpref & 0x0080): #print "ME" operand_3 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] operand_3.extend([[ self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag2) & mask, 0 ]]) #print operand_3 operand = [operand_1] #operand = operand_1 if (ida_instruction.auxpref & 0x0040): #print "MB2" operand.append(operand_2) if (ida_instruction.auxpref & 0x0080): #print "ME2" operand.append(operand_3) #print operand # operand = operand_1 #print operand #print '>>>', hex(address), idx, op.type, op.reg #operand.extend([[self.NODE_TYPE_OPERATOR_COMMA, [self.NODE_TYPE_VALUE, op.reg&mask, 0], [self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag1)&mask, 1], [self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag2)&mask, 2]]]) elif op.type == OPERAND_TYPE_IDPSPEC3: # CR registers # operand.extend([[ self.NODE_TYPE_REGISTER, self.CR_REGISTERS[self.as_byte_value(op.reg)], 0 ]]) elif op.type == OPERAND_TYPE_IDPSPEC4: # The bit in the CR to check for # operand.extend( [[self.NODE_TYPE_REGISTER, self.as_byte_value(op.reg), 0]]) elif op.type == OPERAND_TYPE_IDPSPEC5: # Device Control Register, implementation specific operand.extend([[self.NODE_TYPE_REGISTER, 'DCR(%x)' % op.value, 0]]) return operand
def single_operand_parser(self, address, op, idx): """Parse a PPC operand.""" def constraint_value(value): if value>2**16: return -(2**32-value) return value # Operand parsing # if op.type == OPERAND_TYPE_NO_OPERAND: return None #print '>>>', hex(address), idx, op.type segment = idaapi.getseg(address) addressing_mode = segment.bitness # Start creating the AST, the root entry is always the width of the # operand operand = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] # Compose the rest of the AST # if op.type == OPERAND_TYPE_DISPLACEMENT: # A displacement operatior might refer to a variable... # var_name = None # Try to get any name that might have been assigned to the # variable. It's only done if the register is: # sp/esp (4) os bp/ebp (5) # flags = idc.GetFlags(address) if (idx==0 and idc.isStkvar0(flags)) or ( idx==1 and idc.isStkvar1(flags)): var_name = self.get_operand_stack_variable_name(address, op, idx) #if has_sib_byte(op) is True: # when SIB byte set, process the SIB indexing # phrase = parse_phrase(op) #else: phrase = [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 0]] if var_name: value = arch.ExpressionNamedValue(long(op.addr), var_name) else: value = constraint_value(op.addr) operand.extend([ [self.NODE_TYPE_DEREFERENCE, phrase+[ [self.NODE_TYPE_VALUE, value, 1]] ] ]) elif op.type == OPERAND_TYPE_REGISTER: operand.extend([ [self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 1]]) elif op.type == OPERAND_TYPE_MEMORY: addr_name = self.get_address_name(op.addr) if addr_name: value = arch.ExpressionNamedValue(long(op.addr), addr_name) else: value = op.addr operand.extend([ [self.NODE_TYPE_DEREFERENCE, [self.NODE_TYPE_VALUE, value, 0]] ]) elif op.type == OPERAND_TYPE_IMMEDIATE: # Keep the value's size # if self.as_byte_value(op.dtyp) == 0: mask = 0xff elif self.as_byte_value(op.dtyp) == 1: mask = 0xffff else: mask = 0xffffffff operand.extend([[self.NODE_TYPE_VALUE, op.value&mask, 0]]) elif op.type in (OPERAND_TYPE_NEAR, OPERAND_TYPE_FAR): addr_name = self.get_address_name(op.addr) if addr_name: value = arch.ExpressionNamedValue(long(op.addr), addr_name) else: value = op.addr operand.extend([[self.NODE_TYPE_VALUE, value, 0]]) elif op.type == OPERAND_TYPE_PHRASE: print '***Dunno how to parse PHRASE' operand.extend([[self.NODE_TYPE_SYMBOL, 'UNK_PHRASE(val:%d, reg:%d, type:%d)' % ( op.value, self.as_byte_value(op.reg), op.type), 0]]) elif op.type == OPERAND_TYPE_IDPSPEC0: # Handle Special Purpose Registers # register = self.SPR_REGISTERS.get( op.value, 'UNKNOWN_REGISTER(val:%x)' % op.value) operand.extend([ [self.NODE_TYPE_REGISTER, register, 0]]) elif op.type == OPERAND_TYPE_IDPSPEC1: #print '***Dunno how to parse OPERAND_TYPE_IDPSPEC1' #operand.extend([[self.NODE_TYPE_SYMBOL, # 'UNK_IDPSPEC1(val:%d, reg:%d, type:%d)' % ( # op.value, op.reg, op.type), 0]]) operand.extend([ [self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 1]]) operand.extend([ [self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.specflag1)], 2]]) elif op.type == OPERAND_TYPE_IDPSPEC2: # IDSPEC2 is operand type for all rlwinm and rlwnm # instructions which are in general op reg, reg, byte, byte, byte # or eqivalent. simplified mnemonics sometimes take less than # five arguments. # # Keep the value's size # if self.as_byte_value(op.dtyp) == 0: mask = 0xff elif self.as_byte_value(op.dtyp) == 1: mask = 0xffff else: mask = 0xffffffff operand_1 = [] operand_2 = [] operand_3 = [] # Get the object representing the instruction's data. # It varies between IDA pre-5.7 and 5.7 onwards, the following check # will take care of it (for more detail look into the similar # construct in arch.py) # if hasattr(idaapi, 'cmd' ): idaapi.decode_insn(address) ida_instruction = idaapi.cmd else: idaapi.ua_code(address) ida_instruction = idaapi.cvar.cmd if (ida_instruction.auxpref & 0x0020): #print "SH" operand_1 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] operand_1.extend([[self.NODE_TYPE_VALUE, self.as_byte_value(op.reg)&mask, 0]]) else: operand_1 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] operand_1.extend([[self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.reg)], 0]]) #print operand_1 if (ida_instruction.auxpref & 0x0040): #print "MB" operand_2 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] operand_2.extend([[self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag1)&mask, 0]]) #print operand_2 if (ida_instruction.auxpref & 0x0080): #print "ME" operand_3 = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] operand_3.extend([[self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag2)&mask, 0]]) #print operand_3 operand = [operand_1] #operand = operand_1 if (ida_instruction.auxpref & 0x0040): #print "MB2" operand.append(operand_2) if (ida_instruction.auxpref & 0x0080): #print "ME2" operand.append(operand_3) #print operand # operand = operand_1 #print operand #print '>>>', hex(address), idx, op.type, op.reg #operand.extend([[self.NODE_TYPE_OPERATOR_COMMA, [self.NODE_TYPE_VALUE, op.reg&mask, 0], [self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag1)&mask, 1], [self.NODE_TYPE_VALUE, self.as_byte_value(op.specflag2)&mask, 2]]]) elif op.type == OPERAND_TYPE_IDPSPEC3: # CR registers # operand.extend([ [self.NODE_TYPE_REGISTER, self.CR_REGISTERS[self.as_byte_value(op.reg)], 0]]) elif op.type == OPERAND_TYPE_IDPSPEC4: # The bit in the CR to check for # operand.extend([[self.NODE_TYPE_REGISTER, self.as_byte_value(op.reg), 0]]) elif op.type == OPERAND_TYPE_IDPSPEC5: # Device Control Register, implementation specific operand.extend([[self.NODE_TYPE_REGISTER, 'DCR(%x)' % op.value, 0]]) return operand
def single_operand_parser(self, address, op, idx): """Parse a metapc operand.""" # Convenience functions # def has_sib_byte(op): # Does the instruction use the SIB byte? return self.as_byte_value(op.specflag1) == 1 def get_sib_scale(op): return (None, 2, 4, 8)[self.as_byte_value(op.specflag2) >> 6] def get_sib_scaled_index_reg(op): return self.SIB_INDEX_REGISTERS[ (self.as_byte_value(op.specflag2) >> 3) & 0x7] def get_sib_base_reg(op): # # [ [7-6] [5-3] [2-0] ] # MOD/RM = ( (mod_2 << 6) | (reg_opcode_3 << 3) | rm_3 ) # There's not MOD/RM made available by IDA!? # # [ [7-6] [5-3] [2-0] ] # SIB = ( (scale_2 << 6) | (index_3 << 3) | base ) # op.specflag2 # # instruction = op + modrm + sib + disp + imm # # If MOD is zero there's no base register, otherwise it's EBP # But IDA exposes no MOD/RM. # Following a discussion in IDA's forums: # http://www.hex-rays.com/forum/viewtopic.php?f=8&t=1424&p=8479&hilit=mod+rm#p8479 # checking for it can be done in the following manner: # SIB_byte = self.as_byte_value(op.specflag2) return self.SIB_BASE_REGISTERS[SIB_byte & 0x7] def get_segment_prefix(op): seg_idx = (op.specval >> 16) if seg_idx == 0: return None if (op.specval >> 16) < len(self.REGISTERS[0]): seg_prefix = self.REGISTERS[0][op.specval >> 16] + ':' else: seg_prefix = op.specval & 0xffff # This must return a string in case a segment register selector is used # or and int/long of a descriptor itself. # return seg_prefix def parse_phrase(op, has_displacement=False): """Parse the expression used for indexed memory access. Returns its AST as a nested list of lists. """ # Check the addressing mode using in this segment segment = idaapi.getseg(address) if segment.bitness != 1: raise Exception( 'Not yet handling addressing modes other than 32bit!') base_reg = get_sib_base_reg(op) scaled_index_reg = get_sib_scaled_index_reg(op) scale = get_sib_scale(op) if scale: # return nested list for reg+reg*scale if base_reg != '': # The last values in each tuple indicate the # preferred display position of each element. # base_reg + (scale_reg * scale) # if scaled_index_reg == '': return [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, base_reg, 0] ] return [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, base_reg, 0], [ self.NODE_TYPE_OPERATOR_TIMES, [self.NODE_TYPE_REGISTER, scaled_index_reg, 0], [self.NODE_TYPE_VALUE, scale, 1], 1 ] ] else: # If there's no base register and # mod == 01 or mod == 10 (=> operand has displacement) # then we need to add EBP if has_displacement: return [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, 'ebp', 0], [ self.NODE_TYPE_OPERATOR_TIMES, [self.NODE_TYPE_REGISTER, scaled_index_reg, 0], [self.NODE_TYPE_VALUE, scale, 1], 1 ] ] return [ self.NODE_TYPE_OPERATOR_PLUS, [ self.NODE_TYPE_OPERATOR_TIMES, [self.NODE_TYPE_REGISTER, scaled_index_reg, 0], [self.NODE_TYPE_VALUE, scale, 1], 0 ] ] else: # return nested list for reg+reg if base_reg == '': if scaled_index_reg != '': if has_displacement: return [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, 'ebp', 0], [self.NODE_TYPE_REGISTER, scaled_index_reg, 1] ] return [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, scaled_index_reg, 0] ] else: if has_displacement: return [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, 'ebp', 0] ] return [] else: if scaled_index_reg != '': return [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, base_reg, 0], [self.NODE_TYPE_REGISTER, scaled_index_reg, 1] ] else: return [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, base_reg, 0] ] # Operand parsing # if op.type == OPERAND_TYPE_NO_OPERAND: return None segment = idaapi.getseg(address) addressing_mode = segment.bitness # Start creating the AST, the root entry is always the width of the # operand operand = [self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)]] # If the operand indicates a displacement and it does # the indexing through the SIB the it might be referring # a variable on the stack and an attempt to retrieve it # is made. # # Compose the rest of the AST # if op.type == OPERAND_TYPE_DISPLACEMENT: # A displacement operatior might refer to a variable... # var_name = None # Try to get any stack name that might have been assigned # to the variable. # flags = idc.GetFlags(address) if (idx == 0 and idc.isStkvar0(flags)) or (idx == 1 and idc.isStkvar1(flags)): var_name = self.get_operand_stack_variable_name( address, op, idx) if has_sib_byte(op) is True: # when SIB byte set, process the SIB indexing phrase = parse_phrase(op, has_displacement=True) else: phrase = [ self.NODE_TYPE_OPERATOR_PLUS, [ self.NODE_TYPE_REGISTER, self.REGISTERS[addressing_mode + 1][op.reg], 0 ] ] if var_name: value = arch.ExpressionNamedValue(long(op.addr), var_name) else: value = op.addr # Calculate the index of the value depending on how many components # we have in the phrase # idx_of_value = len(phrase) - 1 operand.extend([[ get_segment_prefix(op), [ self.NODE_TYPE_DEREFERENCE, phrase + [[self.NODE_TYPE_VALUE, value, idx_of_value]] ] ]]) elif op.type == OPERAND_TYPE_REGISTER: operand.extend([[ self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.dtyp)][op.reg], 0 ]]) elif op.type == OPERAND_TYPE_MEMORY: addr_name = self.get_address_name(op.addr) if addr_name: value = arch.ExpressionNamedValue(long(op.addr), addr_name) else: value = op.addr if has_sib_byte(op) is True: # when SIB byte set, process the SIB indexing phrase = parse_phrase(op) idx_of_value = len(phrase) - 1 operand.extend([[ get_segment_prefix(op), [ self.NODE_TYPE_DEREFERENCE, phrase + [[self.NODE_TYPE_VALUE, value, idx_of_value]] ] ]]) else: operand.extend([[ get_segment_prefix(op), [ self.NODE_TYPE_DEREFERENCE, [self.NODE_TYPE_VALUE, value, 0] ] ]]) elif op.type == OPERAND_TYPE_IMMEDIATE: width = self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)] if width == arch.Arch.NODE_TYPE_OPERATOR_WIDTH_BYTE_1: value = op.value & 0xff elif width == arch.Arch.NODE_TYPE_OPERATOR_WIDTH_BYTE_2: value = op.value & 0xffff elif width == arch.Arch.NODE_TYPE_OPERATOR_WIDTH_BYTE_4: value = op.value & 0xffffffff else: value = op.value operand.extend([[self.NODE_TYPE_VALUE, value, 0]]) elif op.type in (OPERAND_TYPE_NEAR, OPERAND_TYPE_FAR): addr_name = self.get_address_name(op.addr) if addr_name: value = arch.ExpressionNamedValue(long(op.addr), addr_name) else: value = op.addr seg_prefix = get_segment_prefix(op) if isinstance(seg_prefix, str): operand.extend([[seg_prefix, [self.NODE_TYPE_VALUE, value, 0]]]) elif isinstance(seg_prefix, (int, long)): operand.extend([[ self.NODE_TYPE_OPERATOR_SEGMENT_GEN, [self.NODE_TYPE_VALUE, seg_prefix, 0], [self.NODE_TYPE_VALUE, value, 1] ]]) elif op.type == OPERAND_TYPE_PHRASE: if has_sib_byte(op) is True: phrase = parse_phrase(op) # Detect observed cases (in GCC compiled sshd) where GCC's instruction # encoding would be parsed into a phrase with an addition of a single # register, without any other summands. # In those cases, if there's a name associated to the zero such as # a stack variable, we will add a zero to the sum. We do that to have # an expression to which alias an expression substitution (in the past # we were removing the addition altogether) # If there's no name we will remove the redundant 0 # # # This case has been observed for the encoding of [esp] where the tree # would be "[" -> "+" -> "esp". # # if phrase[0] == self.NODE_TYPE_OPERATOR_PLUS and len( phrase) == 2: var_name = self.get_operand_stack_variable_name( address, op, idx) if var_name: value = arch.ExpressionNamedValue(0, var_name) phrase.append([self.NODE_TYPE_VALUE, value, 1]) else: phrase = phrase[1] operand.extend([[ get_segment_prefix(op), [self.NODE_TYPE_DEREFERENCE, phrase] ]]) else: operand.extend([[ get_segment_prefix(op), [ self.NODE_TYPE_DEREFERENCE, [ self.NODE_TYPE_REGISTER, self.REGISTERS[addressing_mode + 1][op.phrase], 0 ] ] ]]) elif op.type == OPERAND_TYPE_IDPSPEC0: # The operand refers to the TR* registers operand.extend([[self.NODE_TYPE_REGISTER, 'tr%d' % op.reg, 0]]) elif op.type == OPERAND_TYPE_IDPSPEC1: # The operand refers to the DR* registers operand.extend([[self.NODE_TYPE_REGISTER, 'dr%d' % op.reg, 0]]) elif op.type == OPERAND_TYPE_IDPSPEC2: # The operand refers to the CR* registers operand.extend([[self.NODE_TYPE_REGISTER, 'cr%d' % op.reg, 0]]) elif op.type == OPERAND_TYPE_IDPSPEC3: # The operand refers to the FPU register stack operand.extend([[self.NODE_TYPE_REGISTER, 'st(%d)' % op.reg, 0]]) elif op.type == OPERAND_TYPE_IDPSPEC4: # The operand is a MMX register operand.extend([[self.NODE_TYPE_REGISTER, 'mm%d' % op.reg, 0]]) elif op.type == OPERAND_TYPE_IDPSPEC5: # The operand is a MMX register operand.extend([[self.NODE_TYPE_REGISTER, 'xmm%d' % op.reg, 0]]) # If no other thing that a width, i.e. ['b2'] is retrieved # we assume there was no operand... this is a hack but I've seen # IDA pretend there's a first operand like this: # # fld ['b2'], ['b4', ['ds', ['[', ['+', ['$', 'edx'], [...]]]]] # # So, in these cases I want no first operand... #if len(operand)==1: # return None return operand
def single_operand_parser(self, address, op, idx): """Parse a metapc operand.""" # Convenience functions # def has_sib_byte(op): # Does the instruction use the SIB byte? return self.as_byte_value(op.specflag1)==1 def get_sib_scale(op): return (None, 2, 4, 8)[self.as_byte_value(op.specflag2)>>6] def get_sib_scaled_index_reg(op): return self.SIB_INDEX_REGISTERS[(self.as_byte_value(op.specflag2)>>3)&0x7] def get_sib_base_reg(op): # # [ [7-6] [5-3] [2-0] ] # MOD/RM = ( (mod_2 << 6) | (reg_opcode_3 << 3) | rm_3 ) # There's not MOD/RM made available by IDA!? # # [ [7-6] [5-3] [2-0] ] # SIB = ( (scale_2 << 6) | (index_3 << 3) | base ) # op.specflag2 # # instruction = op + modrm + sib + disp + imm # # If MOD is zero there's no base register, otherwise it's EBP # But IDA exposes no MOD/RM. # Following a discussion in IDA's forums: # http://www.hex-rays.com/forum/viewtopic.php?f=8&t=1424&p=8479&hilit=mod+rm#p8479 # checking for it can be done in the following manner: # SIB_byte = self.as_byte_value(op.specflag2) return self.SIB_BASE_REGISTERS[ SIB_byte & 0x7] def get_segment_prefix(op): seg_idx = (op.specval>>16) if seg_idx == 0: return None if (op.specval>>16) < len(self.REGISTERS[0]) : seg_prefix = self.REGISTERS[0][op.specval>>16] + ':' else: seg_prefix = op.specval&0xffff # This must return a string in case a segment register selector is used # or and int/long of a descriptor itself. # return seg_prefix def parse_phrase(op, has_displacement=False): """Parse the expression used for indexed memory access. Returns its AST as a nested list of lists. """ # Check the addressing mode using in this segment segment = idaapi.getseg(address) if segment.bitness != 1: raise Exception( 'Not yet handling addressing modes other than 32bit!') base_reg = get_sib_base_reg(op) scaled_index_reg = get_sib_scaled_index_reg(op) scale = get_sib_scale(op) if scale: # return nested list for reg+reg*scale if base_reg != '': # The last values in each tuple indicate the # preferred display position of each element. # base_reg + (scale_reg * scale) # if scaled_index_reg == '': return [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, base_reg, 0] ] return [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, base_reg, 0], [self.NODE_TYPE_OPERATOR_TIMES, [self.NODE_TYPE_REGISTER, scaled_index_reg, 0], [self.NODE_TYPE_VALUE, scale, 1], 1 ] ] else: # If there's no base register and # mod == 01 or mod == 10 (=> operand has displacement) # then we need to add EBP if has_displacement: return [ self.NODE_TYPE_OPERATOR_PLUS, [ self.NODE_TYPE_REGISTER, 'ebp', 0], [ self.NODE_TYPE_OPERATOR_TIMES, [self.NODE_TYPE_REGISTER, scaled_index_reg, 0], [self.NODE_TYPE_VALUE, scale, 1], 1 ] ] return [ self.NODE_TYPE_OPERATOR_PLUS, [ self.NODE_TYPE_OPERATOR_TIMES, [self.NODE_TYPE_REGISTER, scaled_index_reg, 0], [self.NODE_TYPE_VALUE, scale, 1], 0 ] ] else: # return nested list for reg+reg if base_reg == '': if scaled_index_reg != '': if has_displacement: return [ self.NODE_TYPE_OPERATOR_PLUS, [ self.NODE_TYPE_REGISTER, 'ebp', 0], [ self.NODE_TYPE_REGISTER, scaled_index_reg, 1 ] ] return [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, scaled_index_reg, 0 ] ] else: if has_displacement: return [self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, 'ebp', 0] ] return [ ] else: if scaled_index_reg != '': return [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, base_reg, 0], [self.NODE_TYPE_REGISTER, scaled_index_reg, 1 ] ] else: return [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, base_reg, 0] ] # Operand parsing # if op.type == OPERAND_TYPE_NO_OPERAND: return None segment = idaapi.getseg(address) addressing_mode = segment.bitness # Start creating the AST, the root entry is always the width of the # operand operand = [self.OPERAND_WIDTH[ self.as_byte_value( op.dtyp ) ]] # If the operand indicates a displacement and it does # the indexing through the SIB the it might be referring # a variable on the stack and an attempt to retrieve it # is made. # # Compose the rest of the AST # if op.type == OPERAND_TYPE_DISPLACEMENT: # A displacement operatior might refer to a variable... # var_name = None # Try to get any stack name that might have been assigned # to the variable. # flags = idc.GetFlags(address) if (idx==0 and idc.isStkvar0(flags)) or ( idx==1 and idc.isStkvar1(flags)): var_name = self.get_operand_stack_variable_name(address, op, idx) if has_sib_byte(op) is True: # when SIB byte set, process the SIB indexing phrase = parse_phrase(op, has_displacement=True) else: phrase = [ self.NODE_TYPE_OPERATOR_PLUS, [self.NODE_TYPE_REGISTER, self.REGISTERS[addressing_mode+1][op.reg], 0] ] if var_name: value = arch.ExpressionNamedValue(long(op.addr), var_name) else: value = op.addr # Calculate the index of the value depending on how many components # we have in the phrase # idx_of_value = len( phrase ) - 1 operand.extend([ [ get_segment_prefix(op), [self.NODE_TYPE_DEREFERENCE, phrase+[ [self.NODE_TYPE_VALUE, value, idx_of_value] ] ] ] ]) elif op.type == OPERAND_TYPE_REGISTER: operand.extend([ [self.NODE_TYPE_REGISTER, self.REGISTERS[self.as_byte_value(op.dtyp)][op.reg], 0]]) elif op.type == OPERAND_TYPE_MEMORY: addr_name = self.get_address_name(op.addr) if addr_name: value = arch.ExpressionNamedValue(long(op.addr), addr_name) else: value = op.addr if has_sib_byte(op) is True: # when SIB byte set, process the SIB indexing phrase = parse_phrase(op) idx_of_value = len( phrase ) - 1 operand.extend([ [ get_segment_prefix(op), [self.NODE_TYPE_DEREFERENCE, phrase+[[self.NODE_TYPE_VALUE, value, idx_of_value]] ] ] ]) else: operand.extend([ [ get_segment_prefix(op), [self.NODE_TYPE_DEREFERENCE, [self.NODE_TYPE_VALUE, value, 0] ] ] ]) elif op.type == OPERAND_TYPE_IMMEDIATE: width = self.OPERAND_WIDTH[self.as_byte_value(op.dtyp)] if width == arch.Arch.NODE_TYPE_OPERATOR_WIDTH_BYTE_1: value = op.value&0xff elif width == arch.Arch.NODE_TYPE_OPERATOR_WIDTH_BYTE_2: value = op.value&0xffff elif width == arch.Arch.NODE_TYPE_OPERATOR_WIDTH_BYTE_4: value = op.value&0xffffffff else: value = op.value operand.extend([[self.NODE_TYPE_VALUE, value, 0]]) elif op.type in (OPERAND_TYPE_NEAR, OPERAND_TYPE_FAR): addr_name = self.get_address_name(op.addr) if addr_name: value = arch.ExpressionNamedValue(long(op.addr), addr_name) else: value = op.addr seg_prefix = get_segment_prefix(op) if isinstance(seg_prefix, str): operand.extend([ [ seg_prefix, [self.NODE_TYPE_VALUE, value, 0] ]]) elif isinstance(seg_prefix, (int, long)): operand.extend([ [ self.NODE_TYPE_OPERATOR_SEGMENT_GEN, [self.NODE_TYPE_VALUE, seg_prefix, 0], [self.NODE_TYPE_VALUE, value, 1] ]] ) elif op.type == OPERAND_TYPE_PHRASE: if has_sib_byte(op) is True: phrase = parse_phrase(op) # Detect observed cases (in GCC compiled sshd) where GCC's instruction # encoding would be parsed into a phrase with an addition of a single # register, without any other summands. # In those cases, if there's a name associated to the zero such as # a stack variable, we will add a zero to the sum. We do that to have # an expression to which alias an expression substitution (in the past # we were removing the addition altogether) # If there's no name we will remove the redundant 0 # # # This case has been observed for the encoding of [esp] where the tree # would be "[" -> "+" -> "esp". # # if phrase[0] == self.NODE_TYPE_OPERATOR_PLUS and len(phrase) == 2: var_name = self.get_operand_stack_variable_name(address, op, idx) if var_name: value = arch.ExpressionNamedValue(0, var_name) phrase.append( [self.NODE_TYPE_VALUE, value, 1] ) else: phrase = phrase[1] operand.extend([ [get_segment_prefix(op), [self.NODE_TYPE_DEREFERENCE, phrase] ]] ) else: operand.extend([ [get_segment_prefix(op), [self.NODE_TYPE_DEREFERENCE, [self.NODE_TYPE_REGISTER, self.REGISTERS[addressing_mode+1][op.phrase], 0] ] ]]) elif op.type == OPERAND_TYPE_IDPSPEC0: # The operand refers to the TR* registers operand.extend([ [self.NODE_TYPE_REGISTER, 'tr%d' % op.reg, 0]]) elif op.type == OPERAND_TYPE_IDPSPEC1: # The operand refers to the DR* registers operand.extend([ [self.NODE_TYPE_REGISTER, 'dr%d' % op.reg, 0]]) elif op.type == OPERAND_TYPE_IDPSPEC2: # The operand refers to the CR* registers operand.extend([ [self.NODE_TYPE_REGISTER, 'cr%d' % op.reg, 0]]) elif op.type == OPERAND_TYPE_IDPSPEC3: # The operand refers to the FPU register stack operand.extend([ [self.NODE_TYPE_REGISTER, 'st(%d)' % op.reg, 0]]) elif op.type == OPERAND_TYPE_IDPSPEC4: # The operand is a MMX register operand.extend([ [self.NODE_TYPE_REGISTER, 'mm%d' % op.reg, 0]]) elif op.type == OPERAND_TYPE_IDPSPEC5: # The operand is a MMX register operand.extend([ [self.NODE_TYPE_REGISTER, 'xmm%d' % op.reg, 0]]) # If no other thing that a width, i.e. ['b2'] is retrieved # we assume there was no operand... this is a hack but I've seen # IDA pretend there's a first operand like this: # # fld ['b2'], ['b4', ['ds', ['[', ['+', ['$', 'edx'], [...]]]]] # # So, in these cases I want no first operand... #if len(operand)==1: # return None return operand