def value(self): """The unpacked data the variable is pointing to.""" if self.is_func_ptr: return self.addr if self.is_stack: flag = idc.get_member_flag(self.frame_id, self.stack_offset) data_type = flag & idc.DT_TYPE # Unpack if an integer type. if data_type in self.SIZE_MAP: data_type_size = self.SIZE_MAP[data_type] if self.size == data_type_size: return utils.struct_unpack(self.data) else: # If data size is greater than type size, then we have an array. data = self.data return [ utils.struct_unpack(data[i:i + data_type_size]) for i in range(0, len(data), data_type_size) ] else: return self.data else: # TODO: Determine how to unpack based on type for global variables. return self.data
def value(self, value): """ Set the operand to the specified value within the cpu_context. """ # Value may be signed. if isinstance(value, int) and value < 0: value = utils.unsigned(value, bit_width=self.width * 8) # If we are writing to an immediate, I believe they want to write to the memory at the immediate. # TODO: Should we fail instead? if self.is_immediate: offset = self.value if idaapi.is_loaded(offset): self._cpu_context.mem_write(offset, value) return if self.is_register: # Convert the value from string to integer... if isinstance(value, str): value = utils.struct_unpack(value) self._cpu_context.registers[self.text] = value return if self.is_memory_reference: # FIXME: Usage of numpy is most likely symptomatic of a bug in an opcode # implementation passing in bad data. # Update this to just is "isinstance" and then fix the buggy opcode. # For data written to the frame or memory, this data MUST be a byte string. if numpy.issubdtype(type(value), numpy.integer): value = utils.struct_pack(value, width=self.width) self._cpu_context.mem_write(self.addr, value) return raise FunctionTracingError(f"Invalid operand type: {self.type}", ip=self.ip)
def read_data(self, addr, size=None, data_type=None): """ Reads memory at the specified address, of the specified size and convert the resulting data into the specified type. :param int addr: address to read data from :param int size: size of data to read :param data_type: type of data to be extracted (default to byte string is size provided or C string if not) """ if not data_type: data_type = STRING if size is None else BYTE_STRING if size is None: size = 0 if data_type == STRING: null_offset = self.memory.find(b'\0', start=addr) # It should always eventually find a null since unmapped pages # are all null. If we get -1 we have a bug. assert null_offset != -1, "Unable to find a null character!" return self.memory.read(addr, null_offset - addr) elif data_type == WIDE_STRING: # Step by 2 bytes to find 2 nulls on an even alignment. # (This helps prevent the need to take endianness into account.) null_offset = addr while self.memory.read(null_offset, 2) != b'\0\0': null_offset += 2 return self.memory.read(addr, null_offset - addr) elif data_type == BYTE_STRING: return self.memory.read(addr, size) elif data_type == BYTE: return utils.struct_unpack(self.mem_read(addr, 1)) elif data_type == WORD: return utils.struct_unpack(self.mem_read(addr, 2)) elif data_type == DWORD: return utils.struct_unpack(self.mem_read(addr, 4)) elif data_type == QWORD: return utils.struct_unpack(self.mem_read(addr, 8)) raise ValueError('Invalid data_type: {!r}'.format(data_type))
def value(self): """ Retrieve the value of the operand as it is currently in the cpu_context. NOTE: We can't cache this value because the value may change based on the cpu context. :return int: An integer of the operand value. """ if self.is_hidden: return None if self.is_immediate: value = idc.get_operand_value(self.ip, self.idx) # Create variable/reference if global. if idc.is_loaded(value): self._cpu_context.variables.add(value, reference=self.ip) return value if self.is_register: value = self._cpu_context.registers[self.text] # Record reference if register is a variable address. if value in self._cpu_context.variables: self._cpu_context.variables[value].add_reference(self.ip) return value # TODO: Determine if this is still necessary. # FS, GS (at least) registers are identified as memory addresses. We need to identify them as registers # and handle them as such if self.type == idc.o_mem: if "fs" in self.text: return self._cpu_context.registers.fs elif "gs" in self.text: return self._cpu_context.registers.gs # If a memory reference, return read in memory. if self.is_memory_reference: addr = self.addr # Record referenc if address is a variable address. if addr in self._cpu_context.variables: self._cpu_context.variables[addr].add_reference(self.ip) # If a function pointer, we want to return the address. # This is because a function may be seen as a memory reference, but we don't # want to dereference it in case it in a non-call instruction. # (e.g. "mov esi, ds:LoadLibraryA") # NOTE: Must use internal function to avoid recursive loop. if utils.is_func_ptr(addr): return addr # Return empty if not self.width: logger.debug("Width is zero for {}, returning empty string.".format(self.text)) return b"" # Otherwise, dereference the address. value = self._cpu_context.mem_read(addr, self.width) return utils.struct_unpack(value) raise FunctionTracingError("Invalid operand type: {}".format(self.type), ip=self.ip)
def _data_array(self) -> List[int]: """Returns data as an array of unpacked integers based on data_type size.""" data = self.data data_type_size = self.data_type_size return [ utils.struct_unpack(data[i:i + data_type_size]) for i in range(0, len(data), data_type_size) ]
def value(self, value): """ Set the operand to the specified value within the cpu_context. """ # If we are writing to an immediate, I believe they want to write to the memory at the immediate. # TODO: Should we fail instead? if self.is_immediate: offset = self.value if idaapi.is_loaded(offset): self._cpu_context.mem_write(offset, value) return if self.is_register: # Convert the value from string to integer... if isinstance(value, str): value = utils.struct_unpack(value) # On 64-bit, the destination register must be set to 0 first (per documentation) # TODO: Check if this happens regardless of the source size if idc.__EA64__ and self.width == 4: # Only do this for 32-bit setting reg64 = utils.convert_reg(self.text, 8) self._cpu_context.registers[reg64] = 0 self._cpu_context.registers[self.text] = value return # TODO: Determine if this is still necessary. # FS, GS (at least) registers are identified as memory addresses. We need to identify them as registers # and handle them as such if self.type == idc.o_mem: if "fs" in self.text: self._cpu_context.registers.fs = value return elif "gs" in self.text: self._cpu_context.registers.gs = value return if self.is_memory_reference: # For data written to the frame or memory, this data MUST be a byte string. if numpy.issubdtype(type(value), numpy.integer): value = utils.struct_pack(value, width=self.width) self._cpu_context.mem_write(self.addr, value) return raise FunctionTracingError('Invalid operand type: {}'.format( self.type), ip=self.ip)
def get_function_args(self, func_ea=None): """ Returns the function argument values for this context based on the given function. >>> cpu_context = ProcessorContext() >>> args = cpu_context.get_function_args(0x180011772) :param int func_ea: Ea of the function to pull a signature from. :returns: list of function arguments """ # If func_ea is not given, assume we are using the first operand from a call instruction. if not func_ea: operand = self.operands[0] # function pointer can be a memory reference or immediate. func_ea = operand.addr or operand.value # First get a func_type_data_t structure for the function funcdata = utils.get_function_data(func_ea) # Now use the data contained in funcdata to obtain the values for the arguments. args = [] for i in range(funcdata.size()): loc_type = funcdata[i].argloc.atype() # Where was this parameter passed? if loc_type == 0: # ALOC_NONE, not sure what this means... raise NotImplementedError( "Argument {} location of type ALOC_NONE".format(i)) elif loc_type == 1: # ALOC_STACK # read the argument from the stack using the calculated stack offset from the disassembler cur_esp = self.sp + funcdata[i].argloc.stkoff() arg = self.mem_read(cur_esp, self.byteness) args.append(utils.struct_unpack(arg)) elif loc_type == 2: # ALOC_DIST, arguments described by multiple locations # TODO: Uses the scattered_aloc_t class, which is a qvector or argpart_t objects # funcdata[i].argloc.scattered() raise NotImplementedError( "Argument {} location of type ALOC_DIST".format(i)) elif loc_type == 3: # ALOC_REG1, single register arg = self.reg_read( utils.REG_MAP.get(funcdata[i].argloc.reg1())) width = funcdata[i].type.get_size() args.append(arg & utils.get_mask(width)) elif loc_type == 4: # ALOC_REG2, register pair (eg: edx:eax [reg2:reg1]) # TODO: CURRENTLY UNTESTED logger.info( "Argument {} of untested type ALOC_REG2. Verify results and report issues" .format(i)) reg1_val = self.reg_read( utils.REG_MAP.get(funcdata[i].argloc.reg1())) reg2_val = self.reg_read( utils.REG_MAP.get(funcdata[i].argloc.reg2())) # TODO: Probably need to determine how to check the width of these register values in order to shift # the data accordingly. Will likely need examples for testing/verification of functionality. args.append(reg2_val << 32 | reg1_val) elif loc_type == 5: # ALOC_RREL, register relative (displacement from address pointed by register # TODO: CURRENTLY UNTESTED logger.info( "Argument {} of untested type ALOC_RREL. Verify results and report issues." .format(i)) # Obtain the register-relative argument location rrel = funcdata[i].argloc.get_rrel() # Extract the pointer value in the register ptr_val = self.reg_read(utils.REG_MAP.get(rrel.reg)) # Get the offset offset = rrel.off args.append(ptr_val + offset) elif loc_type == 6: # ALOC_STATIC, global address # TODO: CURRENTLY UNTESTED logger.info( "Argument {} of untested type ALOC_STATIC. Verify results and report issues." .format(i)) args.append(funcdata[i].argloc.get_ea()) elif loc_type >= 7: # ALOC_CUSTOM, custom argloc # TODO: Will need to figure out the functionality and usage for the custloc_desc_t structure # funcdata[i].argloc.get_custom() raise NotImplementedError( "Argument {} location of type ALOC_CUSTOM".format(i)) return args