def _memory_access(self, state): if state.scratch.sim_procedure is not None: return key = (state.scratch.bbl_addr, state.scratch.stmt_idx) #l.debug("hit access (%#x, %d)...", *key) try: cur_access = self.accesses[key] except KeyError: l.error("There's an access I know nothing about!!!!!!! (%#x, %d)", *key) import ipdb; ipdb.set_trace() return if cur_access.is_write: pointer = state.inspect.mem_write_address #length = state.inspect.mem_write_length data = state.inspect.mem_write_expr else: pointer = state.inspect.mem_read_address #length = state.inspect.mem_read_length data = state.inspect.mem_read_expr ptr_ty = self.ty_backend.convert(pointer).ty if type(ptr_ty) is sim_type.SimTypePointer: #l.info("...got em!") offset = ptr_ty.offset subty = ptr_ty.pts_to if type(subty) is SimStructAbstract: subty.offsets[offset.cache_key].add(cur_access) cur_access.source = ValueSource(subty, offset) if data.op == 'BVS' and data.args[0].startswith('mem_') and len(data.annotations) == 0: # this is a fresh read! we need to mark its source. newty = sim_type.SimTypeTop(label=[ValueSource(subty, offset)]) data = data.annotate(TypeAnnotation(newty)) state.memory.store(pointer, data, inspect=False, endness=state.arch.memory_endness) else: l.warning('...pointer is to %s?', repr(subty)) elif not pointer.symbolic: #l.info("...global data!") self.global_struct.offsets[pointer.cache_key].add(cur_access) cur_access.source = ValueSource(self.global_struct, state.se.eval(pointer)) else: #l.info("...don't got em!") if ptr_ty.label is not None and len(ptr_ty.label) > 0: if len(ptr_ty.label) > 1: import ipdb; ipdb.set_trace() print 'not sure how this case can arise but it needs special handling if it does' #l.info("...but we have a source!") self.pass_results.append(('SOURCE', ptr_ty.label[0])) if cur_access.is_write: #state.inspect.mem_write_address = pointer #state.inspect.mem_write_length = length state.inspect.mem_write_expr = data else: #state.inspect.mem_read_address = pointer #state.inspect.mem_read_length = length state.inspect.mem_read_expr = data
def _init_analysis(self, func): mark_addr = None for node in func.nodes: if type(node) is not angr.knowledge.codenode.BlockNode: continue block = self.project.factory.block(node.addr, size=node.size) for idx, stmt in enumerate(block.vex.statements): if stmt.tag == 'Ist_IMark': mark_addr = stmt.addr if stmt.tag == 'Ist_WrTmp' and stmt.data.tag == 'Iex_Load': self.accesses[(block.addr, idx)] = Access(block.addr, idx, False, stmt.data.result_size >> 3, mark_addr) elif stmt.tag == 'Ist_Store': self.accesses[(block.addr, idx)] = Access(block.addr, idx, True, stmt.data.result_size >> 3, mark_addr) elif stmt.tag == 'Ist_Dirty' and stmt.cee.name == 'x86g_dirtyhelper_loadF80le': self.accesses[(block.addr, idx)] = Access(block.addr, idx, False, 10, mark_addr) elif stmt.tag == 'Ist_Dirty' and stmt.cee.name == 'x86g_dirtyhelper_storeF80le': self.accesses[(block.addr, idx)] = Access(block.addr, idx, True, 10, mark_addr) frame_struct = SimStructAbstract(label='stack_%x' % func.addr) self._register_addr(frame_struct) sp_bv = self.pointer_to_abstruct(frame_struct) self.function_initial_regs[func.addr] = {'sp': sp_bv} self.initial_state.memory.store( sp_bv, claripy.BVV(0x1234, self.project.arch.bits), endness=self.project.arch.memory_endness, inspect=False) for reg in self.project.arch.default_symbolic_registers: if self.project.arch.registers[reg][ 0] == self.project.arch.registers['sp'][0]: continue val = claripy.BVS('reg_%s' % reg, self.project.arch.bits) val = val.annotate( TypeAnnotation( sim_type.SimTypeTop(label=[ValueSource('register', reg)]))) self.function_initial_regs[func.addr][reg] = val self._analyze(func.addr)
def _runtime_unify(self, state, one, two, stack_frame=False, overwrite=True): """ decide if one and two need to be unified, if so add a 'UNIFY' tag :param state: The analysis state that holds intermediate results :param one: The first value to unify :param two: The second value to unify :param stack_frame: If we're only allowed to look at offsets in front of the pointer :param overwrite: Whether to use the semantics that one is "overwriting" two """ one_ty = self.ty_backend.convert(one).ty two_ty = self.ty_backend.convert(two).ty # if both of them are pointers!!! this gets very tricky if type(one_ty) is type(two_ty) is sim_type.SimTypePointer: one_subty = one_ty.pts_to two_subty = two_ty.pts_to one_offset = one_ty.offset two_offset = two_ty.offset if one_offset.symbolic or two_offset.symbolic: import ipdb ipdb.set_trace() print('yikes! (jesus christ)') if one_subty is two_subty: if one_offset is not two_subty: import ipdb ipdb.set_trace() print('yikes? (arrays maybe. recursion probably)') else: import ipdb ipdb.set_trace() print('yikes. (no object identity but yes equality)') # these are two different structures that we might have to unify. # possible cases: # - two structures are actually the same structure. # - two stack frames. flag tells us this. only deal with the argument parts # - a structure is present in another structure # TODO: do some type checking on the two structs to make sure we're not making # a huge mistake!!! else: if claripy.is_true( one_offset == two_offset) and claripy.is_true( one_offset == 0): self.pass_results.append(('UNIFY', (one, two))) elif stack_frame: for ckey, _ in two_subty.offsets.iteritems(): offset = ckey.ast if not claripy.is_true(claripy.SGT(offset, 0)): continue two_value = state.memory.load( two + offset, size=state.arch.bytes, inspect=False, endness=state.arch.memory_endness) one_value = state.memory.load( one + offset, size=state.arch.bytes, inspect=False, endness=state.arch.memory_endness) # one last edge case consideration: if one_value doesn't have a source # (since we disabled inspect, this might happen) # we should manually give it a source since this is something we know one_value_ty = self.ty_backend.convert(one_value).ty if type(one_value_ty) is not sim_type.SimTypePointer and \ len(one_value_ty.label) == 0: one_value = one_value.annotate( TypeAnnotation( sim_type.SimTypeTop( label=[ValueSource(two_subty, offset) ]))) self._runtime_unify(state, one_value, two_value) else: import ipdb ipdb.set_trace() # look through the structs and check offset by offset # do we want to check in the initial state or elsewhere? # think we should check in the current state. everything from the initial state wil be there still? print('okay??') # when only one of them is a pointer! # if a source is available we should toss it to SOURCE, she'll just drop the other in! # if a source is not available, wait. eventually one will be available :) elif type(two_ty) is sim_type.SimTypePointer: if len(one_ty.label) > 0: if len(one_ty.label) > 1: import ipdb ipdb.set_trace() print('????????????') self.pass_results.append(('SOURCE', one_ty.label[0])) # this is where overwrite semantics comes into play. # if one overwrites two, then we can't mark two as a pointer just because 1 is a pointer. # otherwise, this is the same as the previous case I guess? # I'm not sure what good this does elif type(one_ty) is sim_type.SimTypePointer: import ipdb ipdb.set_trace() if not overwrite: if len(two_ty.label) > 0: if len(two_ty.label) > 1: import ipdb ipdb.set_trace() print('????????????') self.pass_results.append(('SOURCE', two_ty.label[0])) # If neither of them are pointers bail out. this is not a general type inference :) else: pass
def _exit_taken(self, state): jk = state.inspect.exit_jumpkind target = state.inspect.exit_target if target.symbolic: # shit lmao all_targets = tuple(state.solver.any_n_int(target, 257)) if len(all_targets) > 256: import ipdb ipdb.set_trace() print('shit!! lmao') else: all_targets = (state.solver.eval(target), ) if jk == 'Ijk_Call' or jk.startswith('Ijk_Sys'): if jk == 'Ijk_Call': ret_addr = state.memory.load(state.regs.sp, size=state.arch.bytes, endness=state.arch.memory_endness, inspect=False) state.regs.sp += state.arch.bytes state.inspect.exit_target = ret_addr else: sys_num = state.regs.eax if sys_num.symbolic: import ipdb ipdb.set_trace() l.error("SHIT. F**K. SHIT F**K.") try: all_targets = (self.syscall_mapping[state.solver.any_int( sys_num)][0], ) except KeyError: # ???????????????????? all_targets = (0x1234678d, ) for target in all_targets: if target in self.function_initial_regs: for reg, stored in self.function_initial_regs[ target].iteritems(): self._runtime_unify(state, state.registers.load(reg), stored, stack_frame=reg == 'sp') if all(target in self.cfg.functions and not self.cfg.functions[target].returning for target in all_targets): state.inspect.exit_jumpkind = 'Ijk_Ret' else: state.inspect.exit_jumpkind = 'Ijk_FakeRet' # okay. # go through all the targets. if any of them have return values available, # take one of them and drop it into the state, take the rest and unify them # if only some of them have return values available, the ones without just # inherit the first value. if none of them are available, make a fresh value, # give it a source taint, and let the 'SOURCE' tag handle it are_any = False for target in all_targets: if target in self.function_return_vals: if not are_any: are_any = True state.regs.eax = self.function_return_vals[target] else: import ipdb ipdb.set_trace() # I don't THINK this should ever happen.... pdb to make sure assumptions are good # reason: there are only multiple call targets if it's a call table. entries in a call table # are typically not reused, and function_return_vals entries are only set from analyses # of the caller. self._runtime_unify( state, state.regs.eax, self.function_return_vals[target], overwrite=False) if are_any: for target in all_targets: if target not in self.function_return_vals: self.function_return_vals[target] = state.regs.eax else: state.regs.eax = claripy.BVS('retval', 32).annotate( TypeAnnotation( sim_type.SimTypeTop( label=[ValueSource('return', all_targets)])))