def _track(self, state, live_defs): """ Given all live definitions prior to this program point, track the changes, and return a new list of live definitions. We scan through the action list of the new state to track the changes. :param state: The input state at that program point. :param live_defs: A list of all live definitions prior to reaching this program point. :returns: A list of new live definitions. """ # Make a copy of live_defs live_defs = live_defs.copy() action_list = list(state.log.actions) # Since all temporary variables are local, we simply track them in a local dict temps = { } # All dependence edges are added to the graph either at the end of this method, or when they are going to be # overwritten by a new edge. This is because we sometimes have to modify a previous edge (e.g. add new labels # to the edge) temps_to_edges = defaultdict(list) regs_to_edges = defaultdict(list) def _annotate_edges_in_dict(dict_, key, **new_labels): """ :param dict_: The dict, can be either `temps_to_edges` or `regs_to_edges` :param key: The key used in finding elements in the dict :param new_labels: New labels to be added to those edges """ for edge_tuple in dict_[key]: # unpack it _, _, labels = edge_tuple for k, v in new_labels.iteritems(): if k in labels: labels[k] = labels[k] + (v, ) else: # Construct a tuple labels[k] = (v, ) def _dump_edge_from_dict(dict_, key, del_key=True): """ Pick an edge from the dict based on the key specified, add it to our graph, and remove the key from dict. :param dict_: The dict, can be either `temps_to_edges` or `regs_to_edges`. :param key: The key used in finding elements in the dict. """ for edge_tuple in dict_[key]: # unpack it prev_code_loc, current_code_loc, labels = edge_tuple # Add the new edge self._add_edge(prev_code_loc, current_code_loc, **labels) # Clear it if del_key: del dict_[key] for a in action_list: if a.bbl_addr is None: current_code_loc = CodeLocation(None, None, sim_procedure=a.sim_procedure) else: current_code_loc = CodeLocation(a.bbl_addr, a.stmt_idx, ins_addr=a.ins_addr) if a.type == "mem": if a.actual_addrs is None: # For now, mem reads don't necessarily have actual_addrs set properly addr_list = set(state.memory.normalize_address(a.addr.ast, convert_to_valueset=True)) else: addr_list = set(a.actual_addrs) for addr in addr_list: variable = SimMemoryVariable(addr, a.data.ast.size()) # TODO: Properly unpack the SAO if a.action == "read": # Create an edge between def site and use site prevdefs = self._def_lookup(live_defs, variable) for prev_code_loc, labels in prevdefs.iteritems(): self._read_edge = True self._add_edge(prev_code_loc, current_code_loc, **labels) else: #if a.action == "write": # Kill the existing live def self._kill(live_defs, variable, current_code_loc) # For each of its register dependency and data dependency, we revise the corresponding edge for reg_off in a.addr.reg_deps: _annotate_edges_in_dict(regs_to_edges, reg_off, subtype='mem_addr') for tmp in a.addr.tmp_deps: _annotate_edges_in_dict(temps_to_edges, tmp, subtype='mem_addr') for reg_off in a.data.reg_deps: _annotate_edges_in_dict(regs_to_edges, reg_off, subtype='mem_data') for tmp in a.data.tmp_deps: _annotate_edges_in_dict(temps_to_edges, tmp, subtype='mem_data') elif a.type == 'reg': # For now, we assume a.offset is not symbolic # TODO: Support symbolic register offsets #variable = SimRegisterVariable(a.offset, a.data.ast.size()) variable = SimRegisterVariable(a.offset, self.project.arch.bits) if a.action == 'read': # What do we want to do? prevdefs = self._def_lookup(live_defs, variable) if a.offset in regs_to_edges: _dump_edge_from_dict(regs_to_edges, a.offset) for prev_code_loc, labels in prevdefs.iteritems(): edge_tuple = (prev_code_loc, current_code_loc, labels) regs_to_edges[a.offset].append(edge_tuple) else: # write self._kill(live_defs, variable, current_code_loc) elif a.type == 'tmp': # tmp is definitely not symbolic if a.action == 'read': prev_code_loc = temps[a.tmp] edge_tuple = (prev_code_loc, current_code_loc, {'type':'tmp', 'data':a.tmp}) if a.tmp in temps_to_edges: _dump_edge_from_dict(temps_to_edges, a.tmp) temps_to_edges[a.tmp].append(edge_tuple) else: # write temps[a.tmp] = current_code_loc elif a.type == 'exit': # exits should only depend on tmps for tmp in a.tmp_deps: prev_code_loc = temps[tmp] edge_tuple = (prev_code_loc, current_code_loc, {'type': 'exit', 'data': tmp}) if tmp in temps_to_edges: _dump_edge_from_dict(temps_to_edges, tmp) temps_to_edges[tmp].append(edge_tuple) # In the end, dump all other edges in those two dicts for reg_offset in regs_to_edges: _dump_edge_from_dict(regs_to_edges, reg_offset, del_key=False) for tmp in temps_to_edges: _dump_edge_from_dict(temps_to_edges, tmp, del_key=False) return live_defs
def _track(self, state, live_defs, statements): """ Given all live definitions prior to this program point, track the changes, and return a new list of live definitions. We scan through the action list of the new state to track the changes. :param state: The input state at that program point. :param live_defs: A list of all live definitions prior to reaching this program point. :param list statements: A list of VEX statements. :returns: A list of new live definitions. """ # Make a copy of live_defs live_defs = live_defs.copy() action_list = list(state.log.actions) # Since all temporary variables are local, we simply track them in a local dict temp_defs = {} temp_variables = {} temp_register_symbols = {} # All dependence edges are added to the graph either at the end of this method, or when they are going to be # overwritten by a new edge. This is because we sometimes have to modify a previous edge (e.g. add new labels # to the edge) temps_to_edges = defaultdict(list) regs_to_edges = defaultdict(list) last_statement_id = None pv_read = None # program variables read out in the same statement. we keep a copy of those variables here so # we can link it to the tmp_write action right afterwards data_generated = None # tracks stack pointer and base pointer #sp = state.se.any_int(state.regs.sp) if not state.regs.sp.symbolic else None #bp = state.se.any_int(state.regs.bp) if not state.regs.bp.symbolic else None for a in action_list: if last_statement_id is None or last_statement_id != a.stmt_idx: pv_read = [] data_generated = None last_statement_id = a.stmt_idx if a.sim_procedure is None: current_code_location = CodeLocation(a.bbl_addr, a.stmt_idx, ins_addr=a.ins_addr) else: current_code_location = CodeLocation( None, None, sim_procedure=a.sim_procedure) if a.type == "mem": if a.actual_addrs is None: # For now, mem reads don't necessarily have actual_addrs set properly try: addr_list = {state.se.any_int(a.addr.ast)} except (SimSolverModeError, SimUnsatError, ZeroDivisionError): # FIXME: ZeroDivisionError should have been caught by claripy and simuvex. # FIXME: see claripy issue #75. this is just a temporary workaround. # it's symbolic... just continue addr_list = { 0x60000000 } # TODO: this is a random address that I pick. Fix it. else: addr_list = set(a.actual_addrs) for addr in addr_list: variable = None if len(addr_list) == 1 and len(a.addr.tmp_deps) == 1: addr_tmp = list(a.addr.tmp_deps)[0] if addr_tmp in temp_register_symbols: # it must be a stack variable sort, offset = temp_register_symbols[addr_tmp] variable = SimStackVariable(offset, a.data.ast.size() / 8, base=sort, base_addr=addr - offset) if variable is None: variable = SimMemoryVariable( addr, a.data.ast.size() / 8) # TODO: Properly unpack the SAO pvs = [] if a.action == "read": # Create an edge between def site and use site prevdefs = self._def_lookup(live_defs, variable) # TODO: prevdefs should only contain location, not labels for prev_code_loc, labels in prevdefs.iteritems(): self._stmt_graph_add_edge(prev_code_loc, current_code_location, **labels) pvs.append(ProgramVariable(variable, prev_code_loc)) if not pvs: pvs.append( ProgramVariable(variable, current_code_location, initial=True)) # make sure to put it into the killing set self._kill(live_defs, variable, current_code_location) for pv in pvs: pv_read.append(pv) if a.action == "write": # Kill the existing live def self._kill(live_defs, variable, current_code_location) pvs.append( ProgramVariable(variable, current_code_location)) for pv in pvs: # For each of its register dependency and data dependency, we annotate the corresponding edge for reg_offset in a.addr.reg_deps: self._stmt_graph_annotate_edges( regs_to_edges[reg_offset], subtype='mem_addr') reg_variable = SimRegisterVariable( reg_offset, self._get_register_size(reg_offset)) prev_defs = self._def_lookup( live_defs, reg_variable) for loc, _ in prev_defs.iteritems(): v = ProgramVariable(reg_variable, loc) self._data_graph_add_edge(v, pv, type='mem_addr') for tmp in a.addr.tmp_deps: self._stmt_graph_annotate_edges( temps_to_edges[tmp], subtype='mem_addr') if tmp in temp_variables: self._data_graph_add_edge(temp_variables[tmp], pv, type='mem_addr') for reg_offset in a.data.reg_deps: self._stmt_graph_annotate_edges( regs_to_edges[reg_offset], subtype='mem_data') reg_variable = SimRegisterVariable( reg_offset, self._get_register_size(reg_offset)) prev_defs = self._def_lookup( live_defs, reg_variable) for loc, _ in prev_defs.iteritems(): v = ProgramVariable(reg_variable, loc) self._data_graph_add_edge(v, pv, type='mem_data') for tmp in a.data.tmp_deps: self._stmt_graph_annotate_edges( temps_to_edges[tmp], subtype='mem_data') if tmp in temp_variables: self._data_graph_add_edge(temp_variables[tmp], pv, type='mem_data') elif a.type == 'reg': # TODO: Support symbolic register offsets reg_offset = a.offset variable = SimRegisterVariable(reg_offset, a.data.ast.size() / 8) if a.action == 'read': # What do we want to do? prevdefs = self._def_lookup(live_defs, variable) # add edges to the statement dependence graph for prev_code_loc, labels in prevdefs.iteritems(): self._stmt_graph_add_edge(prev_code_loc, current_code_location, **labels) # record the edge edge_tuple = (prev_code_loc, current_code_location) regs_to_edges[reg_offset].append(edge_tuple) pv_read.append(ProgramVariable(variable, prev_code_loc)) if not prevdefs: # the register was never defined before - it must be passed in as an argument pv_read.append( ProgramVariable(variable, current_code_location, initial=True)) # make sure to put it into the killing set self._kill(live_defs, variable, current_code_location) if reg_offset == self.project.arch.sp_offset: data_generated = ('sp', 0) elif reg_offset == self.project.arch.bp_offset: data_generated = ('bp', 0) else: # write self._kill(live_defs, variable, current_code_location) if reg_offset in regs_to_edges: # clear the existing edges definition del regs_to_edges[reg_offset] # add a node on the data dependence graph pv = ProgramVariable(variable, current_code_location) self._data_graph_add_node(pv) if not a.reg_deps and not a.tmp_deps: # moving a constant into the register # try to parse out the constant from statement const_variable = SimConstantVariable() if statements is not None: stmt = statements[a.stmt_idx] if isinstance(stmt.data, pyvex.IRExpr.Const): const_variable = SimConstantVariable( value=stmt.data.con.value) const_pv = ProgramVariable(const_variable, current_code_location) self._data_graph_add_edge(const_pv, pv) for tmp in a.tmp_deps: if tmp in temp_variables: self._data_graph_add_edge(temp_variables[tmp], pv) elif a.type == 'tmp': # tmp is definitely not symbolic tmp = a.tmp pv = ProgramVariable(SimTemporaryVariable(tmp), current_code_location) if a.action == 'read': prev_code_loc = temp_defs[tmp] self._stmt_graph_add_edge(prev_code_loc, current_code_location, type='tmp', data=a.tmp) # record the edge edge_tuple = (prev_code_loc, current_code_location) temps_to_edges[a.tmp].append(edge_tuple) if tmp in temp_register_symbols: data_generated = temp_register_symbols[tmp] else: # write temp_defs[tmp] = current_code_location temp_variables[tmp] = pv # clear existing edges if tmp in temps_to_edges: del temps_to_edges[tmp] for tmp_dep in a.tmp_deps: if tmp_dep in temp_variables: self._data_graph_add_edge(temp_variables[tmp_dep], pv) if data_generated: temp_register_symbols[tmp] = data_generated for data in pv_read: self._data_graph_add_edge(data, pv) if not a.tmp_deps and not pv_read: # read in a constant # try to parse out the constant from statement const_variable = SimConstantVariable() if statements is not None: stmt = statements[a.stmt_idx] if isinstance(stmt, pyvex.IRStmt.Dirty): l.warning( 'Dirty statements are not supported in DDG for now.' ) elif isinstance(stmt.data, pyvex.IRExpr.Const): const_variable = SimConstantVariable( value=stmt.data.con.value) const_pv = ProgramVariable(const_variable, current_code_location) self._data_graph_add_edge(const_pv, pv) elif a.type == 'exit': # exits should only depend on tmps for tmp in a.tmp_deps: prev_code_loc = temp_defs[tmp] # add the edge to the graph self._stmt_graph_add_edge(prev_code_loc, current_code_location, type='exit', data='tmp') # log the edge edge_tuple = (prev_code_loc, current_code_location) temps_to_edges[tmp].append(edge_tuple) elif a.type == 'operation': # FIXME: we should support a more complete range of operations if a.op.endswith('Sub32') or a.op.endswith('Sub64'): # subtract expr_0, expr_1 = a.exprs if expr_0.tmp_deps and (not expr_1.tmp_deps and not expr_1.reg_deps): # tmp - const tmp = list(expr_0.tmp_deps)[0] if tmp in temp_register_symbols: sort, offset = temp_register_symbols[tmp] offset -= expr_1.ast.args[0] data_generated = (sort, offset) elif a.op.endswith('Add32') or a.op.endswith('Add64'): # add expr_0, expr_1 = a.exprs if expr_0.tmp_deps and (not expr_1.tmp_deps and not expr_1.reg_deps): # tmp + const tmp = list(expr_0.tmp_deps)[0] if tmp in temp_register_symbols: sort, offset = temp_register_symbols[tmp] offset += expr_1.ast.args[0] data_generated = (sort, offset) #import pprint #pprint.pprint(self._data_graph.edges()) #pprint.pprint(self.simplified_data_graph.edges()) # import ipdb; ipdb.set_trace() return live_defs