Python SimRegisterVariable Examples

Programming Language: Python

Namespace/Package Name: simuvex

Examples at hotexamples.com: 5

Python SimRegisterVariable - 5 examples found. These are the top rated real world Python examples of simuvex.SimRegisterVariable extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SimRegisterVariable(5)

Frequently Used Methods

SimRegisterVariable (5)

Example #1

Show file

    def _analyze(self):
        """
        The core analysis method.

        :return: None
        """

        # get the register that stores return value
        return_reg = self.RETURN_VALUE_REGISTERS.get(self.project.arch.name,
                                                     None)
        if return_reg is None:
            raise StaticPoliceKeyNotFoundError(
                'Return register is not specified for architecture %s.' %
                self.project.arch.name)

        return_reg_offset, return_reg_size = self.project.arch.registers[
            return_reg]

        variable = SimRegisterVariable(return_reg_offset, return_reg_size * 8)

        all_defs = self._data_dep.find_definitions(variable)

        # apparently we only care about those final definitions, i.e. definitions that do not have any consumers or
        # killers
        defs = []
        for d in all_defs:  # type: ProgramVariable
            if not self._data_dep.find_consumers(
                    d) and not self._data_dep.find_killers(d):
                defs.append(d)

        if not defs:
            l.warning('Cannot find any definition for return value.')
            return

        return_values = []

        # trace each definition backwards
        for d in defs:
            sources = self._data_dep.find_sources(d)

            if not sources:
                # umm what's going on
                continue

            for s in sources:
                if isinstance(s.variable, SimConstantVariable):
                    return_values.append(ConstantReturnValue(s.variable.value))
                else:
                    return_values.append(UnknownReturnValue())

        self.return_values = return_values

Example #2

Show file

File: vsa_ddg.py Project: zyq8709/angr

    def _track(self, state, live_defs):
        """
        Given all live definitions prior to this program point, track the changes, and return a new list of live
        definitions. We scan through the action list of the new state to track the changes.

        :param state:       The input state at that program point.
        :param live_defs:   A list of all live definitions prior to reaching this program point.
        :returns:           A list of new live definitions.
        """

        # Make a copy of live_defs
        live_defs = live_defs.copy()

        action_list = list(state.log.actions)

        # Since all temporary variables are local, we simply track them in a local dict
        temps = { }

        # All dependence edges are added to the graph either at the end of this method, or when they are going to be
        # overwritten by a new edge. This is because we sometimes have to modify a  previous edge (e.g. add new labels
        # to the edge)
        temps_to_edges = defaultdict(list)
        regs_to_edges = defaultdict(list)

        def _annotate_edges_in_dict(dict_, key, **new_labels):
            """

            :param dict_:       The dict, can be either `temps_to_edges` or `regs_to_edges`
            :param key:         The key used in finding elements in the dict
            :param new_labels:  New labels to be added to those edges
            """

            for edge_tuple in dict_[key]:
                # unpack it
                _, _, labels = edge_tuple
                for k, v in new_labels.iteritems():
                    if k in labels:
                        labels[k] = labels[k] + (v, )
                    else:
                        # Construct a tuple
                        labels[k] = (v, )

        def _dump_edge_from_dict(dict_, key, del_key=True):
            """
            Pick an edge from the dict based on the key specified, add it to our graph, and remove the key from dict.

            :param dict_:   The dict, can be either `temps_to_edges` or `regs_to_edges`.
            :param key:     The key used in finding elements in the dict.
            """
            for edge_tuple in dict_[key]:
                # unpack it
                prev_code_loc, current_code_loc, labels = edge_tuple
                # Add the new edge
                self._add_edge(prev_code_loc, current_code_loc, **labels)

            # Clear it
            if del_key:
                del dict_[key]

        for a in action_list:

            if a.bbl_addr is None:
                current_code_loc = CodeLocation(None, None, sim_procedure=a.sim_procedure)
            else:
                current_code_loc = CodeLocation(a.bbl_addr, a.stmt_idx, ins_addr=a.ins_addr)

            if a.type == "mem":
                if a.actual_addrs is None:
                    # For now, mem reads don't necessarily have actual_addrs set properly
                    addr_list = set(state.memory.normalize_address(a.addr.ast, convert_to_valueset=True))
                else:
                    addr_list = set(a.actual_addrs)

                for addr in addr_list:
                    variable = SimMemoryVariable(addr, a.data.ast.size()) # TODO: Properly unpack the SAO

                    if a.action == "read":
                        # Create an edge between def site and use site

                        prevdefs = self._def_lookup(live_defs, variable)

                        for prev_code_loc, labels in prevdefs.iteritems():
                            self._read_edge = True
                            self._add_edge(prev_code_loc, current_code_loc, **labels)

                    else: #if a.action == "write":
                        # Kill the existing live def
                        self._kill(live_defs, variable, current_code_loc)

                    # For each of its register dependency and data dependency, we revise the corresponding edge
                    for reg_off in a.addr.reg_deps:
                        _annotate_edges_in_dict(regs_to_edges, reg_off, subtype='mem_addr')
                    for tmp in a.addr.tmp_deps:
                        _annotate_edges_in_dict(temps_to_edges, tmp, subtype='mem_addr')

                    for reg_off in a.data.reg_deps:
                        _annotate_edges_in_dict(regs_to_edges, reg_off, subtype='mem_data')
                    for tmp in a.data.tmp_deps:
                        _annotate_edges_in_dict(temps_to_edges, tmp, subtype='mem_data')

            elif a.type == 'reg':
                # For now, we assume a.offset is not symbolic
                # TODO: Support symbolic register offsets

                #variable = SimRegisterVariable(a.offset, a.data.ast.size())
                variable = SimRegisterVariable(a.offset, self.project.arch.bits)

                if a.action == 'read':
                    # What do we want to do?
                    prevdefs = self._def_lookup(live_defs, variable)

                    if a.offset in regs_to_edges:
                        _dump_edge_from_dict(regs_to_edges, a.offset)

                    for prev_code_loc, labels in prevdefs.iteritems():
                        edge_tuple = (prev_code_loc, current_code_loc, labels)
                        regs_to_edges[a.offset].append(edge_tuple)

                else:
                    # write
                    self._kill(live_defs, variable, current_code_loc)

            elif a.type == 'tmp':
                # tmp is definitely not symbolic
                if a.action == 'read':
                    prev_code_loc = temps[a.tmp]
                    edge_tuple = (prev_code_loc, current_code_loc, {'type':'tmp', 'data':a.tmp})

                    if a.tmp in temps_to_edges:
                        _dump_edge_from_dict(temps_to_edges, a.tmp)

                    temps_to_edges[a.tmp].append(edge_tuple)

                else:
                    # write
                    temps[a.tmp] = current_code_loc

            elif a.type == 'exit':
                # exits should only depend on tmps

                for tmp in a.tmp_deps:
                    prev_code_loc = temps[tmp]
                    edge_tuple = (prev_code_loc, current_code_loc, {'type': 'exit', 'data': tmp})

                    if tmp in temps_to_edges:
                        _dump_edge_from_dict(temps_to_edges, tmp)

                    temps_to_edges[tmp].append(edge_tuple)

        # In the end, dump all other edges in those two dicts
        for reg_offset in regs_to_edges:
            _dump_edge_from_dict(regs_to_edges, reg_offset, del_key=False)
        for tmp in temps_to_edges:
            _dump_edge_from_dict(temps_to_edges, tmp, del_key=False)

        return live_defs

Example #3

Show file

File: return_value_checks.py Project: zhang-li/acsac-course

    def function_check(self, function):
        """


        :param angr.knowledge.Function function: The function to be checked against.
        :return: True if the policy is respected, False otherwise.
        :rtype: bool
        """

        if function.returning is False:
            l.warning('Function %#x does not return.', function.addr)
            return True

        # find all places where the function is called

        cfg = self._fast_cfg

        function_node = cfg.get_any_node(function.addr)

        if not function_node:
            # the function is not found
            l.warning('Function %#x is not found in the control flow graph.',
                      function.addr)
            return True

        # find all predecessors, which are callers to this function
        predecessors = cfg.get_all_predecessors(function_node)

        if not predecessors:
            # the function is not called from anywhere, or we cannot resolve the caller
            l.warning(
                'Function %#x is not called by any node throughout the control flow graph.',
                function.addr)
            return True

        # for each function that the caller is in, generate a data dependency graph
        for pred in predecessors:  # type: angr.analyses.cfg_node.CFGNode
            func_addr = pred.function_address

            if func_addr is None:
                continue

            caller_func = cfg.functions.get(
                func_addr, None)  # type: angr.knowledge.Function
            if caller_func is None:
                continue

            tmp_kb = KnowledgeBase(self.project, self.project.loader.main_bin)
            caller_func_cfg = self.project.analyses.CFGAccurate(
                call_depth=0,
                base_graph=caller_func.graph,
                keep_state=True,
            )
            dep_graph = self.project.analyses.DataDependencyAnalysis(
                caller_func_cfg,
                kb=tmp_kb,
            )

            # analyze on dep_graph
            ret_val_reg = ReturnValueAnalysis.RETURN_VALUE_REGISTERS[
                self.project.arch.name]
            ret_val_reg_offset, ret_val_reg_size = self.project.arch.registers[
                ret_val_reg]
            ret_var = SimRegisterVariable(ret_val_reg_offset,
                                          ret_val_reg_size * 8)

            # return site
            return_site_addr = pred.addr + pred.size

            ret_var_def = ProgramVariable(ret_var,
                                          CodeLocation(return_site_addr, -1))
            # TODO: add return value nodes in DataDependencyAnalysis

            consumers = dep_graph.find_consumers(ret_var_def)

            if not consumers:
                l.warning(
                    'Return value of function %#x is not checked at calling site %#x.',
                    function.addr, pred.addr)
                return False

        return True

Example #4

Show file

File: ddg.py Project: yuede/angr

    def _track(self, state, live_defs, statements):
        """
        Given all live definitions prior to this program point, track the changes, and return a new list of live
        definitions. We scan through the action list of the new state to track the changes.

        :param state:       The input state at that program point.
        :param live_defs:   A list of all live definitions prior to reaching this program point.
        :param list statements: A list of VEX statements.
        :returns:           A list of new live definitions.
        """

        # Make a copy of live_defs
        live_defs = live_defs.copy()

        action_list = list(state.log.actions)

        # Since all temporary variables are local, we simply track them in a local dict
        temp_defs = {}
        temp_variables = {}
        temp_register_symbols = {}

        # All dependence edges are added to the graph either at the end of this method, or when they are going to be
        # overwritten by a new edge. This is because we sometimes have to modify a previous edge (e.g. add new labels
        # to the edge)
        temps_to_edges = defaultdict(list)
        regs_to_edges = defaultdict(list)

        last_statement_id = None
        pv_read = None  # program variables read out in the same statement. we keep a copy of those variables here so
        # we can link it to the tmp_write action right afterwards
        data_generated = None

        # tracks stack pointer and base pointer
        #sp = state.se.any_int(state.regs.sp) if not state.regs.sp.symbolic else None
        #bp = state.se.any_int(state.regs.bp) if not state.regs.bp.symbolic else None

        for a in action_list:

            if last_statement_id is None or last_statement_id != a.stmt_idx:
                pv_read = []
                data_generated = None
                last_statement_id = a.stmt_idx

            if a.sim_procedure is None:
                current_code_location = CodeLocation(a.bbl_addr,
                                                     a.stmt_idx,
                                                     ins_addr=a.ins_addr)
            else:
                current_code_location = CodeLocation(
                    None, None, sim_procedure=a.sim_procedure)

            if a.type == "mem":
                if a.actual_addrs is None:
                    # For now, mem reads don't necessarily have actual_addrs set properly
                    try:
                        addr_list = {state.se.any_int(a.addr.ast)}
                    except (SimSolverModeError, SimUnsatError,
                            ZeroDivisionError):
                        # FIXME: ZeroDivisionError should have been caught by claripy and simuvex.
                        # FIXME: see claripy issue #75. this is just a temporary workaround.
                        # it's symbolic... just continue
                        addr_list = {
                            0x60000000
                        }  # TODO: this is a random address that I pick. Fix it.
                else:
                    addr_list = set(a.actual_addrs)

                for addr in addr_list:

                    variable = None
                    if len(addr_list) == 1 and len(a.addr.tmp_deps) == 1:
                        addr_tmp = list(a.addr.tmp_deps)[0]
                        if addr_tmp in temp_register_symbols:
                            # it must be a stack variable
                            sort, offset = temp_register_symbols[addr_tmp]
                            variable = SimStackVariable(offset,
                                                        a.data.ast.size() / 8,
                                                        base=sort,
                                                        base_addr=addr -
                                                        offset)

                    if variable is None:
                        variable = SimMemoryVariable(
                            addr,
                            a.data.ast.size() /
                            8)  # TODO: Properly unpack the SAO

                    pvs = []

                    if a.action == "read":
                        # Create an edge between def site and use site

                        prevdefs = self._def_lookup(live_defs, variable)

                        # TODO: prevdefs should only contain location, not labels
                        for prev_code_loc, labels in prevdefs.iteritems():
                            self._stmt_graph_add_edge(prev_code_loc,
                                                      current_code_location,
                                                      **labels)

                            pvs.append(ProgramVariable(variable,
                                                       prev_code_loc))

                        if not pvs:
                            pvs.append(
                                ProgramVariable(variable,
                                                current_code_location,
                                                initial=True))
                            # make sure to put it into the killing set
                            self._kill(live_defs, variable,
                                       current_code_location)

                        for pv in pvs:
                            pv_read.append(pv)

                    if a.action == "write":
                        # Kill the existing live def
                        self._kill(live_defs, variable, current_code_location)

                        pvs.append(
                            ProgramVariable(variable, current_code_location))

                    for pv in pvs:
                        # For each of its register dependency and data dependency, we annotate the corresponding edge
                        for reg_offset in a.addr.reg_deps:
                            self._stmt_graph_annotate_edges(
                                regs_to_edges[reg_offset], subtype='mem_addr')
                            reg_variable = SimRegisterVariable(
                                reg_offset,
                                self._get_register_size(reg_offset))
                            prev_defs = self._def_lookup(
                                live_defs, reg_variable)
                            for loc, _ in prev_defs.iteritems():
                                v = ProgramVariable(reg_variable, loc)
                                self._data_graph_add_edge(v,
                                                          pv,
                                                          type='mem_addr')

                        for tmp in a.addr.tmp_deps:
                            self._stmt_graph_annotate_edges(
                                temps_to_edges[tmp], subtype='mem_addr')
                            if tmp in temp_variables:
                                self._data_graph_add_edge(temp_variables[tmp],
                                                          pv,
                                                          type='mem_addr')

                        for reg_offset in a.data.reg_deps:
                            self._stmt_graph_annotate_edges(
                                regs_to_edges[reg_offset], subtype='mem_data')
                            reg_variable = SimRegisterVariable(
                                reg_offset,
                                self._get_register_size(reg_offset))
                            prev_defs = self._def_lookup(
                                live_defs, reg_variable)
                            for loc, _ in prev_defs.iteritems():
                                v = ProgramVariable(reg_variable, loc)
                                self._data_graph_add_edge(v,
                                                          pv,
                                                          type='mem_data')

                        for tmp in a.data.tmp_deps:
                            self._stmt_graph_annotate_edges(
                                temps_to_edges[tmp], subtype='mem_data')
                            if tmp in temp_variables:
                                self._data_graph_add_edge(temp_variables[tmp],
                                                          pv,
                                                          type='mem_data')

            elif a.type == 'reg':
                # TODO: Support symbolic register offsets

                reg_offset = a.offset
                variable = SimRegisterVariable(reg_offset,
                                               a.data.ast.size() / 8)

                if a.action == 'read':
                    # What do we want to do?
                    prevdefs = self._def_lookup(live_defs, variable)

                    # add edges to the statement dependence graph
                    for prev_code_loc, labels in prevdefs.iteritems():
                        self._stmt_graph_add_edge(prev_code_loc,
                                                  current_code_location,
                                                  **labels)
                        # record the edge
                        edge_tuple = (prev_code_loc, current_code_location)
                        regs_to_edges[reg_offset].append(edge_tuple)

                        pv_read.append(ProgramVariable(variable,
                                                       prev_code_loc))

                    if not prevdefs:
                        # the register was never defined before - it must be passed in as an argument
                        pv_read.append(
                            ProgramVariable(variable,
                                            current_code_location,
                                            initial=True))
                        # make sure to put it into the killing set
                        self._kill(live_defs, variable, current_code_location)

                    if reg_offset == self.project.arch.sp_offset:
                        data_generated = ('sp', 0)
                    elif reg_offset == self.project.arch.bp_offset:
                        data_generated = ('bp', 0)

                else:
                    # write
                    self._kill(live_defs, variable, current_code_location)

                    if reg_offset in regs_to_edges:
                        # clear the existing edges definition
                        del regs_to_edges[reg_offset]

                    # add a node on the data dependence graph
                    pv = ProgramVariable(variable, current_code_location)
                    self._data_graph_add_node(pv)

                    if not a.reg_deps and not a.tmp_deps:
                        # moving a constant into the register
                        # try to parse out the constant from statement
                        const_variable = SimConstantVariable()
                        if statements is not None:
                            stmt = statements[a.stmt_idx]
                            if isinstance(stmt.data, pyvex.IRExpr.Const):
                                const_variable = SimConstantVariable(
                                    value=stmt.data.con.value)
                        const_pv = ProgramVariable(const_variable,
                                                   current_code_location)
                        self._data_graph_add_edge(const_pv, pv)

                    for tmp in a.tmp_deps:
                        if tmp in temp_variables:
                            self._data_graph_add_edge(temp_variables[tmp], pv)

            elif a.type == 'tmp':
                # tmp is definitely not symbolic
                tmp = a.tmp
                pv = ProgramVariable(SimTemporaryVariable(tmp),
                                     current_code_location)

                if a.action == 'read':
                    prev_code_loc = temp_defs[tmp]

                    self._stmt_graph_add_edge(prev_code_loc,
                                              current_code_location,
                                              type='tmp',
                                              data=a.tmp)
                    # record the edge
                    edge_tuple = (prev_code_loc, current_code_location)
                    temps_to_edges[a.tmp].append(edge_tuple)

                    if tmp in temp_register_symbols:
                        data_generated = temp_register_symbols[tmp]

                else:
                    # write
                    temp_defs[tmp] = current_code_location
                    temp_variables[tmp] = pv

                    # clear existing edges
                    if tmp in temps_to_edges:
                        del temps_to_edges[tmp]

                    for tmp_dep in a.tmp_deps:
                        if tmp_dep in temp_variables:
                            self._data_graph_add_edge(temp_variables[tmp_dep],
                                                      pv)

                    if data_generated:
                        temp_register_symbols[tmp] = data_generated

                    for data in pv_read:
                        self._data_graph_add_edge(data, pv)

                    if not a.tmp_deps and not pv_read:
                        # read in a constant
                        # try to parse out the constant from statement
                        const_variable = SimConstantVariable()
                        if statements is not None:
                            stmt = statements[a.stmt_idx]
                            if isinstance(stmt, pyvex.IRStmt.Dirty):
                                l.warning(
                                    'Dirty statements are not supported in DDG for now.'
                                )
                            elif isinstance(stmt.data, pyvex.IRExpr.Const):
                                const_variable = SimConstantVariable(
                                    value=stmt.data.con.value)
                        const_pv = ProgramVariable(const_variable,
                                                   current_code_location)
                        self._data_graph_add_edge(const_pv, pv)

            elif a.type == 'exit':
                # exits should only depend on tmps
                for tmp in a.tmp_deps:
                    prev_code_loc = temp_defs[tmp]

                    # add the edge to the graph
                    self._stmt_graph_add_edge(prev_code_loc,
                                              current_code_location,
                                              type='exit',
                                              data='tmp')

                    # log the edge
                    edge_tuple = (prev_code_loc, current_code_location)
                    temps_to_edges[tmp].append(edge_tuple)

            elif a.type == 'operation':
                # FIXME: we should support a more complete range of operations

                if a.op.endswith('Sub32') or a.op.endswith('Sub64'):
                    # subtract
                    expr_0, expr_1 = a.exprs

                    if expr_0.tmp_deps and (not expr_1.tmp_deps
                                            and not expr_1.reg_deps):
                        # tmp - const
                        tmp = list(expr_0.tmp_deps)[0]
                        if tmp in temp_register_symbols:
                            sort, offset = temp_register_symbols[tmp]
                            offset -= expr_1.ast.args[0]
                            data_generated = (sort, offset)

                elif a.op.endswith('Add32') or a.op.endswith('Add64'):
                    # add
                    expr_0, expr_1 = a.exprs

                    if expr_0.tmp_deps and (not expr_1.tmp_deps
                                            and not expr_1.reg_deps):
                        # tmp + const
                        tmp = list(expr_0.tmp_deps)[0]
                        if tmp in temp_register_symbols:
                            sort, offset = temp_register_symbols[tmp]
                            offset += expr_1.ast.args[0]
                            data_generated = (sort, offset)

        #import pprint
        #pprint.pprint(self._data_graph.edges())
        #pprint.pprint(self.simplified_data_graph.edges())
        # import ipdb; ipdb.set_trace()

        return live_defs

Example #5

Show file

    def _register_reallocation(self, function, data_graph):
        """
        Find unused registers throughout the function, and use those registers to replace stack variables.

        Only functions that satisfy the following criteria can be optimized in this way:
        - The function does not call any other function.
        - The function does not use esp to index any stack variable.
        - Prologue and epilogue of the function is identifiable.
        - At least one register is not used in the entire function.

        :param angr.knowledge.Function function:
        :param networkx.MultiDiGraph data_graph:
        :return: None
        """

        # make sure this function does not call other functions
        if function.callout_sites:
            return

        if len(function.endpoints) != 1:
            return

        # identify function prologue and epilogue
        startpoint_block = self.project.factory.block(
            function.startpoint.addr).capstone
        startpoint_insns = startpoint_block.insns

        # supported function prologues:
        #
        # push  ebp
        # mov   ebp, esp
        # sub   esp, [0-9a-f]+h
        #
        # push  ebp
        # mov   ebp, esp
        # push  eax

        if len(startpoint_insns) < 3:
            return

        insn0, insn1, insn2 = startpoint_insns[:3]

        if not (insn0.mnemonic == 'push' and insn0.op_str == 'ebp'):
            return
        if not (insn1.mnemonic == 'mov' and insn1.op_str == 'ebp, esp'):
            return
        if not (insn2.mnemonic == 'sub' and re.match(r"esp, [0-9a-fx]+", insn2.op_str)) and \
                not (insn2.mnemonic == 'push' and insn2.op_str == 'eax'):
            return

        endpoint_block = self.project.factory.block(
            function.endpoints[0].addr).capstone
        endpoint_insns = endpoint_block.insns

        # supported function epilogues:
        #
        # add   esp, [0-9a-f]+h
        # pop   ebp
        # ret

        if len(endpoint_insns) < 3:
            return

        insn3, insn4, insn5 = endpoint_insns[-3:]

        if not (insn3.mnemonic == 'add'
                and re.match(r"esp, [0-9a-fx]+", insn3.op_str)):
            return
        if not (insn4.mnemonic == 'pop' and insn4.op_str == 'ebp'):
            return
        if not insn5.mnemonic == 'ret':
            return

        # make sure esp is not used anywhere else - all stack variables must be indexed using ebp
        esp_offset = self.project.arch.registers['esp'][0]
        ebp_offset = self.project.arch.registers['ebp'][0]
        esp_variables = []
        for n in data_graph.nodes_iter():
            if isinstance(
                    n.variable,
                    SimRegisterVariable) and n.variable.reg == esp_offset:
                esp_variables.append(n)

        # find out all call instructions
        call_insns = set()
        for src, dst, data in function.transition_graph.edges_iter(data=True):
            if 'type' in data and data['type'] == 'call':
                src_block = function._get_block(src.addr)
                call_insns.add(src_block.instruction_addrs[-1])

        # there should be six esp variables + all call sites
        # push ebp (insn0 - read, insn0 - write) ; sub esp, 0xXX (insn2) ;
        # add esp, 0xXX (insn3) ; pop ebp (insn4) ; ret (insn5)

        esp_insns = set(n.location.ins_addr for n in esp_variables)
        if esp_insns != {
                insn0.address, insn2.address, insn3.address, insn4.address,
                insn5.address
        } | call_insns:
            return

        prologue_addr = insn0.address
        prologue_size = insn0.size + insn1.size + insn2.size
        epilogue_addr = insn3.address
        epilogue_size = insn3.size + insn4.size + insn5.size

        # look at consumer of those esp variables. no other instruction should be consuming them
        # esp_consumer_insns = { insn0.address, insn1.address, insn2.address, insn3.address, insn4.address,
        #                        insn5.address} | esp_insns
        # for esp_variable in esp_variables:  # type: angr.analyses.ddg.ProgramVariable
        #     consumers = data_graph.successors(esp_variable)
        #     if any([ consumer.location.ins_addr not in esp_consumer_insns for consumer in consumers ]):
        #         return

        # make sure we never gets the address of those stack variables into any register
        # say, lea edx, [ebp-0x4] is forbidden
        # check all edges in data graph
        for src, dst, data in data_graph.edges_iter(data=True):
            if isinstance(dst.variable, SimRegisterVariable) and \
                    dst.variable.reg != ebp_offset and \
                    dst.variable.reg < 40:
                #to a register other than ebp
                if isinstance(src.variable, SimRegisterVariable) and \
                        src.variable.reg == ebp_offset:
                    # from ebp
                    l.debug(
                        "Found a lea operation from ebp at %#x. Function %s cannot be optimized.",
                        dst.location.ins_addr,
                        repr(function),
                    )
                    return

        # we definitely don't want to mess with fp or sse operations
        for node in data_graph.nodes_iter():
            if isinstance(node.variable, SimRegisterVariable) and \
                    72 <= node.variable.reg < 288:  # offset(mm0) <= node.variable.reg < offset(cs)
                l.debug(
                    'Found a float-point/SSE register access at %#x. Function %s cannot be optimized.',
                    node.location.ins_addr, repr(function))
                return

        l.debug("RegisterReallocation: function %s satisfies the criteria.",
                repr(function))

        # nice. let's see if we can optimize this function
        # do we have free registers?

        used_general_registers = set()
        for n in data_graph.nodes_iter():
            if isinstance(n.variable, SimRegisterVariable):
                if n.variable.reg < 40:  # this is a hardcoded limit - we only care about general registers
                    used_general_registers.add(n.variable.reg)
        registers = self.project.arch.registers
        all_general_registers = {  #registers['eax'][0], registers['ecx'][0], registers['edx'][0],
            registers['ebx'][0], registers['edi'][0], registers['esi'][0],
            registers['esp'][0], registers['ebp'][0]
        }
        unused_general_registers = all_general_registers - used_general_registers

        if not unused_general_registers:
            l.debug(
                "RegisterReallocation: function %s does not have any free register.",
                repr(function))
            return
        l.debug(
            "RegisterReallocation: function %s has %d free register(s): %s",
            repr(function), len(unused_general_registers), ", ".join([
                self.project.arch.register_names[u]
                for u in unused_general_registers
            ]))

        # find local stack variables of size 4
        stack_variables = set()
        for n in data_graph.nodes_iter():
            if isinstance(n.variable, SimStackVariable) and \
                    n.variable.base == 'bp' and \
                    n.variable.size == 4 and \
                    n.variable.offset < 0:
                stack_variables.add(n)

        # alright, now we need to make sure that stack variables are never accessed by indexes
        # in other words, they must be accessed directly in forms of 'dword ptr [ebp+x]'
        # it's easy to do this: we get mem_addr predecessors of each stack variable, and make sure there are only two of
        # them: one is ebp, the other one is a constant
        #
        # ah, also, since we do not want to mess with crazy fp registers, we further require none of the stack variable
        # sources and consumers is a FP register.

        filtered_stack_variables = set()
        for stack_variable in stack_variables:

            failed = False

            # check how they are accessed
            in_edges = data_graph.in_edges(stack_variable, data=True)
            for src, _, data in in_edges:
                if 'type' in data and data['type'] == 'mem_addr':
                    if isinstance(src.variable, SimRegisterVariable
                                  ) and src.variable.reg == ebp_offset:
                        # ebp
                        pass
                    elif isinstance(src.variable, SimConstantVariable):
                        # the constant
                        pass
                    else:
                        # ouch
                        failed = True
                        break

                if isinstance(src.variable,
                              SimRegisterVariable) and src.variable.reg >= 72:
                    # it comes from a FP register
                    failed = True
                    break

            if failed:
                continue

            # check consumers
            out_edges = data_graph.out_edges(stack_variable, data=True)
            for _, dst, data in out_edges:
                if 'type' in data and data['type'] == 'kill':
                    continue
                if isinstance(dst.variable,
                              SimRegisterVariable) and dst.variable.reg >= 72:
                    # an FP register is the consumer
                    failed = True
                    break

            if failed:
                continue

            filtered_stack_variables.add(stack_variable)

        # order the stack variables by the sum of their in and out degrees.
        stack_variable_to_degree = defaultdict(int)
        stack_variable_sources = defaultdict(list)
        for sv in filtered_stack_variables:
            stack_variable_to_degree[sv.variable] += data_graph.in_degree(sv)
            stack_variable_to_degree[sv.variable] += data_graph.out_degree(sv)
            stack_variable_sources[sv.variable].append(sv)

        sorted_stack_variables = sorted(
            stack_variable_to_degree.keys(),
            key=lambda sv: stack_variable_to_degree[sv],
            reverse=True)

        # aha these are the ones that we can replace!
        for reg, sv in zip(unused_general_registers, sorted_stack_variables):

            non_initial_sources = [
                src for src in stack_variable_sources[sv] if not src.initial
            ]

            if not non_initial_sources:
                # we failed to find any source for it, which indicates a failure in our dependence analysis
                # skip
                continue

            # get consumers
            consumers = set()
            for src in stack_variable_sources[sv]:
                out_edges = data_graph.out_edges(src, data=True)
                for _, dst, data in out_edges:
                    if 'type' not in data or data['type'] != 'kill':
                        consumers.add(dst)

            rr = RegisterReallocation(sv, SimRegisterVariable(reg, 4),
                                      non_initial_sources, list(consumers),
                                      prologue_addr, prologue_size,
                                      epilogue_addr, epilogue_size)
            self.register_reallocations.append(rr)

            l.debug("RegisterReallocation: %s will replace %s in function %s.",
                    rr.register_variable, rr.stack_variable, repr(function))