Example #1
0
    def _analyze(self):
        """
        The core analysis method.

        :return: None
        """

        # get the register that stores return value
        return_reg = self.RETURN_VALUE_REGISTERS.get(self.project.arch.name,
                                                     None)
        if return_reg is None:
            raise StaticPoliceKeyNotFoundError(
                'Return register is not specified for architecture %s.' %
                self.project.arch.name)

        return_reg_offset, return_reg_size = self.project.arch.registers[
            return_reg]

        variable = SimRegisterVariable(return_reg_offset, return_reg_size * 8)

        all_defs = self._data_dep.find_definitions(variable)

        # apparently we only care about those final definitions, i.e. definitions that do not have any consumers or
        # killers
        defs = []
        for d in all_defs:  # type: ProgramVariable
            if not self._data_dep.find_consumers(
                    d) and not self._data_dep.find_killers(d):
                defs.append(d)

        if not defs:
            l.warning('Cannot find any definition for return value.')
            return

        return_values = []

        # trace each definition backwards
        for d in defs:
            sources = self._data_dep.find_sources(d)

            if not sources:
                # umm what's going on
                continue

            for s in sources:
                if isinstance(s.variable, SimConstantVariable):
                    return_values.append(ConstantReturnValue(s.variable.value))
                else:
                    return_values.append(UnknownReturnValue())

        self.return_values = return_values
Example #2
0
    def _track(self, state, live_defs):
        """
        Given all live definitions prior to this program point, track the changes, and return a new list of live
        definitions. We scan through the action list of the new state to track the changes.

        :param state:       The input state at that program point.
        :param live_defs:   A list of all live definitions prior to reaching this program point.
        :returns:           A list of new live definitions.
        """

        # Make a copy of live_defs
        live_defs = live_defs.copy()

        action_list = list(state.log.actions)

        # Since all temporary variables are local, we simply track them in a local dict
        temps = { }

        # All dependence edges are added to the graph either at the end of this method, or when they are going to be
        # overwritten by a new edge. This is because we sometimes have to modify a  previous edge (e.g. add new labels
        # to the edge)
        temps_to_edges = defaultdict(list)
        regs_to_edges = defaultdict(list)

        def _annotate_edges_in_dict(dict_, key, **new_labels):
            """

            :param dict_:       The dict, can be either `temps_to_edges` or `regs_to_edges`
            :param key:         The key used in finding elements in the dict
            :param new_labels:  New labels to be added to those edges
            """

            for edge_tuple in dict_[key]:
                # unpack it
                _, _, labels = edge_tuple
                for k, v in new_labels.iteritems():
                    if k in labels:
                        labels[k] = labels[k] + (v, )
                    else:
                        # Construct a tuple
                        labels[k] = (v, )

        def _dump_edge_from_dict(dict_, key, del_key=True):
            """
            Pick an edge from the dict based on the key specified, add it to our graph, and remove the key from dict.

            :param dict_:   The dict, can be either `temps_to_edges` or `regs_to_edges`.
            :param key:     The key used in finding elements in the dict.
            """
            for edge_tuple in dict_[key]:
                # unpack it
                prev_code_loc, current_code_loc, labels = edge_tuple
                # Add the new edge
                self._add_edge(prev_code_loc, current_code_loc, **labels)

            # Clear it
            if del_key:
                del dict_[key]

        for a in action_list:

            if a.bbl_addr is None:
                current_code_loc = CodeLocation(None, None, sim_procedure=a.sim_procedure)
            else:
                current_code_loc = CodeLocation(a.bbl_addr, a.stmt_idx, ins_addr=a.ins_addr)

            if a.type == "mem":
                if a.actual_addrs is None:
                    # For now, mem reads don't necessarily have actual_addrs set properly
                    addr_list = set(state.memory.normalize_address(a.addr.ast, convert_to_valueset=True))
                else:
                    addr_list = set(a.actual_addrs)

                for addr in addr_list:
                    variable = SimMemoryVariable(addr, a.data.ast.size()) # TODO: Properly unpack the SAO

                    if a.action == "read":
                        # Create an edge between def site and use site

                        prevdefs = self._def_lookup(live_defs, variable)

                        for prev_code_loc, labels in prevdefs.iteritems():
                            self._read_edge = True
                            self._add_edge(prev_code_loc, current_code_loc, **labels)

                    else: #if a.action == "write":
                        # Kill the existing live def
                        self._kill(live_defs, variable, current_code_loc)

                    # For each of its register dependency and data dependency, we revise the corresponding edge
                    for reg_off in a.addr.reg_deps:
                        _annotate_edges_in_dict(regs_to_edges, reg_off, subtype='mem_addr')
                    for tmp in a.addr.tmp_deps:
                        _annotate_edges_in_dict(temps_to_edges, tmp, subtype='mem_addr')

                    for reg_off in a.data.reg_deps:
                        _annotate_edges_in_dict(regs_to_edges, reg_off, subtype='mem_data')
                    for tmp in a.data.tmp_deps:
                        _annotate_edges_in_dict(temps_to_edges, tmp, subtype='mem_data')

            elif a.type == 'reg':
                # For now, we assume a.offset is not symbolic
                # TODO: Support symbolic register offsets

                #variable = SimRegisterVariable(a.offset, a.data.ast.size())
                variable = SimRegisterVariable(a.offset, self.project.arch.bits)

                if a.action == 'read':
                    # What do we want to do?
                    prevdefs = self._def_lookup(live_defs, variable)

                    if a.offset in regs_to_edges:
                        _dump_edge_from_dict(regs_to_edges, a.offset)

                    for prev_code_loc, labels in prevdefs.iteritems():
                        edge_tuple = (prev_code_loc, current_code_loc, labels)
                        regs_to_edges[a.offset].append(edge_tuple)

                else:
                    # write
                    self._kill(live_defs, variable, current_code_loc)

            elif a.type == 'tmp':
                # tmp is definitely not symbolic
                if a.action == 'read':
                    prev_code_loc = temps[a.tmp]
                    edge_tuple = (prev_code_loc, current_code_loc, {'type':'tmp', 'data':a.tmp})

                    if a.tmp in temps_to_edges:
                        _dump_edge_from_dict(temps_to_edges, a.tmp)

                    temps_to_edges[a.tmp].append(edge_tuple)

                else:
                    # write
                    temps[a.tmp] = current_code_loc

            elif a.type == 'exit':
                # exits should only depend on tmps

                for tmp in a.tmp_deps:
                    prev_code_loc = temps[tmp]
                    edge_tuple = (prev_code_loc, current_code_loc, {'type': 'exit', 'data': tmp})

                    if tmp in temps_to_edges:
                        _dump_edge_from_dict(temps_to_edges, tmp)

                    temps_to_edges[tmp].append(edge_tuple)

        # In the end, dump all other edges in those two dicts
        for reg_offset in regs_to_edges:
            _dump_edge_from_dict(regs_to_edges, reg_offset, del_key=False)
        for tmp in temps_to_edges:
            _dump_edge_from_dict(temps_to_edges, tmp, del_key=False)

        return live_defs
    def function_check(self, function):
        """


        :param angr.knowledge.Function function: The function to be checked against.
        :return: True if the policy is respected, False otherwise.
        :rtype: bool
        """

        if function.returning is False:
            l.warning('Function %#x does not return.', function.addr)
            return True

        # find all places where the function is called

        cfg = self._fast_cfg

        function_node = cfg.get_any_node(function.addr)

        if not function_node:
            # the function is not found
            l.warning('Function %#x is not found in the control flow graph.',
                      function.addr)
            return True

        # find all predecessors, which are callers to this function
        predecessors = cfg.get_all_predecessors(function_node)

        if not predecessors:
            # the function is not called from anywhere, or we cannot resolve the caller
            l.warning(
                'Function %#x is not called by any node throughout the control flow graph.',
                function.addr)
            return True

        # for each function that the caller is in, generate a data dependency graph
        for pred in predecessors:  # type: angr.analyses.cfg_node.CFGNode
            func_addr = pred.function_address

            if func_addr is None:
                continue

            caller_func = cfg.functions.get(
                func_addr, None)  # type: angr.knowledge.Function
            if caller_func is None:
                continue

            tmp_kb = KnowledgeBase(self.project, self.project.loader.main_bin)
            caller_func_cfg = self.project.analyses.CFGAccurate(
                call_depth=0,
                base_graph=caller_func.graph,
                keep_state=True,
            )
            dep_graph = self.project.analyses.DataDependencyAnalysis(
                caller_func_cfg,
                kb=tmp_kb,
            )

            # analyze on dep_graph
            ret_val_reg = ReturnValueAnalysis.RETURN_VALUE_REGISTERS[
                self.project.arch.name]
            ret_val_reg_offset, ret_val_reg_size = self.project.arch.registers[
                ret_val_reg]
            ret_var = SimRegisterVariable(ret_val_reg_offset,
                                          ret_val_reg_size * 8)

            # return site
            return_site_addr = pred.addr + pred.size

            ret_var_def = ProgramVariable(ret_var,
                                          CodeLocation(return_site_addr, -1))
            # TODO: add return value nodes in DataDependencyAnalysis

            consumers = dep_graph.find_consumers(ret_var_def)

            if not consumers:
                l.warning(
                    'Return value of function %#x is not checked at calling site %#x.',
                    function.addr, pred.addr)
                return False

        return True
Example #4
0
File: ddg.py Project: yuede/angr
    def _track(self, state, live_defs, statements):
        """
        Given all live definitions prior to this program point, track the changes, and return a new list of live
        definitions. We scan through the action list of the new state to track the changes.

        :param state:       The input state at that program point.
        :param live_defs:   A list of all live definitions prior to reaching this program point.
        :param list statements: A list of VEX statements.
        :returns:           A list of new live definitions.
        """

        # Make a copy of live_defs
        live_defs = live_defs.copy()

        action_list = list(state.log.actions)

        # Since all temporary variables are local, we simply track them in a local dict
        temp_defs = {}
        temp_variables = {}
        temp_register_symbols = {}

        # All dependence edges are added to the graph either at the end of this method, or when they are going to be
        # overwritten by a new edge. This is because we sometimes have to modify a previous edge (e.g. add new labels
        # to the edge)
        temps_to_edges = defaultdict(list)
        regs_to_edges = defaultdict(list)

        last_statement_id = None
        pv_read = None  # program variables read out in the same statement. we keep a copy of those variables here so
        # we can link it to the tmp_write action right afterwards
        data_generated = None

        # tracks stack pointer and base pointer
        #sp = state.se.any_int(state.regs.sp) if not state.regs.sp.symbolic else None
        #bp = state.se.any_int(state.regs.bp) if not state.regs.bp.symbolic else None

        for a in action_list:

            if last_statement_id is None or last_statement_id != a.stmt_idx:
                pv_read = []
                data_generated = None
                last_statement_id = a.stmt_idx

            if a.sim_procedure is None:
                current_code_location = CodeLocation(a.bbl_addr,
                                                     a.stmt_idx,
                                                     ins_addr=a.ins_addr)
            else:
                current_code_location = CodeLocation(
                    None, None, sim_procedure=a.sim_procedure)

            if a.type == "mem":
                if a.actual_addrs is None:
                    # For now, mem reads don't necessarily have actual_addrs set properly
                    try:
                        addr_list = {state.se.any_int(a.addr.ast)}
                    except (SimSolverModeError, SimUnsatError,
                            ZeroDivisionError):
                        # FIXME: ZeroDivisionError should have been caught by claripy and simuvex.
                        # FIXME: see claripy issue #75. this is just a temporary workaround.
                        # it's symbolic... just continue
                        addr_list = {
                            0x60000000
                        }  # TODO: this is a random address that I pick. Fix it.
                else:
                    addr_list = set(a.actual_addrs)

                for addr in addr_list:

                    variable = None
                    if len(addr_list) == 1 and len(a.addr.tmp_deps) == 1:
                        addr_tmp = list(a.addr.tmp_deps)[0]
                        if addr_tmp in temp_register_symbols:
                            # it must be a stack variable
                            sort, offset = temp_register_symbols[addr_tmp]
                            variable = SimStackVariable(offset,
                                                        a.data.ast.size() / 8,
                                                        base=sort,
                                                        base_addr=addr -
                                                        offset)

                    if variable is None:
                        variable = SimMemoryVariable(
                            addr,
                            a.data.ast.size() /
                            8)  # TODO: Properly unpack the SAO

                    pvs = []

                    if a.action == "read":
                        # Create an edge between def site and use site

                        prevdefs = self._def_lookup(live_defs, variable)

                        # TODO: prevdefs should only contain location, not labels
                        for prev_code_loc, labels in prevdefs.iteritems():
                            self._stmt_graph_add_edge(prev_code_loc,
                                                      current_code_location,
                                                      **labels)

                            pvs.append(ProgramVariable(variable,
                                                       prev_code_loc))

                        if not pvs:
                            pvs.append(
                                ProgramVariable(variable,
                                                current_code_location,
                                                initial=True))
                            # make sure to put it into the killing set
                            self._kill(live_defs, variable,
                                       current_code_location)

                        for pv in pvs:
                            pv_read.append(pv)

                    if a.action == "write":
                        # Kill the existing live def
                        self._kill(live_defs, variable, current_code_location)

                        pvs.append(
                            ProgramVariable(variable, current_code_location))

                    for pv in pvs:
                        # For each of its register dependency and data dependency, we annotate the corresponding edge
                        for reg_offset in a.addr.reg_deps:
                            self._stmt_graph_annotate_edges(
                                regs_to_edges[reg_offset], subtype='mem_addr')
                            reg_variable = SimRegisterVariable(
                                reg_offset,
                                self._get_register_size(reg_offset))
                            prev_defs = self._def_lookup(
                                live_defs, reg_variable)
                            for loc, _ in prev_defs.iteritems():
                                v = ProgramVariable(reg_variable, loc)
                                self._data_graph_add_edge(v,
                                                          pv,
                                                          type='mem_addr')

                        for tmp in a.addr.tmp_deps:
                            self._stmt_graph_annotate_edges(
                                temps_to_edges[tmp], subtype='mem_addr')
                            if tmp in temp_variables:
                                self._data_graph_add_edge(temp_variables[tmp],
                                                          pv,
                                                          type='mem_addr')

                        for reg_offset in a.data.reg_deps:
                            self._stmt_graph_annotate_edges(
                                regs_to_edges[reg_offset], subtype='mem_data')
                            reg_variable = SimRegisterVariable(
                                reg_offset,
                                self._get_register_size(reg_offset))
                            prev_defs = self._def_lookup(
                                live_defs, reg_variable)
                            for loc, _ in prev_defs.iteritems():
                                v = ProgramVariable(reg_variable, loc)
                                self._data_graph_add_edge(v,
                                                          pv,
                                                          type='mem_data')

                        for tmp in a.data.tmp_deps:
                            self._stmt_graph_annotate_edges(
                                temps_to_edges[tmp], subtype='mem_data')
                            if tmp in temp_variables:
                                self._data_graph_add_edge(temp_variables[tmp],
                                                          pv,
                                                          type='mem_data')

            elif a.type == 'reg':
                # TODO: Support symbolic register offsets

                reg_offset = a.offset
                variable = SimRegisterVariable(reg_offset,
                                               a.data.ast.size() / 8)

                if a.action == 'read':
                    # What do we want to do?
                    prevdefs = self._def_lookup(live_defs, variable)

                    # add edges to the statement dependence graph
                    for prev_code_loc, labels in prevdefs.iteritems():
                        self._stmt_graph_add_edge(prev_code_loc,
                                                  current_code_location,
                                                  **labels)
                        # record the edge
                        edge_tuple = (prev_code_loc, current_code_location)
                        regs_to_edges[reg_offset].append(edge_tuple)

                        pv_read.append(ProgramVariable(variable,
                                                       prev_code_loc))

                    if not prevdefs:
                        # the register was never defined before - it must be passed in as an argument
                        pv_read.append(
                            ProgramVariable(variable,
                                            current_code_location,
                                            initial=True))
                        # make sure to put it into the killing set
                        self._kill(live_defs, variable, current_code_location)

                    if reg_offset == self.project.arch.sp_offset:
                        data_generated = ('sp', 0)
                    elif reg_offset == self.project.arch.bp_offset:
                        data_generated = ('bp', 0)

                else:
                    # write
                    self._kill(live_defs, variable, current_code_location)

                    if reg_offset in regs_to_edges:
                        # clear the existing edges definition
                        del regs_to_edges[reg_offset]

                    # add a node on the data dependence graph
                    pv = ProgramVariable(variable, current_code_location)
                    self._data_graph_add_node(pv)

                    if not a.reg_deps and not a.tmp_deps:
                        # moving a constant into the register
                        # try to parse out the constant from statement
                        const_variable = SimConstantVariable()
                        if statements is not None:
                            stmt = statements[a.stmt_idx]
                            if isinstance(stmt.data, pyvex.IRExpr.Const):
                                const_variable = SimConstantVariable(
                                    value=stmt.data.con.value)
                        const_pv = ProgramVariable(const_variable,
                                                   current_code_location)
                        self._data_graph_add_edge(const_pv, pv)

                    for tmp in a.tmp_deps:
                        if tmp in temp_variables:
                            self._data_graph_add_edge(temp_variables[tmp], pv)

            elif a.type == 'tmp':
                # tmp is definitely not symbolic
                tmp = a.tmp
                pv = ProgramVariable(SimTemporaryVariable(tmp),
                                     current_code_location)

                if a.action == 'read':
                    prev_code_loc = temp_defs[tmp]

                    self._stmt_graph_add_edge(prev_code_loc,
                                              current_code_location,
                                              type='tmp',
                                              data=a.tmp)
                    # record the edge
                    edge_tuple = (prev_code_loc, current_code_location)
                    temps_to_edges[a.tmp].append(edge_tuple)

                    if tmp in temp_register_symbols:
                        data_generated = temp_register_symbols[tmp]

                else:
                    # write
                    temp_defs[tmp] = current_code_location
                    temp_variables[tmp] = pv

                    # clear existing edges
                    if tmp in temps_to_edges:
                        del temps_to_edges[tmp]

                    for tmp_dep in a.tmp_deps:
                        if tmp_dep in temp_variables:
                            self._data_graph_add_edge(temp_variables[tmp_dep],
                                                      pv)

                    if data_generated:
                        temp_register_symbols[tmp] = data_generated

                    for data in pv_read:
                        self._data_graph_add_edge(data, pv)

                    if not a.tmp_deps and not pv_read:
                        # read in a constant
                        # try to parse out the constant from statement
                        const_variable = SimConstantVariable()
                        if statements is not None:
                            stmt = statements[a.stmt_idx]
                            if isinstance(stmt, pyvex.IRStmt.Dirty):
                                l.warning(
                                    'Dirty statements are not supported in DDG for now.'
                                )
                            elif isinstance(stmt.data, pyvex.IRExpr.Const):
                                const_variable = SimConstantVariable(
                                    value=stmt.data.con.value)
                        const_pv = ProgramVariable(const_variable,
                                                   current_code_location)
                        self._data_graph_add_edge(const_pv, pv)

            elif a.type == 'exit':
                # exits should only depend on tmps
                for tmp in a.tmp_deps:
                    prev_code_loc = temp_defs[tmp]

                    # add the edge to the graph
                    self._stmt_graph_add_edge(prev_code_loc,
                                              current_code_location,
                                              type='exit',
                                              data='tmp')

                    # log the edge
                    edge_tuple = (prev_code_loc, current_code_location)
                    temps_to_edges[tmp].append(edge_tuple)

            elif a.type == 'operation':
                # FIXME: we should support a more complete range of operations

                if a.op.endswith('Sub32') or a.op.endswith('Sub64'):
                    # subtract
                    expr_0, expr_1 = a.exprs

                    if expr_0.tmp_deps and (not expr_1.tmp_deps
                                            and not expr_1.reg_deps):
                        # tmp - const
                        tmp = list(expr_0.tmp_deps)[0]
                        if tmp in temp_register_symbols:
                            sort, offset = temp_register_symbols[tmp]
                            offset -= expr_1.ast.args[0]
                            data_generated = (sort, offset)

                elif a.op.endswith('Add32') or a.op.endswith('Add64'):
                    # add
                    expr_0, expr_1 = a.exprs

                    if expr_0.tmp_deps and (not expr_1.tmp_deps
                                            and not expr_1.reg_deps):
                        # tmp + const
                        tmp = list(expr_0.tmp_deps)[0]
                        if tmp in temp_register_symbols:
                            sort, offset = temp_register_symbols[tmp]
                            offset += expr_1.ast.args[0]
                            data_generated = (sort, offset)

        #import pprint
        #pprint.pprint(self._data_graph.edges())
        #pprint.pprint(self.simplified_data_graph.edges())
        # import ipdb; ipdb.set_trace()

        return live_defs
Example #5
0
    def _register_reallocation(self, function, data_graph):
        """
        Find unused registers throughout the function, and use those registers to replace stack variables.

        Only functions that satisfy the following criteria can be optimized in this way:
        - The function does not call any other function.
        - The function does not use esp to index any stack variable.
        - Prologue and epilogue of the function is identifiable.
        - At least one register is not used in the entire function.

        :param angr.knowledge.Function function:
        :param networkx.MultiDiGraph data_graph:
        :return: None
        """

        # make sure this function does not call other functions
        if function.callout_sites:
            return

        if len(function.endpoints) != 1:
            return

        # identify function prologue and epilogue
        startpoint_block = self.project.factory.block(
            function.startpoint.addr).capstone
        startpoint_insns = startpoint_block.insns

        # supported function prologues:
        #
        # push  ebp
        # mov   ebp, esp
        # sub   esp, [0-9a-f]+h
        #
        # push  ebp
        # mov   ebp, esp
        # push  eax

        if len(startpoint_insns) < 3:
            return

        insn0, insn1, insn2 = startpoint_insns[:3]

        if not (insn0.mnemonic == 'push' and insn0.op_str == 'ebp'):
            return
        if not (insn1.mnemonic == 'mov' and insn1.op_str == 'ebp, esp'):
            return
        if not (insn2.mnemonic == 'sub' and re.match(r"esp, [0-9a-fx]+", insn2.op_str)) and \
                not (insn2.mnemonic == 'push' and insn2.op_str == 'eax'):
            return

        endpoint_block = self.project.factory.block(
            function.endpoints[0].addr).capstone
        endpoint_insns = endpoint_block.insns

        # supported function epilogues:
        #
        # add   esp, [0-9a-f]+h
        # pop   ebp
        # ret

        if len(endpoint_insns) < 3:
            return

        insn3, insn4, insn5 = endpoint_insns[-3:]

        if not (insn3.mnemonic == 'add'
                and re.match(r"esp, [0-9a-fx]+", insn3.op_str)):
            return
        if not (insn4.mnemonic == 'pop' and insn4.op_str == 'ebp'):
            return
        if not insn5.mnemonic == 'ret':
            return

        # make sure esp is not used anywhere else - all stack variables must be indexed using ebp
        esp_offset = self.project.arch.registers['esp'][0]
        ebp_offset = self.project.arch.registers['ebp'][0]
        esp_variables = []
        for n in data_graph.nodes_iter():
            if isinstance(
                    n.variable,
                    SimRegisterVariable) and n.variable.reg == esp_offset:
                esp_variables.append(n)

        # find out all call instructions
        call_insns = set()
        for src, dst, data in function.transition_graph.edges_iter(data=True):
            if 'type' in data and data['type'] == 'call':
                src_block = function._get_block(src.addr)
                call_insns.add(src_block.instruction_addrs[-1])

        # there should be six esp variables + all call sites
        # push ebp (insn0 - read, insn0 - write) ; sub esp, 0xXX (insn2) ;
        # add esp, 0xXX (insn3) ; pop ebp (insn4) ; ret (insn5)

        esp_insns = set(n.location.ins_addr for n in esp_variables)
        if esp_insns != {
                insn0.address, insn2.address, insn3.address, insn4.address,
                insn5.address
        } | call_insns:
            return

        prologue_addr = insn0.address
        prologue_size = insn0.size + insn1.size + insn2.size
        epilogue_addr = insn3.address
        epilogue_size = insn3.size + insn4.size + insn5.size

        # look at consumer of those esp variables. no other instruction should be consuming them
        # esp_consumer_insns = { insn0.address, insn1.address, insn2.address, insn3.address, insn4.address,
        #                        insn5.address} | esp_insns
        # for esp_variable in esp_variables:  # type: angr.analyses.ddg.ProgramVariable
        #     consumers = data_graph.successors(esp_variable)
        #     if any([ consumer.location.ins_addr not in esp_consumer_insns for consumer in consumers ]):
        #         return

        # make sure we never gets the address of those stack variables into any register
        # say, lea edx, [ebp-0x4] is forbidden
        # check all edges in data graph
        for src, dst, data in data_graph.edges_iter(data=True):
            if isinstance(dst.variable, SimRegisterVariable) and \
                    dst.variable.reg != ebp_offset and \
                    dst.variable.reg < 40:
                #to a register other than ebp
                if isinstance(src.variable, SimRegisterVariable) and \
                        src.variable.reg == ebp_offset:
                    # from ebp
                    l.debug(
                        "Found a lea operation from ebp at %#x. Function %s cannot be optimized.",
                        dst.location.ins_addr,
                        repr(function),
                    )
                    return

        # we definitely don't want to mess with fp or sse operations
        for node in data_graph.nodes_iter():
            if isinstance(node.variable, SimRegisterVariable) and \
                    72 <= node.variable.reg < 288:  # offset(mm0) <= node.variable.reg < offset(cs)
                l.debug(
                    'Found a float-point/SSE register access at %#x. Function %s cannot be optimized.',
                    node.location.ins_addr, repr(function))
                return

        l.debug("RegisterReallocation: function %s satisfies the criteria.",
                repr(function))

        # nice. let's see if we can optimize this function
        # do we have free registers?

        used_general_registers = set()
        for n in data_graph.nodes_iter():
            if isinstance(n.variable, SimRegisterVariable):
                if n.variable.reg < 40:  # this is a hardcoded limit - we only care about general registers
                    used_general_registers.add(n.variable.reg)
        registers = self.project.arch.registers
        all_general_registers = {  #registers['eax'][0], registers['ecx'][0], registers['edx'][0],
            registers['ebx'][0], registers['edi'][0], registers['esi'][0],
            registers['esp'][0], registers['ebp'][0]
        }
        unused_general_registers = all_general_registers - used_general_registers

        if not unused_general_registers:
            l.debug(
                "RegisterReallocation: function %s does not have any free register.",
                repr(function))
            return
        l.debug(
            "RegisterReallocation: function %s has %d free register(s): %s",
            repr(function), len(unused_general_registers), ", ".join([
                self.project.arch.register_names[u]
                for u in unused_general_registers
            ]))

        # find local stack variables of size 4
        stack_variables = set()
        for n in data_graph.nodes_iter():
            if isinstance(n.variable, SimStackVariable) and \
                    n.variable.base == 'bp' and \
                    n.variable.size == 4 and \
                    n.variable.offset < 0:
                stack_variables.add(n)

        # alright, now we need to make sure that stack variables are never accessed by indexes
        # in other words, they must be accessed directly in forms of 'dword ptr [ebp+x]'
        # it's easy to do this: we get mem_addr predecessors of each stack variable, and make sure there are only two of
        # them: one is ebp, the other one is a constant
        #
        # ah, also, since we do not want to mess with crazy fp registers, we further require none of the stack variable
        # sources and consumers is a FP register.

        filtered_stack_variables = set()
        for stack_variable in stack_variables:

            failed = False

            # check how they are accessed
            in_edges = data_graph.in_edges(stack_variable, data=True)
            for src, _, data in in_edges:
                if 'type' in data and data['type'] == 'mem_addr':
                    if isinstance(src.variable, SimRegisterVariable
                                  ) and src.variable.reg == ebp_offset:
                        # ebp
                        pass
                    elif isinstance(src.variable, SimConstantVariable):
                        # the constant
                        pass
                    else:
                        # ouch
                        failed = True
                        break

                if isinstance(src.variable,
                              SimRegisterVariable) and src.variable.reg >= 72:
                    # it comes from a FP register
                    failed = True
                    break

            if failed:
                continue

            # check consumers
            out_edges = data_graph.out_edges(stack_variable, data=True)
            for _, dst, data in out_edges:
                if 'type' in data and data['type'] == 'kill':
                    continue
                if isinstance(dst.variable,
                              SimRegisterVariable) and dst.variable.reg >= 72:
                    # an FP register is the consumer
                    failed = True
                    break

            if failed:
                continue

            filtered_stack_variables.add(stack_variable)

        # order the stack variables by the sum of their in and out degrees.
        stack_variable_to_degree = defaultdict(int)
        stack_variable_sources = defaultdict(list)
        for sv in filtered_stack_variables:
            stack_variable_to_degree[sv.variable] += data_graph.in_degree(sv)
            stack_variable_to_degree[sv.variable] += data_graph.out_degree(sv)
            stack_variable_sources[sv.variable].append(sv)

        sorted_stack_variables = sorted(
            stack_variable_to_degree.keys(),
            key=lambda sv: stack_variable_to_degree[sv],
            reverse=True)

        # aha these are the ones that we can replace!
        for reg, sv in zip(unused_general_registers, sorted_stack_variables):

            non_initial_sources = [
                src for src in stack_variable_sources[sv] if not src.initial
            ]

            if not non_initial_sources:
                # we failed to find any source for it, which indicates a failure in our dependence analysis
                # skip
                continue

            # get consumers
            consumers = set()
            for src in stack_variable_sources[sv]:
                out_edges = data_graph.out_edges(src, data=True)
                for _, dst, data in out_edges:
                    if 'type' not in data or data['type'] != 'kill':
                        consumers.add(dst)

            rr = RegisterReallocation(sv, SimRegisterVariable(reg, 4),
                                      non_initial_sources, list(consumers),
                                      prologue_addr, prologue_size,
                                      epilogue_addr, epilogue_size)
            self.register_reallocations.append(rr)

            l.debug("RegisterReallocation: %s will replace %s in function %s.",
                    rr.register_variable, rr.stack_variable, repr(function))