예제 #1
0
파일: vsa_ddg.py 프로젝트: zyq8709/angr
    def _track(self, state, live_defs):
        """
        Given all live definitions prior to this program point, track the changes, and return a new list of live
        definitions. We scan through the action list of the new state to track the changes.

        :param state:       The input state at that program point.
        :param live_defs:   A list of all live definitions prior to reaching this program point.
        :returns:           A list of new live definitions.
        """

        # Make a copy of live_defs
        live_defs = live_defs.copy()

        action_list = list(state.log.actions)

        # Since all temporary variables are local, we simply track them in a local dict
        temps = { }

        # All dependence edges are added to the graph either at the end of this method, or when they are going to be
        # overwritten by a new edge. This is because we sometimes have to modify a  previous edge (e.g. add new labels
        # to the edge)
        temps_to_edges = defaultdict(list)
        regs_to_edges = defaultdict(list)

        def _annotate_edges_in_dict(dict_, key, **new_labels):
            """

            :param dict_:       The dict, can be either `temps_to_edges` or `regs_to_edges`
            :param key:         The key used in finding elements in the dict
            :param new_labels:  New labels to be added to those edges
            """

            for edge_tuple in dict_[key]:
                # unpack it
                _, _, labels = edge_tuple
                for k, v in new_labels.iteritems():
                    if k in labels:
                        labels[k] = labels[k] + (v, )
                    else:
                        # Construct a tuple
                        labels[k] = (v, )

        def _dump_edge_from_dict(dict_, key, del_key=True):
            """
            Pick an edge from the dict based on the key specified, add it to our graph, and remove the key from dict.

            :param dict_:   The dict, can be either `temps_to_edges` or `regs_to_edges`.
            :param key:     The key used in finding elements in the dict.
            """
            for edge_tuple in dict_[key]:
                # unpack it
                prev_code_loc, current_code_loc, labels = edge_tuple
                # Add the new edge
                self._add_edge(prev_code_loc, current_code_loc, **labels)

            # Clear it
            if del_key:
                del dict_[key]

        for a in action_list:

            if a.bbl_addr is None:
                current_code_loc = CodeLocation(None, None, sim_procedure=a.sim_procedure)
            else:
                current_code_loc = CodeLocation(a.bbl_addr, a.stmt_idx, ins_addr=a.ins_addr)

            if a.type == "mem":
                if a.actual_addrs is None:
                    # For now, mem reads don't necessarily have actual_addrs set properly
                    addr_list = set(state.memory.normalize_address(a.addr.ast, convert_to_valueset=True))
                else:
                    addr_list = set(a.actual_addrs)

                for addr in addr_list:
                    variable = SimMemoryVariable(addr, a.data.ast.size()) # TODO: Properly unpack the SAO

                    if a.action == "read":
                        # Create an edge between def site and use site

                        prevdefs = self._def_lookup(live_defs, variable)

                        for prev_code_loc, labels in prevdefs.iteritems():
                            self._read_edge = True
                            self._add_edge(prev_code_loc, current_code_loc, **labels)

                    else: #if a.action == "write":
                        # Kill the existing live def
                        self._kill(live_defs, variable, current_code_loc)

                    # For each of its register dependency and data dependency, we revise the corresponding edge
                    for reg_off in a.addr.reg_deps:
                        _annotate_edges_in_dict(regs_to_edges, reg_off, subtype='mem_addr')
                    for tmp in a.addr.tmp_deps:
                        _annotate_edges_in_dict(temps_to_edges, tmp, subtype='mem_addr')

                    for reg_off in a.data.reg_deps:
                        _annotate_edges_in_dict(regs_to_edges, reg_off, subtype='mem_data')
                    for tmp in a.data.tmp_deps:
                        _annotate_edges_in_dict(temps_to_edges, tmp, subtype='mem_data')

            elif a.type == 'reg':
                # For now, we assume a.offset is not symbolic
                # TODO: Support symbolic register offsets

                #variable = SimRegisterVariable(a.offset, a.data.ast.size())
                variable = SimRegisterVariable(a.offset, self.project.arch.bits)

                if a.action == 'read':
                    # What do we want to do?
                    prevdefs = self._def_lookup(live_defs, variable)

                    if a.offset in regs_to_edges:
                        _dump_edge_from_dict(regs_to_edges, a.offset)

                    for prev_code_loc, labels in prevdefs.iteritems():
                        edge_tuple = (prev_code_loc, current_code_loc, labels)
                        regs_to_edges[a.offset].append(edge_tuple)

                else:
                    # write
                    self._kill(live_defs, variable, current_code_loc)

            elif a.type == 'tmp':
                # tmp is definitely not symbolic
                if a.action == 'read':
                    prev_code_loc = temps[a.tmp]
                    edge_tuple = (prev_code_loc, current_code_loc, {'type':'tmp', 'data':a.tmp})

                    if a.tmp in temps_to_edges:
                        _dump_edge_from_dict(temps_to_edges, a.tmp)

                    temps_to_edges[a.tmp].append(edge_tuple)

                else:
                    # write
                    temps[a.tmp] = current_code_loc

            elif a.type == 'exit':
                # exits should only depend on tmps

                for tmp in a.tmp_deps:
                    prev_code_loc = temps[tmp]
                    edge_tuple = (prev_code_loc, current_code_loc, {'type': 'exit', 'data': tmp})

                    if tmp in temps_to_edges:
                        _dump_edge_from_dict(temps_to_edges, tmp)

                    temps_to_edges[tmp].append(edge_tuple)

        # In the end, dump all other edges in those two dicts
        for reg_offset in regs_to_edges:
            _dump_edge_from_dict(regs_to_edges, reg_offset, del_key=False)
        for tmp in temps_to_edges:
            _dump_edge_from_dict(temps_to_edges, tmp, del_key=False)

        return live_defs
예제 #2
0
파일: ddg.py 프로젝트: yuede/angr
    def _track(self, state, live_defs, statements):
        """
        Given all live definitions prior to this program point, track the changes, and return a new list of live
        definitions. We scan through the action list of the new state to track the changes.

        :param state:       The input state at that program point.
        :param live_defs:   A list of all live definitions prior to reaching this program point.
        :param list statements: A list of VEX statements.
        :returns:           A list of new live definitions.
        """

        # Make a copy of live_defs
        live_defs = live_defs.copy()

        action_list = list(state.log.actions)

        # Since all temporary variables are local, we simply track them in a local dict
        temp_defs = {}
        temp_variables = {}
        temp_register_symbols = {}

        # All dependence edges are added to the graph either at the end of this method, or when they are going to be
        # overwritten by a new edge. This is because we sometimes have to modify a previous edge (e.g. add new labels
        # to the edge)
        temps_to_edges = defaultdict(list)
        regs_to_edges = defaultdict(list)

        last_statement_id = None
        pv_read = None  # program variables read out in the same statement. we keep a copy of those variables here so
        # we can link it to the tmp_write action right afterwards
        data_generated = None

        # tracks stack pointer and base pointer
        #sp = state.se.any_int(state.regs.sp) if not state.regs.sp.symbolic else None
        #bp = state.se.any_int(state.regs.bp) if not state.regs.bp.symbolic else None

        for a in action_list:

            if last_statement_id is None or last_statement_id != a.stmt_idx:
                pv_read = []
                data_generated = None
                last_statement_id = a.stmt_idx

            if a.sim_procedure is None:
                current_code_location = CodeLocation(a.bbl_addr,
                                                     a.stmt_idx,
                                                     ins_addr=a.ins_addr)
            else:
                current_code_location = CodeLocation(
                    None, None, sim_procedure=a.sim_procedure)

            if a.type == "mem":
                if a.actual_addrs is None:
                    # For now, mem reads don't necessarily have actual_addrs set properly
                    try:
                        addr_list = {state.se.any_int(a.addr.ast)}
                    except (SimSolverModeError, SimUnsatError,
                            ZeroDivisionError):
                        # FIXME: ZeroDivisionError should have been caught by claripy and simuvex.
                        # FIXME: see claripy issue #75. this is just a temporary workaround.
                        # it's symbolic... just continue
                        addr_list = {
                            0x60000000
                        }  # TODO: this is a random address that I pick. Fix it.
                else:
                    addr_list = set(a.actual_addrs)

                for addr in addr_list:

                    variable = None
                    if len(addr_list) == 1 and len(a.addr.tmp_deps) == 1:
                        addr_tmp = list(a.addr.tmp_deps)[0]
                        if addr_tmp in temp_register_symbols:
                            # it must be a stack variable
                            sort, offset = temp_register_symbols[addr_tmp]
                            variable = SimStackVariable(offset,
                                                        a.data.ast.size() / 8,
                                                        base=sort,
                                                        base_addr=addr -
                                                        offset)

                    if variable is None:
                        variable = SimMemoryVariable(
                            addr,
                            a.data.ast.size() /
                            8)  # TODO: Properly unpack the SAO

                    pvs = []

                    if a.action == "read":
                        # Create an edge between def site and use site

                        prevdefs = self._def_lookup(live_defs, variable)

                        # TODO: prevdefs should only contain location, not labels
                        for prev_code_loc, labels in prevdefs.iteritems():
                            self._stmt_graph_add_edge(prev_code_loc,
                                                      current_code_location,
                                                      **labels)

                            pvs.append(ProgramVariable(variable,
                                                       prev_code_loc))

                        if not pvs:
                            pvs.append(
                                ProgramVariable(variable,
                                                current_code_location,
                                                initial=True))
                            # make sure to put it into the killing set
                            self._kill(live_defs, variable,
                                       current_code_location)

                        for pv in pvs:
                            pv_read.append(pv)

                    if a.action == "write":
                        # Kill the existing live def
                        self._kill(live_defs, variable, current_code_location)

                        pvs.append(
                            ProgramVariable(variable, current_code_location))

                    for pv in pvs:
                        # For each of its register dependency and data dependency, we annotate the corresponding edge
                        for reg_offset in a.addr.reg_deps:
                            self._stmt_graph_annotate_edges(
                                regs_to_edges[reg_offset], subtype='mem_addr')
                            reg_variable = SimRegisterVariable(
                                reg_offset,
                                self._get_register_size(reg_offset))
                            prev_defs = self._def_lookup(
                                live_defs, reg_variable)
                            for loc, _ in prev_defs.iteritems():
                                v = ProgramVariable(reg_variable, loc)
                                self._data_graph_add_edge(v,
                                                          pv,
                                                          type='mem_addr')

                        for tmp in a.addr.tmp_deps:
                            self._stmt_graph_annotate_edges(
                                temps_to_edges[tmp], subtype='mem_addr')
                            if tmp in temp_variables:
                                self._data_graph_add_edge(temp_variables[tmp],
                                                          pv,
                                                          type='mem_addr')

                        for reg_offset in a.data.reg_deps:
                            self._stmt_graph_annotate_edges(
                                regs_to_edges[reg_offset], subtype='mem_data')
                            reg_variable = SimRegisterVariable(
                                reg_offset,
                                self._get_register_size(reg_offset))
                            prev_defs = self._def_lookup(
                                live_defs, reg_variable)
                            for loc, _ in prev_defs.iteritems():
                                v = ProgramVariable(reg_variable, loc)
                                self._data_graph_add_edge(v,
                                                          pv,
                                                          type='mem_data')

                        for tmp in a.data.tmp_deps:
                            self._stmt_graph_annotate_edges(
                                temps_to_edges[tmp], subtype='mem_data')
                            if tmp in temp_variables:
                                self._data_graph_add_edge(temp_variables[tmp],
                                                          pv,
                                                          type='mem_data')

            elif a.type == 'reg':
                # TODO: Support symbolic register offsets

                reg_offset = a.offset
                variable = SimRegisterVariable(reg_offset,
                                               a.data.ast.size() / 8)

                if a.action == 'read':
                    # What do we want to do?
                    prevdefs = self._def_lookup(live_defs, variable)

                    # add edges to the statement dependence graph
                    for prev_code_loc, labels in prevdefs.iteritems():
                        self._stmt_graph_add_edge(prev_code_loc,
                                                  current_code_location,
                                                  **labels)
                        # record the edge
                        edge_tuple = (prev_code_loc, current_code_location)
                        regs_to_edges[reg_offset].append(edge_tuple)

                        pv_read.append(ProgramVariable(variable,
                                                       prev_code_loc))

                    if not prevdefs:
                        # the register was never defined before - it must be passed in as an argument
                        pv_read.append(
                            ProgramVariable(variable,
                                            current_code_location,
                                            initial=True))
                        # make sure to put it into the killing set
                        self._kill(live_defs, variable, current_code_location)

                    if reg_offset == self.project.arch.sp_offset:
                        data_generated = ('sp', 0)
                    elif reg_offset == self.project.arch.bp_offset:
                        data_generated = ('bp', 0)

                else:
                    # write
                    self._kill(live_defs, variable, current_code_location)

                    if reg_offset in regs_to_edges:
                        # clear the existing edges definition
                        del regs_to_edges[reg_offset]

                    # add a node on the data dependence graph
                    pv = ProgramVariable(variable, current_code_location)
                    self._data_graph_add_node(pv)

                    if not a.reg_deps and not a.tmp_deps:
                        # moving a constant into the register
                        # try to parse out the constant from statement
                        const_variable = SimConstantVariable()
                        if statements is not None:
                            stmt = statements[a.stmt_idx]
                            if isinstance(stmt.data, pyvex.IRExpr.Const):
                                const_variable = SimConstantVariable(
                                    value=stmt.data.con.value)
                        const_pv = ProgramVariable(const_variable,
                                                   current_code_location)
                        self._data_graph_add_edge(const_pv, pv)

                    for tmp in a.tmp_deps:
                        if tmp in temp_variables:
                            self._data_graph_add_edge(temp_variables[tmp], pv)

            elif a.type == 'tmp':
                # tmp is definitely not symbolic
                tmp = a.tmp
                pv = ProgramVariable(SimTemporaryVariable(tmp),
                                     current_code_location)

                if a.action == 'read':
                    prev_code_loc = temp_defs[tmp]

                    self._stmt_graph_add_edge(prev_code_loc,
                                              current_code_location,
                                              type='tmp',
                                              data=a.tmp)
                    # record the edge
                    edge_tuple = (prev_code_loc, current_code_location)
                    temps_to_edges[a.tmp].append(edge_tuple)

                    if tmp in temp_register_symbols:
                        data_generated = temp_register_symbols[tmp]

                else:
                    # write
                    temp_defs[tmp] = current_code_location
                    temp_variables[tmp] = pv

                    # clear existing edges
                    if tmp in temps_to_edges:
                        del temps_to_edges[tmp]

                    for tmp_dep in a.tmp_deps:
                        if tmp_dep in temp_variables:
                            self._data_graph_add_edge(temp_variables[tmp_dep],
                                                      pv)

                    if data_generated:
                        temp_register_symbols[tmp] = data_generated

                    for data in pv_read:
                        self._data_graph_add_edge(data, pv)

                    if not a.tmp_deps and not pv_read:
                        # read in a constant
                        # try to parse out the constant from statement
                        const_variable = SimConstantVariable()
                        if statements is not None:
                            stmt = statements[a.stmt_idx]
                            if isinstance(stmt, pyvex.IRStmt.Dirty):
                                l.warning(
                                    'Dirty statements are not supported in DDG for now.'
                                )
                            elif isinstance(stmt.data, pyvex.IRExpr.Const):
                                const_variable = SimConstantVariable(
                                    value=stmt.data.con.value)
                        const_pv = ProgramVariable(const_variable,
                                                   current_code_location)
                        self._data_graph_add_edge(const_pv, pv)

            elif a.type == 'exit':
                # exits should only depend on tmps
                for tmp in a.tmp_deps:
                    prev_code_loc = temp_defs[tmp]

                    # add the edge to the graph
                    self._stmt_graph_add_edge(prev_code_loc,
                                              current_code_location,
                                              type='exit',
                                              data='tmp')

                    # log the edge
                    edge_tuple = (prev_code_loc, current_code_location)
                    temps_to_edges[tmp].append(edge_tuple)

            elif a.type == 'operation':
                # FIXME: we should support a more complete range of operations

                if a.op.endswith('Sub32') or a.op.endswith('Sub64'):
                    # subtract
                    expr_0, expr_1 = a.exprs

                    if expr_0.tmp_deps and (not expr_1.tmp_deps
                                            and not expr_1.reg_deps):
                        # tmp - const
                        tmp = list(expr_0.tmp_deps)[0]
                        if tmp in temp_register_symbols:
                            sort, offset = temp_register_symbols[tmp]
                            offset -= expr_1.ast.args[0]
                            data_generated = (sort, offset)

                elif a.op.endswith('Add32') or a.op.endswith('Add64'):
                    # add
                    expr_0, expr_1 = a.exprs

                    if expr_0.tmp_deps and (not expr_1.tmp_deps
                                            and not expr_1.reg_deps):
                        # tmp + const
                        tmp = list(expr_0.tmp_deps)[0]
                        if tmp in temp_register_symbols:
                            sort, offset = temp_register_symbols[tmp]
                            offset += expr_1.ast.args[0]
                            data_generated = (sort, offset)

        #import pprint
        #pprint.pprint(self._data_graph.edges())
        #pprint.pprint(self.simplified_data_graph.edges())
        # import ipdb; ipdb.set_trace()

        return live_defs