Example #1
0
 def __init__(self):
     self.nodes = None
     self.edges = None
     self.jumps = None
     self.labels = None
     self.start_node = None
     self.key_counter = KeyCounter()
Example #2
0
class CFGBuilder(visitors.ImplicitVisitor):
    """
    A visitor that can be used to build the control-flow graph of the given
    program as an instance of a Digraph. Nodes of the resulting control-flow
    graph will have the following data attached to it:
    - 'widening_point': "True" iff the node can be used as a widening point.
    - 'node': the corresponding IR node which this CFG node was built from,
      or None.
    """
    def __init__(self):
        self.nodes = None
        self.edges = None
        self.jumps = None
        self.labels = None
        self.start_node = None
        self.key_counter = KeyCounter()

    def fresh(self, name):
        return "{}{}".format(name, self.key_counter.get_incr(name))

    @staticmethod
    def is_label(node):
        """
        :param tree.Node node: An IR node
        :return: Whether the node is a LabelStmt
        :rtype: bool
        """
        return isinstance(node, LabelStmt)

    def compute_reachable_nodes(self, start, reachables):
        """
        Computes the set of nodes that are reachable from the given "start"
        node using the set of edges registered so far. Reachable nodes are
        added to the given set.

        :param Digraph.Node start: The node from which to compute reachable
            nodes.

        :param set[Digraph.Node] reachables: The set of nodes that are found
            reachable so far.
        """
        def outs(node):
            """
            :return: The directly reachable nodes from the given node
            :rtype: iterable[Digraph.Node]
            """
            return (e.to for e in self.edges if e.frm == node)

        reachables.add(start)
        for node in outs(start):
            if node not in reachables:
                self.compute_reachable_nodes(node, reachables)

    def visit_program(self, prgm):
        self.nodes = []
        self.edges = []
        self.jumps = []
        self.labels = {}

        start = self.build_node("start")
        self.visit_stmts(prgm.stmts, start)

        # Generate jump edges
        for node, label in self.jumps:
            self.edges.extend([
                Digraph.Edge(node, edge.to)
                for edge in self.edges
                if edge.frm == self.labels[label]
            ])

        # Compute reachable nodes
        reachables = set()
        self.compute_reachable_nodes(start, reachables)

        # Remove all nodes and edges that are not reachable
        self.nodes = [n for n in self.nodes if n in reachables]
        self.edges = [e for e in self.edges if e.frm in reachables]

        return Digraph([start] + self.nodes, self.edges)

    def visit_split(self, splitstmt, start):
        ends = [
            self.visit_stmts(branch, start) for branch in splitstmt.branches
        ]

        join = self.build_node("split_join")
        self.register_and_link(ends, join)

        return join

    def visit_loop(self, loopstmt, start):
        loop_start = self.build_node("loop_start", is_widening_point=True)

        end = self.visit_stmts(loopstmt.stmts, loop_start)
        join = self.build_node("loop_join")

        self.register_and_link([start, end], loop_start)
        self.register_and_link([loop_start], join)
        return join

    def visit_label(self, label, start):
        self.labels[label] = start
        return start

    def visit_goto(self, goto, start):
        self.jumps.append((start, goto.label))
        return None

    def visit_assign(self, assign, start):
        n = self.build_node("assign", orig_node=assign)
        self.register_and_link([start], n)
        return n

    def visit_read(self, read, start):
        n = self.build_node("read", orig_node=read)
        self.register_and_link([start], n)
        return n

    def visit_assume(self, assume, start):
        n = self.build_node("assume", orig_node=assume)
        self.register_and_link([start], n)
        return n

    def visit_stmts(self, stmts, cur):
        for stmt in stmts:
            cur = stmt.visit(self, cur)
        return cur

    def build_node(self, name, is_widening_point=False, orig_node=None):
        return Digraph.Node(
            name=self.fresh(name),
            is_widening_point=is_widening_point,
            node=orig_node
        )

    def register_and_link(self, froms, new_node):
        self.nodes.append(new_node)
        for f in froms:
            if f is not None:
                self.edges.append(Digraph.Edge(f, new_node))
Example #3
0
def compute_semantics(prog, prog_model, merge_pred_builder, arg_values=None):
    evaluator = ExprEvaluator(prog_model)
    solver = ExprSolver(prog_model)

    # setup widening configuration
    visit_counter = KeyCounter()
    widening_delay = 5
    narrowing_delay = 3

    def do_widen(counter):
        # will widen when counter == widen_delay, then narrow. If it has not
        # converged after narrow_delay is reached, widening is triggered again
        # but without a follow-up narrowing.
        return (counter == widening_delay
                or counter >= narrowing_delay + widening_delay)

    cfg = prog.visit(CFGBuilder())
    roots = cfg.roots()
    non_roots = [n for n in cfg.nodes if n not in roots]

    # find the variables that appear in the program
    var_set = set(n for n in prog_model.keys() if isinstance(n, Variable))

    # build an index
    indexed_vars = {var.data.index: var for var in var_set}
    last_index = max(indexed_vars.keys()) if len(indexed_vars) > 0 else -1

    # define the variables domain
    vars_domain = domains.Product(*(
        prog_model[indexed_vars[i]].domain
        if i in indexed_vars else _unit_domain
        for i in range(last_index + 1)
    ))

    # define the trace domain
    trace_domain = _SimpleTraceLattice(cfg.nodes)

    # define the State domain that we track at each program point.
    lat = domains.Powerset(
        domains.Product(
            trace_domain,
            vars_domain
        ),
        merge_pred_builder.build(
            trace_domain,
            vars_domain
        ),
        None  # We don't need a top element here.
    )

    # the transfer function
    transfer_func = _VarTracker(var_set, vars_domain, evaluator, solver)

    def transfer(new_states, node, inputs):
        transferred = (
            (
                trace,
                node.data.node.visit(transfer_func, values)
                if node.data.node is not None else values
            )
            for trace, values in inputs
        )

        output = lat.build([
            (
                trace_domain.join(trace, trace_domain.build([node])),
                values
            )
            for trace, values in transferred
            if not vars_domain.is_empty(values)
        ])

        if node.data.is_widening_point:
            if do_widen(visit_counter.get_incr(node)):
                output = lat.update(new_states[node], output, True)

        return output

    def it(states):
        new_states = states.copy()

        for node in non_roots:
            new_states[node] = transfer(new_states, node, reduce(
                lat.join,
                (new_states[anc] for anc in cfg.ancestors(node))
            ))

        return new_states

    # initial state of the variables at the entry of the program
    init_vars = tuple(
        arg_values[indexed_vars[i]]
        if (i in indexed_vars and
            arg_values is not None and
            indexed_vars[i] in arg_values)
        else vars_domain.domains[i].top
        for i in range(last_index + 1)
    )

    # initial state at the the entry of the program
    init_lat = lat.build([(trace_domain.bottom, init_vars)])

    # last state of the program (all program points)
    last = concat_dicts(
        {n: transfer({}, n, init_lat) for n in roots},
        {n: lat.bottom for n in non_roots}
    )

    # current state of the program (all program points)
    result = it(last)

    # find a fix-point.
    while any(not lat.eq(x, result[i]) for i, x in last.iteritems()):
        last, result = result, it(result)

    formatted_results = {
        node: {
            trace: {
                v: values[v.data.index] for v in var_set
            }
            for trace, values in state
        }
        for node, state in result.iteritems()
    }

    return AnalysisResults(
        cfg,
        formatted_results,
        trace_domain,
        vars_domain,
        evaluator,
        prog.data.fun_id
    )
Example #4
0
class CFGBuilder(visitors.ImplicitVisitor):
    """
    A visitor that can be used to build the control-flow graph of the given
    program as an instance of a Digraph. Nodes of the resulting control-flow
    graph will have the following data attached to it:
    - 'widening_point': "True" iff the node can be used as a widening point.
    - 'node': the corresponding IR node which this CFG node was built from,
      or None.
    """
    NOT_VISITED = object()
    BEING_VISITED = object()
    ALREADY_VISITED = object()

    def __init__(self):
        self.nodes = None
        self.edges = None
        self.jumps = None
        self.labels = None
        self.start_node = None
        self.key_counter = KeyCounter()

    def fresh(self, name):
        return "{}{}".format(name, self.key_counter.get_incr(name))

    @staticmethod
    def is_label(node):
        """
        :param tree.Node node: An IR node
        :return: Whether the node is a LabelStmt
        :rtype: bool
        """
        return isinstance(node, LabelStmt)

    def outs(self, node):
        """
        :return: The directly reachable nodes from the given node
        :rtype: iterable[Digraph.Node]
        """
        return (e.to for e in self.edges if e.frm == node)

    def compute_reachable_nodes(self, start, reachables):
        """
        Computes the set of nodes that are reachable from the given "start"
        node using the set of edges registered so far. Reachable nodes are
        added to the given set.

        :param Digraph.Node start: The node from which to compute reachable
            nodes.

        :param set[Digraph.Node] reachables: The set of nodes that are found
            reachable so far.
        """
        reachables.add(start)
        for node in self.outs(start):
            if node not in reachables:
                self.compute_reachable_nodes(node, reachables)

    def post_process_cfg(self, start):
        """
        Routine for post processing the resulting control-flow graph. Basically
        perform a depth-first search from the given `start` node to find out
        the reachables nodes and the widening points.

        Returns a map that indicates for each node if it is reachable or not
        (i.e. maps to CFGBuilder.ALREADY_VISITED or CFGBuilder.NOT_VISITED).

        :type start: Digraph.Node
        :rtype: map[Digraph.Node, object]
        """
        state = defaultdict(lambda: CFGBuilder.NOT_VISITED)

        def inner(start):
            state[start] = CFGBuilder.BEING_VISITED

            for node in self.outs(start):
                node_state = state[node]
                if node_state == CFGBuilder.NOT_VISITED:
                    inner(node)
                elif node_state == CFGBuilder.BEING_VISITED:
                    # Found a back edge, mark `node` as a widening point.
                    node.data = node.data.copy(is_widening_point=True)

            state[start] = CFGBuilder.ALREADY_VISITED

        inner(start)
        return state

    def visit_program(self, prgm):
        self.nodes = []
        self.edges = []
        self.jumps = []
        self.labels = {}

        start = self.build_node("start")
        self.visit_stmts(prgm.stmts, start)

        # Generate jump edges
        for node, label in self.jumps:
            self.edges.extend([
                Digraph.Edge(node, edge.to) for edge in self.edges
                if edge.frm == self.labels[label]
            ])

        # Post process the CFG to find infer widening points and compute
        # the set of reachable nodes.
        visit_results = self.post_process_cfg(start)

        # Remove all nodes and edges that are not reachable
        self.nodes = [
            n for n in self.nodes if visit_results[n] != CFGBuilder.NOT_VISITED
        ]
        self.edges = [
            e for e in self.edges
            if visit_results[e.frm] != CFGBuilder.NOT_VISITED
        ]

        return Digraph([start] + self.nodes, self.edges)

    def visit_split(self, splitstmt, start):
        ends = [
            self.visit_stmts(branch, start) for branch in splitstmt.branches
        ]

        join = self.build_node("split_join")
        self.register_and_link(ends, join)

        return join

    def visit_loop(self, loopstmt, start):
        loop_start = self.build_node("loop_start")

        end = self.visit_stmts(loopstmt.stmts, loop_start)
        join = self.build_node("loop_join")

        self.register_and_link([start, end], loop_start)
        self.register_and_link([loop_start], join)
        return join

    def visit_label(self, label, start):
        self.labels[label] = start
        return start

    def visit_goto(self, goto, start):
        self.jumps.append((start, goto.label))
        return None

    def visit_assign(self, assign, start):
        n = self.build_node("assign", orig_node=assign)
        self.register_and_link([start], n)
        return n

    def visit_read(self, read, start):
        n = self.build_node("read", orig_node=read)
        self.register_and_link([start], n)
        return n

    def visit_assume(self, assume, start):
        n = self.build_node("assume", orig_node=assume)
        self.register_and_link([start], n)
        return n

    def visit_stmts(self, stmts, cur):
        for stmt in stmts:
            cur = stmt.visit(self, cur)
        return cur

    def build_node(self, name, orig_node=None):
        return Digraph.Node(name=self.fresh(name),
                            is_widening_point=False,
                            node=orig_node)

    def register_and_link(self, froms, new_node):
        self.nodes.append(new_node)
        for f in froms:
            if f is not None:
                self.edges.append(Digraph.Edge(f, new_node))
Example #5
0
def compute_semantics(prog, prog_model, merge_pred_builder, arg_values=None):
    evaluator = ExprEvaluator(prog_model)
    solver = ExprSolver(prog_model)

    # setup widening configuration
    visit_counter = KeyCounter()
    widening_delay = 5
    narrowing_delay = 3

    def do_widen(counter):
        # will widen when counter == widen_delay, then narrow. If it has not
        # converged after narrow_delay is reached, widening is triggered again
        # but without a follow-up narrowing.
        return (counter == widening_delay
                or counter >= narrowing_delay + widening_delay)

    cfg = prog.visit(CFGBuilder())
    roots = cfg.roots()
    non_roots = [n for n in cfg.nodes if n not in roots]

    # find the variables that appear in the program
    var_set = set(n for n in prog_model.keys() if isinstance(n, Variable))

    # build an index
    indexed_vars = {var.data.index: var for var in var_set}
    last_index = max(indexed_vars.keys()) if len(indexed_vars) > 0 else -1

    # define the variables domain
    vars_domain = domains.Product(*(prog_model[indexed_vars[i]].domain if i in
                                    indexed_vars else _unit_domain
                                    for i in range(last_index + 1)))

    # define the trace domain
    trace_domain = _SimpleTraceLattice(cfg.nodes)

    # define the State domain that we track at each program point.
    lat = domains.Powerset(
        domains.Product(trace_domain, vars_domain),
        merge_pred_builder.build(trace_domain, vars_domain),
        None  # We don't need a top element here.
    )

    # the transfer function
    transfer_func = _VarTracker(var_set, vars_domain, evaluator, solver)

    def transfer(states, node, inputs):
        """
        Applies the transfer function to the given nodes using the abstract
        domain element that represents an over-approximation of the states at
        the program points that precede it.

        Returns a new element of the abstract domain, representing the state
        at this node after the application of the transfer function.

        :param dict[Digraph.Node, object] states: The state at each program
            point.
        :param Digraph.Node node: The node on which to apply the transfer
            function.
        :param object inputs: The element of the abstract domain that
            represents the state of all the predecessors combined.
        :rtype: object
        """

        transferred = ((trace, node.data.node.visit(transfer_func, values)
                        if node.data.node is not None else values)
                       for trace, values in inputs)

        output = lat.build([
            (trace_domain.join(trace, trace_domain.build([node])), values)
            for trace, values in transferred
            if not vars_domain.is_empty(values)
        ])

        if node.data.is_widening_point:
            if do_widen(visit_counter.get_incr(node)):
                output = lat.update(states[node], output, True)

        return output

    def iterate_once(states, ordering):
        """
        Perform one iteration over the system of data-flow equations associated
        with the given subset of nodes.

        This procedure updates "states" in-place.

        :param dict[Digraph.Node, object] states: The state at each program
            point.
        :param Digraph.HierarchicalOrdering ordering: The ordering, describing:
            - The order in which to apply the transfer functions to each node.
            - The subset of program points to consider in this iteration.
        """
        for elem, is_node in ordering:
            if is_node:
                states[elem] = transfer(
                    states, elem,
                    reduce(lat.join,
                           (states[anc] for anc in cfg.ancestors(elem))))
            else:
                fix(states, elem)

    def is_eq(last, current):
        """
        Given two dictionaries describing the state at each program point,
        returns True iff the state at each program point in `last` is equal
        to the state at *those* program points in `current`.

        :param dict[Digraph.Node, object] last: The last state (possibly
            containing less nodes than current).
        :param dict[Digraph.Node, object] current: The current state.
        :rtype: bool
        """
        for n, x in last.iteritems():
            if not lat.eq(x, current[n]):
                return False
        return True

    def sub_states(states, ordering):
        """
        Creates a copy of the given "states" dictionary which contains entries
        only for the nodes that are given by the first level of the ordering
        (i.e. from the current component).

        :type states: dict[Digraph.Node, object]
        :type ordering: Digraph.HierarchicalOrdering
        :rtype: dict[Digraph.Node, object]
        """
        return {elem: states[elem] for elem, is_node in ordering if is_node}

    def fix(states, ordering):
        """
        Solve the data-flow equations on the given subset of nodes of the CFG.
        Finds a fix-point by successive iterations.

        This procedure updates "states" in-place.

        :param dict[Digraph.Node, object] states: The state at each program
            point.
        :param Digraph.HierarchicalOrdering ordering: The ordering, describing:
            - The order in which to apply the transfer functions to each node.
            - The subset of program points for which to find a fix-point.
        """
        last = sub_states(states, ordering)
        iterate_once(states, ordering)

        # loop until the current state is equivalent to the last one.
        while not is_eq(last, states):
            last = sub_states(states, ordering)
            iterate_once(states, ordering)

    # initial state of the variables at the entry of the program
    init_vars = tuple(arg_values[indexed_vars[i]] if (
        i in indexed_vars and arg_values is not None
        and indexed_vars[i] in arg_values) else vars_domain.domains[i].top
                      for i in range(last_index + 1))

    # initial state to use at the entry of the program
    init_lat = lat.build([(trace_domain.bottom, init_vars)])

    # initial state at each program point the program
    states = concat_dicts({n: transfer({}, n, init_lat)
                           for n in roots}, {n: lat.bottom
                                             for n in non_roots})

    # Find a fix-point.
    fix(states, cfg.subgraph(non_roots).flat_topological_ordering())

    formatted_results = {
        node: {
            trace: {v: values[v.data.index]
                    for v in var_set}
            for trace, values in state
        }
        for node, state in states.iteritems()
    }

    return AnalysisResults(cfg, formatted_results, trace_domain, vars_domain,
                           evaluator, prog.data.fun_id)