def __init__(self): self.nodes = None self.edges = None self.jumps = None self.labels = None self.start_node = None self.key_counter = KeyCounter()
class CFGBuilder(visitors.ImplicitVisitor): """ A visitor that can be used to build the control-flow graph of the given program as an instance of a Digraph. Nodes of the resulting control-flow graph will have the following data attached to it: - 'widening_point': "True" iff the node can be used as a widening point. - 'node': the corresponding IR node which this CFG node was built from, or None. """ def __init__(self): self.nodes = None self.edges = None self.jumps = None self.labels = None self.start_node = None self.key_counter = KeyCounter() def fresh(self, name): return "{}{}".format(name, self.key_counter.get_incr(name)) @staticmethod def is_label(node): """ :param tree.Node node: An IR node :return: Whether the node is a LabelStmt :rtype: bool """ return isinstance(node, LabelStmt) def compute_reachable_nodes(self, start, reachables): """ Computes the set of nodes that are reachable from the given "start" node using the set of edges registered so far. Reachable nodes are added to the given set. :param Digraph.Node start: The node from which to compute reachable nodes. :param set[Digraph.Node] reachables: The set of nodes that are found reachable so far. """ def outs(node): """ :return: The directly reachable nodes from the given node :rtype: iterable[Digraph.Node] """ return (e.to for e in self.edges if e.frm == node) reachables.add(start) for node in outs(start): if node not in reachables: self.compute_reachable_nodes(node, reachables) def visit_program(self, prgm): self.nodes = [] self.edges = [] self.jumps = [] self.labels = {} start = self.build_node("start") self.visit_stmts(prgm.stmts, start) # Generate jump edges for node, label in self.jumps: self.edges.extend([ Digraph.Edge(node, edge.to) for edge in self.edges if edge.frm == self.labels[label] ]) # Compute reachable nodes reachables = set() self.compute_reachable_nodes(start, reachables) # Remove all nodes and edges that are not reachable self.nodes = [n for n in self.nodes if n in reachables] self.edges = [e for e in self.edges if e.frm in reachables] return Digraph([start] + self.nodes, self.edges) def visit_split(self, splitstmt, start): ends = [ self.visit_stmts(branch, start) for branch in splitstmt.branches ] join = self.build_node("split_join") self.register_and_link(ends, join) return join def visit_loop(self, loopstmt, start): loop_start = self.build_node("loop_start", is_widening_point=True) end = self.visit_stmts(loopstmt.stmts, loop_start) join = self.build_node("loop_join") self.register_and_link([start, end], loop_start) self.register_and_link([loop_start], join) return join def visit_label(self, label, start): self.labels[label] = start return start def visit_goto(self, goto, start): self.jumps.append((start, goto.label)) return None def visit_assign(self, assign, start): n = self.build_node("assign", orig_node=assign) self.register_and_link([start], n) return n def visit_read(self, read, start): n = self.build_node("read", orig_node=read) self.register_and_link([start], n) return n def visit_assume(self, assume, start): n = self.build_node("assume", orig_node=assume) self.register_and_link([start], n) return n def visit_stmts(self, stmts, cur): for stmt in stmts: cur = stmt.visit(self, cur) return cur def build_node(self, name, is_widening_point=False, orig_node=None): return Digraph.Node( name=self.fresh(name), is_widening_point=is_widening_point, node=orig_node ) def register_and_link(self, froms, new_node): self.nodes.append(new_node) for f in froms: if f is not None: self.edges.append(Digraph.Edge(f, new_node))
def compute_semantics(prog, prog_model, merge_pred_builder, arg_values=None): evaluator = ExprEvaluator(prog_model) solver = ExprSolver(prog_model) # setup widening configuration visit_counter = KeyCounter() widening_delay = 5 narrowing_delay = 3 def do_widen(counter): # will widen when counter == widen_delay, then narrow. If it has not # converged after narrow_delay is reached, widening is triggered again # but without a follow-up narrowing. return (counter == widening_delay or counter >= narrowing_delay + widening_delay) cfg = prog.visit(CFGBuilder()) roots = cfg.roots() non_roots = [n for n in cfg.nodes if n not in roots] # find the variables that appear in the program var_set = set(n for n in prog_model.keys() if isinstance(n, Variable)) # build an index indexed_vars = {var.data.index: var for var in var_set} last_index = max(indexed_vars.keys()) if len(indexed_vars) > 0 else -1 # define the variables domain vars_domain = domains.Product(*( prog_model[indexed_vars[i]].domain if i in indexed_vars else _unit_domain for i in range(last_index + 1) )) # define the trace domain trace_domain = _SimpleTraceLattice(cfg.nodes) # define the State domain that we track at each program point. lat = domains.Powerset( domains.Product( trace_domain, vars_domain ), merge_pred_builder.build( trace_domain, vars_domain ), None # We don't need a top element here. ) # the transfer function transfer_func = _VarTracker(var_set, vars_domain, evaluator, solver) def transfer(new_states, node, inputs): transferred = ( ( trace, node.data.node.visit(transfer_func, values) if node.data.node is not None else values ) for trace, values in inputs ) output = lat.build([ ( trace_domain.join(trace, trace_domain.build([node])), values ) for trace, values in transferred if not vars_domain.is_empty(values) ]) if node.data.is_widening_point: if do_widen(visit_counter.get_incr(node)): output = lat.update(new_states[node], output, True) return output def it(states): new_states = states.copy() for node in non_roots: new_states[node] = transfer(new_states, node, reduce( lat.join, (new_states[anc] for anc in cfg.ancestors(node)) )) return new_states # initial state of the variables at the entry of the program init_vars = tuple( arg_values[indexed_vars[i]] if (i in indexed_vars and arg_values is not None and indexed_vars[i] in arg_values) else vars_domain.domains[i].top for i in range(last_index + 1) ) # initial state at the the entry of the program init_lat = lat.build([(trace_domain.bottom, init_vars)]) # last state of the program (all program points) last = concat_dicts( {n: transfer({}, n, init_lat) for n in roots}, {n: lat.bottom for n in non_roots} ) # current state of the program (all program points) result = it(last) # find a fix-point. while any(not lat.eq(x, result[i]) for i, x in last.iteritems()): last, result = result, it(result) formatted_results = { node: { trace: { v: values[v.data.index] for v in var_set } for trace, values in state } for node, state in result.iteritems() } return AnalysisResults( cfg, formatted_results, trace_domain, vars_domain, evaluator, prog.data.fun_id )
class CFGBuilder(visitors.ImplicitVisitor): """ A visitor that can be used to build the control-flow graph of the given program as an instance of a Digraph. Nodes of the resulting control-flow graph will have the following data attached to it: - 'widening_point': "True" iff the node can be used as a widening point. - 'node': the corresponding IR node which this CFG node was built from, or None. """ NOT_VISITED = object() BEING_VISITED = object() ALREADY_VISITED = object() def __init__(self): self.nodes = None self.edges = None self.jumps = None self.labels = None self.start_node = None self.key_counter = KeyCounter() def fresh(self, name): return "{}{}".format(name, self.key_counter.get_incr(name)) @staticmethod def is_label(node): """ :param tree.Node node: An IR node :return: Whether the node is a LabelStmt :rtype: bool """ return isinstance(node, LabelStmt) def outs(self, node): """ :return: The directly reachable nodes from the given node :rtype: iterable[Digraph.Node] """ return (e.to for e in self.edges if e.frm == node) def compute_reachable_nodes(self, start, reachables): """ Computes the set of nodes that are reachable from the given "start" node using the set of edges registered so far. Reachable nodes are added to the given set. :param Digraph.Node start: The node from which to compute reachable nodes. :param set[Digraph.Node] reachables: The set of nodes that are found reachable so far. """ reachables.add(start) for node in self.outs(start): if node not in reachables: self.compute_reachable_nodes(node, reachables) def post_process_cfg(self, start): """ Routine for post processing the resulting control-flow graph. Basically perform a depth-first search from the given `start` node to find out the reachables nodes and the widening points. Returns a map that indicates for each node if it is reachable or not (i.e. maps to CFGBuilder.ALREADY_VISITED or CFGBuilder.NOT_VISITED). :type start: Digraph.Node :rtype: map[Digraph.Node, object] """ state = defaultdict(lambda: CFGBuilder.NOT_VISITED) def inner(start): state[start] = CFGBuilder.BEING_VISITED for node in self.outs(start): node_state = state[node] if node_state == CFGBuilder.NOT_VISITED: inner(node) elif node_state == CFGBuilder.BEING_VISITED: # Found a back edge, mark `node` as a widening point. node.data = node.data.copy(is_widening_point=True) state[start] = CFGBuilder.ALREADY_VISITED inner(start) return state def visit_program(self, prgm): self.nodes = [] self.edges = [] self.jumps = [] self.labels = {} start = self.build_node("start") self.visit_stmts(prgm.stmts, start) # Generate jump edges for node, label in self.jumps: self.edges.extend([ Digraph.Edge(node, edge.to) for edge in self.edges if edge.frm == self.labels[label] ]) # Post process the CFG to find infer widening points and compute # the set of reachable nodes. visit_results = self.post_process_cfg(start) # Remove all nodes and edges that are not reachable self.nodes = [ n for n in self.nodes if visit_results[n] != CFGBuilder.NOT_VISITED ] self.edges = [ e for e in self.edges if visit_results[e.frm] != CFGBuilder.NOT_VISITED ] return Digraph([start] + self.nodes, self.edges) def visit_split(self, splitstmt, start): ends = [ self.visit_stmts(branch, start) for branch in splitstmt.branches ] join = self.build_node("split_join") self.register_and_link(ends, join) return join def visit_loop(self, loopstmt, start): loop_start = self.build_node("loop_start") end = self.visit_stmts(loopstmt.stmts, loop_start) join = self.build_node("loop_join") self.register_and_link([start, end], loop_start) self.register_and_link([loop_start], join) return join def visit_label(self, label, start): self.labels[label] = start return start def visit_goto(self, goto, start): self.jumps.append((start, goto.label)) return None def visit_assign(self, assign, start): n = self.build_node("assign", orig_node=assign) self.register_and_link([start], n) return n def visit_read(self, read, start): n = self.build_node("read", orig_node=read) self.register_and_link([start], n) return n def visit_assume(self, assume, start): n = self.build_node("assume", orig_node=assume) self.register_and_link([start], n) return n def visit_stmts(self, stmts, cur): for stmt in stmts: cur = stmt.visit(self, cur) return cur def build_node(self, name, orig_node=None): return Digraph.Node(name=self.fresh(name), is_widening_point=False, node=orig_node) def register_and_link(self, froms, new_node): self.nodes.append(new_node) for f in froms: if f is not None: self.edges.append(Digraph.Edge(f, new_node))
def compute_semantics(prog, prog_model, merge_pred_builder, arg_values=None): evaluator = ExprEvaluator(prog_model) solver = ExprSolver(prog_model) # setup widening configuration visit_counter = KeyCounter() widening_delay = 5 narrowing_delay = 3 def do_widen(counter): # will widen when counter == widen_delay, then narrow. If it has not # converged after narrow_delay is reached, widening is triggered again # but without a follow-up narrowing. return (counter == widening_delay or counter >= narrowing_delay + widening_delay) cfg = prog.visit(CFGBuilder()) roots = cfg.roots() non_roots = [n for n in cfg.nodes if n not in roots] # find the variables that appear in the program var_set = set(n for n in prog_model.keys() if isinstance(n, Variable)) # build an index indexed_vars = {var.data.index: var for var in var_set} last_index = max(indexed_vars.keys()) if len(indexed_vars) > 0 else -1 # define the variables domain vars_domain = domains.Product(*(prog_model[indexed_vars[i]].domain if i in indexed_vars else _unit_domain for i in range(last_index + 1))) # define the trace domain trace_domain = _SimpleTraceLattice(cfg.nodes) # define the State domain that we track at each program point. lat = domains.Powerset( domains.Product(trace_domain, vars_domain), merge_pred_builder.build(trace_domain, vars_domain), None # We don't need a top element here. ) # the transfer function transfer_func = _VarTracker(var_set, vars_domain, evaluator, solver) def transfer(states, node, inputs): """ Applies the transfer function to the given nodes using the abstract domain element that represents an over-approximation of the states at the program points that precede it. Returns a new element of the abstract domain, representing the state at this node after the application of the transfer function. :param dict[Digraph.Node, object] states: The state at each program point. :param Digraph.Node node: The node on which to apply the transfer function. :param object inputs: The element of the abstract domain that represents the state of all the predecessors combined. :rtype: object """ transferred = ((trace, node.data.node.visit(transfer_func, values) if node.data.node is not None else values) for trace, values in inputs) output = lat.build([ (trace_domain.join(trace, trace_domain.build([node])), values) for trace, values in transferred if not vars_domain.is_empty(values) ]) if node.data.is_widening_point: if do_widen(visit_counter.get_incr(node)): output = lat.update(states[node], output, True) return output def iterate_once(states, ordering): """ Perform one iteration over the system of data-flow equations associated with the given subset of nodes. This procedure updates "states" in-place. :param dict[Digraph.Node, object] states: The state at each program point. :param Digraph.HierarchicalOrdering ordering: The ordering, describing: - The order in which to apply the transfer functions to each node. - The subset of program points to consider in this iteration. """ for elem, is_node in ordering: if is_node: states[elem] = transfer( states, elem, reduce(lat.join, (states[anc] for anc in cfg.ancestors(elem)))) else: fix(states, elem) def is_eq(last, current): """ Given two dictionaries describing the state at each program point, returns True iff the state at each program point in `last` is equal to the state at *those* program points in `current`. :param dict[Digraph.Node, object] last: The last state (possibly containing less nodes than current). :param dict[Digraph.Node, object] current: The current state. :rtype: bool """ for n, x in last.iteritems(): if not lat.eq(x, current[n]): return False return True def sub_states(states, ordering): """ Creates a copy of the given "states" dictionary which contains entries only for the nodes that are given by the first level of the ordering (i.e. from the current component). :type states: dict[Digraph.Node, object] :type ordering: Digraph.HierarchicalOrdering :rtype: dict[Digraph.Node, object] """ return {elem: states[elem] for elem, is_node in ordering if is_node} def fix(states, ordering): """ Solve the data-flow equations on the given subset of nodes of the CFG. Finds a fix-point by successive iterations. This procedure updates "states" in-place. :param dict[Digraph.Node, object] states: The state at each program point. :param Digraph.HierarchicalOrdering ordering: The ordering, describing: - The order in which to apply the transfer functions to each node. - The subset of program points for which to find a fix-point. """ last = sub_states(states, ordering) iterate_once(states, ordering) # loop until the current state is equivalent to the last one. while not is_eq(last, states): last = sub_states(states, ordering) iterate_once(states, ordering) # initial state of the variables at the entry of the program init_vars = tuple(arg_values[indexed_vars[i]] if ( i in indexed_vars and arg_values is not None and indexed_vars[i] in arg_values) else vars_domain.domains[i].top for i in range(last_index + 1)) # initial state to use at the entry of the program init_lat = lat.build([(trace_domain.bottom, init_vars)]) # initial state at each program point the program states = concat_dicts({n: transfer({}, n, init_lat) for n in roots}, {n: lat.bottom for n in non_roots}) # Find a fix-point. fix(states, cfg.subgraph(non_roots).flat_topological_ordering()) formatted_results = { node: { trace: {v: values[v.data.index] for v in var_set} for trace, values in state } for node, state in states.iteritems() } return AnalysisResults(cfg, formatted_results, trace_domain, vars_domain, evaluator, prog.data.fun_id)