class ArgumentEffects(ModuleAnalysis): """Gathers inter-procedural effects on function arguments.""" def __init__(self): # There's an edge between src and dest if a parameter of dest is # modified by src self.result = DiGraph() self.node_to_functioneffect = {} super(ArgumentEffects, self).__init__(Aliases, GlobalDeclarations, Intrinsics) def prepare(self, node): """ Initialise arguments effects as this analyse is inter-procedural. Initialisation done for Pythonic functions and default value set for user defined functions. """ super(ArgumentEffects, self).prepare(node) for i in self.intrinsics: fe = IntrinsicArgumentEffects[i] self.node_to_functioneffect[i] = fe self.result.add_node(fe) for n in self.global_declarations.values(): fe = FunctionEffects(n) self.node_to_functioneffect[n] = fe self.result.add_node(fe) def run(self, node): result = super(ArgumentEffects, self).run(node) candidates = set(result) while candidates: function = candidates.pop() for ue in enumerate(function.update_effects): update_effect_idx, update_effect = ue if not update_effect: continue for pred in result.successors(function): edge = result.edges[function, pred] for fp in enumerate(edge["formal_parameters"]): i, formal_parameter_idx = fp # propagate the impurity backward if needed. # Afterward we may need another graph iteration ith_effectiv = edge["effective_parameters"][i] if (formal_parameter_idx == update_effect_idx and not pred.update_effects[ith_effectiv]): pred.update_effects[ith_effectiv] = True candidates.add(pred) self.result = {f.func: f.update_effects for f in result} return self.result def argument_index(self, node): while isinstance(node, ast.Subscript): node = node.value for node_alias in self.aliases[node]: while isinstance(node_alias, ast.Subscript): node_alias = node_alias.value if node_alias in self.current_arguments: return self.current_arguments[node_alias] if node_alias in self.current_subscripted_arguments: return self.current_subscripted_arguments[node_alias] return -1 def visit_FunctionDef(self, node): self.current_function = self.node_to_functioneffect[node] self.current_arguments = { arg: i for i, arg in enumerate(node.args.args) } self.current_subscripted_arguments = dict() assert self.current_function in self.result self.generic_visit(node) def visit_For(self, node): ai = self.argument_index(node.iter) if ai >= 0: self.current_subscripted_arguments[node.target] = ai self.generic_visit(node) def visit_AugAssign(self, node): n = self.argument_index(node.target) if n >= 0: self.current_function.update_effects[n] = True self.generic_visit(node) def visit_Assign(self, node): for t in node.targets: if isinstance(t, ast.Subscript): n = self.argument_index(t) if n >= 0: self.current_function.update_effects[n] = True self.generic_visit(node) def visit_Call(self, node): for i, arg in enumerate(node.args): n = self.argument_index(arg) if n >= 0: func_aliases = self.aliases[node.func] # pessimistic case: no alias found if func_aliases is None: self.current_function.update_effects[n] = True continue # expand argument if any func_aliases = reduce( lambda x, y: x + ( # all functions list(self.node_to_functioneffect.keys()) if (isinstance(y, ast.Name) and self.argument_index(y) >= 0) else [y]), func_aliases, list()) for func_alias in func_aliases: # special hook for binded functions if isinstance(func_alias, ast.Call): bound_name = func_alias.args[0].id func_alias = self.global_declarations[bound_name] if func_alias is intrinsic.UnboundValue: continue if func_alias not in self.node_to_functioneffect: continue if func_alias is MODULES['functools']['partial']: base_func_aliases = self.aliases[node.args[0]] fe = self.node_to_functioneffect[func_alias] if len(base_func_aliases) == 1: base_func_alias = next(iter(base_func_aliases)) fe = self.node_to_functioneffect.get( base_func_alias, fe) else: fe = self.node_to_functioneffect[func_alias] if not self.result.has_edge(fe, self.current_function): self.result.add_edge(fe, self.current_function, effective_parameters=[], formal_parameters=[]) edge = self.result.edges[fe, self.current_function] edge["effective_parameters"].append(n) edge["formal_parameters"].append(i) self.generic_visit(node)
class GlobalEffects(ModuleAnalysis): """Add a flag on each function that updates a global variable.""" def __init__(self): self.result = DiGraph() self.node_to_functioneffect = dict() super(GlobalEffects, self).__init__(Aliases, GlobalDeclarations, Intrinsics) def prepare(self, node): """ Initialise globals effects as this analyse is inter-procedural. Initialisation done for Pythonic functions and default value set for user defined functions. """ super(GlobalEffects, self).prepare(node) for i in self.intrinsics: fe = IntrinsicGlobalEffects[i] self.node_to_functioneffect[i] = fe self.result.add_node(fe) for n in self.global_declarations.values(): fe = FunctionEffect(n) self.node_to_functioneffect[n] = fe self.result.add_node(fe) self.node_to_functioneffect[intrinsic.UnboundValue] = \ FunctionEffect(intrinsic.UnboundValue) def run(self, node): result = super(GlobalEffects, self).run(node) keep_going = True while keep_going: keep_going = False for function in result: if function.global_effect: for pred in self.result.predecessors(function): if not pred.global_effect: keep_going = pred.global_effect = True self.result = {f.func for f in result if f.global_effect} return self.result def visit_FunctionDef(self, node): self.current_function = self.node_to_functioneffect[node] assert self.current_function in self.result self.generic_visit(node) def visit_Print(self, _): self.current_function.global_effect = True def visit_Call(self, node): # try to get all aliases of the function, if possible # else use [] as a fallback func_aliases = self.aliases[node.func] # expand argument if any func_aliases = reduce( # all funcs lambda x, y: x + (list(self.node_to_functioneffect.keys()) if isinstance(y, ast.Name) else [y]), func_aliases, list()) for func_alias in func_aliases: # special hook for bound functions if isinstance(func_alias, ast.Call): fake_call = ast.Call(func_alias.args[0], func_alias.args[1:], []) self.visit(fake_call) continue # conservative choice if func_alias not in self.node_to_functioneffect: func_alias = intrinsic.UnboundValue func_alias = self.node_to_functioneffect[func_alias] self.result.add_edge(self.current_function, func_alias) self.generic_visit(node)
class GlobalEffects(ModuleAnalysis): """Add a flag on each function that updates a global variable.""" class FunctionEffect(object): def __init__(self, node): self.func = node if isinstance(node, ast.FunctionDef): self.global_effect = False elif isinstance(node, intrinsic.Intrinsic): self.global_effect = node.global_effects elif isinstance(node, ast.alias): self.global_effect = False elif isinstance(node, str): self.global_effect = False elif isinstance(node, intrinsic.Class): self.global_effect = False elif isinstance(node, intrinsic.UnboundValueType): self.global_effect = True # conservative choice else: print(type(node), node) raise NotImplementedError def __init__(self): self.result = DiGraph() self.node_to_functioneffect = dict() super(GlobalEffects, self).__init__(Aliases, GlobalDeclarations) def prepare(self, node): """ Initialise globals effects as this analyse is inter-procedural. Initialisation done for Pythonic functions and default value set for user defined functions. """ super(GlobalEffects, self).prepare(node) def register_node(module): """ Recursively save globals effect for all functions. """ for v in module.values(): if isinstance(v, dict): # Submodule case register_node(v) else: fe = GlobalEffects.FunctionEffect(v) self.node_to_functioneffect[v] = fe self.result.add_node(fe) if isinstance(v, intrinsic.Class): register_node(v.fields) register_node(self.global_declarations) for module in MODULES.values(): register_node(module) self.node_to_functioneffect[intrinsic.UnboundValue] = \ GlobalEffects.FunctionEffect(intrinsic.UnboundValue) def run(self, node): result = super(GlobalEffects, self).run(node) keep_going = True while keep_going: keep_going = False for function in result: if function.global_effect: for pred in self.result.predecessors(function): if not pred.global_effect: keep_going = pred.global_effect = True self.result = {f.func for f in result if f.global_effect} return self.result def visit_FunctionDef(self, node): self.current_function = self.node_to_functioneffect[node] assert self.current_function in self.result self.generic_visit(node) def visit_Print(self, _): self.current_function.global_effect = True def visit_Call(self, node): # try to get all aliases of the function, if possible # else use [] as a fallback func_aliases = self.aliases[node.func] # expand argument if any func_aliases = reduce( # all funcs lambda x, y: x + (list(self.node_to_functioneffect.keys()) if isinstance(y, ast.Name) else [y]), func_aliases, list()) for func_alias in func_aliases: # special hook for bound functions if isinstance(func_alias, ast.Call): fake_call = ast.Call(func_alias.args[0], func_alias.args[1:], []) self.visit(fake_call) continue # conservative choice if func_alias not in self.node_to_functioneffect: func_alias = intrinsic.UnboundValue func_alias = self.node_to_functioneffect[func_alias] self.result.add_edge(self.current_function, func_alias) self.generic_visit(node)
class TypeDependencies(ModuleAnalysis): """ Gathers the callees of each function required for type inference. This analyse produces a directed graph with functions as nodes and edges between nodes when a function might call another. Check usual behavior. >>> import gast as ast >>> from pythran import passmanager >>> pm = passmanager.PassManager("test") >>> node = ast.parse(''' ... def foo(n): ... return 1 if copy(n) else copy(n) ... def copy(n): ... return n == 2''') >>> res = pm.gather(TypeDependencies, node) >>> len(res.edges) 3 foo result depend on : NoDeps and copy copy result depend on : NoDeps Check that content assignment is a dependency. >>> node = ast.parse(''' ... def foo(n): ... n[1] = copy(n) ... return 1 if copy(n) else n ... def copy(n): ... return n == 2''') >>> res = pm.gather(TypeDependencies, node) >>> len(res.edges) 3 foo result depend on : NoDeps and copy copy result depend on : NoDeps Check augassign add a dependencies but don't remove the old one. >>> node = ast.parse(''' ... def bar(n): ... return n ... def foo(n): ... n[1] = copy(n) ... n[1] += bar(1) ... return 1 if copy(n) else n ... def copy(n): ... return n == 2''') >>> res = pm.gather(TypeDependencies, node) >>> len(res.edges) 5 bar result depend on : NoDeps foo result depend on : NoDeps, bar and copy copy depend on : NoDeps Check a if statement handle both branches >>> node = ast.parse(''' ... def bar(n): ... return n ... def foo(n): ... if n: ... n = bar() ... else: ... n = 4 ... return 1 or n''') >>> res = pm.gather(TypeDependencies, node) >>> len(res.edges) 3 Check we do not add everything from a conditional statement. >>> node = ast.parse(''' ... def bar(n): ... return n ... def foo(n): ... if n: ... n = bar() ... n = 3 ... else: ... n = 4 ... return 1 or n''') >>> res = pm.gather(TypeDependencies, node) >>> len(res.edges) 2 bar result depend on : NoDeps foo result depend on : NoDeps only Check dependency on for target variable >>> node = ast.parse(''' ... def bar(n): ... return builtins.range(n) ... def foo(n): ... for i in bar(n): ... i = 2 ... return i''') >>> res = pm.gather(TypeDependencies, node) >>> len(res.edges) 2 bar result depend on : NoDeps foo result depend on : NoDeps Check dependency on for target variable with no deps if we don't start >>> node = ast.parse(''' ... def bar(n): ... return builtins.range(n) ... def foo(n): ... i = 4 ... for i in bar(n): ... pass ... return i''') >>> res = pm.gather(TypeDependencies, node) >>> len(res.edges) 3 bar result depend on : NoDeps foo result depend on : NoDeps and bar Check dependency on for target variable with deps >>> node = ast.parse(''' ... def bar(n): ... return builtins.range(n) ... def foo(n): ... for i in bar(n): ... pass ... return i''') >>> res = pm.gather(TypeDependencies, node) >>> len(res.edges) 2 bar result depend on : NoDeps foo result depend on : NoDeps and bar Check conditional without else branch. >>> node = ast.parse(''' ... def foo(n): ... res = 3 ... if n: ... res = foo(n - 1) ... return res''') >>> res = pm.gather(TypeDependencies, node) >>> len(res.edges) 2 foo result depend on : NoDeps and foo FIXME : We should use CFG to perform better function dependencies. Check conditional without break >> node = ast.parse(''' .. def bar2(n): .. return builtins.range(n) .. def bar(n): .. return builtins.range(n) .. def foo(n): .. for i in bar(n): .. if i: .. j = bar(n) .. break .. j = bar2(n) .. return j''') >> res = pm.gather(TypeDependencies, node) >> len(res.edges) 4 bar result depend on : NoDeps bar2 result depend on : NoDeps foo result depend on : bar ad bar2 """ NoDeps = "None" def __init__(self): """ Create empty result graph and gather global declarations. """ self.result = DiGraph() self.current_function = None self.naming = dict() # variable to dependencies for current function. # variable to dependencies for current conditional statement self.in_cond = dict() ModuleAnalysis.__init__(self, GlobalDeclarations) def prepare(self, node): """ Add nodes for each global declarations in the result graph. No edges are added as there are no type builtin type dependencies. """ super(TypeDependencies, self).prepare(node) for v in self.global_declarations.values(): self.result.add_node(v) self.result.add_node(TypeDependencies.NoDeps) def visit_any_conditionnal(self, node1, node2): """ Set and restore the in_cond variable before visiting subnode. Compute correct dependencies on a value as both branch are possible path. """ true_naming = false_naming = None try: tmp = self.naming.copy() for expr in node1: self.visit(expr) true_naming = self.naming self.naming = tmp except KeyError: pass try: tmp = self.naming.copy() for expr in node2: self.visit(expr) false_naming = self.naming self.naming = tmp except KeyError: pass if true_naming and not false_naming: self.naming = true_naming elif false_naming and not true_naming: self.naming = false_naming elif true_naming and false_naming: self.naming = false_naming for k, v in true_naming.items(): if k not in self.naming: self.naming[k] = v else: for dep in v: if dep not in self.naming[k]: self.naming[k].append(dep) def visit_FunctionDef(self, node): """ Initialize variable for the current function to add edges from calls. We compute variable to call dependencies and add edges when returns are reach. """ # Ensure there are no nested functions. assert self.current_function is None self.current_function = node self.naming = dict() self.in_cond = False # True when we are in a if, while or for self.generic_visit(node) self.current_function = None def visit_Return(self, node): """ Add edge from all possible callee to current function. Gather all the function call that led to the creation of the returned expression and add an edge to each of this function. When visiting an expression, one returns a list of frozensets. Each element of the list is linked to a possible path, each element of a frozenset is linked to a dependency. """ if not node.value: # Yielding function can't return values return for dep_set in self.visit(node.value): if dep_set: for dep in dep_set: self.result.add_edge(dep, self.current_function) else: self.result.add_edge(TypeDependencies.NoDeps, self.current_function) visit_Yield = visit_Return def visit_Assign(self, node): """ In case of assignment assign value depend on r-value type dependencies. It is valid for subscript, `a[i] = foo()` means `a` type depend on `foo` return type. """ value_deps = self.visit(node.value) for target in node.targets: name = get_variable(target) if isinstance(name, ast.Name): self.naming[name.id] = value_deps def visit_AugAssign(self, node): """ AugAssigned value depend on r-value type dependencies. It is valid for subscript, `a[i] += foo()` means `a` type depend on `foo` return type and previous a types too. """ args = (self.naming[get_variable(node.target).id], self.visit(node.value)) merge_dep = list( {frozenset.union(*x) for x in itertools.product(*args)}) self.naming[get_variable(node.target).id] = merge_dep def visit_For(self, node): """ Handle iterator variable in for loops. Iterate variable may be the correct one at the end of the loop. """ body = node.body if node.target.id in self.naming: body = [ ast.Assign( targets=[node.target], value=node.iter, type_comment=None) ] + body self.visit_any_conditionnal(body, node.orelse) else: iter_dep = self.visit(node.iter) self.naming[node.target.id] = iter_dep self.visit_any_conditionnal(body, body + node.orelse) def visit_BoolOp(self, node): """ Return type may come from any boolop operand. """ return sum((self.visit(value) for value in node.values), []) def visit_BinOp(self, node): """ Return type depend from both operand of the binary operation. """ args = [self.visit(arg) for arg in (node.left, node.right)] return list({frozenset.union(*x) for x in itertools.product(*args)}) def visit_UnaryOp(self, node): """ Return type depend on operand only. """ return self.visit(node.operand) @staticmethod def visit_Lambda(_): """ Lambda have to be remove before. """ assert False def visit_IfExp(self, node): """ Return value depend on both if branch. """ return self.visit(node.body) + self.visit(node.orelse) @staticmethod def visit_Compare(_): """ Comparison return a bool so there are no dependencies. """ return [frozenset()] def visit_Call(self, node): """ Function call depend on all function use in the call. >> a = foo(bar(c) or foobar(d)) Return type depend on [foo, bar] or [foo, foobar] """ args = [self.visit(arg) for arg in node.args] func = self.visit(node.func) params = args + [func or []] return list({frozenset.union(*p) for p in itertools.product(*params)}) @staticmethod def visit_Constant(_): """ Return no dependencies on others functions. """ return [frozenset()] @staticmethod def visit_Attribute(_): """ Return no dependencies on others functions. """ return [frozenset()] def visit_Subscript(self, node): """ Return dependencies of the subscripted value. a = foo()[0] means `a` have a dependency on `foo` return type. """ return self.visit(node.value) def visit_Name(self, node): """ Return dependencies for given variable. It has to be registered first. """ if node.id in self.naming: return self.naming[node.id] elif node.id in self.global_declarations: return [frozenset([self.global_declarations[node.id]])] elif isinstance(node.ctx, ast.Param): deps = [frozenset()] self.naming[node.id] = deps return deps else: raise PythranInternalError("Variable '{}' used before assignment" "".format(node.id)) def visit_List(self, node): """ List construction depend on each elements type dependency. """ if node.elts: return list(set(sum([self.visit(elt) for elt in node.elts], []))) else: return [frozenset()] visit_Set = visit_List def visit_Dict(self, node): """ Dict construction depend on each element/value type dependency.""" if node.keys: items = node.keys + node.values return list(set(sum([self.visit(item) for item in items], []))) else: return [frozenset()] visit_Tuple = visit_List @staticmethod def visit_Slice(_): """ Slice are not part of return type dependency information. """ assert False @staticmethod def visit_Index(_): """ Index are not part of return type dependency information. """ assert False def visit_If(self, node): """ Both if branches may be evaluate first. """ return self.visit_any_conditionnal(node.body, node.orelse) def visit_While(self, node): """ Both while branches may be evaluate first. """ return self.visit_any_conditionnal(node.body, node.orelse) def visit_ExceptHandler(self, node): """ Exception may declare a new variable. """ if node.name: self.naming[node.name.id] = [frozenset()] for stmt in node.body: self.visit(stmt)
class CFG(FunctionAnalysis): """ Computes the Control Flow Graph of a function. The processing of a node yields a pair containing * the OUT nodes, to be linked with the IN nodes of the successor * the RAISE nodes, nodes that stop the control flow (exception/break/...) """ #: The sink node in the control flow graph. #: #: The predecessors of this node are those AST nodes that terminate #: control flow without a return statement. NIL = object() def __init__(self): self.result = DiGraph() super(CFG, self).__init__() def visit_FunctionDef(self, node): """OUT = node, RAISES = ()""" # the function itself is the entry point self.result.add_node(node) currs = (node, ) for n in node.body: self.result.add_node(n) for curr in currs: self.result.add_edge(curr, n) currs, _ = self.visit(n) # add an edge to NIL for nodes that end the control flow # without a return self.result.add_node(CFG.NIL) for curr in currs: self.result.add_edge(curr, CFG.NIL) return (node, ), () def visit_Pass(self, node): """OUT = node, RAISES = ()""" return (node, ), () # All these nodes have the same behavior as pass visit_Assign = visit_AugAssign = visit_Import = visit_Pass visit_Expr = visit_Print = visit_ImportFrom = visit_Pass visit_Yield = visit_Delete = visit_Pass def visit_Return(self, node): """OUT = (), RAISES = ()""" return (), () def visit_For(self, node): """ OUT = (node,) + last body statements RAISES = body's that are not break or continue """ currs = (node, ) break_currs = tuple() raises = () # handle body for n in node.body: self.result.add_node(n) for curr in currs: self.result.add_edge(curr, n) currs, nraises = self.visit(n) for nraise in nraises: if isinstance(nraise, ast.Break): break_currs += (nraise, ) elif isinstance(nraise, ast.Continue): self.result.add_edge(nraise, node) else: raises += (nraise, ) # add the backward loop for curr in currs: self.result.add_edge(curr, node) # the else statement if needed if node.orelse: for n in node.orelse: self.result.add_node(n) for curr in currs: self.result.add_edge(curr, n) currs, nraises = self.visit(n) else: currs = node, # while only if isinstance(node, ast.While): if is_true_predicate(node.test): return break_currs, raises else: return break_currs + currs, raises # for only return break_currs + currs, raises visit_While = visit_For def visit_If(self, node): """ OUT = true branch U false branch RAISES = true branch U false branch """ currs = (node, ) raises = () # true branch for n in node.body: self.result.add_node(n) for curr in currs: self.result.add_edge(curr, n) currs, nraises = self.visit(n) raises += nraises # false branch tcurrs = currs traises = raises currs = (node, ) for n in node.orelse: self.result.add_node(n) for curr in currs: self.result.add_edge(curr, n) currs, nraises = self.visit(n) raises = traises + nraises if is_true_predicate(node.test): return tcurrs, raises return tcurrs + currs, raises def visit_Raise(self, node): """OUT = (), RAISES = (node)""" return (), (node, ) visit_Break = visit_Continue = visit_Raise def visit_Assert(self, node): """OUT = RAISES = (node)""" return (node, ), (node, ) def visit_Try(self, node): """ OUT = body's U handler's RAISES = handler's this equation is not has good has it could be... but we need type information to be more accurate """ currs = (node, ) raises = () for handler in node.handlers: self.result.add_node(handler) for n in node.body: self.result.add_node(n) for curr in currs: self.result.add_edge(curr, n) currs, nraises = self.visit(n) for nraise in nraises: if isinstance(nraise, ast.Raise): for handler in node.handlers: self.result.add_edge(nraise, handler) else: raises += (nraise, ) for handler in node.handlers: ncurrs, nraises = self.visit(handler) currs += ncurrs raises += nraises return currs, raises def visit_ExceptHandler(self, node): """OUT = body's, RAISES = body's""" currs = (node, ) raises = () for n in node.body: self.result.add_node(n) for curr in currs: self.result.add_edge(curr, n) currs, nraises = self.visit(n) raises += nraises return currs, raises