def pre_execute(self): """Before the code is executed, we reset the list of variables/functions etc. 'used' (as we don't know them yet).""" # Reset the used list self.used = set() pb = ProvBuilder() self.pre_ticker = deepcopy(pb.get_ticker())
def prov_wrapper(*args, **kwargs): """Provenance wrapper for arbitrary functions""" # log.debug('---\nWRAPPER: function name "{}"\n---'.format(f.__name__)) inputs = inspect.getcallargs(f, *args, **kwargs) outputs = f(*args, **kwargs) pb = ProvBuilder() pb.add_activity(f.__name__, prov_wrapper.source, inputs, outputs) prov_wrapper.prov = pb.get_graph() # prov_wrapper.prov_ttl = pb.get_graph().serialize(format='turtle') return outputs
def replace(f, input_names, output_names, *args, **kwargs): """Provenance-enabled replacement for arbitrary functions""" # Inputs is a dictionary of argument names and values # Outputs is whatever the wrapped function returns # Source is the source code of the function, or its docstring. ## If we're dealing with a 'ufunc' (i.e. numpy universal function) if isinstance(f,np.ufunc): inputs = {'x{}'.format(n) : args[n-1] for n in range(1,f.nin+1)} source = f.__doc__ ## If we're dealing with a 'wrapper_descriptor' (i.e. a wrapper around a C-function) we cannot retrieve the argument names elif isinstance(f,types.TypeType): inputs = {'x{}'.format(n) : args[n-1] for n in range(1,len(args)+1)} source = f.__doc__ ## If we're dealing with a 'classobj' (i.e. an expression that instantiates a object of a class, or something... whatever.) elif inspect.isclass(f): inputs = inspect.getcallargs(f.__init__, f, *args, **kwargs) # Only use those inputs that have a value inputs = {k:v for k,v in inputs.items()} source = inspect.getsource(f) ## If we're dealing with a builtin function elif isinstance(f,types.BuiltinFunctionType): inputs = {} source = f.__name__ # If we're dealing with the 'get_ipython' function, we need to take some extra care, otherwise we introduce a cycle in the provenance graph. elif hasattr(f,'__name__') and getattr(f,'__name__') == 'get_ipython': inputs = {} source = inspect.getsource(f) ## If we're dealing with any other function, we just get all args and kwargs as inputs as a dictionary. else : try : inputs = inspect.getcallargs(f, *args, **kwargs) # Only use those inputs that have a value inputs = {k:v for k,v in inputs.items()} for input,ivalue in inputs.items(): log.debug(type(ivalue)) try : log.debug("{} {}".format(input,ivalue)) if ivalue is None or isinstance(ivalue,types.NoneType): log.debug("Popping {}".format(input)) inputs.pop(input,ivalue) except Exception as e: log.warning(e) source = inspect.getsource(f) except : log.warning('Function is not a Python function') inputs = {'x{}'.format(n) : args[n-1] for n in range(1,len(args)+1)} source = f.__doc__ pb = ProvBuilder() pre_ticker = deepcopy(pb.get_ticker()) outputs = f(*args, **kwargs) if hasattr(f,'__name__'): name = f.__name__ elif hasattr(f,'__str__'): name = f.__str__ elif hasattr(f,'__doc__'): name = f.__doc__ else : name = 'unknown_function' pb.add_activity(name , source, inputs, outputs, input_names=input_names, output_names=output_names,pre_ticker=pre_ticker) replace.prov = pb.get_graph() # prov_wrapper.prov_ttl = pb.get_graph().serialize(format='turtle') return outputs
def post_execute(self): """This will build the provenance for the execution of the code block in the IPython Notebook""" # Initialize a provenance builder pb = ProvBuilder() # Get the description (i.e. the code) from the code we just executed description = self.hist_to_string(1) position = len(get_ipython().user_ns.get('_ih')) -1 name = "In [{}]".format(position) # Initialize inputs, outputs and dependencies dictionaries for activity generator in ProvBuilder inputs = {} outputs = {} dependencies = {} # For all nodes (variables/functions) that were recognized by the CodeVisitor # If the node is a *global* variable, add it to the inputs # If the node is a *function* name, add it to the dependencies for node in self.used: # log.debug("Checking wether " + node + " is a variable or a function") try : evaluated_node = self.shell.ev(node) # log.debug("> Could evaluate {}".format(node) ) if node in self.environment and not callable(evaluated_node) : # log.debug(">> {} is in environment and not callable (it is a variable)".format(node)) # # log.debug("Used global variable {}".format(node)) # Set the input of the node to the value that it had prior to executing the code (if available) if node in self.environment: # log.debug("Global variable existed before, adding to inputs") inputs[node] = self.environment[node] # Otherwise, we do nothing, since the variable may have been used, but was first introduced in the code. else : # log.debug("Global variable was introduced here, not doing anything") pass elif callable(evaluated_node): # log.debug(">> {} is a function, adding to dependencies.".format(node)) try : dependencies[node] = inspect.getsource(evaluated_node) except Exception as e: # print e dependencies[node] = unicode(evaluated_node) else : # log.debug(">> {} is not callable, and not in environment... it was a variable that was newly introduced here?".format(node)) pass except : ## log.debug("Used local {} variable or function".format(node)) # log.debug("> Could not evaluate " + node) if node in self.environment : # log.debug(">> Node is in environment, we'll use its evaluated value from the environment") evaluated_node = self.environment[node] if not callable(evaluated_node) : # log.debug(">>> {} is a variable".format(node)) inputs[node] = evaluated_node else : # log.debug(">>> {} is a function".format(node)) dependencies[node] = inspect.getsource(evaluated_node) else : # log.debug(">> {} was introduced here, not doing anything".format(node)) pass # We'll loop through all known entities in the user namespace. for k,v in self.shell.user_ns.items(): # Ignore any standard IPython/Python variables in the user namespace # if k.startswith('_') or k in ['In','Out','exit','quit','get_ipython'] : # log.debug("'{}' skipped, because it is in ['In','Out','exit','quit','get_ipython'] or starts with '_'".format(k)) # pass # TEMPORARY: Test what happens if we don't exclude 'Out' if k.startswith('_') or k in ['In','exit','quit','get_ipython'] : # log.debug("'{}' skipped, because it is in ['In','exit','quit','get_ipython'] or starts with '_'".format(k)) continue # For all other variables, see whether they were changed, and add them to the outputs ## This compares the value of the variable with the value it had previously, or ## checks that the variable did not exist previously. # Need to do some exception handling to deal with ValueErrors for objects that have ambiguous truth values try : # if (k in self.environment and not ((numpy.array_equal(v,self.environment[k]) or v == self.environment[k]) and self.pre_ticker.setdefault(k,0) == pb.get_tick(k))) or (not k in self.environment) or k == 'Out': if (k in self.environment and not (v == self.environment[k] and self.pre_ticker.setdefault(k,0) == pb.get_tick(k))) or (not k in self.environment) or k == 'Out': changed = True else: changed = False except Exception as e: log.debug("Caught numpy array comparison exception") ## Special handling of Numpy silliness if k in self.environment: if not numpy.asarray(v == self.environment[k]).all(): log.debug("Not the same (value-comparison)") changed = True elif self.pre_ticker.setdefault(k,0) == pb.get_tick(k): log.debug("Not the same (tick-comparison)") changed = True else : changed = False elif not k in self.environment : log.debug("Newly added variable") changed = True elif k == 'Out' : log.debug("Out value") changed = True else : log.debug("Not changed") changed = False if changed: log.debug("{} changed or was added".format(k)) # If the object is not a function, we'll use the value as output value. if not callable(v): # log.debug("{} is not a function, adding to outputs as value".format(k)) if k == 'Out': if len(v) == 0: log.debug("Output value is empty: skipping...") continue kname = 'Out [{}]'.format(position) log.debug("{}: {}".format(kname,v)) else : kname = k outputs[kname] = v # Increase the tick of the variable with name 'k' # self.tick(k) # If it is a PROV wrapped function, we'll retrieve its source and use it as output value. elif callable(v) and hasattr(v,'source') : # log.debug("{} is a PROV wrapped function, its source is an output value".format(k)) outputs[k] = v.source # Otherwise (this shouldn't be the case, but anyway) we'll use its source directly. elif callable(v) : # log.debug("{} is callable, but not wrapped... we'll try to retrieve its source and add it as an output".format(k)) try : outputs[k] = inspect.getsource(v) except: # log.debug("could not get source of {}, just taking its value as an output".format(k)) outputs[k] = v # Finally, this is probably not were we'll end up anyway... we'll do nothing else : # log.debug("Unexpected!") pass # log.debug("Just visited {}".format(k)) self.environment[k] = v else : log.debug("'{}' skipped because it did not change (ticks: {} and {}).".format(k,self.pre_ticker[k],pb.get_tick(k))) # print dependencies.keys() pb.add_activity(name, description, inputs, outputs, dependencies, input_names=inputs.keys(), output_names=outputs.keys(), expand_output_dict=True, pre_ticker=self.pre_ticker)