Beispiel #1
0
 def pre_execute(self):
     """Before the code is executed, we reset the list of variables/functions etc. 'used' (as we don't know them yet)."""
     # Reset the used list
     self.used = set()
     
     pb = ProvBuilder()
     self.pre_ticker = deepcopy(pb.get_ticker())
Beispiel #2
0
 def prov_wrapper(*args, **kwargs):
     """Provenance wrapper for arbitrary functions"""
     # log.debug('---\nWRAPPER: function name "{}"\n---'.format(f.__name__))
     
     inputs = inspect.getcallargs(f, *args, **kwargs)
     
     outputs = f(*args, **kwargs)
     
     pb = ProvBuilder()
     
     pb.add_activity(f.__name__, prov_wrapper.source, inputs, outputs)
     
     prov_wrapper.prov = pb.get_graph()
     # prov_wrapper.prov_ttl = pb.get_graph().serialize(format='turtle')
     
     return outputs
Beispiel #3
0
def replace(f, input_names, output_names, *args, **kwargs):
    """Provenance-enabled replacement for arbitrary functions"""
    
    # Inputs is a dictionary of argument names and values
    # Outputs is whatever the wrapped function returns
    # Source is the source code of the function, or its docstring.
    
    
    ## If we're dealing with a 'ufunc' (i.e. numpy universal function)
    if isinstance(f,np.ufunc):
        inputs = {'x{}'.format(n) : args[n-1] for n in range(1,f.nin+1)}
        source = f.__doc__
        
    ## If we're dealing with a 'wrapper_descriptor' (i.e. a wrapper around a C-function) we cannot retrieve the argument names
    elif isinstance(f,types.TypeType):
        inputs = {'x{}'.format(n) : args[n-1] for n in range(1,len(args)+1)}
        source = f.__doc__
        
    ## If we're dealing with a 'classobj' (i.e. an expression that instantiates a object of a class, or something... whatever.)
    elif inspect.isclass(f):
        inputs = inspect.getcallargs(f.__init__, f, *args, **kwargs)
        # Only use those inputs that have a value
        inputs = {k:v for k,v in inputs.items()}
        source = inspect.getsource(f)
        
    ## If we're dealing with a builtin function
    elif isinstance(f,types.BuiltinFunctionType):
        inputs = {}
        source = f.__name__
        
    # If we're dealing with the 'get_ipython' function, we need to take some extra care, otherwise we introduce a cycle in the provenance graph.
    elif hasattr(f,'__name__') and getattr(f,'__name__') == 'get_ipython':
        inputs = {}
        source = inspect.getsource(f)
        
    ## If we're dealing with any other function, we just get all args and kwargs as inputs as a dictionary.
    else :
        try :
            inputs = inspect.getcallargs(f, *args, **kwargs)
            # Only use those inputs that have a value
            inputs = {k:v for k,v in inputs.items()}
            
            for input,ivalue in inputs.items():
                log.debug(type(ivalue))
                try :
                    log.debug("{} {}".format(input,ivalue))
                    if ivalue is None or isinstance(ivalue,types.NoneType):
                        log.debug("Popping {}".format(input))
                        inputs.pop(input,ivalue)
                except Exception as e:
                    log.warning(e)
            
            source = inspect.getsource(f)
        except :
            log.warning('Function is not a Python function')
            inputs = {'x{}'.format(n) : args[n-1] for n in range(1,len(args)+1)}
            source = f.__doc__
    
    pb = ProvBuilder()
    
    pre_ticker = deepcopy(pb.get_ticker())
    
    outputs = f(*args, **kwargs)

    if hasattr(f,'__name__'):
        name = f.__name__
    elif hasattr(f,'__str__'):
        name = f.__str__
    elif hasattr(f,'__doc__'):
        name = f.__doc__
    else :
        name = 'unknown_function'
    
    pb.add_activity(name , source, inputs, outputs, input_names=input_names, output_names=output_names,pre_ticker=pre_ticker)
    
    replace.prov = pb.get_graph()
    
    # prov_wrapper.prov_ttl = pb.get_graph().serialize(format='turtle')
    
    return outputs    
Beispiel #4
0
    def post_execute(self):
        """This will build the provenance for the execution of the code block in the IPython Notebook"""
        # Initialize a provenance builder
        pb = ProvBuilder()

        # Get the description (i.e. the code) from the code we just executed
        description = self.hist_to_string(1) 
        position = len(get_ipython().user_ns.get('_ih')) -1
        name = "In [{}]".format(position)
        
        # Initialize inputs, outputs and dependencies dictionaries for activity generator in ProvBuilder
        inputs = {}
        outputs = {}
        dependencies = {}
        
        # For all nodes (variables/functions) that were recognized by the CodeVisitor
        # If the node is a *global* variable, add it to the inputs
        # If the node is a *function* name, add it to the dependencies
        for node in self.used:    
            # log.debug("Checking wether " + node + " is a variable or a function")
            try :
                evaluated_node = self.shell.ev(node)
                # log.debug("> Could evaluate {}".format(node) )
                if node in self.environment and not callable(evaluated_node) :
                    # log.debug(">> {} is in environment and not callable (it is a variable)".format(node))
                    # # log.debug("Used global variable {}".format(node))
                    # Set the input of the node to the value that it had prior to executing the code (if available)
                    if node in self.environment:
                        # log.debug("Global variable existed before, adding to inputs")
                        inputs[node] = self.environment[node]
                    # Otherwise, we do nothing, since the variable may have been used, but was first introduced in the code.
                    else :
                        # log.debug("Global variable was introduced here, not doing anything")
                        pass
               
                elif callable(evaluated_node):
                    # log.debug(">> {} is a function, adding to dependencies.".format(node))
                    try :
                        dependencies[node] = inspect.getsource(evaluated_node)
                    except Exception as e:
                        # print e
                        dependencies[node] = unicode(evaluated_node)
                else :
                    # log.debug(">> {} is not callable, and not in environment... it was a variable that was newly introduced here?".format(node))
                    pass
            except :
                ## log.debug("Used local {} variable or function".format(node))
                # log.debug("> Could not evaluate " + node)
                if node in self.environment :
                    # log.debug(">> Node is in environment, we'll use its evaluated value from the environment")
                    evaluated_node = self.environment[node]
                    
                    if not callable(evaluated_node) :
                        # log.debug(">>> {} is a variable".format(node))
                        inputs[node] = evaluated_node
                    else :
                        # log.debug(">>> {} is a function".format(node))
                        dependencies[node] = inspect.getsource(evaluated_node)
                else :
                    # log.debug(">> {} was introduced here, not doing anything".format(node))
                    pass
                
        # We'll loop through all known entities in the user namespace.
        for k,v in self.shell.user_ns.items():
            # Ignore any standard IPython/Python variables in the user namespace
            # if k.startswith('_') or k in ['In','Out','exit','quit','get_ipython'] :
            #     log.debug("'{}' skipped, because it is in ['In','Out','exit','quit','get_ipython'] or starts with '_'".format(k))
            #     pass
            
            # TEMPORARY: Test what happens if we don't exclude 'Out'
            if k.startswith('_') or k in ['In','exit','quit','get_ipython'] :
                # log.debug("'{}' skipped, because it is in ['In','exit','quit','get_ipython'] or starts with '_'".format(k))
                continue
                
            # For all other variables, see whether they were changed, and add them to the outputs
            ## This compares the value of the variable with the value it had previously, or
            ## checks that the variable did not exist previously.  
            # Need to do some exception handling to deal with ValueErrors for objects that have ambiguous truth values
            try :
                # if (k in self.environment and not ((numpy.array_equal(v,self.environment[k]) or v == self.environment[k]) and self.pre_ticker.setdefault(k,0) == pb.get_tick(k))) or (not k in self.environment) or k == 'Out':
                if (k in self.environment and not (v == self.environment[k] and self.pre_ticker.setdefault(k,0) == pb.get_tick(k))) or (not k in self.environment) or k == 'Out':
                    changed = True
                else:
                    changed = False
            except Exception as e:
                log.debug("Caught numpy array comparison exception")
                ## Special handling of Numpy silliness
                if k in self.environment:
                    if not numpy.asarray(v == self.environment[k]).all():
                        log.debug("Not the same (value-comparison)")
                        changed = True
                    elif self.pre_ticker.setdefault(k,0) == pb.get_tick(k):
                        log.debug("Not the same (tick-comparison)")
                        changed = True
                    else :
                        changed = False
                elif not k in self.environment :
                    log.debug("Newly added variable")
                    changed = True
                elif k == 'Out' :
                    log.debug("Out value")
                    changed = True
                else :
                    log.debug("Not changed")
                    changed = False
                
            
            if changed:
                log.debug("{} changed or was added".format(k))
                
                
                
                # If the object is not a function, we'll use the value as output value.
                if not callable(v):
                    # log.debug("{} is not a function, adding to outputs as value".format(k))
                    if k == 'Out':
                        if len(v) == 0:
                            log.debug("Output value is empty: skipping...")
                            continue
                            
                        kname = 'Out [{}]'.format(position)
                        log.debug("{}: {}".format(kname,v))
                    else :
                        kname = k
                    outputs[kname] = v
                    
                    # Increase the tick of the variable with name 'k'
                    # self.tick(k)
                    
                # If it is a PROV wrapped function, we'll retrieve its source and use it as output value.
                elif callable(v) and hasattr(v,'source') :
                    # log.debug("{} is a PROV wrapped function, its source is an output value".format(k))
                    outputs[k] = v.source
                # Otherwise (this shouldn't be the case, but anyway) we'll use its source directly.
                elif callable(v) :
                    # log.debug("{} is callable, but not wrapped... we'll try to retrieve its source and add it as an output".format(k))
                    try :
                        outputs[k] = inspect.getsource(v)
                    except:
                        # log.debug("could not get source of {}, just taking its value as an output".format(k))
                        outputs[k] = v
                # Finally, this is probably not were we'll end up anyway... we'll do nothing 
                else :
                    # log.debug("Unexpected!")
                    pass
                
                # log.debug("Just visited {}".format(k))
                self.environment[k] = v
            else :
                log.debug("'{}' skipped because it did not change (ticks: {} and {}).".format(k,self.pre_ticker[k],pb.get_tick(k)))
                
        
        # print dependencies.keys()
        pb.add_activity(name, description, inputs, outputs, dependencies, input_names=inputs.keys(), output_names=outputs.keys(), expand_output_dict=True, pre_ticker=self.pre_ticker)