def _spider_function(function, session, pickles={}): """ Takes a function and global variables referenced in an environment and recursively finds dependencies required in order to execute the function. This includes references to classes, libraries, variables, functions, etc. Parameters ---------- function: function a function referenced in an environment session: dictionary variables referenced from a seperate environment; i.e. globals() pickles: dictionary holds the variables needed to execute the function Returns ------- imports: list list of import statements required for the function to execute source: string source code of the function pickles: dictionary dictionary of variable names and their values as pickled strings """ if '_objects_seen' not in pickles: pickles['_objects_seen'] = [] pickles['_objects_seen'].append(str(function)) imports = [] modules = {} source = "# code for %s\n" % (str(function)) if isinstance(function, types.ModuleType): pass else: source += _get_source(function) + '\n' for varname in _get_naked_loads(function): if varname in pickles['_objects_seen']: continue pickles['_objects_seen'].append(varname) if varname not in session: continue obj = session[varname] # checking to see if this is an instance of an object if hasattr(obj, "__name__") == False: pickles[varname] = terragon.dumps_to_base64(obj) if hasattr(obj, "__module__"): if obj.__module__ == "__main__": new_imports, new_source, new_pickles, new_modules = _spider_function( obj, session, pickles) source += new_source + '\n' imports += new_imports pickles.update(new_pickles) modules.update(new_modules) else: modules.update(_extract_module(obj.__module__)) ref = inspect.getmodule(obj).__name__ if hasattr(obj, "func_name") and obj.func_name != varname: imports.append("from %s import %s as %s" % (ref, obj.func_name, varname)) else: # we need to figure out how to import this library. i'm not # sure exactly what the right way to get the module and # class name, but this works just fine try: import_statement = "from %s import %s" % (ref, varname) exec import_statement in locals() imports.append(import_statement) except: try: import_statement = "from %s import %s" % ( ref, obj.__class__.__name__) exec import_statement in locals() imports.append(import_statement) except: pass elif isinstance(obj, types.ModuleType): modules.update(_extract_module(obj.__name__)) if obj.__name__ != varname: imports.append("import %s as %s" % (obj.__name__, varname)) else: imports.append("import %s" % (varname)) else: # catch all. if all else fails, pickle it pickles[varname] = terragon.dumps_to_base64(obj) return imports, source, pickles, modules
def _spider_function(function, session, pickles={}): """ Takes a function and global variables referenced in an environment and recursively finds dependencies required in order to execute the function. This includes references to classes, libraries, variables, functions, etc. Parameters ---------- function: function a function referenced in an environment session: dictionary variables referenced from a seperate environment; i.e. globals() pickles: dictionary holds the variables needed to execute the function Returns ------- imports: list list of import statements required for the function to execute source: string source code of the function pickles: dictionary dictionary of variable names and their values as pickled strings """ if '_objects_seen' not in pickles: pickles['_objects_seen'] = [] pickles['_objects_seen'].append(str(function)) imports = [] modules = {} source = "# code for %s\n" % (str(function)) if isinstance(function, types.ModuleType): logging.debug( "object (%s) is a `types.ModuleType`. skipping _get_source" % str(function)) pass else: source += _get_source(function) + '\n' logging.debug("finding objects that need to be serialized") for varname in _get_naked_loads(function): logging.debug("\t%s" % varname) if varname in pickles['_objects_seen']: logging.debug("\t%s has already been processed. skipping." % varname) continue pickles['_objects_seen'].append(varname) if varname not in session: logging.debug("\t%s not found in session. skipping." % varname) continue obj = session[varname] # checking to see if this is an instance of an object if hasattr(obj, "__name__") == False: logging.debug( "\t%s has attribute __name__, performing serialization checks." % varname) if _is_tensor(obj): logging.debug("\t%s is from tensorflow skipping." % varname) continue elif _is_spark(obj): logging.debug( "\t%s is from spark. serializing using `dumps_spark_to_base64`." % varname) pickles[varname] = terragon.dumps_spark_to_base64( session['sc'], obj) elif _is_pom(obj): logging.debug( "\t%s is from pomegranate. serializing using `dumps_pom_to_base64`." % varname) pickles[varname] = terragon.dumps_pom_to_base64(obj) else: logging.debug( "\tno special serialization requirement detected for %s. using dumps_to_base64." % varname) pickles[varname] = terragon.dumps_to_base64(obj) elif hasattr(obj, "__module__"): logging.debug("\tobject %s has attribute __module__" % varname) if obj.__module__ == "__main__": logging.debug("\t%s is in module __main__" % varname) new_imports, new_source, new_pickles, new_modules = _spider_function( obj, session, pickles) source += new_source + '\n' imports += new_imports pickles.update(new_pickles) modules.update(new_modules) else: logging.debug( "\t%s is a submodule (not __main__). extracting/saving source code and import statements." % varname) modules.update(_extract_module(obj.__module__)) ref = inspect.getmodule(obj).__name__ if hasattr(obj, "func_name") and obj.__name__ != varname: imports.append("from %s import %s as %s" % (ref, obj.__name__, varname)) else: # we need to figure out how to import this library. i'm not # sure exactly what the right way to get the module and # class name, but this works just fine try: import_statement = "from %s import %s" % (ref, varname) exec(import_statement, locals()) imports.append(import_statement) except: try: import_statement = "from %s import %s" % ( ref, obj.__class__.__name__) exec(import_statement, locals()) imports.append(import_statement) except: pass elif isinstance(obj, types.ModuleType): logging.debug( "\tobject %s is a `types.ModuleType`, extracting/saving source code and import statements." % varname) modules.update(_extract_module(obj.__name__)) if obj.__name__ != varname: imports.append("import %s as %s" % (obj.__name__, varname)) else: imports.append("import %s" % (varname)) else: logging.debug( "\tno special cases detected for %s. pickling using dumps_to_base64" % varname) # catch all. if all else fails, pickle it pickles[varname] = terragon.dumps_to_base64(obj) return imports, source, pickles, modules
def _spider_function(function, session, pickles={}): """ Takes a function and global variables referenced in an environment and recursively finds dependencies required in order to execute the function. This includes references to classes, libraries, variables, functions, etc. Parameters ---------- function: function a function referenced in an environment session: dictionary variables referenced from a seperate environment; i.e. globals() pickles: dictionary holds the variables needed to execute the function Returns ------- imports: list list of import statements required for the function to execute source: string source code of the function pickles: dictionary dictionary of variable names and their values as pickled strings """ if '_objects_seen' not in pickles: pickles['_objects_seen'] = [] pickles['_objects_seen'].append(str(function)) imports = [] modules = {} source = "# code for %s\n" % (str(function)) if isinstance(function, types.ModuleType): logging.debug("object (%s) is a `types.ModuleType`. skipping _get_source" % str(function)) pass else: source += _get_source(function) + '\n' logging.debug("finding objects that need to be serialized") for varname in _get_naked_loads(function): logging.debug("\t%s" % varname) if varname in pickles['_objects_seen']: logging.debug("\t%s has already been processed. skipping." % varname) continue pickles['_objects_seen'].append(varname) if varname not in session: logging.debug("\t%s not found in session. skipping." % varname) continue obj = session[varname] # checking to see if this is an instance of an object if hasattr(obj, "__name__")==False: logging.debug("\t%s has attribute __name__, performing serialization checks." % varname) if _is_tensor(obj): logging.debug("\t%s is from tensorflow skipping." % varname) continue elif _is_spark(obj): logging.debug("\t%s is from spark. serializing using `dumps_spark_to_base64`." % varname) pickles[varname] = terragon.dumps_spark_to_base64(session['sc'], obj) elif _is_pom(obj): logging.debug("\t%s is from pomegranate. serializing using `dumps_pom_to_base64`." % varname) pickles[varname] = terragon.dumps_pom_to_base64(obj) else: logging.debug("\tno special serialization requirement detected for %s. using dumps_to_base64." % varname) pickles[varname] = terragon.dumps_to_base64(obj) elif hasattr(obj, "__module__"): logging.debug("\tobject %s has attribute __module__" % varname) if obj.__module__=="__main__": logging.debug("\t%s is in module __main__" % varname) new_imports, new_source, new_pickles, new_modules = _spider_function(obj, session, pickles) source += new_source + '\n' imports += new_imports pickles.update(new_pickles) modules.update(new_modules) else: logging.debug("\t%s is a submodule (not __main__). extracting/saving source code and import statements." % varname) modules.update(_extract_module(obj.__module__)) ref = inspect.getmodule(obj).__name__ if hasattr(obj, "func_name") and obj.__name__!=varname: imports.append("from %s import %s as %s" % (ref, obj.__name__, varname)) else: # we need to figure out how to import this library. i'm not # sure exactly what the right way to get the module and # class name, but this works just fine try: import_statement = "from %s import %s" % (ref, varname) exec(import_statement, locals()) imports.append(import_statement) except: try: import_statement = "from %s import %s" % (ref, obj.__class__.__name__) exec(import_statement, locals()) imports.append(import_statement) except: pass elif isinstance(obj, types.ModuleType): logging.debug("\tobject %s is a `types.ModuleType`, extracting/saving source code and import statements." % varname) modules.update(_extract_module(obj.__name__)) if obj.__name__!=varname: imports.append("import %s as %s" % (obj.__name__, varname)) else: imports.append("import %s" % (varname)) else: logging.debug("\tno special cases detected for %s. pickling using dumps_to_base64" % varname) # catch all. if all else fails, pickle it pickles[varname] = terragon.dumps_to_base64(obj) return imports, source, pickles, modules
def _spider_function(function, session, pickles={}, verbose=0): """ Takes a function and global variables referenced in an environment and recursively finds dependencies required in order to execute the function. This includes references to classes, libraries, variables, functions, etc. Parameters ---------- function: function a function referenced in an environment session: dictionary variables referenced from a seperate environment; i.e. globals() pickles: dictionary holds the variables needed to execute the function verbose: int log level Returns ------- imports: list list of import statements required for the function to execute source: string source code of the function pickles: dictionary dictionary of variable names and their values as pickled strings """ if '_objects_seen' not in pickles: pickles['_objects_seen'] = [] pickles['_objects_seen'].append(str(function)) imports = [] modules = {} source = "# code for %s\n" % (str(function)) if isinstance(function, types.ModuleType): pass else: source += _get_source(function) + '\n' for varname in _get_naked_loads(function, verbose=verbose): if varname in pickles['_objects_seen']: continue pickles['_objects_seen'].append(varname) if varname not in session: continue obj = session[varname] # checking to see if this is an instance of an object if hasattr(obj, "__name__")==False: pickles[varname] = terragon.dumps_to_base64(obj) if hasattr(obj, "__module__"): if obj.__module__=="__main__": new_imports, new_source, new_pickles, new_modules = _spider_function(obj, session, pickles, verbose=verbose) source += new_source + '\n' imports += new_imports pickles.update(new_pickles) modules.update(new_modules) else: modules.update(_extract_module(obj.__module__, verbose=verbose)) ref = inspect.getmodule(obj).__name__ if hasattr(obj, "func_name") and obj.func_name!=varname: imports.append("from %s import %s as %s" % (ref, obj.func_name, varname)) else: # we need to figure out how to import this library. i'm not # sure exactly what the right way to get the module and # class name, but this works just fine try: import_statement = "from %s import %s" % (ref, varname) exec import_statement in locals() imports.append(import_statement) except: try: import_statement = "from %s import %s" % (ref, obj.__class__.__name__) exec import_statement in locals() imports.append(import_statement) except: pass elif isinstance(obj, types.ModuleType): modules.update(_extract_module(obj.__name__, verbose=verbose)) if obj.__name__!=varname: imports.append("import %s as %s" % (obj.__name__, varname)) else: imports.append("import %s" % (varname)) else: # catch all. if all else fails, pickle it pickles[varname] = terragon.dumps_to_base64(obj) return imports, source, pickles, modules