Example #1
0
def _spider_function(function, session, pickles={}):
    """
    Takes a function and global variables referenced in an environment and 
    recursively finds dependencies required in order to execute the function. 
    This includes references to classes, libraries, variables, functions, etc.

    Parameters
    ----------
    function: function
        a function referenced in an environment
    session: dictionary
        variables referenced from a seperate environment; i.e. globals()
    pickles: dictionary
        holds the variables needed to execute the function

    Returns
    -------
    imports: list
        list of import statements required for the function to execute
    source: string
        source code of the function
    pickles: dictionary
        dictionary of variable names and their values as pickled strings
    """

    if '_objects_seen' not in pickles:
        pickles['_objects_seen'] = []
    pickles['_objects_seen'].append(str(function))
    imports = []
    modules = {}
    source = "# code for %s\n" % (str(function))
    if isinstance(function, types.ModuleType):
        pass
    else:
        source += _get_source(function) + '\n'

    for varname in _get_naked_loads(function):
        if varname in pickles['_objects_seen']:
            continue
        pickles['_objects_seen'].append(varname)
        if varname not in session:
            continue
        obj = session[varname]
        # checking to see if this is an instance of an object
        if hasattr(obj, "__name__") == False:
            pickles[varname] = terragon.dumps_to_base64(obj)
        if hasattr(obj, "__module__"):
            if obj.__module__ == "__main__":
                new_imports, new_source, new_pickles, new_modules = _spider_function(
                    obj, session, pickles)
                source += new_source + '\n'
                imports += new_imports
                pickles.update(new_pickles)
                modules.update(new_modules)
            else:
                modules.update(_extract_module(obj.__module__))
                ref = inspect.getmodule(obj).__name__
                if hasattr(obj, "func_name") and obj.func_name != varname:
                    imports.append("from %s import %s as %s" %
                                   (ref, obj.func_name, varname))
                else:
                    # we need to figure out how to import this library. i'm not
                    # sure exactly what the right way to get the module and
                    # class name, but this works just fine
                    try:
                        import_statement = "from %s import %s" % (ref, varname)
                        exec import_statement in locals()
                        imports.append(import_statement)
                    except:
                        try:
                            import_statement = "from %s import %s" % (
                                ref, obj.__class__.__name__)
                            exec import_statement in locals()
                            imports.append(import_statement)
                        except:
                            pass

        elif isinstance(obj, types.ModuleType):
            modules.update(_extract_module(obj.__name__))
            if obj.__name__ != varname:
                imports.append("import %s as %s" % (obj.__name__, varname))
            else:
                imports.append("import %s" % (varname))
        else:
            # catch all. if all else fails, pickle it
            pickles[varname] = terragon.dumps_to_base64(obj)
    return imports, source, pickles, modules
Example #2
0
def _spider_function(function, session, pickles={}):
    """
    Takes a function and global variables referenced in an environment and
    recursively finds dependencies required in order to execute the function.
    This includes references to classes, libraries, variables, functions, etc.

    Parameters
    ----------
    function: function
        a function referenced in an environment
    session: dictionary
        variables referenced from a seperate environment; i.e. globals()
    pickles: dictionary
        holds the variables needed to execute the function

    Returns
    -------
    imports: list
        list of import statements required for the function to execute
    source: string
        source code of the function
    pickles: dictionary
        dictionary of variable names and their values as pickled strings
    """

    if '_objects_seen' not in pickles:
        pickles['_objects_seen'] = []
    pickles['_objects_seen'].append(str(function))
    imports = []
    modules = {}
    source = "# code for %s\n" % (str(function))
    if isinstance(function, types.ModuleType):
        logging.debug(
            "object (%s) is a `types.ModuleType`. skipping _get_source" %
            str(function))
        pass
    else:
        source += _get_source(function) + '\n'

    logging.debug("finding objects that need to be serialized")
    for varname in _get_naked_loads(function):
        logging.debug("\t%s" % varname)
        if varname in pickles['_objects_seen']:
            logging.debug("\t%s has already been processed. skipping." %
                          varname)
            continue
        pickles['_objects_seen'].append(varname)
        if varname not in session:
            logging.debug("\t%s not found in session. skipping." % varname)
            continue
        obj = session[varname]
        # checking to see if this is an instance of an object
        if hasattr(obj, "__name__") == False:
            logging.debug(
                "\t%s has attribute __name__, performing serialization checks."
                % varname)
            if _is_tensor(obj):
                logging.debug("\t%s is from tensorflow skipping." % varname)
                continue
            elif _is_spark(obj):
                logging.debug(
                    "\t%s is from spark. serializing using `dumps_spark_to_base64`."
                    % varname)
                pickles[varname] = terragon.dumps_spark_to_base64(
                    session['sc'], obj)
            elif _is_pom(obj):
                logging.debug(
                    "\t%s is from pomegranate. serializing using `dumps_pom_to_base64`."
                    % varname)
                pickles[varname] = terragon.dumps_pom_to_base64(obj)
            else:
                logging.debug(
                    "\tno special serialization requirement detected for %s. using dumps_to_base64."
                    % varname)
                pickles[varname] = terragon.dumps_to_base64(obj)

        elif hasattr(obj, "__module__"):
            logging.debug("\tobject %s has attribute __module__" % varname)
            if obj.__module__ == "__main__":
                logging.debug("\t%s is in module __main__" % varname)
                new_imports, new_source, new_pickles, new_modules = _spider_function(
                    obj, session, pickles)
                source += new_source + '\n'
                imports += new_imports
                pickles.update(new_pickles)
                modules.update(new_modules)
            else:
                logging.debug(
                    "\t%s is a submodule (not __main__). extracting/saving source code and import statements."
                    % varname)
                modules.update(_extract_module(obj.__module__))
                ref = inspect.getmodule(obj).__name__
                if hasattr(obj, "func_name") and obj.__name__ != varname:
                    imports.append("from %s import %s as %s" %
                                   (ref, obj.__name__, varname))
                else:
                    # we need to figure out how to import this library. i'm not
                    # sure exactly what the right way to get the module and
                    # class name, but this works just fine
                    try:
                        import_statement = "from %s import %s" % (ref, varname)
                        exec(import_statement, locals())
                        imports.append(import_statement)
                    except:
                        try:
                            import_statement = "from %s import %s" % (
                                ref, obj.__class__.__name__)
                            exec(import_statement, locals())
                            imports.append(import_statement)
                        except:
                            pass

        elif isinstance(obj, types.ModuleType):
            logging.debug(
                "\tobject %s is a `types.ModuleType`, extracting/saving source code and import statements."
                % varname)
            modules.update(_extract_module(obj.__name__))
            if obj.__name__ != varname:
                imports.append("import %s as %s" % (obj.__name__, varname))
            else:
                imports.append("import %s" % (varname))
        else:
            logging.debug(
                "\tno special cases detected for %s. pickling using dumps_to_base64"
                % varname)
            # catch all. if all else fails, pickle it
            pickles[varname] = terragon.dumps_to_base64(obj)

    return imports, source, pickles, modules
Example #3
0
def _spider_function(function, session, pickles={}):
    """
    Takes a function and global variables referenced in an environment and
    recursively finds dependencies required in order to execute the function.
    This includes references to classes, libraries, variables, functions, etc.

    Parameters
    ----------
    function: function
        a function referenced in an environment
    session: dictionary
        variables referenced from a seperate environment; i.e. globals()
    pickles: dictionary
        holds the variables needed to execute the function

    Returns
    -------
    imports: list
        list of import statements required for the function to execute
    source: string
        source code of the function
    pickles: dictionary
        dictionary of variable names and their values as pickled strings
    """

    if '_objects_seen' not in pickles:
        pickles['_objects_seen'] = []
    pickles['_objects_seen'].append(str(function))
    imports = []
    modules = {}
    source = "# code for %s\n" % (str(function))
    if isinstance(function, types.ModuleType):
        logging.debug("object (%s) is a `types.ModuleType`. skipping _get_source" % str(function))
        pass
    else:
        source += _get_source(function) + '\n'

    logging.debug("finding objects that need to be serialized")
    for varname in _get_naked_loads(function):
        logging.debug("\t%s" % varname)
        if varname in pickles['_objects_seen']:
            logging.debug("\t%s has already been processed. skipping." % varname)
            continue
        pickles['_objects_seen'].append(varname)
        if varname not in session:
            logging.debug("\t%s not found in session. skipping." % varname)
            continue
        obj = session[varname]
        # checking to see if this is an instance of an object
        if hasattr(obj, "__name__")==False:
            logging.debug("\t%s has attribute __name__, performing serialization checks." % varname)
            if _is_tensor(obj):
                logging.debug("\t%s is from tensorflow skipping." % varname)
                continue
            elif _is_spark(obj):
                logging.debug("\t%s is from spark. serializing using `dumps_spark_to_base64`." % varname)
                pickles[varname] = terragon.dumps_spark_to_base64(session['sc'], obj)
            elif _is_pom(obj):
                logging.debug("\t%s is from pomegranate. serializing using `dumps_pom_to_base64`." % varname)
                pickles[varname] = terragon.dumps_pom_to_base64(obj)
            else:
                logging.debug("\tno special serialization requirement detected for %s. using dumps_to_base64." % varname)
                pickles[varname] = terragon.dumps_to_base64(obj)

        elif hasattr(obj, "__module__"):
            logging.debug("\tobject %s has attribute __module__" % varname)
            if obj.__module__=="__main__":
                logging.debug("\t%s is in module __main__" % varname)
                new_imports, new_source, new_pickles, new_modules = _spider_function(obj, session, pickles)
                source += new_source + '\n'
                imports += new_imports
                pickles.update(new_pickles)
                modules.update(new_modules)
            else:
                logging.debug("\t%s is a submodule (not __main__). extracting/saving source code and import statements." % varname)
                modules.update(_extract_module(obj.__module__))
                ref = inspect.getmodule(obj).__name__
                if hasattr(obj, "func_name") and obj.__name__!=varname:
                    imports.append("from %s import %s as %s" % (ref, obj.__name__, varname))
                else:
                    # we need to figure out how to import this library. i'm not
                    # sure exactly what the right way to get the module and
                    # class name, but this works just fine
                    try:
                        import_statement = "from %s import %s" % (ref, varname)
                        exec(import_statement, locals())
                        imports.append(import_statement)
                    except:
                        try:
                            import_statement = "from %s import %s" % (ref, obj.__class__.__name__)
                            exec(import_statement, locals())
                            imports.append(import_statement)
                        except:
                            pass

        elif isinstance(obj, types.ModuleType):
            logging.debug("\tobject %s is a `types.ModuleType`, extracting/saving source code and import statements." % varname)
            modules.update(_extract_module(obj.__name__))
            if obj.__name__!=varname:
                imports.append("import %s as %s" % (obj.__name__, varname))
            else:
                imports.append("import %s" % (varname))
        else:
            logging.debug("\tno special cases detected for %s. pickling using dumps_to_base64" % varname)
            # catch all. if all else fails, pickle it
            pickles[varname] = terragon.dumps_to_base64(obj)

    return imports, source, pickles, modules
Example #4
0
def _spider_function(function, session, pickles={}, verbose=0):
    """
    Takes a function and global variables referenced in an environment and 
    recursively finds dependencies required in order to execute the function. 
    This includes references to classes, libraries, variables, functions, etc.

    Parameters
    ----------
    function: function
        a function referenced in an environment
    session: dictionary
        variables referenced from a seperate environment; i.e. globals()
    pickles: dictionary
        holds the variables needed to execute the function
    verbose: int
        log level

    Returns
    -------
    imports: list
        list of import statements required for the function to execute
    source: string
        source code of the function
    pickles: dictionary
        dictionary of variable names and their values as pickled strings
    """

    if '_objects_seen' not in pickles:
        pickles['_objects_seen'] = []
    pickles['_objects_seen'].append(str(function))
    imports = []
    modules = {}
    source = "# code for %s\n" % (str(function)) 
    if isinstance(function, types.ModuleType):
        pass
    else:
        source += _get_source(function) + '\n'
    
    for varname in _get_naked_loads(function, verbose=verbose):
        if varname in pickles['_objects_seen']:
            continue
        pickles['_objects_seen'].append(varname)
        if varname not in session:
            continue
        obj = session[varname]
        # checking to see if this is an instance of an object
        if hasattr(obj, "__name__")==False:
            pickles[varname] = terragon.dumps_to_base64(obj)
        if hasattr(obj, "__module__"):
            if obj.__module__=="__main__":
                new_imports, new_source, new_pickles, new_modules = _spider_function(obj, session, pickles, verbose=verbose)
                source += new_source + '\n'
                imports += new_imports
                pickles.update(new_pickles)
                modules.update(new_modules)
            else:
                modules.update(_extract_module(obj.__module__, verbose=verbose))
                ref = inspect.getmodule(obj).__name__
                if hasattr(obj, "func_name") and obj.func_name!=varname:
                    imports.append("from %s import %s as %s" % (ref, obj.func_name, varname))
                else:
                    # we need to figure out how to import this library. i'm not 
                    # sure exactly what the right way to get the module and 
                    # class name, but this works just fine
                    try:
                        import_statement = "from %s import %s" % (ref, varname)
                        exec import_statement in locals()
                        imports.append(import_statement)
                    except:
                        try:
                            import_statement = "from %s import %s" % (ref, obj.__class__.__name__)
                            exec import_statement in locals()
                            imports.append(import_statement)
                        except:
                            pass

        elif isinstance(obj, types.ModuleType):
            modules.update(_extract_module(obj.__name__, verbose=verbose))
            if obj.__name__!=varname:
                imports.append("import %s as %s" % (obj.__name__, varname))
            else:
                imports.append("import %s" % (varname))
        else:
            # catch all. if all else fails, pickle it
            pickles[varname] = terragon.dumps_to_base64(obj)
    return imports, source, pickles, modules