def _get_external_card_packages(with_paths=False): """ Safely extract all exteranl card modules Args: with_paths (bool, optional): setting `with_paths=True` will result in a list of tuples: `[( mf_extensions_parent_path , relative_path_to_module, module)]`. setting false will return a list of modules Defaults to False. Returns: `list` of `ModuleType` or `list` of `tuples` where each tuple if of the form (mf_extensions_parent_path , relative_path_to_module, ModuleType) """ import importlib available_card_modules = [] for m in get_modules("plugins.cards"): # Iterate submodules of metaflow_extensions.X.plugins.cards # For example metaflow_extensions.X.plugins.cards.monitoring card_packages = [] for fndx, card_mod, ispkg_c in iter_namespace(m.module): try: if not ispkg_c: continue cm = importlib.import_module(card_mod) _ext_debug("Importing card package %s" % card_mod) if with_paths: card_packages.append((fndx.path, cm)) else: card_packages.append(cm) except Exception as e: _ext_debug( "External Card Module Import Exception \n\n %s \n\n %s" % (str(e), traceback.format_exc())) if with_paths: card_packages = [ ( os.path.abspath(os.path.join( pth, "../../../../")), # parent path to metaflow_extensions os.path.join( EXT_PKG, os.path.relpath(m.__path__[0], os.path.join(pth, "../../../")), ), # construct relative path to parent. m, ) for pth, m in card_packages ] available_card_modules.extend(card_packages) return available_card_modules
def _get_external_card_packages(): """ Safely extract all external card modules Returns: `list` of `ModuleType` """ import importlib # Caching card related modules. global _CARD_MODULES if len(_CARD_MODULES) > 0: return _CARD_MODULES for m in get_modules("plugins.cards"): card_packages = [] # condition checks if it is not a namespace package or is a regular package. if getattr(m.module, "__file__", None): # This supports the following cases # - a namespace package support with mfextinit_X.py # - a regular package support card_packages.append(m.module) else: # This is to support current system where you have a namespace package and then sub packages that have __init__.py for _, card_mod, ispkg_c in iter_namespace(m.module): # Iterate submodules of metaflow_extensions.X.plugins.cards # For example metaflow_extensions.X.plugins.cards.monitoring try: if not ispkg_c: continue cm = importlib.import_module(card_mod) _ext_debug("Importing card package %s" % card_mod) card_packages.append(cm) except Exception as e: _ext_debug( "External Card Module Import Exception \n\n %s \n\n %s" % (str(e), traceback.format_exc()) ) _CARD_MODULES.extend(card_packages) return _CARD_MODULES
class UnhandledInMergeArtifactsException(MetaflowException): headline = "Unhandled artifacts in merge" def __init__(self, msg, unhandled): super(UnhandledInMergeArtifactsException, self).__init__(msg) self.artifact_names = unhandled class MissingInMergeArtifactsException(MetaflowException): headline = "Missing artifacts in merge" def __init__(self, msg, unhandled): super(MissingInMergeArtifactsException, self).__init__(msg) self.artifact_names = unhandled # Import any exceptions defined by a Metaflow extensions packages try: from metaflow.extension_support import get_modules, multiload_globals multiload_globals(get_modules("exceptions"), globals()) finally: # Erase all temporary names to avoid leaking things for _n in ["get_modules", "multiload_globals"]: try: del globals()[_n] except KeyError: pass del globals()["_n"]
get_modules, lazy_load_aliases, load_globals, load_module, EXT_PKG, _ext_debug, ) # We load the module overrides *first* explicitly. Non overrides can be loaded # in toplevel as well but these can be loaded first if needed. Note that those # modules should be careful not to include anything in Metaflow at their top-level # as it is likely to not work. _override_modules = [] _tl_modules = [] try: _modules_to_import = get_modules("toplevel") for m in _modules_to_import: override_module = m.module.__dict__.get("module_overrides", None) if override_module is not None: _override_modules.append(".".join( [EXT_PKG, m.tl_package, "toplevel", override_module])) tl_module = m.module.__dict__.get("toplevel", None) if tl_module is not None: _tl_modules.append(".".join( [EXT_PKG, m.tl_package, "toplevel", tl_module])) _ext_debug("Got overrides to load: %s" % _override_modules) _ext_debug("Got top-level imports: %s" % _tl_modules) except Exception as e: _ext_debug("Error in importing toplevel/overrides: %s" % e)
def generate_trampolines(out_dir): # This function will look in the configurations directory and create # files named <module>.py that will properly setup the environment escape when # called # in some cases we may want to disable environment escape # functionality, in that case, set METAFLOW_ENV_ESCAPE_DISABLED if os.environ.get("METAFLOW_ENV_ESCAPE_DISABLED", False) in (True, "True"): return paths = [os.path.join(os.path.dirname(os.path.abspath(__file__)), "configurations")] for m in get_modules("plugins.env_escape"): paths.extend( [os.path.join(x, "configurations") for x in list(m.module.__path__)] ) for rootpath in paths: for path in os.listdir(rootpath): path = os.path.join(rootpath, path) if os.path.isdir(path): dir_name = os.path.basename(path) if dir_name.startswith("emulate_"): module_names = dir_name[8:].split("__") for module_name in module_names: with open( os.path.join(out_dir, module_name + ".py"), mode="w", encoding="utf-8", ) as f: f.write( """ import importlib import os import sys from metaflow.plugins.env_escape.client_modules import ModuleImporter # This is a trampoline file to ensure that the ModuleImporter to handle the emulated # modules gets properly loaded. If multiple modules are emulated by a single configuration # the first import will cause this trampoline file to be loaded. All other calls will use # the module importer since we insert the importer at the head of the meta_path def load(): # We check if we are not overriding a module that already exists; to do so, we remove # the path we live at from sys.path and check old_paths = sys.path cur_path = os.path.dirname(__file__) sys.path = [p for p in old_paths if p != cur_path] # Handle special case where we launch a shell (including with a command) # and we are in the CWD (searched if '' is the first element of sys.path) if cur_path == os.getcwd() and sys.path[0] == '': sys.path = sys.path[1:] # Remove the module (this file) to reload it properly. Do *NOT* update sys.modules but # modify directly since it may be referenced elsewhere del sys.modules["{module_name}"] for prefix in {prefixes}: try: importlib.import_module(prefix) except ImportError: # Here we actually have two cases: we are being imported from the client (inner env) # in which case we are happy (since no module exists) OR we are being imported by the # server in which case we could not find the underlying module so we re-raise # this error. # We distinguish these cases by checking if the executable is the # python_executable the server should be using if sys.executable == "{python_executable}": raise # print("Env escape using executable {python_executable}") else: # Inverse logic as above here. if sys.executable != "{python_executable}": # We use the package locally and warn user. print("Not using environment escape for '%s' as module present" % prefix) # In both cases, we don't load our loader since # the package is locally present return sys.path = old_paths m = ModuleImporter("{python_executable}", "{pythonpath}", {max_pickle_version}, "{path}", {prefixes}) sys.meta_path.insert(0, m) # Reload this module using the ModuleImporter importlib.import_module("{module_name}") if not "{python_executable}": raise RuntimeError( "Trying to access an escaped module ({module_name}) without a valid interpreter") load() """.format( python_executable=ENV_ESCAPE_PY, pythonpath=ENV_ESCAPE_PATHS, max_pickle_version=int(ENV_ESCAPE_PICKLE_VERSION), path=path, prefixes=module_names, module_name=module_name, ) )
# because of https://bugs.python.org/issue42853 (Py3 bug); this also helps # keep memory consumption lower # NOTE: For some weird reason, if you pass a large value to # read, it delays the call so we always pass it either what # remains or 2GB, whichever is smallest. def read_in_chunks(dst, src, src_sz, max_chunk_size): remaining = src_sz while remaining > 0: buf = src.read(min(remaining, max_chunk_size)) # Py2 doesn't return the number of bytes written so calculate size # separately dst.write(buf) remaining -= len(buf) from .s3 import MetaflowS3Exception, S3 # Import any additional datatools defined by a Metaflow extensions package try: from metaflow.extension_support import get_modules, multiload_all multiload_all(get_modules("datatools"), "datatools", globals()) finally: # Erase all temporary names to avoid leaking things for _n in ["get_modules", "multiload_all"]: try: del globals()[_n] except KeyError: pass del globals()["_n"]
# PINNED_CONDA_LIBS are the libraries that metaflow depends on for execution # and are needed within a conda environment def get_pinned_conda_libs(python_version): return { "requests": ">=2.21.0", "boto3": ">=1.14.0", } METAFLOW_EXTENSIONS_ADDL_SUFFIXES = set([]) # Check if there are extensions to Metaflow to load and override everything try: from metaflow.extension_support import get_modules ext_modules = get_modules("config") for m in ext_modules: # We load into globals whatever we have in extension_module # We specifically exclude any modules that may be included (like sys, os, etc) for n, o in m.module.__dict__.items(): if n == "DEBUG_OPTIONS": DEBUG_OPTIONS.extend(o) for typ in o: vars()["METAFLOW_DEBUG_%s" % typ.upper()] = from_conf( "METAFLOW_DEBUG_%s" % typ.upper()) elif n == "METAFLOW_EXTENSIONS_ADDL_SUFFIXES": METAFLOW_EXTENSIONS_ADDL_SUFFIXES.update(o) elif not n.startswith("__") and not isinstance( o, types.ModuleType): globals()[n] = o METAFLOW_EXTENSIONS_ADDL_SUFFIXES = list(METAFLOW_EXTENSIONS_ADDL_SUFFIXES)
lambda base, overrides: _merge_lists(base, overrides, "TYPE"), ), "SIDECARS": ({}, lambda base, overrides: base.update(overrides)), "LOGGING_SIDECARS": ({}, lambda base, overrides: base.update(overrides)), "MONITOR_SIDECARS": ({}, lambda base, overrides: base.update(overrides)), "AWS_CLIENT_PROVIDERS": ( [], lambda base, overrides: _merge_lists(base, overrides, "name"), ), "get_plugin_cli": (lambda l=None: [] if l is None else l(), _merge_funcs), } try: from metaflow.extension_support import get_modules, multiload_all, _ext_debug _modules_to_import = get_modules("plugins") multiload_all(_modules_to_import, "plugins", globals()) # Build an ordered list _ext_plugins = {k: v[0] for k, v in _expected_extensions.items()} for m in _modules_to_import: for k, v in _expected_extensions.items(): module_override = m.module.__dict__.get(k) if module_override is not None: v[1](_ext_plugins[k], module_override) except Exception as e: _ext_debug( "\tWARNING: ignoring all plugins due to error during import: %s" % e) _ext_plugins = {k: v[0] for k, v in _expected_extensions.items()}