def test_should_load_plugins_from_property(self): class AirflowTestPropertyPlugin(AirflowPlugin): name = "test_property_plugin" @property def operators(self): from airflow.models.baseoperator import BaseOperator class PluginPropertyOperator(BaseOperator): pass return [PluginPropertyOperator] class TestNonPropertyHook(BaseHook): pass hooks = [TestNonPropertyHook] with mock_plugin_manager(plugins=[AirflowTestPropertyPlugin()]): from airflow import plugins_manager plugins_manager.integrate_dag_plugins() self.assertIn('AirflowTestPropertyPlugin', str(plugins_manager.plugins)) self.assertIn('PluginPropertyOperator', str(plugins_manager.operators_modules[0].__dict__)) self.assertIn("TestNonPropertyHook", str(plugins_manager.hooks_modules[0].__dict__))
def dump_plugins(args): """Dump plugins information""" plugins_manager.log.setLevel(logging.DEBUG) plugins_manager.ensure_plugins_loaded() plugins_manager.integrate_dag_plugins() plugins_manager.integrate_executor_plugins() plugins_manager.initialize_extra_operators_links_plugins() plugins_manager.initialize_web_ui_plugins() _header("PLUGINS MANGER:", "#") for attr_name in PLUGINS_MANAGER_ATTRIBUTES_TO_DUMP: attr_value = getattr(plugins_manager, attr_name) print(f"{attr_name} = ", end='') pprint(attr_value) print() _header("PLUGINS:", "#") if not plugins_manager.plugins: print("No plugins loaded") else: print(f"Loaded {len(plugins_manager.plugins)} plugins") for plugin_no, plugin in enumerate(plugins_manager.plugins, 1): _header(f"{plugin_no}. {plugin.name}", "=") for attr_name in PLUGINS_ATTRIBUTES_TO_DUMP: attr_value = getattr(plugin, attr_name) print(f"{attr_name} = ", end='') pprint(attr_value) print()
def process_file(self, filepath, only_if_updated=True, safe_mode=True): """ Given a path to a python module or zip file, this method imports the module and look for dag objects within it. """ integrate_dag_plugins() # if the source file no longer exists in the DB or in the filesystem, # return an empty list # todo: raise exception? if filepath is None or not os.path.isfile(filepath): return [] try: # This failed before in what may have been a git sync # race condition file_last_changed_on_disk = datetime.fromtimestamp(os.path.getmtime(filepath)) if only_if_updated \ and filepath in self.file_last_changed \ and file_last_changed_on_disk == self.file_last_changed[filepath]: return [] except Exception as e: # pylint: disable=broad-except self.log.exception(e) return [] if not zipfile.is_zipfile(filepath): mods = self._load_modules_from_file(filepath, safe_mode) else: mods = self._load_modules_from_zip(filepath, safe_mode) found_dags = self._process_modules(filepath, mods, file_last_changed_on_disk) self.file_last_changed[filepath] = file_last_changed_on_disk return found_dags
def test_should_load_plugins_from_property(self): from airflow import plugins_manager plugins_manager.integrate_dag_plugins() self.assertIn('TestPluginsManager.AirflowTestPropertyPlugin', str(plugins_manager.plugins)) self.assertIn('PluginPropertyOperator', str(plugins_manager.operators_modules[0].__dict__)) self.assertIn("TestNonPropertyHook", str(plugins_manager.hooks_modules[0].__dict__))
def test_should_load_plugins_from_property(self): class AirflowTestPropertyPlugin(AirflowPlugin): name = "test_property_plugin" @property def hooks(self): class TestPropertyHook(BaseHook): pass return [TestPropertyHook] with mock_plugin_manager(plugins=[AirflowTestPropertyPlugin()]): from airflow import plugins_manager plugins_manager.integrate_dag_plugins() self.assertIn('AirflowTestPropertyPlugin', str(plugins_manager.plugins)) self.assertIn("TestPropertyHook", str(plugins_manager.hooks_modules[0].__dict__))
def process_file(self, filepath, only_if_updated=True, safe_mode=True): """ Given a path to a python module or zip file, this method imports the module and look for dag objects within it. """ from airflow.models.dag import DAG # Avoid circular import integrate_dag_plugins() found_dags = [] # if the source file no longer exists in the DB or in the filesystem, # return an empty list # todo: raise exception? if filepath is None or not os.path.isfile(filepath): return found_dags try: # This failed before in what may have been a git sync # race condition file_last_changed_on_disk = datetime.fromtimestamp( os.path.getmtime(filepath)) if only_if_updated \ and filepath in self.file_last_changed \ and file_last_changed_on_disk == self.file_last_changed[filepath]: return found_dags except Exception as e: self.log.exception(e) return found_dags mods = [] is_zipfile = zipfile.is_zipfile(filepath) if not is_zipfile: if safe_mode: with open(filepath, 'rb') as file: content = file.read() if not all([s in content for s in (b'DAG', b'airflow')]): self.file_last_changed[ filepath] = file_last_changed_on_disk # Don't want to spam user with skip messages if not self.has_logged: self.has_logged = True self.log.info( "File %s assumed to contain no DAGs. Skipping.", filepath) return found_dags self.log.debug("Importing %s", filepath) org_mod_name, _ = os.path.splitext(os.path.split(filepath)[-1]) mod_name = ('unusual_prefix_' + hashlib.sha1(filepath.encode('utf-8')).hexdigest() + '_' + org_mod_name) if mod_name in sys.modules: del sys.modules[mod_name] with timeout(self.DAGBAG_IMPORT_TIMEOUT): try: loader = importlib.machinery.SourceFileLoader( mod_name, filepath) spec = importlib.util.spec_from_loader(mod_name, loader) m = importlib.util.module_from_spec(spec) sys.modules[spec.name] = m loader.exec_module(m) mods.append(m) except Exception as e: self.log.exception("Failed to import: %s", filepath) self.import_errors[filepath] = str(e) self.file_last_changed[ filepath] = file_last_changed_on_disk else: zip_file = zipfile.ZipFile(filepath) for mod in zip_file.infolist(): head, _ = os.path.split(mod.filename) mod_name, ext = os.path.splitext(mod.filename) if not head and (ext == '.py' or ext == '.pyc'): if mod_name == '__init__': self.log.warning("Found __init__.%s at root of %s", ext, filepath) if safe_mode: with zip_file.open(mod.filename) as zf: self.log.debug("Reading %s from %s", mod.filename, filepath) content = zf.read() if not all( [s in content for s in (b'DAG', b'airflow')]): self.file_last_changed[filepath] = ( file_last_changed_on_disk) # todo: create ignore list # Don't want to spam user with skip messages if not self.has_logged: self.has_logged = True self.log.info( "File %s assumed to contain no DAGs. Skipping.", filepath) if mod_name in sys.modules: del sys.modules[mod_name] try: sys.path.insert(0, filepath) m = importlib.import_module(mod_name) mods.append(m) except Exception as e: self.log.exception("Failed to import: %s", filepath) self.import_errors[filepath] = str(e) self.file_last_changed[ filepath] = file_last_changed_on_disk for m in mods: for dag in list(m.__dict__.values()): if isinstance(dag, DAG): if not dag.full_filepath: dag.full_filepath = filepath if dag.fileloc != filepath and not is_zipfile: dag.fileloc = filepath try: dag.is_subdag = False self.bag_dag(dag, parent_dag=dag, root_dag=dag) if isinstance(dag.normalized_schedule_interval, str): croniter(dag.normalized_schedule_interval) found_dags.append(dag) found_dags += dag.subdags except (CroniterBadCronError, CroniterBadDateError, CroniterNotAlphaError) as cron_e: self.log.exception("Failed to bag_dag: %s", dag.full_filepath) self.import_errors[dag.full_filepath] = \ "Invalid Cron expression: " + str(cron_e) self.file_last_changed[dag.full_filepath] = \ file_last_changed_on_disk except AirflowDagCycleException as cycle_exception: self.log.exception("Failed to bag_dag: %s", dag.full_filepath) self.import_errors[dag.full_filepath] = str( cycle_exception) self.file_last_changed[dag.full_filepath] = \ file_last_changed_on_disk self.file_last_changed[filepath] = file_last_changed_on_disk return found_dags