Example #1
0
    def test_should_load_plugins_from_property(self):
        class AirflowTestPropertyPlugin(AirflowPlugin):
            name = "test_property_plugin"

            @property
            def operators(self):
                from airflow.models.baseoperator import BaseOperator

                class PluginPropertyOperator(BaseOperator):
                    pass

                return [PluginPropertyOperator]

            class TestNonPropertyHook(BaseHook):
                pass

            hooks = [TestNonPropertyHook]

        with mock_plugin_manager(plugins=[AirflowTestPropertyPlugin()]):
            from airflow import plugins_manager

            plugins_manager.integrate_dag_plugins()

            self.assertIn('AirflowTestPropertyPlugin',
                          str(plugins_manager.plugins))
            self.assertIn('PluginPropertyOperator',
                          str(plugins_manager.operators_modules[0].__dict__))
            self.assertIn("TestNonPropertyHook",
                          str(plugins_manager.hooks_modules[0].__dict__))
Example #2
0
def dump_plugins(args):
    """Dump plugins information"""
    plugins_manager.log.setLevel(logging.DEBUG)

    plugins_manager.ensure_plugins_loaded()
    plugins_manager.integrate_dag_plugins()
    plugins_manager.integrate_executor_plugins()
    plugins_manager.initialize_extra_operators_links_plugins()
    plugins_manager.initialize_web_ui_plugins()

    _header("PLUGINS MANGER:", "#")

    for attr_name in PLUGINS_MANAGER_ATTRIBUTES_TO_DUMP:
        attr_value = getattr(plugins_manager, attr_name)
        print(f"{attr_name} = ", end='')
        pprint(attr_value)
    print()

    _header("PLUGINS:", "#")
    if not plugins_manager.plugins:
        print("No plugins loaded")
    else:
        print(f"Loaded {len(plugins_manager.plugins)} plugins")
        for plugin_no, plugin in enumerate(plugins_manager.plugins, 1):
            _header(f"{plugin_no}. {plugin.name}", "=")
            for attr_name in PLUGINS_ATTRIBUTES_TO_DUMP:
                attr_value = getattr(plugin, attr_name)
                print(f"{attr_name} = ", end='')
                pprint(attr_value)
            print()
Example #3
0
    def process_file(self, filepath, only_if_updated=True, safe_mode=True):
        """
        Given a path to a python module or zip file, this method imports
        the module and look for dag objects within it.
        """
        integrate_dag_plugins()

        # if the source file no longer exists in the DB or in the filesystem,
        # return an empty list
        # todo: raise exception?
        if filepath is None or not os.path.isfile(filepath):
            return []

        try:
            # This failed before in what may have been a git sync
            # race condition
            file_last_changed_on_disk = datetime.fromtimestamp(os.path.getmtime(filepath))
            if only_if_updated \
                    and filepath in self.file_last_changed \
                    and file_last_changed_on_disk == self.file_last_changed[filepath]:
                return []
        except Exception as e:  # pylint: disable=broad-except
            self.log.exception(e)
            return []

        if not zipfile.is_zipfile(filepath):
            mods = self._load_modules_from_file(filepath, safe_mode)
        else:
            mods = self._load_modules_from_zip(filepath, safe_mode)

        found_dags = self._process_modules(filepath, mods, file_last_changed_on_disk)

        self.file_last_changed[filepath] = file_last_changed_on_disk
        return found_dags
 def test_should_load_plugins_from_property(self):
     from airflow import plugins_manager
     plugins_manager.integrate_dag_plugins()
     self.assertIn('TestPluginsManager.AirflowTestPropertyPlugin',
                   str(plugins_manager.plugins))
     self.assertIn('PluginPropertyOperator',
                   str(plugins_manager.operators_modules[0].__dict__))
     self.assertIn("TestNonPropertyHook",
                   str(plugins_manager.hooks_modules[0].__dict__))
Example #5
0
    def test_should_load_plugins_from_property(self):
        class AirflowTestPropertyPlugin(AirflowPlugin):
            name = "test_property_plugin"

            @property
            def hooks(self):
                class TestPropertyHook(BaseHook):
                    pass

                return [TestPropertyHook]

        with mock_plugin_manager(plugins=[AirflowTestPropertyPlugin()]):
            from airflow import plugins_manager

            plugins_manager.integrate_dag_plugins()

            self.assertIn('AirflowTestPropertyPlugin', str(plugins_manager.plugins))
            self.assertIn("TestPropertyHook", str(plugins_manager.hooks_modules[0].__dict__))
Example #6
0
    def process_file(self, filepath, only_if_updated=True, safe_mode=True):
        """
        Given a path to a python module or zip file, this method imports
        the module and look for dag objects within it.
        """
        from airflow.models.dag import DAG  # Avoid circular import

        integrate_dag_plugins()
        found_dags = []

        # if the source file no longer exists in the DB or in the filesystem,
        # return an empty list
        # todo: raise exception?
        if filepath is None or not os.path.isfile(filepath):
            return found_dags

        try:
            # This failed before in what may have been a git sync
            # race condition
            file_last_changed_on_disk = datetime.fromtimestamp(
                os.path.getmtime(filepath))
            if only_if_updated \
                    and filepath in self.file_last_changed \
                    and file_last_changed_on_disk == self.file_last_changed[filepath]:
                return found_dags

        except Exception as e:
            self.log.exception(e)
            return found_dags

        mods = []
        is_zipfile = zipfile.is_zipfile(filepath)
        if not is_zipfile:
            if safe_mode:
                with open(filepath, 'rb') as file:
                    content = file.read()
                    if not all([s in content for s in (b'DAG', b'airflow')]):
                        self.file_last_changed[
                            filepath] = file_last_changed_on_disk
                        # Don't want to spam user with skip messages
                        if not self.has_logged:
                            self.has_logged = True
                            self.log.info(
                                "File %s assumed to contain no DAGs. Skipping.",
                                filepath)
                        return found_dags

            self.log.debug("Importing %s", filepath)
            org_mod_name, _ = os.path.splitext(os.path.split(filepath)[-1])
            mod_name = ('unusual_prefix_' +
                        hashlib.sha1(filepath.encode('utf-8')).hexdigest() +
                        '_' + org_mod_name)

            if mod_name in sys.modules:
                del sys.modules[mod_name]

            with timeout(self.DAGBAG_IMPORT_TIMEOUT):
                try:
                    loader = importlib.machinery.SourceFileLoader(
                        mod_name, filepath)
                    spec = importlib.util.spec_from_loader(mod_name, loader)
                    m = importlib.util.module_from_spec(spec)
                    sys.modules[spec.name] = m
                    loader.exec_module(m)
                    mods.append(m)
                except Exception as e:
                    self.log.exception("Failed to import: %s", filepath)
                    self.import_errors[filepath] = str(e)
                    self.file_last_changed[
                        filepath] = file_last_changed_on_disk

        else:
            zip_file = zipfile.ZipFile(filepath)
            for mod in zip_file.infolist():
                head, _ = os.path.split(mod.filename)
                mod_name, ext = os.path.splitext(mod.filename)
                if not head and (ext == '.py' or ext == '.pyc'):
                    if mod_name == '__init__':
                        self.log.warning("Found __init__.%s at root of %s",
                                         ext, filepath)
                    if safe_mode:
                        with zip_file.open(mod.filename) as zf:
                            self.log.debug("Reading %s from %s", mod.filename,
                                           filepath)
                            content = zf.read()
                            if not all(
                                [s in content for s in (b'DAG', b'airflow')]):
                                self.file_last_changed[filepath] = (
                                    file_last_changed_on_disk)
                                # todo: create ignore list
                                # Don't want to spam user with skip messages
                                if not self.has_logged:
                                    self.has_logged = True
                                    self.log.info(
                                        "File %s assumed to contain no DAGs. Skipping.",
                                        filepath)

                    if mod_name in sys.modules:
                        del sys.modules[mod_name]

                    try:
                        sys.path.insert(0, filepath)
                        m = importlib.import_module(mod_name)
                        mods.append(m)
                    except Exception as e:
                        self.log.exception("Failed to import: %s", filepath)
                        self.import_errors[filepath] = str(e)
                        self.file_last_changed[
                            filepath] = file_last_changed_on_disk

        for m in mods:
            for dag in list(m.__dict__.values()):
                if isinstance(dag, DAG):
                    if not dag.full_filepath:
                        dag.full_filepath = filepath
                        if dag.fileloc != filepath and not is_zipfile:
                            dag.fileloc = filepath
                    try:
                        dag.is_subdag = False
                        self.bag_dag(dag, parent_dag=dag, root_dag=dag)
                        if isinstance(dag.normalized_schedule_interval, str):
                            croniter(dag.normalized_schedule_interval)
                        found_dags.append(dag)
                        found_dags += dag.subdags
                    except (CroniterBadCronError, CroniterBadDateError,
                            CroniterNotAlphaError) as cron_e:
                        self.log.exception("Failed to bag_dag: %s",
                                           dag.full_filepath)
                        self.import_errors[dag.full_filepath] = \
                            "Invalid Cron expression: " + str(cron_e)
                        self.file_last_changed[dag.full_filepath] = \
                            file_last_changed_on_disk
                    except AirflowDagCycleException as cycle_exception:
                        self.log.exception("Failed to bag_dag: %s",
                                           dag.full_filepath)
                        self.import_errors[dag.full_filepath] = str(
                            cycle_exception)
                        self.file_last_changed[dag.full_filepath] = \
                            file_last_changed_on_disk

        self.file_last_changed[filepath] = file_last_changed_on_disk
        return found_dags