def dump_plugins(args): """Dump plugins information""" plugins_manager.ensure_plugins_loaded() plugins_manager.integrate_macros_plugins() plugins_manager.integrate_executor_plugins() plugins_manager.initialize_extra_operators_links_plugins() plugins_manager.initialize_web_ui_plugins() if not plugins_manager.plugins: print("No plugins loaded") return plugins_info: List[Dict[str, str]] = [] for plugin in plugins_manager.plugins: info = {"name": plugin.name} info.update( {n: getattr(plugin, n) for n in PLUGINS_ATTRIBUTES_TO_DUMP}) plugins_info.append(info) # Remove empty info if args.output == "table": # pylint: disable=too-many-nested-blocks # We can do plugins_info[0] as the element it will exist as there's # at least one plugin at this point for col in list(plugins_info[0]): if all(not bool(p[col]) for p in plugins_info): for plugin in plugins_info: del plugin[col] AirflowConsole().print_as(plugins_info, output=args.output)
def dump_plugins(args): """Dump plugins information""" plugins_manager.log.setLevel(logging.DEBUG) plugins_manager.ensure_plugins_loaded() plugins_manager.integrate_dag_plugins() plugins_manager.integrate_executor_plugins() plugins_manager.initialize_extra_operators_links_plugins() plugins_manager.initialize_web_ui_plugins() _header("PLUGINS MANGER:", "#") for attr_name in PLUGINS_MANAGER_ATTRIBUTES_TO_DUMP: attr_value = getattr(plugins_manager, attr_name) print(f"{attr_name} = ", end='') pprint(attr_value) print() _header("PLUGINS:", "#") if not plugins_manager.plugins: print("No plugins loaded") else: print(f"Loaded {len(plugins_manager.plugins)} plugins") for plugin_no, plugin in enumerate(plugins_manager.plugins, 1): _header(f"{plugin_no}. {plugin.name}", "=") for attr_name in PLUGINS_ATTRIBUTES_TO_DUMP: attr_value = getattr(plugin, attr_name) print(f"{attr_name} = ", end='') pprint(attr_value) print()
def global_operator_extra_link_dict(self) -> Dict[str, Any]: """Returns dictionary of all global extra links""" from airflow import plugins_manager plugins_manager.initialize_extra_operators_links_plugins() if plugins_manager.global_operator_extra_links is None: raise AirflowException("Can't load operators") return {link.name: link for link in plugins_manager.global_operator_extra_links}
def operator_extra_link_dict(self) -> Dict[str, Any]: """Returns dictionary of all extra links for the operator""" op_extra_links_from_plugin: Dict[str, Any] = {} from airflow import plugins_manager plugins_manager.initialize_extra_operators_links_plugins() if plugins_manager.operator_extra_links is None: raise AirflowException("Can't load operators") for ope in plugins_manager.operator_extra_links: if ope.operators and self.operator_class in ope.operators: op_extra_links_from_plugin.update({ope.name: ope}) operator_extra_links_all = {link.name: link for link in self.operator_extra_links} # Extra links defined in Plugins overrides operator links defined in operator operator_extra_links_all.update(op_extra_links_from_plugin) return operator_extra_links_all
def dump_plugins(args): """Dump plugins information""" plugins_manager.ensure_plugins_loaded() plugins_manager.integrate_macros_plugins() plugins_manager.integrate_executor_plugins() plugins_manager.initialize_extra_operators_links_plugins() plugins_manager.initialize_web_ui_plugins() if not plugins_manager.plugins: print("No plugins loaded") return console = Console() console.print("[bold yellow]SUMMARY:[/bold yellow]") console.print( f"[bold green]Plugins directory[/bold green]: {conf.get('core', 'plugins_folder')}\n", highlight=False) console.print( f"[bold green]Loaded plugins[/bold green]: {len(plugins_manager.plugins)}\n", highlight=False) for attr_name in PLUGINS_MANAGER_ATTRIBUTES_TO_DUMP: attr_value: Optional[List[Any]] = getattr(plugins_manager, attr_name) if not attr_value: continue table = SimpleTable(title=attr_name.capitalize().replace("_", " ")) table.add_column(width=100) for item in attr_value: # pylint: disable=not-an-iterable table.add_row(f"- {_get_name(item)}", ) console.print(table) console.print("[bold yellow]DETAILED INFO:[/bold yellow]") for plugin in plugins_manager.plugins: table = SimpleTable(title=plugin.name) for attr_name in PLUGINS_ATTRIBUTES_TO_DUMP: value = getattr(plugin, attr_name) if not value: continue table.add_row(attr_name.capitalize().replace("_", " "), _join_plugins_names(value)) console.print(table)
def _deserialize_operator_extra_links( cls, encoded_op_links: list) -> Dict[str, BaseOperatorLink]: """ Deserialize Operator Links if the Classes are registered in Airflow Plugins. Error is raised if the OperatorLink is not found in Plugins too. :param encoded_op_links: Serialized Operator Link :return: De-Serialized Operator Link """ from airflow import plugins_manager plugins_manager.initialize_extra_operators_links_plugins() if plugins_manager.registered_operator_link_classes is None: raise AirflowException("Can't load plugins") op_predefined_extra_links = {} for _operator_links_source in encoded_op_links: # Get the key, value pair as Tuple where key is OperatorLink ClassName # and value is the dictionary containing the arguments passed to the OperatorLink # # Example of a single iteration: # # _operator_links_source = # { # 'airflow.providers.google.cloud.operators.bigquery.BigQueryConsoleIndexableLink': { # 'index': 0 # } # }, # # list(_operator_links_source.items()) = # [ # ( # 'airflow.providers.google.cloud.operators.bigquery.BigQueryConsoleIndexableLink', # {'index': 0} # ) # ] # # list(_operator_links_source.items())[0] = # ( # 'airflow.providers.google.cloud.operators.bigquery.BigQueryConsoleIndexableLink', # { # 'index': 0 # } # ) _operator_link_class_path, data = list( _operator_links_source.items())[0] if _operator_link_class_path in get_operator_extra_links(): single_op_link_class = import_string(_operator_link_class_path) elif _operator_link_class_path in plugins_manager.registered_operator_link_classes: single_op_link_class = plugins_manager.registered_operator_link_classes[ _operator_link_class_path] else: log.error("Operator Link class %r not registered", _operator_link_class_path) return {} op_predefined_extra_link: BaseOperatorLink = cattr.structure( data, single_op_link_class) op_predefined_extra_links.update( {op_predefined_extra_link.name: op_predefined_extra_link}) return op_predefined_extra_links
def deserialize_operator(cls, encoded_op: Dict[str, Any]) -> BaseOperator: """Deserializes an operator from a JSON object.""" op = SerializedBaseOperator(task_id=encoded_op['task_id']) if "label" not in encoded_op: # Handle deserialization of old data before the introduction of TaskGroup encoded_op["label"] = encoded_op["task_id"] # Extra Operator Links defined in Plugins op_extra_links_from_plugin = {} # We don't want to load Extra Operator links in Scheduler if cls._load_operator_extra_links: # pylint: disable=too-many-nested-blocks from airflow import plugins_manager plugins_manager.initialize_extra_operators_links_plugins() if plugins_manager.operator_extra_links is None: raise AirflowException("Can not load plugins") for ope in plugins_manager.operator_extra_links: for operator in ope.operators: if (operator.__name__ == encoded_op["_task_type"] and operator.__module__ == encoded_op["_task_module"]): op_extra_links_from_plugin.update({ope.name: ope}) # If OperatorLinks are defined in Plugins but not in the Operator that is being Serialized # set the Operator links attribute # The case for "If OperatorLinks are defined in the operator that is being Serialized" # is handled in the deserialization loop where it matches k == "_operator_extra_links" if op_extra_links_from_plugin and "_operator_extra_links" not in encoded_op: setattr(op, "operator_extra_links", list(op_extra_links_from_plugin.values())) for k, v in encoded_op.items(): if k == "_downstream_task_ids": v = set(v) elif k == "subdag": v = SerializedDAG.deserialize_dag(v) elif k in { "retry_delay", "execution_timeout", "sla", "max_retry_delay" }: v = cls._deserialize_timedelta(v) elif k in encoded_op["template_fields"]: pass elif k.endswith("_date"): v = cls._deserialize_datetime(v) elif k == "_operator_extra_links": if cls._load_operator_extra_links: op_predefined_extra_links = cls._deserialize_operator_extra_links( v) # If OperatorLinks with the same name exists, Links via Plugin have higher precedence op_predefined_extra_links.update( op_extra_links_from_plugin) else: op_predefined_extra_links = {} v = list(op_predefined_extra_links.values()) k = "operator_extra_links" elif k == "deps": v = cls._deserialize_deps(v) elif k in cls._decorated_fields or k not in op.get_serialized_fields( ): v = cls._deserialize(v) # else use v as it is setattr(op, k, v) for k in op.get_serialized_fields() - encoded_op.keys( ) - cls._CONSTRUCTOR_PARAMS.keys(): setattr(op, k, None) # Set all the template_field to None that were not present in Serialized JSON for field in op.template_fields: if not hasattr(op, field): setattr(op, field, None) # Used to determine if an Operator is inherited from DummyOperator setattr(op, "_is_dummy", bool(encoded_op.get("_is_dummy", False))) return op
def deserialize_operator(cls, encoded_op: Dict[str, Any]) -> BaseOperator: """Deserializes an operator from a JSON object. """ from airflow import plugins_manager plugins_manager.initialize_extra_operators_links_plugins() if plugins_manager.operator_extra_links is None: raise AirflowException("Cnn't load plugins") op = SerializedBaseOperator(task_id=encoded_op['task_id']) # Extra Operator Links defined in Plugins op_extra_links_from_plugin = {} for ope in plugins_manager.operator_extra_links: for operator in ope.operators: if operator.__name__ == encoded_op["_task_type"] and \ operator.__module__ == encoded_op["_task_module"]: op_extra_links_from_plugin.update({ope.name: ope}) # If OperatorLinks are defined in Plugins but not in the Operator that is being Serialized # set the Operator links attribute # The case for "If OperatorLinks are defined in the operator that is being Serialized" # is handled in the deserialization loop where it matches k == "_operator_extra_links" if op_extra_links_from_plugin and "_operator_extra_links" not in encoded_op: setattr(op, "operator_extra_links", list(op_extra_links_from_plugin.values())) for k, v in encoded_op.items(): if k == "_downstream_task_ids": v = set(v) elif k == "subdag": v = SerializedDAG.deserialize_dag(v) elif k in {"retry_delay", "execution_timeout"}: v = cls._deserialize_timedelta(v) elif k in encoded_op["template_fields"]: pass elif k.endswith("_date"): v = cls._deserialize_datetime(v) elif k == "_operator_extra_links": op_predefined_extra_links = cls._deserialize_operator_extra_links(v) # If OperatorLinks with the same name exists, Links via Plugin have higher precedence op_predefined_extra_links.update(op_extra_links_from_plugin) v = list(op_predefined_extra_links.values()) k = "operator_extra_links" elif k in cls._decorated_fields or k not in op.get_serialized_fields(): v = cls._deserialize(v) # else use v as it is setattr(op, k, v) for k in op.get_serialized_fields() - encoded_op.keys() - cls._CONSTRUCTOR_PARAMS.keys(): setattr(op, k, None) # Set all the template_field to None that were not present in Serialized JSON for field in op.template_fields: if not hasattr(op, field): setattr(op, field, None) return op
def populate_operator(cls, op: Operator, encoded_op: Dict[str, Any]) -> None: if "label" not in encoded_op: # Handle deserialization of old data before the introduction of TaskGroup encoded_op["label"] = encoded_op["task_id"] # Extra Operator Links defined in Plugins op_extra_links_from_plugin = {} # We don't want to load Extra Operator links in Scheduler if cls._load_operator_extra_links: from airflow import plugins_manager plugins_manager.initialize_extra_operators_links_plugins() if plugins_manager.operator_extra_links is None: raise AirflowException("Can not load plugins") for ope in plugins_manager.operator_extra_links: for operator in ope.operators: if ( operator.__name__ == encoded_op["_task_type"] and operator.__module__ == encoded_op["_task_module"] ): op_extra_links_from_plugin.update({ope.name: ope}) # If OperatorLinks are defined in Plugins but not in the Operator that is being Serialized # set the Operator links attribute # The case for "If OperatorLinks are defined in the operator that is being Serialized" # is handled in the deserialization loop where it matches k == "_operator_extra_links" if op_extra_links_from_plugin and "_operator_extra_links" not in encoded_op: setattr(op, "operator_extra_links", list(op_extra_links_from_plugin.values())) for k, v in encoded_op.items(): # Todo: TODO: Remove in Airflow 3.0 when dummy operator is removed if k == "_is_dummy": k = "_is_empty" if k == "_downstream_task_ids": # Upgrade from old format/name k = "downstream_task_ids" if k == "label": # Label shouldn't be set anymore -- it's computed from task_id now continue elif k == "downstream_task_ids": v = set(v) elif k == "subdag": v = SerializedDAG.deserialize_dag(v) elif k in {"retry_delay", "execution_timeout", "sla", "max_retry_delay"}: v = cls._deserialize_timedelta(v) elif k in encoded_op["template_fields"]: pass elif k == "resources": v = Resources.from_dict(v) elif k.endswith("_date"): v = cls._deserialize_datetime(v) elif k == "_operator_extra_links": if cls._load_operator_extra_links: op_predefined_extra_links = cls._deserialize_operator_extra_links(v) # If OperatorLinks with the same name exists, Links via Plugin have higher precedence op_predefined_extra_links.update(op_extra_links_from_plugin) else: op_predefined_extra_links = {} v = list(op_predefined_extra_links.values()) k = "operator_extra_links" elif k == "deps": v = cls._deserialize_deps(v) elif k == "params": v = cls._deserialize_params_dict(v) elif k in ("mapped_kwargs", "partial_kwargs"): if "op_kwargs" not in v: op_kwargs: Optional[dict] = None else: op_kwargs = {arg: cls._deserialize(value) for arg, value in v.pop("op_kwargs").items()} v = {arg: cls._deserialize(value) for arg, value in v.items()} if op_kwargs is not None: v["op_kwargs"] = op_kwargs elif k == "mapped_op_kwargs": v = {arg: cls._deserialize(value) for arg, value in v.items()} elif k in cls._decorated_fields or k not in op.get_serialized_fields(): v = cls._deserialize(v) # else use v as it is setattr(op, k, v) for k in op.get_serialized_fields() - encoded_op.keys() - cls._CONSTRUCTOR_PARAMS.keys(): # TODO: refactor deserialization of BaseOperator and MappedOperaotr (split it out), then check # could go away. if not hasattr(op, k): setattr(op, k, None) # Set all the template_field to None that were not present in Serialized JSON for field in op.template_fields: if not hasattr(op, field): setattr(op, field, None) # Used to determine if an Operator is inherited from EmptyOperator setattr(op, "_is_empty", bool(encoded_op.get("_is_empty", False)))