def deserialize_operator(cls, encoded_op: Dict[str, Any]) -> Operator: """Deserializes an operator from a JSON object.""" op: Operator if encoded_op.get("_is_mapped", False): # Most of these will be loaded later, these are just some stand-ins. op_data = {k: v for k, v in encoded_op.items() if k in BaseOperator.get_serialized_fields()} op = MappedOperator( operator_class=op_data, mapped_kwargs={}, partial_kwargs={}, task_id=encoded_op["task_id"], params={}, deps=MappedOperator.deps_for(BaseOperator), operator_extra_links=BaseOperator.operator_extra_links, template_ext=BaseOperator.template_ext, template_fields=BaseOperator.template_fields, template_fields_renderers=BaseOperator.template_fields_renderers, ui_color=BaseOperator.ui_color, ui_fgcolor=BaseOperator.ui_fgcolor, is_empty=False, task_module=encoded_op["_task_module"], task_type=encoded_op["_task_type"], dag=None, task_group=None, start_date=None, end_date=None, expansion_kwargs_attr=encoded_op["_expansion_kwargs_attr"], ) else: op = SerializedBaseOperator(task_id=encoded_op['task_id']) cls.populate_operator(op, encoded_op) return op
def serialize_mapped_operator(cls, op: MappedOperator) -> Dict[str, Any]: serialized_op = cls._serialize_node(op, include_deps=op.deps is MappedOperator.deps_for(BaseOperator)) # Simplify partial_kwargs by comparing it to the most barebone object. # Remove all entries that are simply default values. serialized_partial = serialized_op["partial_kwargs"] for k, default in _get_default_mapped_partial().items(): try: v = serialized_partial[k] except KeyError: continue if v == default: del serialized_partial[k] # Simplify op_kwargs format. It must be a dict, so we flatten it. with contextlib.suppress(KeyError): op_kwargs = serialized_op["mapped_kwargs"]["op_kwargs"] assert op_kwargs[Encoding.TYPE] == DAT.DICT serialized_op["mapped_kwargs"]["op_kwargs"] = op_kwargs[Encoding.VAR] with contextlib.suppress(KeyError): op_kwargs = serialized_op["partial_kwargs"]["op_kwargs"] assert op_kwargs[Encoding.TYPE] == DAT.DICT serialized_op["partial_kwargs"]["op_kwargs"] = op_kwargs[Encoding.VAR] with contextlib.suppress(KeyError): op_kwargs = serialized_op["mapped_op_kwargs"] assert op_kwargs[Encoding.TYPE] == DAT.DICT serialized_op["mapped_op_kwargs"] = op_kwargs[Encoding.VAR] serialized_op["_is_mapped"] = True return serialized_op
def map(self, **kwargs: "MapArgument") -> XComArg: self._validate_arg_names("map", kwargs) partial_kwargs = self.kwargs.copy() dag = partial_kwargs.pop("dag", DagContext.get_current_dag()) task_group = partial_kwargs.pop( "task_group", TaskGroupContext.get_current_task_group(dag)) task_id = get_unique_task_id(partial_kwargs.pop("task_id"), dag, task_group) params = partial_kwargs.pop("params", None) # Logic here should be kept in sync with BaseOperatorMeta.partial(). if "task_concurrency" in partial_kwargs: raise TypeError("unexpected argument: task_concurrency") if partial_kwargs.get("wait_for_downstream"): partial_kwargs["depends_on_past"] = True start_date = timezone.convert_to_utc( partial_kwargs.pop("start_date", None)) end_date = timezone.convert_to_utc(partial_kwargs.pop( "end_date", None)) if partial_kwargs.get("pool") is None: partial_kwargs["pool"] = Pool.DEFAULT_POOL_NAME partial_kwargs["retries"] = parse_retries( partial_kwargs.get("retries", DEFAULT_RETRIES)) partial_kwargs["retry_delay"] = coerce_retry_delay( partial_kwargs.get("retry_delay", DEFAULT_RETRY_DELAY), ) partial_kwargs["resources"] = coerce_resources( partial_kwargs.get("resources")) partial_kwargs.setdefault("executor_config", {}) partial_kwargs.setdefault("op_args", []) partial_kwargs.setdefault("op_kwargs", {}) # Mypy does not work well with a subclassed attrs class :( _MappedOperator = cast(Any, DecoratedMappedOperator) operator = _MappedOperator( operator_class=self.operator_class, mapped_kwargs={}, partial_kwargs=partial_kwargs, task_id=task_id, params=params, deps=MappedOperator.deps_for(self.operator_class), operator_extra_links=self.operator_class.operator_extra_links, template_ext=self.operator_class.template_ext, template_fields=self.operator_class.template_fields, ui_color=self.operator_class.ui_color, ui_fgcolor=self.operator_class.ui_fgcolor, is_dummy=False, task_module=self.operator_class.__module__, task_type=self.operator_class.__name__, dag=dag, task_group=task_group, start_date=start_date, end_date=end_date, multiple_outputs=self.multiple_outputs, python_callable=self.function, mapped_op_kwargs=kwargs, ) return XComArg(operator=operator)
def expand(self, **map_kwargs: "Mappable") -> XComArg: self._validate_arg_names("expand", map_kwargs) prevent_duplicates(self.kwargs, map_kwargs, fail_reason="mapping already partial") ensure_xcomarg_return_value(map_kwargs) task_kwargs = self.kwargs.copy() dag = task_kwargs.pop("dag", None) or DagContext.get_current_dag() task_group = task_kwargs.pop( "task_group", None) or TaskGroupContext.get_current_task_group(dag) partial_kwargs, default_params = get_merged_defaults( dag=dag, task_group=task_group, task_params=task_kwargs.pop("params", None), task_default_args=task_kwargs.pop("default_args", None), ) partial_kwargs.update(task_kwargs) task_id = get_unique_task_id(partial_kwargs.pop("task_id"), dag, task_group) params = partial_kwargs.pop("params", None) or default_params # Logic here should be kept in sync with BaseOperatorMeta.partial(). if "task_concurrency" in partial_kwargs: raise TypeError("unexpected argument: task_concurrency") if partial_kwargs.get("wait_for_downstream"): partial_kwargs["depends_on_past"] = True start_date = timezone.convert_to_utc( partial_kwargs.pop("start_date", None)) end_date = timezone.convert_to_utc(partial_kwargs.pop( "end_date", None)) if partial_kwargs.get("pool") is None: partial_kwargs["pool"] = Pool.DEFAULT_POOL_NAME partial_kwargs["retries"] = parse_retries( partial_kwargs.get("retries", DEFAULT_RETRIES)) partial_kwargs["retry_delay"] = coerce_retry_delay( partial_kwargs.get("retry_delay", DEFAULT_RETRY_DELAY), ) partial_kwargs["resources"] = coerce_resources( partial_kwargs.get("resources")) partial_kwargs.setdefault("executor_config", {}) partial_kwargs.setdefault("op_args", []) partial_kwargs.setdefault("op_kwargs", {}) # Mypy does not work well with a subclassed attrs class :( _MappedOperator = cast(Any, DecoratedMappedOperator) operator = _MappedOperator( operator_class=self.operator_class, mapped_kwargs={}, partial_kwargs=partial_kwargs, task_id=task_id, params=params, deps=MappedOperator.deps_for(self.operator_class), operator_extra_links=self.operator_class.operator_extra_links, template_ext=self.operator_class.template_ext, template_fields=self.operator_class.template_fields, template_fields_renderers=self.operator_class. template_fields_renderers, ui_color=self.operator_class.ui_color, ui_fgcolor=self.operator_class.ui_fgcolor, is_empty=False, task_module=self.operator_class.__module__, task_type=self.operator_class.__name__, dag=dag, task_group=task_group, start_date=start_date, end_date=end_date, multiple_outputs=self.multiple_outputs, python_callable=self.function, mapped_op_kwargs=map_kwargs, # Different from classic operators, kwargs passed to a taskflow # task's expand() contribute to the op_kwargs operator argument, not # the operator arguments themselves, and should expand against it. expansion_kwargs_attr="mapped_op_kwargs", ) return XComArg(operator=operator)
def _expand(self, expand_input: ExpandInput, *, strict: bool) -> XComArg: ensure_xcomarg_return_value(expand_input.value) task_kwargs = self.kwargs.copy() dag = task_kwargs.pop("dag", None) or DagContext.get_current_dag() task_group = task_kwargs.pop( "task_group", None) or TaskGroupContext.get_current_task_group(dag) partial_kwargs, default_params = get_merged_defaults( dag=dag, task_group=task_group, task_params=task_kwargs.pop("params", None), task_default_args=task_kwargs.pop("default_args", None), ) partial_kwargs.update(task_kwargs) task_id = get_unique_task_id(partial_kwargs.pop("task_id"), dag, task_group) params = partial_kwargs.pop("params", None) or default_params # Logic here should be kept in sync with BaseOperatorMeta.partial(). if "task_concurrency" in partial_kwargs: raise TypeError("unexpected argument: task_concurrency") if partial_kwargs.get("wait_for_downstream"): partial_kwargs["depends_on_past"] = True start_date = timezone.convert_to_utc( partial_kwargs.pop("start_date", None)) end_date = timezone.convert_to_utc(partial_kwargs.pop( "end_date", None)) if partial_kwargs.get("pool") is None: partial_kwargs["pool"] = Pool.DEFAULT_POOL_NAME partial_kwargs["retries"] = parse_retries( partial_kwargs.get("retries", DEFAULT_RETRIES)) partial_kwargs["retry_delay"] = coerce_timedelta( partial_kwargs.get("retry_delay", DEFAULT_RETRY_DELAY), key="retry_delay", ) max_retry_delay = partial_kwargs.get("max_retry_delay") partial_kwargs["max_retry_delay"] = ( max_retry_delay if max_retry_delay is None else coerce_timedelta( max_retry_delay, key="max_retry_delay")) partial_kwargs["resources"] = coerce_resources( partial_kwargs.get("resources")) partial_kwargs.setdefault("executor_config", {}) partial_kwargs.setdefault("op_args", []) partial_kwargs.setdefault("op_kwargs", {}) # Mypy does not work well with a subclassed attrs class :( _MappedOperator = cast(Any, DecoratedMappedOperator) try: operator_name = self.operator_class.custom_operator_name # type: ignore except AttributeError: operator_name = self.operator_class.__name__ operator = _MappedOperator( operator_class=self.operator_class, expand_input= EXPAND_INPUT_EMPTY, # Don't use this; mapped values go to op_kwargs_expand_input. partial_kwargs=partial_kwargs, task_id=task_id, params=params, deps=MappedOperator.deps_for(self.operator_class), operator_extra_links=self.operator_class.operator_extra_links, template_ext=self.operator_class.template_ext, template_fields=self.operator_class.template_fields, template_fields_renderers=self.operator_class. template_fields_renderers, ui_color=self.operator_class.ui_color, ui_fgcolor=self.operator_class.ui_fgcolor, is_empty=False, task_module=self.operator_class.__module__, task_type=self.operator_class.__name__, operator_name=operator_name, dag=dag, task_group=task_group, start_date=start_date, end_date=end_date, multiple_outputs=self.multiple_outputs, python_callable=self.function, op_kwargs_expand_input=expand_input, disallow_kwargs_override=strict, # Different from classic operators, kwargs passed to a taskflow # task's expand() contribute to the op_kwargs operator argument, not # the operator arguments themselves, and should expand against it. expand_input_attr="op_kwargs_expand_input", ) return XComArg(operator=operator)