def get_unique_task_id( task_id: str, dag: Optional[DAG] = None, task_group: Optional[TaskGroup] = None ) -> str: """ Generate unique task id given a DAG (or if run in a DAG context) Ids are generated by appending a unique number to the end of the original task id. Example: task_id task_id__1 task_id__2 ... task_id__20 """ dag = dag or DagContext.get_current_dag() if not dag: return task_id # We need to check if we are in the context of TaskGroup as the task_id may # already be altered task_group = task_group or TaskGroupContext.get_current_task_group(dag) tg_task_id = task_group.child_id(task_id) if task_group else task_id if tg_task_id not in dag.task_ids: return task_id core = re.split(r'__\d+$', task_id)[0] suffixes = sorted( int(re.split(r'^.+__', task_id)[1]) for task_id in dag.task_ids if re.match(rf'^{core}__\d+$', task_id) ) if not suffixes: return f'{core}__1' return f'{core}__{suffixes[-1] + 1}'
def map(self, **kwargs: "MapArgument") -> XComArg: self._validate_arg_names("map", kwargs) partial_kwargs = self.kwargs.copy() dag = partial_kwargs.pop("dag", DagContext.get_current_dag()) task_group = partial_kwargs.pop( "task_group", TaskGroupContext.get_current_task_group(dag)) task_id = get_unique_task_id(partial_kwargs.pop("task_id"), dag, task_group) params = partial_kwargs.pop("params", None) # Logic here should be kept in sync with BaseOperatorMeta.partial(). if "task_concurrency" in partial_kwargs: raise TypeError("unexpected argument: task_concurrency") if partial_kwargs.get("wait_for_downstream"): partial_kwargs["depends_on_past"] = True start_date = timezone.convert_to_utc( partial_kwargs.pop("start_date", None)) end_date = timezone.convert_to_utc(partial_kwargs.pop( "end_date", None)) if partial_kwargs.get("pool") is None: partial_kwargs["pool"] = Pool.DEFAULT_POOL_NAME partial_kwargs["retries"] = parse_retries( partial_kwargs.get("retries", DEFAULT_RETRIES)) partial_kwargs["retry_delay"] = coerce_retry_delay( partial_kwargs.get("retry_delay", DEFAULT_RETRY_DELAY), ) partial_kwargs["resources"] = coerce_resources( partial_kwargs.get("resources")) partial_kwargs.setdefault("executor_config", {}) partial_kwargs.setdefault("op_args", []) partial_kwargs.setdefault("op_kwargs", {}) # Mypy does not work well with a subclassed attrs class :( _MappedOperator = cast(Any, DecoratedMappedOperator) operator = _MappedOperator( operator_class=self.operator_class, mapped_kwargs={}, partial_kwargs=partial_kwargs, task_id=task_id, params=params, deps=MappedOperator.deps_for(self.operator_class), operator_extra_links=self.operator_class.operator_extra_links, template_ext=self.operator_class.template_ext, template_fields=self.operator_class.template_fields, ui_color=self.operator_class.ui_color, ui_fgcolor=self.operator_class.ui_fgcolor, is_dummy=False, task_module=self.operator_class.__module__, task_type=self.operator_class.__name__, dag=dag, task_group=task_group, start_date=start_date, end_date=end_date, multiple_outputs=self.multiple_outputs, python_callable=self.function, mapped_op_kwargs=kwargs, ) return XComArg(operator=operator)
def map( self, *, dag: Optional["DAG"] = None, task_group: Optional["TaskGroup"] = None, **kwargs ) -> XComArg: self._validate_arg_names("map", kwargs) dag = dag or DagContext.get_current_dag() task_group = task_group or TaskGroupContext.get_current_task_group(dag) task_id = get_unique_task_id(self.kwargs['task_id'], dag, task_group) operator = MappedOperator.from_decorator( decorator=self, dag=dag, task_group=task_group, task_id=task_id, mapped_kwargs=kwargs, ) return XComArg(operator=operator)
def get_unique_task_id( task_id: str, dag: Optional[DAG] = None, task_group: Optional[TaskGroup] = None, ) -> str: """ Generate unique task id given a DAG (or if run in a DAG context) Ids are generated by appending a unique number to the end of the original task id. Example: task_id task_id__1 task_id__2 ... task_id__20 """ dag = dag or DagContext.get_current_dag() if not dag: return task_id # We need to check if we are in the context of TaskGroup as the task_id may # already be altered task_group = task_group or TaskGroupContext.get_current_task_group(dag) tg_task_id = task_group.child_id(task_id) if task_group else task_id if tg_task_id not in dag.task_ids: return task_id def _find_id_suffixes(dag: DAG) -> Iterator[int]: prefix = re.split(r"__\d+$", tg_task_id)[0] for task_id in dag.task_ids: match = re.match(rf"^{prefix}__(\d+)$", task_id) if match is None: continue yield int(match.group(1)) yield 0 # Default if there's no matching task ID. core = re.split(r"__\d+$", task_id)[0] return f"{core}__{max(_find_id_suffixes(dag)) + 1}"
def map(self, *, dag: Optional["DAG"] = None, task_group: Optional["TaskGroup"] = None, **kwargs) -> XComArg: dag = dag or DagContext.get_current_dag() task_group = task_group or TaskGroupContext.get_current_task_group(dag) task_id = get_unique_task_id(self.kwargs['task_id'], dag, task_group) self._validate_arg_names("map", kwargs) operator = MappedOperator( operator_class=self.operator_class, task_id=task_id, dag=dag, task_group=task_group, partial_kwargs=self.kwargs, # Set them to empty to bypass the validation, as for decorated stuff we validate ourselves mapped_kwargs={}, ) operator.mapped_kwargs.update(kwargs) return XComArg(operator=operator)
def expand(self, **map_kwargs: "Mappable") -> XComArg: self._validate_arg_names("expand", map_kwargs) prevent_duplicates(self.kwargs, map_kwargs, fail_reason="mapping already partial") ensure_xcomarg_return_value(map_kwargs) task_kwargs = self.kwargs.copy() dag = task_kwargs.pop("dag", None) or DagContext.get_current_dag() task_group = task_kwargs.pop( "task_group", None) or TaskGroupContext.get_current_task_group(dag) partial_kwargs, default_params = get_merged_defaults( dag=dag, task_group=task_group, task_params=task_kwargs.pop("params", None), task_default_args=task_kwargs.pop("default_args", None), ) partial_kwargs.update(task_kwargs) task_id = get_unique_task_id(partial_kwargs.pop("task_id"), dag, task_group) params = partial_kwargs.pop("params", None) or default_params # Logic here should be kept in sync with BaseOperatorMeta.partial(). if "task_concurrency" in partial_kwargs: raise TypeError("unexpected argument: task_concurrency") if partial_kwargs.get("wait_for_downstream"): partial_kwargs["depends_on_past"] = True start_date = timezone.convert_to_utc( partial_kwargs.pop("start_date", None)) end_date = timezone.convert_to_utc(partial_kwargs.pop( "end_date", None)) if partial_kwargs.get("pool") is None: partial_kwargs["pool"] = Pool.DEFAULT_POOL_NAME partial_kwargs["retries"] = parse_retries( partial_kwargs.get("retries", DEFAULT_RETRIES)) partial_kwargs["retry_delay"] = coerce_retry_delay( partial_kwargs.get("retry_delay", DEFAULT_RETRY_DELAY), ) partial_kwargs["resources"] = coerce_resources( partial_kwargs.get("resources")) partial_kwargs.setdefault("executor_config", {}) partial_kwargs.setdefault("op_args", []) partial_kwargs.setdefault("op_kwargs", {}) # Mypy does not work well with a subclassed attrs class :( _MappedOperator = cast(Any, DecoratedMappedOperator) operator = _MappedOperator( operator_class=self.operator_class, mapped_kwargs={}, partial_kwargs=partial_kwargs, task_id=task_id, params=params, deps=MappedOperator.deps_for(self.operator_class), operator_extra_links=self.operator_class.operator_extra_links, template_ext=self.operator_class.template_ext, template_fields=self.operator_class.template_fields, template_fields_renderers=self.operator_class. template_fields_renderers, ui_color=self.operator_class.ui_color, ui_fgcolor=self.operator_class.ui_fgcolor, is_empty=False, task_module=self.operator_class.__module__, task_type=self.operator_class.__name__, dag=dag, task_group=task_group, start_date=start_date, end_date=end_date, multiple_outputs=self.multiple_outputs, python_callable=self.function, mapped_op_kwargs=map_kwargs, # Different from classic operators, kwargs passed to a taskflow # task's expand() contribute to the op_kwargs operator argument, not # the operator arguments themselves, and should expand against it. expansion_kwargs_attr="mapped_op_kwargs", ) return XComArg(operator=operator)
def _expand(self, expand_input: ExpandInput, *, strict: bool) -> XComArg: ensure_xcomarg_return_value(expand_input.value) task_kwargs = self.kwargs.copy() dag = task_kwargs.pop("dag", None) or DagContext.get_current_dag() task_group = task_kwargs.pop( "task_group", None) or TaskGroupContext.get_current_task_group(dag) partial_kwargs, default_params = get_merged_defaults( dag=dag, task_group=task_group, task_params=task_kwargs.pop("params", None), task_default_args=task_kwargs.pop("default_args", None), ) partial_kwargs.update(task_kwargs) task_id = get_unique_task_id(partial_kwargs.pop("task_id"), dag, task_group) params = partial_kwargs.pop("params", None) or default_params # Logic here should be kept in sync with BaseOperatorMeta.partial(). if "task_concurrency" in partial_kwargs: raise TypeError("unexpected argument: task_concurrency") if partial_kwargs.get("wait_for_downstream"): partial_kwargs["depends_on_past"] = True start_date = timezone.convert_to_utc( partial_kwargs.pop("start_date", None)) end_date = timezone.convert_to_utc(partial_kwargs.pop( "end_date", None)) if partial_kwargs.get("pool") is None: partial_kwargs["pool"] = Pool.DEFAULT_POOL_NAME partial_kwargs["retries"] = parse_retries( partial_kwargs.get("retries", DEFAULT_RETRIES)) partial_kwargs["retry_delay"] = coerce_timedelta( partial_kwargs.get("retry_delay", DEFAULT_RETRY_DELAY), key="retry_delay", ) max_retry_delay = partial_kwargs.get("max_retry_delay") partial_kwargs["max_retry_delay"] = ( max_retry_delay if max_retry_delay is None else coerce_timedelta( max_retry_delay, key="max_retry_delay")) partial_kwargs["resources"] = coerce_resources( partial_kwargs.get("resources")) partial_kwargs.setdefault("executor_config", {}) partial_kwargs.setdefault("op_args", []) partial_kwargs.setdefault("op_kwargs", {}) # Mypy does not work well with a subclassed attrs class :( _MappedOperator = cast(Any, DecoratedMappedOperator) try: operator_name = self.operator_class.custom_operator_name # type: ignore except AttributeError: operator_name = self.operator_class.__name__ operator = _MappedOperator( operator_class=self.operator_class, expand_input= EXPAND_INPUT_EMPTY, # Don't use this; mapped values go to op_kwargs_expand_input. partial_kwargs=partial_kwargs, task_id=task_id, params=params, deps=MappedOperator.deps_for(self.operator_class), operator_extra_links=self.operator_class.operator_extra_links, template_ext=self.operator_class.template_ext, template_fields=self.operator_class.template_fields, template_fields_renderers=self.operator_class. template_fields_renderers, ui_color=self.operator_class.ui_color, ui_fgcolor=self.operator_class.ui_fgcolor, is_empty=False, task_module=self.operator_class.__module__, task_type=self.operator_class.__name__, operator_name=operator_name, dag=dag, task_group=task_group, start_date=start_date, end_date=end_date, multiple_outputs=self.multiple_outputs, python_callable=self.function, op_kwargs_expand_input=expand_input, disallow_kwargs_override=strict, # Different from classic operators, kwargs passed to a taskflow # task's expand() contribute to the op_kwargs operator argument, not # the operator arguments themselves, and should expand against it. expand_input_attr="op_kwargs_expand_input", ) return XComArg(operator=operator)