def add(self, task: DAGNode) -> None: """Add a task to this TaskGroup. :meta private: """ from airflow.models.abstractoperator import AbstractOperator existing_tg = task.task_group if isinstance(task, AbstractOperator ) and existing_tg is not None and existing_tg != self: raise TaskAlreadyInTaskGroup(task.node_id, existing_tg.node_id, self.node_id) # Set the TG first, as setting it might change the return value of node_id! task.task_group = weakref.proxy(self) key = task.node_id if key in self.children: node_type = "Task" if hasattr(task, 'task_id') else "Task Group" raise DuplicateTaskIdFound( f"{node_type} id '{key}' has already been added to the DAG") if isinstance(task, TaskGroup): if self.dag: if task.dag is not None and self.dag is not task.dag: raise RuntimeError( "Cannot mix TaskGroups from different DAGs: %s and %s", self.dag, task.dag) task.dag = self.dag if task.children: raise AirflowException("Cannot add a non-empty TaskGroup") self.children[key] = task
def __init__( self, group_id: Optional[str], prefix_group_id: bool = True, parent_group: Optional["TaskGroup"] = None, dag: Optional["DAG"] = None, tooltip: str = "", ui_color: str = "CornflowerBlue", ui_fgcolor: str = "#000", ): from airflow.models.dag import DagContext self.prefix_group_id = prefix_group_id if group_id is None: # This creates a root TaskGroup. if parent_group: raise AirflowException("Root TaskGroup cannot have parent_group") # used_group_ids is shared across all TaskGroups in the same DAG to keep track # of used group_id to avoid duplication. self.used_group_ids: Set[Optional[str]] = set() self._parent_group = None else: if not isinstance(group_id, str): raise ValueError("group_id must be str") if not group_id: raise ValueError("group_id must not be empty") dag = dag or DagContext.get_current_dag() if not parent_group and not dag: raise AirflowException("TaskGroup can only be used inside a dag") self._parent_group = parent_group or TaskGroupContext.get_current_task_group(dag) if not self._parent_group: raise AirflowException("TaskGroup must have a parent_group except for the root TaskGroup") self.used_group_ids = self._parent_group.used_group_ids self._group_id = group_id if self.group_id in self.used_group_ids: raise DuplicateTaskIdFound(f"group_id '{self.group_id}' has already been added to the DAG") self.used_group_ids.add(self.group_id) self.used_group_ids.add(self.downstream_join_id) self.used_group_ids.add(self.upstream_join_id) self.children: Dict[str, Union["BaseOperator", "TaskGroup"]] = {} if self._parent_group: self._parent_group.add(self) self.tooltip = tooltip self.ui_color = ui_color self.ui_fgcolor = ui_fgcolor # Keep track of TaskGroups or tasks that depend on this entire TaskGroup separately # so that we can optimize the number of edges when entire TaskGroups depend on each other. self.upstream_group_ids: Set[Optional[str]] = set() self.downstream_group_ids: Set[Optional[str]] = set() self.upstream_task_ids: Set[Optional[str]] = set() self.downstream_task_ids: Set[Optional[str]] = set()
def add(self, task: Union["BaseOperator", "TaskGroup"]) -> None: """Add a task to this TaskGroup.""" key = task.group_id if isinstance(task, TaskGroup) else task.task_id if key in self.children: raise DuplicateTaskIdFound(f"Task id '{key}' has already been added to the DAG") if isinstance(task, TaskGroup): if task.children: raise AirflowException("Cannot add a non-empty TaskGroup") self.children[key] = task # type: ignore
def _check_for_group_id_collisions(self, add_suffix_on_collision: bool): if self._group_id is None: return # if given group_id already used assign suffix by incrementing largest used suffix integer # Example : task_group ==> task_group__1 -> task_group__2 -> task_group__3 if self._group_id in self.used_group_ids: if not add_suffix_on_collision: raise DuplicateTaskIdFound(f"group_id '{self._group_id}' has already been added to the DAG") base = re.split(r'__\d+$', self._group_id)[0] suffixes = sorted( int(re.split(r'^.+__', used_group_id)[1]) for used_group_id in self.used_group_ids if used_group_id is not None and re.match(rf'^{base}__\d+$', used_group_id) ) if not suffixes: self._group_id += '__1' else: self._group_id = f'{base}__{suffixes[-1] + 1}'
def dag(self, dag): """ Operators can be assigned to one DAG, one time. Repeat assignments to that same DAG are ok. """ if not isinstance(dag, DAG): raise TypeError('Expected DAG; received {}'.format( dag.__class__.__name__)) elif self.has_dag() and self.dag is not dag: raise AirflowException( "The DAG assigned to {} can not be changed.".format(self)) elif self.task_id not in dag.task_dict: dag.add_task(self) elif self.task_id in dag.task_dict and dag.task_dict[ self.task_id] != self: raise DuplicateTaskIdFound( "Task id '{}' has already been added to the DAG".format( self.task_id)) self._dag = dag # pylint: disable=attribute-defined-outside-init
def add(self, task: DAGNode) -> None: """Add a task to this TaskGroup.""" key = task.node_id if key in self.children: node_type = "Task" if hasattr(task, 'task_id') else "Task Group" raise DuplicateTaskIdFound( f"{node_type} id '{key}' has already been added to the DAG") if isinstance(task, TaskGroup): if self.dag: if task.dag is not None and self.dag is not task.dag: raise RuntimeError( "Cannot mix TaskGroups from different DAGs: %s and %s", self.dag, task.dag) task.dag = self.dag if task.children: raise AirflowException("Cannot add a non-empty TaskGroup") self.children[key] = task task.task_group = weakref.proxy(self)
def __init__( self, group_id: Optional[str], prefix_group_id: bool = True, parent_group: Optional["TaskGroup"] = None, dag: Optional["DAG"] = None, default_args: Optional[Dict] = None, tooltip: str = "", ui_color: str = "CornflowerBlue", ui_fgcolor: str = "#000", add_suffix_on_collision: bool = False, ): from airflow.models.dag import DagContext self.prefix_group_id = prefix_group_id self.default_args = copy.deepcopy(default_args or {}) if group_id is None: # This creates a root TaskGroup. if parent_group: raise AirflowException( "Root TaskGroup cannot have parent_group") # used_group_ids is shared across all TaskGroups in the same DAG to keep track # of used group_id to avoid duplication. self.used_group_ids: Set[Optional[str]] = set() self._parent_group = None else: if not isinstance(group_id, str): raise ValueError("group_id must be str") if not group_id: raise ValueError("group_id must not be empty") dag = dag or DagContext.get_current_dag() if not parent_group and not dag: raise AirflowException( "TaskGroup can only be used inside a dag") self._parent_group = parent_group or TaskGroupContext.get_current_task_group( dag) if not self._parent_group: raise AirflowException( "TaskGroup must have a parent_group except for the root TaskGroup" ) self.used_group_ids = self._parent_group.used_group_ids self._group_id = group_id # if given group_id already used assign suffix by incrementing largest used suffix integer # Example : task_group ==> task_group__1 -> task_group__2 -> task_group__3 if group_id in self.used_group_ids: if not add_suffix_on_collision: raise DuplicateTaskIdFound( f"group_id '{self.group_id}' has already been added to the DAG" ) base = re.split(r'__\d+$', group_id)[0] suffixes = sorted( int(re.split(r'^.+__', used_group_id)[1]) for used_group_id in self.used_group_ids if used_group_id is not None and re.match(rf'^{base}__\d+$', used_group_id)) if not suffixes: self._group_id += '__1' else: self._group_id = f'{base}__{suffixes[-1] + 1}' self.used_group_ids.add(self.group_id) self.used_group_ids.add(self.downstream_join_id) self.used_group_ids.add(self.upstream_join_id) self.children: Dict[str, Union["BaseOperator", "TaskGroup"]] = {} if self._parent_group: self._parent_group.add(self) self.tooltip = tooltip self.ui_color = ui_color self.ui_fgcolor = ui_fgcolor # Keep track of TaskGroups or tasks that depend on this entire TaskGroup separately # so that we can optimize the number of edges when entire TaskGroups depend on each other. self.upstream_group_ids: Set[Optional[str]] = set() self.downstream_group_ids: Set[Optional[str]] = set() self.upstream_task_ids: Set[Optional[str]] = set() self.downstream_task_ids: Set[Optional[str]] = set()