def __init__( self, group_id: Optional[str], prefix_group_id: bool = True, parent_group: Optional["TaskGroup"] = None, dag: Optional["DAG"] = None, default_args: Optional[Dict] = None, tooltip: str = "", ui_color: str = "CornflowerBlue", ui_fgcolor: str = "#000", add_suffix_on_collision: bool = False, ): from airflow.models.dag import DagContext self.prefix_group_id = prefix_group_id self.default_args = copy.deepcopy(default_args or {}) dag = dag or DagContext.get_current_dag() if group_id is None: # This creates a root TaskGroup. if parent_group: raise AirflowException( "Root TaskGroup cannot have parent_group") # used_group_ids is shared across all TaskGroups in the same DAG to keep track # of used group_id to avoid duplication. self.used_group_ids = set() self.dag = dag else: if prefix_group_id: # If group id is used as prefix, it should not contain spaces nor dots # because it is used as prefix in the task_id validate_group_key(group_id) else: if not isinstance(group_id, str): raise ValueError("group_id must be str") if not group_id: raise ValueError("group_id must not be empty") if not parent_group and not dag: raise AirflowException( "TaskGroup can only be used inside a dag") parent_group = parent_group or TaskGroupContext.get_current_task_group( dag) if not parent_group: raise AirflowException( "TaskGroup must have a parent_group except for the root TaskGroup" ) if dag is not parent_group.dag: raise RuntimeError( "Cannot mix TaskGroups from different DAGs: %s and %s", dag, parent_group.dag) self.used_group_ids = parent_group.used_group_ids # if given group_id already used assign suffix by incrementing largest used suffix integer # Example : task_group ==> task_group__1 -> task_group__2 -> task_group__3 self._group_id = group_id self._check_for_group_id_collisions(add_suffix_on_collision) self.children: Dict[str, DAGNode] = {} if parent_group: parent_group.add(self) self.used_group_ids.add(self.group_id) if self.group_id: self.used_group_ids.add(self.downstream_join_id) self.used_group_ids.add(self.upstream_join_id) self.tooltip = tooltip self.ui_color = ui_color self.ui_fgcolor = ui_fgcolor # Keep track of TaskGroups or tasks that depend on this entire TaskGroup separately # so that we can optimize the number of edges when entire TaskGroups depend on each other. self.upstream_group_ids: Set[Optional[str]] = set() self.downstream_group_ids: Set[Optional[str]] = set() self.upstream_task_ids = set() self.downstream_task_ids = set()
def __init__( self, group_id: Optional[str], prefix_group_id: bool = True, parent_group: Optional["TaskGroup"] = None, dag: Optional["DAG"] = None, default_args: Optional[Dict] = None, tooltip: str = "", ui_color: str = "CornflowerBlue", ui_fgcolor: str = "#000", add_suffix_on_collision: bool = False, ): from airflow.models.dag import DagContext self.prefix_group_id = prefix_group_id self.default_args = copy.deepcopy(default_args or {}) if group_id is None: # This creates a root TaskGroup. if parent_group: raise AirflowException( "Root TaskGroup cannot have parent_group") # used_group_ids is shared across all TaskGroups in the same DAG to keep track # of used group_id to avoid duplication. self.used_group_ids: Set[Optional[str]] = set() self._parent_group = None else: if prefix_group_id: # If group id is used as prefix, it should not contain spaces nor dots # because it is used as prefix in the task_id validate_group_key(group_id) else: if not isinstance(group_id, str): raise ValueError("group_id must be str") if not group_id: raise ValueError("group_id must not be empty") dag = dag or DagContext.get_current_dag() if not parent_group and not dag: raise AirflowException( "TaskGroup can only be used inside a dag") self._parent_group = parent_group or TaskGroupContext.get_current_task_group( dag) if not self._parent_group: raise AirflowException( "TaskGroup must have a parent_group except for the root TaskGroup" ) self.used_group_ids = self._parent_group.used_group_ids self._group_id = group_id # if given group_id already used assign suffix by incrementing largest used suffix integer # Example : task_group ==> task_group__1 -> task_group__2 -> task_group__3 if group_id in self.used_group_ids: if not add_suffix_on_collision: raise DuplicateTaskIdFound( f"group_id '{self.group_id}' has already been added to the DAG" ) base = re.split(r'__\d+$', group_id)[0] suffixes = sorted( int(re.split(r'^.+__', used_group_id)[1]) for used_group_id in self.used_group_ids if used_group_id is not None and re.match(rf'^{base}__\d+$', used_group_id)) if not suffixes: self._group_id += '__1' else: self._group_id = f'{base}__{suffixes[-1] + 1}' self.used_group_ids.add(self.group_id) self.used_group_ids.add(self.downstream_join_id) self.used_group_ids.add(self.upstream_join_id) self.children: Dict[str, Union["BaseOperator", "TaskGroup"]] = {} if self._parent_group: self._parent_group.add(self) self.tooltip = tooltip self.ui_color = ui_color self.ui_fgcolor = ui_fgcolor # Keep track of TaskGroups or tasks that depend on this entire TaskGroup separately # so that we can optimize the number of edges when entire TaskGroups depend on each other. self.upstream_group_ids: Set[Optional[str]] = set() self.downstream_group_ids: Set[Optional[str]] = set() self.upstream_task_ids: Set[Optional[str]] = set() self.downstream_task_ids: Set[Optional[str]] = set()