def check_and_get_dag(dag_id: str, task_id: Optional[str] = None) -> DagModel: """Checks that DAG exists and in case it is specified that Task exist""" dag_model = DagModel.get_current(dag_id) if dag_model is None: raise DagNotFound(f"Dag id {dag_id} not found in DagModel") dagbag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True) dag = dagbag.get_dag(dag_id) if not dag: error_message = f"Dag id {dag_id} not found" raise DagNotFound(error_message) if task_id and not dag.has_task(task_id): error_message = f'Task {task_id} not found in dag {dag_id}' raise TaskNotFound(error_message) return dag
def delete_dag(self, keep_records_in_log: bool = True, session=None): dag = session.query(DagModel).filter( DagModel.dag_id == self.dag_id).first() if dag is None: raise DagNotFound(f"Dag id {self.dag_id} not found") # so explicitly removes serialized DAG here. if STORE_SERIALIZED_DAGS and SerializedDagModel.has_dag( dag_id=self.dag_id, session=session): SerializedDagModel.remove_dag(dag_id=self.dag_id, session=session) # noinspection PyUnresolvedReferences,PyProtectedMember for model in models.base.Base._decl_class_registry.values(): if hasattr(model, "dag_id"): if model.__name__: print(model.__name__) if keep_records_in_log and model.__name__ == "Log": continue cond = or_(model.dag_id == self.dag_id, model.dag_id.like(self.dag_id + ".%")) session.query(model).filter(cond).delete( synchronize_session="fetch") # Delete entries in Import Errors table for a deleted DAG # This handles the case when the dag_id is changed in the file session.query(models.ImportError).filter( models.ImportError.filename == dag.fileloc).delete( synchronize_session="fetch")
def delete_dag(dag_id: str, keep_records_in_log: bool = True, session=None) -> int: """ :param dag_id: the dag_id of the DAG to delete :param keep_records_in_log: whether keep records of the given dag_id in the Log table in the backend database (for reasons like auditing). The default value is True. :param session: session used :return count of deleted dags """ dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).first() if dag is None: raise DagNotFound("Dag id {} not found".format(dag_id)) if dag.fileloc and os.path.exists(dag.fileloc): raise DagFileExists("Dag id {} is still in DagBag. " "Remove the DAG file first: {}".format(dag_id, dag.fileloc)) count = 0 # noinspection PyUnresolvedReferences,PyProtectedMember for model in models.base.Base._decl_class_registry.values(): # pylint: disable=protected-access if hasattr(model, "dag_id"): if keep_records_in_log and model.__name__ == 'Log': continue cond = or_(model.dag_id == dag_id, model.dag_id.like(dag_id + ".%")) count += session.query(model).filter(cond).delete(synchronize_session='fetch') if dag.is_subdag: parent_dag_id, task_id = dag_id.rsplit(".", 1) for model in models.DagRun, TaskFail, models.TaskInstance: count += session.query(model).filter(model.dag_id == parent_dag_id, model.task_id == task_id).delete() return count
def delete_dag(dag_id): session = settings.Session() DM = models.DagModel dag = session.query(DM).filter(DM.dag_id == dag_id).first() if dag is None: raise DagNotFound("Dag id {} not found".format(dag_id)) dagbag = models.DagBag() if dag_id in dagbag.dags: raise DagFileExists("Dag id {} is still in DagBag. " "Remove the DAG file first.".format(dag_id)) count = 0 # noinspection PyUnresolvedReferences,PyProtectedMember for m in models.Base._decl_class_registry.values(): if hasattr(m, "dag_id"): cond = or_(m.dag_id == dag_id, m.dag_id.like(dag_id + ".%")) count += session.query(m).filter(cond).delete( synchronize_session='fetch') if dag.is_subdag: p, c = dag_id.rsplit(".", 1) for m in models.DagRun, models.TaskFail, models.TaskInstance: count += session.query(m).filter(m.dag_id == p, m.task_id == c).delete() session.commit() return count
def trigger_dag( dag_id: str, run_id: Optional[str] = None, conf: Optional[Union[dict, str]] = None, execution_date: Optional[datetime] = None, replace_microseconds: bool = True, ) -> Optional[DagRun]: """Triggers execution of DAG specified by dag_id :param dag_id: DAG ID :param run_id: ID of the dag_run :param conf: configuration :param execution_date: date of execution :param replace_microseconds: whether microseconds should be zeroed :return: first dag run triggered - even if more than one Dag Runs were triggered or None """ dag_model = DagModel.get_current(dag_id) if dag_model is None: raise DagNotFound("Dag id {} not found in DagModel".format(dag_id)) dagbag = DagBag(dag_folder=dag_model.fileloc) dag_run = DagRun() triggers = _trigger_dag( dag_id=dag_id, dag_run=dag_run, dag_bag=dagbag, run_id=run_id, conf=conf, execution_date=execution_date, replace_microseconds=replace_microseconds, ) return triggers[0] if triggers else None
def get_task_instance(dag_id, task_id, execution_date): """Return the task object identified by the given dag_id and task_id.""" dagbag = DagBag() # Check DAG exists. if dag_id not in dagbag.dags: error_message = "Dag id {} not found".format(dag_id) raise DagNotFound(error_message) # Get DAG object and check Task Exists dag = dagbag.get_dag(dag_id) if not dag.has_task(task_id): error_message = 'Task {} not found in dag {}'.format(task_id, dag_id) raise TaskNotFound(error_message) # Get DagRun object and check that it exists dagrun = dag.get_dagrun(execution_date=execution_date) if not dagrun: error_message = ('Dag Run for date {} not found in dag {}' .format(execution_date, dag_id)) raise DagRunNotFound(error_message) # Get task instance object and check that it exists task_instance = dagrun.get_task_instance(task_id) if not task_instance: error_message = ('Task {} instance for date {} not found' .format(task_id, execution_date)) raise TaskInstanceNotFound(error_message) return task_instance
def execute(self, context: Dict): if isinstance(self.execution_date, datetime.datetime): execution_date = self.execution_date elif isinstance(self.execution_date, str): execution_date = timezone.parse(self.execution_date) self.execution_date = execution_date else: execution_date = timezone.utcnow() run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date) try: # Ignore MyPy type for self.execution_date # because it doesn't pick up the timezone.parse() for strings dag_run = trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=self.execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, self.execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound(f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=self.execution_date, end_date=self.execution_date) dag_run = DagRun.find(dag_id=dag.dag_id, run_id=run_id)[0] else: raise e if self.wait_for_completion: # wait for dag to complete while True: self.log.info( 'Waiting for %s on %s to become allowed state %s ...', self.trigger_dag_id, dag_run.execution_date, self.allowed_states, ) time.sleep(self.poke_interval) dag_run.refresh_from_db() state = dag_run.state if state in self.failed_states: raise AirflowException(f"{self.trigger_dag_id} failed with failed states {state}") if state in self.allowed_states: self.log.info("%s finished with allowed state %s", self.trigger_dag_id, state) return
def _trigger_dag( dag_id, dag_bag, dag_run, run_id, conf, execution_date, replace_microseconds, ): if dag_id not in dag_bag.dags: raise DagNotFound("Dag id {} not found".format(dag_id)) # 根据dag_id获得的dag对象 dag = dag_bag.get_dag(dag_id) # 获得调度时间 if not execution_date: execution_date = datetime.datetime.now() # 验证调度时间必须存在时区信息 assert isinstance(execution_date, datetime.datetime) if replace_microseconds: execution_date = execution_date.replace(microsecond=0) # 获得dag实例运行ID,默认调度时间与run_id关联 # 还有一种情况是,同一个调度时间有多个run_id if not run_id: run_id = "manual__{0}".format(execution_date.isoformat()) # 判断dag实例是否存在,(dag_id, run_id)可以确认唯一性 dr = dag_run.find(dag_id=dag_id, run_id=run_id) if dr: raise DagRunAlreadyExists("Run id {} already exists for dag id {}".format( run_id, dag_id )) # 获得dag实例参数配置 run_conf = None if conf: run_conf = json.loads(conf) triggers = list() dags_to_trigger = list() dags_to_trigger.append(dag) while dags_to_trigger: dag = dags_to_trigger.pop() trigger = dag.create_dagrun( run_id=run_id, execution_date=execution_date, state=State.RUNNING, conf=run_conf, external_trigger=True, ) triggers.append(trigger) if dag.subdags: dags_to_trigger.extend(dag.subdags) return triggers
def check_and_get_dag(dag_id: str, task_id: Optional[str] = None) -> DagModel: """Checks that DAG exists and in case it is specified that Task exist""" dag_model = DagModel.get_current(dag_id) if dag_model is None: raise DagNotFound("Dag id {} not found in DagModel".format(dag_id)) dagbag = DagBag(dag_folder=dag_model.fileloc, store_serialized_dags=conf.getboolean( 'core', 'store_serialized_dags')) dag = dagbag.get_dag(dag_id) # prefetch dag if it is stored serialized if dag_id not in dagbag.dags: error_message = "Dag id {} not found".format(dag_id) raise DagNotFound(error_message) if task_id and not dag.has_task(task_id): error_message = 'Task {} not found in dag {}'.format(task_id, dag_id) raise TaskNotFound(error_message) return dag
def delete_dag(dag_id: str, keep_records_in_log: bool = True, session=None) -> int: """ :param dag_id: the dag_id of the DAG to delete :param keep_records_in_log: whether keep records of the given dag_id in the Log table in the backend database (for reasons like auditing). The default value is True. :param session: session used :return count of deleted dags """ log.info("Deleting DAG: %s", dag_id) running_tis = (session.query(models.TaskInstance.state).filter( models.TaskInstance.dag_id == dag_id).filter( models.TaskInstance.state == State.RUNNING).first()) if running_tis: raise AirflowException("TaskInstances still running") dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).first() if dag is None: raise DagNotFound(f"Dag id {dag_id} not found") # deleting a DAG should also delete all of its subdags dags_to_delete_query = session.query(DagModel.dag_id).filter( or_( DagModel.dag_id == dag_id, and_(DagModel.dag_id.like(f"{dag_id}.%"), DagModel.is_subdag), )) dags_to_delete = [dag_id for dag_id, in dags_to_delete_query] # Scheduler removes DAGs without files from serialized_dag table every dag_dir_list_interval. # There may be a lag, so explicitly removes serialized DAG here. if SerializedDagModel.has_dag(dag_id=dag_id, session=session): SerializedDagModel.remove_dag(dag_id=dag_id, session=session) count = 0 for model in get_sqla_model_classes(): if hasattr(model, "dag_id"): if keep_records_in_log and model.__name__ == 'Log': continue count += (session.query(model).filter( model.dag_id.in_(dags_to_delete)).delete( synchronize_session='fetch')) if dag.is_subdag: parent_dag_id, task_id = dag_id.rsplit(".", 1) for model in TaskFail, models.TaskInstance: count += (session.query(model).filter( model.dag_id == parent_dag_id, model.task_id == task_id).delete()) # Delete entries in Import Errors table for a deleted DAG # This handles the case when the dag_id is changed in the file session.query(models.ImportError).filter( models.ImportError.filename == dag.fileloc).delete( synchronize_session='fetch') return count
def check_and_get_dag(dag_id: str, task_id: Optional[str] = None) -> DagModel: """Checks that DAG exists and in case it is specified that Task exist""" dagbag = DagBag() if dag_id not in dagbag.dags: error_message = "Dag id {} not found".format(dag_id) raise DagNotFound(error_message) dag = dagbag.get_dag(dag_id) if task_id and not dag.has_task(task_id): error_message = 'Task {} not found in dag {}'.format(task_id, dag_id) raise TaskNotFound(error_message) return dag
def _trigger_dag( dag_id, dag_bag, dag_run, run_id, conf, execution_date, replace_microseconds, ): if dag_id not in dag_bag.dags: raise DagNotFound("Dag id {} not found".format(dag_id)) dag = dag_bag.get_dag(dag_id) if not execution_date: execution_date = timezone.utcnow() assert timezone.is_localized(execution_date) if replace_microseconds: execution_date = execution_date.replace(microsecond=0) if not run_id: run_id = "manual__{0}".format(execution_date.isoformat()) dr = dag_run.find(dag_id=dag_id, run_id=run_id) if dr: raise DagRunAlreadyExists( "Run id {} already exists for dag id {}".format(run_id, dag_id)) run_conf = None if conf: if type(conf) is dict: run_conf = conf else: run_conf = json.loads(conf) triggers = list() dags_to_trigger = list() dags_to_trigger.append(dag) while dags_to_trigger: dag = dags_to_trigger.pop() trigger = dag.create_dagrun( run_id=run_id, execution_date=execution_date, state=State.RUNNING, conf=run_conf, external_trigger=True, ) triggers.append(trigger) if dag.subdags: dags_to_trigger.extend(dag.subdags) return triggers
def delete_dag(session, model, dag_id): """Deletes the DAG based on dag_id""" dag = session.query(model).filter(model.dag_id == dag_id).first() if dag is None: raise DagNotFound("Dag id {} not found".format(dag_id)) for module in Base._decl_class_registry.values(): if hasattr(module, "dag_id"): if module.__name__ == "DagModel" or module.__name__ == "XCom": cond = or_(module.dag_id == dag_id, module.dag_id.like(dag_id + ".%")) session.query(module).filter(cond).delete( synchronize_session='fetch') continue
def delete_dag(dag_id: str, keep_records_in_log: bool = True, session=None) -> int: """ :param dag_id: the dag_id of the DAG to delete :param keep_records_in_log: whether keep records of the given dag_id in the Log table in the backend database (for reasons like auditing). The default value is True. :param session: session used :return count of deleted dags """ logger = LoggingMixin() logger.log.info("Deleting DAG: %s", dag_id) dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).first() if dag is None: raise DagNotFound("Dag id {} not found".format(dag_id)) # Scheduler removes DAGs without files from serialized_dag table every dag_dir_list_interval. # There may be a lag, so explicitly removes serialized DAG here. if STORE_SERIALIZED_DAGS and SerializedDagModel.has_dag(dag_id=dag_id, session=session): SerializedDagModel.remove_dag(dag_id=dag_id, session=session) count = 0 # noinspection PyUnresolvedReferences,PyProtectedMember for model in models.base.Base._decl_class_registry.values(): # pylint: disable=protected-access if hasattr(model, "dag_id"): if keep_records_in_log and model.__name__ == 'Log': continue cond = or_(model.dag_id == dag_id, model.dag_id.like(dag_id + ".%")) count += session.query(model).filter(cond).delete( synchronize_session='fetch') if dag.is_subdag: parent_dag_id, task_id = dag_id.rsplit(".", 1) for model in models.DagRun, TaskFail, models.TaskInstance: count += session.query(model).filter( model.dag_id == parent_dag_id, model.task_id == task_id).delete() # Delete entries in Import Errors table for a deleted DAG # This handles the case when the dag_id is changed in the file session.query(models.ImportError).filter( models.ImportError.filename == dag.fileloc).delete( synchronize_session='fetch') return count
def get_task(dag_id, task_id): """Return the task object identified by the given dag_id and task_id.""" dagbag = DagBag() # Check DAG exists. if dag_id not in dagbag.dags: error_message = "Dag id {} not found".format(dag_id) raise DagNotFound(error_message) # Get DAG object and check Task Exists dag = dagbag.get_dag(dag_id) if not dag.has_task(task_id): error_message = 'Task {} not found in dag {}'.format(task_id, dag_id) raise TaskNotFound(error_message) # Return the task. return dag.get_task(task_id)
def execute(self, context: Dict): if isinstance(self.execution_date, datetime.datetime): execution_date = self.execution_date elif isinstance(self.execution_date, str): execution_date = timezone.parse(self.execution_date) self.execution_date = execution_date else: execution_date = timezone.utcnow() run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date) try: # Ignore MyPy type for self.execution_date # because it doesn't pick up the timezone.parse() for strings trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=self.execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, self.execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound( f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag( dag_folder=dag_model.fileloc, store_serialized_dags=settings.STORE_SERIALIZED_DAGS) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=self.execution_date, end_date=self.execution_date) else: raise e
def get_code(dag_id): """Return python code of a given dag_id.""" session = settings.Session() DM = models.DagModel dag = session.query(DM).filter(DM.dag_id == dag_id).first() session.close() # Check DAG exists. if dag is None: error_message = "Dag id {} not found".format(dag_id) raise DagNotFound(error_message) try: with wwwutils.open_maybe_zipped(dag.fileloc, 'r') as f: code = f.read() return code except IOError as e: error_message = "Error {} while reading Dag id {} Code".format( str(e), dag_id) raise AirflowException(error_message)
def delete_dag(dag_id, keep_records_in_log=True): """ :param dag_id: the dag_id of the DAG to delete :type dag_id: str :param keep_records_in_log: whether keep records of the given dag_id in the Log table in the backend database (for reasons like auditing). The default value is True. :type keep_records_in_log: bool """ session = settings.Session() DM = models.DagModel dag = session.query(DM).filter(DM.dag_id == dag_id).first() if dag is None: raise DagNotFound("Dag id {} not found".format(dag_id)) if dag.fileloc and not os.path.exists(dag.fileloc): raise DagFileExists("Dag id {} is still in DagBag. " "Remove the DAG file first: {}".format( dag_id, dag.fileloc)) count = 0 # noinspection PyUnresolvedReferences,PyProtectedMember for m in models.base.Base._decl_class_registry.values(): if hasattr(m, "dag_id"): if keep_records_in_log and m.__name__ == 'Log': continue cond = or_(m.dag_id == dag_id, m.dag_id.like(dag_id + ".%")) count += session.query(m).filter(cond).delete( synchronize_session='fetch') if dag.is_subdag: p, c = dag_id.rsplit(".", 1) for m in models.DagRun, models.TaskFail, models.TaskInstance: count += session.query(m).filter(m.dag_id == p, m.task_id == c).delete() session.commit() return count
def _trigger_dag(self, dag_id: str, dag_bag: DagBag, dag_run: DagRun): dag = dag_bag.get_dag( dag_id) # prefetch dag if it is stored serialized if dag_id not in dag_bag.dags: raise DagNotFound(f"Dag id {dag_id} not found") execution_date = timezone.utcnow() run_id = f"rb_status_manual__{execution_date.isoformat()}" dag_run_id = dag_run.find(dag_id=dag_id, run_id=run_id) if dag_run_id: raise DagRunAlreadyExists( f"Run id {run_id} already exists for dag id {dag_id}") dag.create_dagrun( run_id=run_id, execution_date=execution_date, state=State.RUNNING, external_trigger=True, )
def get_dag_run_state(dag_id, execution_date): """Return the task object identified by the given dag_id and task_id.""" dagbag = DagBag() # Check DAG exists. if dag_id not in dagbag.dags: error_message = "Dag id {} not found".format(dag_id) raise DagNotFound(error_message) # Get DAG object and check Task Exists dag = dagbag.get_dag(dag_id) # Get DagRun object and check that it exists dagrun = dag.get_dagrun(execution_date=execution_date) if not dagrun: error_message = ('Dag Run for date {} not found in dag {}'.format( execution_date, dag_id)) raise DagRunNotFound(error_message) return {'state': dagrun.get_state()}
def trigger_dag( dag_id, # type: str run_id=None, # type: Optional[str] conf=None, # type: Optional[Union[dict, str]] execution_date=None, # type: Optional[datetime] replace_microseconds=True, # type: bool ): """Triggers execution of DAG specified by dag_id :param dag_id: DAG ID :param run_id: ID of the dag_run :param conf: configuration :param execution_date: date of execution :param replace_microseconds: whether microseconds should be zeroed :return: first dag run triggered - even if more than one Dag Runs were triggered or None """ dag_model = DagModel.get_current(dag_id) if dag_model is None: raise DagNotFound("Dag id {} not found in DagModel".format(dag_id)) def read_store_serialized_dags(): from airflow.configuration import conf return conf.getboolean('core', 'store_serialized_dags') dagbag = DagBag( dag_folder=dag_model.fileloc, store_serialized_dags=read_store_serialized_dags() ) dag_run = DagRun() triggers = _trigger_dag( dag_id=dag_id, dag_run=dag_run, dag_bag=dagbag, run_id=run_id, conf=conf, execution_date=execution_date, replace_microseconds=replace_microseconds, ) return triggers[0] if triggers else None
def delete_dag(dag_id, keep_records_in_log=True, session=None): """ :param dag_id: the dag_id of the DAG to delete :param keep_records_in_log: whether keep records of the given dag_id in the Log table in the backend database (for reasons like auditing). The default value is True. :param session: session used :return count of deleted dags """ dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).first() if dag is None: raise DagNotFound("Dag id {} not found".format(dag_id)) count = 0 # noinspection PyUnresolvedReferences,PyProtectedMember for model in models.base.Base._decl_class_registry.values(): # pylint: disable=protected-access if hasattr(model, "dag_id"): if keep_records_in_log and model.__name__ == 'Log': continue cond = or_(model.dag_id == dag_id, model.dag_id.like(dag_id + ".%")) count += session.query(model).filter(cond).delete( synchronize_session='fetch') if dag.is_subdag: parent_dag_id, task_id = dag_id.rsplit(".", 1) for model in models.DagRun, TaskFail, models.TaskInstance: count += session.query(model).filter( model.dag_id == parent_dag_id, model.task_id == task_id).delete() # Delete entries in Import Errors table for a deleted DAG # This handles the case when the dag_id is changed in the file session.query(models.ImportError).filter( models.ImportError.filename == dag.fileloc).delete( synchronize_session='fetch') return count
def trigger_dag(self): """ Triggers execution of DAG interpreted from the report's dag_id _trigger_dag iterates through the class registry and looks For any model that has dag_id as an attribute and deletes all references to the specific dag_id :param dag_id: DAG ID :param dagbag: dagbag :param dagrun: empty dag run to be created """ dag_model = DagModel.get_current(self.dag_id) if dag_model is None: raise DagNotFound(f"Dag id {self.dag_id} not found in DagModel") dagbag = DagBag( dag_folder=dag_model.fileloc, store_serialized_dags=conf.getboolean("core", "store_serialized_dags"), ) dag_run = DagRun() self._trigger_dag(dag_id=self.dag_id, dag_bag=dagbag, dag_run=dag_run)
def trigger_dag( dag_id, run_id=None, conf=None, execution_date=None, replace_microseconds=True, ): dag_model = DagModel.get_current(dag_id) if dag_model is None: raise DagNotFound("Dag id {} not found in DagModel".format(dag_id)) dagbag = DagBag(dag_folder=dag_model.fileloc) dag_run = DagRun() triggers = _trigger_dag( dag_id=dag_id, dag_run=dag_run, dag_bag=dagbag, run_id=run_id, conf=conf, execution_date=execution_date, replace_microseconds=replace_microseconds, ) return triggers[0] if triggers else None
def execute(self, context: Context): if isinstance(self.execution_date, datetime.datetime): parsed_execution_date = self.execution_date elif isinstance(self.execution_date, str): parsed_execution_date = timezone.parse(self.execution_date) else: parsed_execution_date = timezone.utcnow() if self.trigger_run_id: run_id = self.trigger_run_id else: run_id = DagRun.generate_run_id(DagRunType.MANUAL, parsed_execution_date) try: dag_run = trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=parsed_execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, parsed_execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound( f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=parsed_execution_date, end_date=parsed_execution_date) dag_run = DagRun.find(dag_id=dag.dag_id, run_id=run_id)[0] else: raise e if dag_run is None: raise RuntimeError("The dag_run should be set here!") # Store the execution date from the dag run (either created or found above) to # be used when creating the extra link on the webserver. ti = context['task_instance'] ti.xcom_push(key=XCOM_EXECUTION_DATE_ISO, value=dag_run.execution_date.isoformat()) ti.xcom_push(key=XCOM_RUN_ID, value=dag_run.run_id) if self.wait_for_completion: # wait for dag to complete while True: self.log.info( 'Waiting for %s on %s to become allowed state %s ...', self.trigger_dag_id, dag_run.execution_date, self.allowed_states, ) time.sleep(self.poke_interval) dag_run.refresh_from_db() state = dag_run.state if state in self.failed_states: raise AirflowException( f"{self.trigger_dag_id} failed with failed states {state}" ) if state in self.allowed_states: self.log.info("%s finished with allowed state %s", self.trigger_dag_id, state) return
def _trigger_dag( dag_id: str, dag_bag: DagBag, dag_run: DagModel, run_id: Optional[str], conf: Optional[Union[dict, str]], execution_date: Optional[datetime], replace_microseconds: bool, ) -> List[DagRun]: # pylint: disable=too-many-arguments """Triggers DAG run. :param dag_id: DAG ID :param dag_bag: DAG Bag model :param dag_run: DAG Run model :param run_id: ID of the dag_run :param conf: configuration :param execution_date: date of execution :param replace_microseconds: whether microseconds should be zeroed :return: list of triggered dags """ dag = dag_bag.get_dag(dag_id) # prefetch dag if it is stored serialized if dag_id not in dag_bag.dags: raise DagNotFound("Dag id {} not found".format(dag_id)) execution_date = execution_date if execution_date else timezone.utcnow() if not timezone.is_localized(execution_date): raise ValueError("The execution_date should be localized") if replace_microseconds: execution_date = execution_date.replace(microsecond=0) if dag.default_args and 'start_date' in dag.default_args: min_dag_start_date = dag.default_args["start_date"] if min_dag_start_date and execution_date < min_dag_start_date: raise ValueError( "The execution_date [{0}] should be >= start_date [{1}] from DAG's default_args" .format(execution_date.isoformat(), min_dag_start_date.isoformat())) run_id = run_id or DagRun.generate_run_id(DagRunType.MANUAL, execution_date) dag_run = dag_run.find(dag_id=dag_id, run_id=run_id) if dag_run: raise DagRunAlreadyExists( f"Run id {dag_run.run_id} already exists for dag id {dag_id}") run_conf = None if conf: run_conf = conf if isinstance(conf, dict) else json.loads(conf) triggers = [] dags_to_trigger = [dag] + dag.subdags for _dag in dags_to_trigger: trigger = _dag.create_dagrun( run_id=run_id, execution_date=execution_date, state=State.RUNNING, conf=run_conf, external_trigger=True, ) triggers.append(trigger) return triggers
def _trigger_dag( dag_id: str, dag_bag: DagBag, dag_run: DagModel, run_id: Optional[str], conf: Optional[Union[dict, str]], execution_date: Optional[datetime], replace_microseconds: bool, ) -> List[DagRun]: # pylint: disable=too-many-arguments """Triggers DAG run. :param dag_id: DAG ID :param dag_bag: DAG Bag model :param dag_run: DAG Run model :param run_id: ID of the dag_run :param conf: configuration :param execution_date: date of execution :param replace_microseconds: whether microseconds should be zeroed :return: list of triggered dags """ if dag_id not in dag_bag.dags: raise DagNotFound("Dag id {} not found".format(dag_id)) dag = dag_bag.get_dag(dag_id) execution_date = execution_date if execution_date else timezone.utcnow() if not timezone.is_localized(execution_date): raise ValueError("The execution_date should be localized") if replace_microseconds: execution_date = execution_date.replace(microsecond=0) if not run_id: run_id = "manual__{0}".format(execution_date.isoformat()) dag_run_id = dag_run.find(dag_id=dag_id, run_id=run_id) if dag_run_id: raise DagRunAlreadyExists( "Run id {} already exists for dag id {}".format(run_id, dag_id)) run_conf = None if conf: if isinstance(conf, dict): run_conf = conf else: run_conf = json.loads(conf) triggers = [] dags_to_trigger = [] dags_to_trigger.append(dag) while dags_to_trigger: dag = dags_to_trigger.pop() trigger = dag.create_dagrun( run_id=run_id, execution_date=execution_date, state=State.RUNNING, conf=run_conf, external_trigger=True, ) triggers.append(trigger) if dag.subdags: dags_to_trigger.extend(dag.subdags) return triggers
def _trigger_dag( dag_id: str, dag_bag: DagBag, run_id: Optional[str] = None, conf: Optional[Union[dict, str]] = None, execution_date: Optional[datetime] = None, replace_microseconds: bool = True, ) -> List[Optional[DagRun]]: """Triggers DAG run. :param dag_id: DAG ID :param dag_bag: DAG Bag model :param run_id: ID of the dag_run :param conf: configuration :param execution_date: date of execution :param replace_microseconds: whether microseconds should be zeroed :return: list of triggered dags """ dag = dag_bag.get_dag(dag_id) # prefetch dag if it is stored serialized if dag_id not in dag_bag.dags: raise DagNotFound(f"Dag id {dag_id} not found") execution_date = execution_date if execution_date else timezone.utcnow() if not timezone.is_localized(execution_date): raise ValueError("The execution_date should be localized") if replace_microseconds: execution_date = execution_date.replace(microsecond=0) if dag.default_args and 'start_date' in dag.default_args: min_dag_start_date = dag.default_args["start_date"] if min_dag_start_date and execution_date < min_dag_start_date: raise ValueError( f"The execution_date [{execution_date.isoformat()}] should be >= start_date " f"[{min_dag_start_date.isoformat()}] from DAG's default_args") run_id = run_id or DagRun.generate_run_id(DagRunType.MANUAL, execution_date) dag_run = DagRun.find_duplicate(dag_id=dag_id, execution_date=execution_date, run_id=run_id) if dag_run: raise DagRunAlreadyExists( f"A Dag Run already exists for dag id {dag_id} at {execution_date} with run id {run_id}" ) run_conf = None if conf: run_conf = conf if isinstance(conf, dict) else json.loads(conf) dag_runs = [] dags_to_run = [dag] + dag.subdags for _dag in dags_to_run: dag_run = _dag.create_dagrun( run_id=run_id, execution_date=execution_date, state=State.QUEUED, conf=run_conf, external_trigger=True, dag_hash=dag_bag.dags_hash.get(dag_id), ) dag_runs.append(dag_run) return dag_runs
def _trigger_dag( dag_id, # type: str dag_bag, # type: DagBag dag_run, # type: DagModel run_id, # type: Optional[str] conf, # type: Optional[Union[dict, str]] execution_date, # type: Optional[datetime] replace_microseconds, # type: bool ): # pylint: disable=too-many-arguments # type: (...) -> List[DagRun] """Triggers DAG run. :param dag_id: DAG ID :param dag_bag: DAG Bag model :param dag_run: DAG Run model :param run_id: ID of the dag_run :param conf: configuration :param execution_date: date of execution :param replace_microseconds: whether microseconds should be zeroed :return: list of triggered dags """ dag = dag_bag.get_dag(dag_id) # prefetch dag if it is stored serialized if dag_id not in dag_bag.dags: raise DagNotFound("Dag id {} not found".format(dag_id)) execution_date = execution_date if execution_date else timezone.utcnow() assert timezone.is_localized(execution_date) if replace_microseconds: execution_date = execution_date.replace(microsecond=0) if dag.default_args and 'start_date' in dag.default_args: min_dag_start_date = dag.default_args["start_date"] if min_dag_start_date and execution_date < min_dag_start_date: raise ValueError( "The execution_date [{0}] should be >= start_date [{1}] from DAG's default_args" .format(execution_date.isoformat(), min_dag_start_date.isoformat())) if not run_id: run_id = "manual__{0}".format(execution_date.isoformat()) dag_run_id = dag_run.find(dag_id=dag_id, run_id=run_id) if dag_run_id: raise DagRunAlreadyExists( "Run id {} already exists for dag id {}".format(run_id, dag_id)) run_conf = None if conf: if isinstance(conf, dict): run_conf = conf else: run_conf = json.loads(conf) triggers = list() dags_to_trigger = list() dags_to_trigger.append(dag) while dags_to_trigger: dag = dags_to_trigger.pop() trigger = dag.create_dagrun( run_id=run_id, execution_date=execution_date, state=State.RUNNING, conf=run_conf, external_trigger=True, ) triggers.append(trigger) if dag.subdags: dags_to_trigger.extend(dag.subdags) return triggers