def test_dagrun_update_state_with_handle_callback_failure(self): def on_failure_callable(context): self.assertEqual( context['dag_run'].dag_id, 'test_dagrun_update_state_with_handle_callback_failure') dag = DAG( dag_id='test_dagrun_update_state_with_handle_callback_failure', start_date=datetime.datetime(2017, 1, 1), on_failure_callback=on_failure_callable, ) dag_task1 = DummyOperator(task_id='test_state_succeeded1', dag=dag) dag_task2 = DummyOperator(task_id='test_state_failed2', dag=dag) dag_task1.set_downstream(dag_task2) initial_task_states = { 'test_state_succeeded1': State.SUCCESS, 'test_state_failed2': State.FAILED, } dag_run = self.create_dag_run(dag=dag, state=State.RUNNING, task_states=initial_task_states) _, callback = dag_run.update_state(execute_callbacks=False) self.assertEqual(State.FAILED, dag_run.state) # Callbacks are not added until handle_callback = False is passed to dag_run.update_state() assert callback == DagCallbackRequest( full_filepath=dag_run.dag.fileloc, dag_id="test_dagrun_update_state_with_handle_callback_failure", execution_date=dag_run.execution_date, is_failure_callback=True, msg="task_failure")
def test_dagrun_update_state_with_handle_callback_failure(self): def on_failure_callable(context): assert context['dag_run'].dag_id == 'test_dagrun_update_state_with_handle_callback_failure' dag = DAG( dag_id='test_dagrun_update_state_with_handle_callback_failure', start_date=datetime.datetime(2017, 1, 1), on_failure_callback=on_failure_callable, ) dag_task1 = DummyOperator(task_id='test_state_succeeded1', dag=dag) dag_task2 = DummyOperator(task_id='test_state_failed2', dag=dag) dag_task1.set_downstream(dag_task2) initial_task_states = { 'test_state_succeeded1': State.SUCCESS, 'test_state_failed2': State.FAILED, } # Scheduler uses Serialized DAG -- so use that instead of the Actual DAG dag = SerializedDAG.from_dict(SerializedDAG.to_dict(dag)) dag_run = self.create_dag_run(dag=dag, state=State.RUNNING, task_states=initial_task_states) _, callback = dag_run.update_state(execute_callbacks=False) assert State.FAILED == dag_run.state # Callbacks are not added until handle_callback = False is passed to dag_run.update_state() assert callback == DagCallbackRequest( full_filepath=dag_run.dag.fileloc, dag_id="test_dagrun_update_state_with_handle_callback_failure", execution_date=dag_run.execution_date, is_failure_callback=True, msg="task_failure", )
def _schedule_dag_run( self, dag_run: DagRun, session: Session, ) -> Optional[DagCallbackRequest]: """ Make scheduling decisions about an individual dag run :param dag_run: The DagRun to schedule :return: Callback that needs to be executed """ dag = dag_run.dag = self.dagbag.get_dag(dag_run.dag_id, session=session) if not dag: self.log.error("Couldn't find dag %s in DagBag/DB!", dag_run.dag_id) return 0 if ( dag_run.start_date and dag.dagrun_timeout and dag_run.start_date < timezone.utcnow() - dag.dagrun_timeout ): dag_run.set_state(State.FAILED) unfinished_task_instances = ( session.query(TI) .filter(TI.dag_id == dag_run.dag_id) .filter(TI.run_id == dag_run.run_id) .filter(TI.state.in_(State.unfinished)) ) for task_instance in unfinished_task_instances: task_instance.state = State.SKIPPED session.merge(task_instance) session.flush() self.log.info("Run %s of %s has timed-out", dag_run.run_id, dag_run.dag_id) callback_to_execute = DagCallbackRequest( full_filepath=dag.fileloc, dag_id=dag.dag_id, run_id=dag_run.run_id, is_failure_callback=True, msg='timed_out', ) # Send SLA & DAG Success/Failure Callbacks to be executed self._send_dag_callbacks_to_processor(dag_run, callback_to_execute) return 0 if dag_run.execution_date > timezone.utcnow() and not dag.allow_future_exec_dates: self.log.error("Execution date is in future: %s", dag_run.execution_date) return 0 self._verify_integrity_if_dag_changed(dag_run=dag_run, session=session) # TODO[HA]: Rename update_state -> schedule_dag_run, ?? something else? schedulable_tis, callback_to_run = dag_run.update_state(session=session, execute_callbacks=False) # This will do one query per dag run. We "could" build up a complex # query to update all the TIs across all the execution dates and dag # IDs in a single query, but it turns out that can be _very very slow_ # see #11147/commit ee90807ac for more details dag_run.schedule_tis(schedulable_tis, session) return callback_to_run