def test_clear_task_instances_without_dag(self): dag = DAG('test_clear_task_instances_without_dag', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='task_0', owner='test', dag=dag) task1 = DummyOperator(task_id='task_1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) ti0.run() ti1.run() session = settings.Session() qry = session.query(TI).filter(TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session) session.commit() # When dag is None, max_tries will be maximum of original max_tries or try_number. ti0.refresh_from_db() ti1.refresh_from_db() # Next try to run will be try 2 self.assertEqual(ti0.try_number, 2) self.assertEqual(ti0.max_tries, 1) self.assertEqual(ti1.try_number, 2) self.assertEqual(ti1.max_tries, 2)
def test_clear_task_instances_without_task(self): dag = DAG('test_clear_task_instances_without_task', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='task0', owner='test', dag=dag) task1 = DummyOperator(task_id='task1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) ti0.run() ti1.run() # Remove the task from dag. dag.task_dict = {} self.assertFalse(dag.has_task(task0.task_id)) self.assertFalse(dag.has_task(task1.task_id)) session = settings.Session() qry = session.query(TI).filter( TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session) session.commit() # When dag is None, max_tries will be maximum of original max_tries or try_number. ti0.refresh_from_db() ti1.refresh_from_db() # Next try to run will be try 2 self.assertEqual(ti0.try_number, 2) self.assertEqual(ti0.max_tries, 1) self.assertEqual(ti1.try_number, 2) self.assertEqual(ti1.max_tries, 2)
def test_clear_task_instances_without_task(self): dag = DAG('test_clear_task_instances_without_task', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='task0', owner='test', dag=dag) task1 = DummyOperator(task_id='task1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) ti0.run() ti1.run() # Remove the task from dag. dag.task_dict = {} self.assertFalse(dag.has_task(task0.task_id)) self.assertFalse(dag.has_task(task1.task_id)) with create_session() as session: qry = session.query(TI).filter( TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session) # When dag is None, max_tries will be maximum of original max_tries or try_number. ti0.refresh_from_db() ti1.refresh_from_db() # Next try to run will be try 2 self.assertEqual(ti0.try_number, 2) self.assertEqual(ti0.max_tries, 1) self.assertEqual(ti1.try_number, 2) self.assertEqual(ti1.max_tries, 2)
def test_clear_task_instances(self): dag = DAG( 'test_clear_task_instances', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10), ) task0 = DummyOperator(task_id='0', owner='test', dag=dag) task1 = DummyOperator(task_id='1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) dag.create_dagrun( execution_date=ti0.execution_date, state=State.RUNNING, run_type=DagRunType.SCHEDULED, ) ti0.run() ti1.run() with create_session() as session: qry = session.query(TI).filter(TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session, dag=dag) ti0.refresh_from_db() ti1.refresh_from_db() # Next try to run will be try 2 assert ti0.try_number == 2 assert ti0.max_tries == 1 assert ti1.try_number == 2 assert ti1.max_tries == 3
def test_clear_task_instances_without_dag(self): dag = DAG('test_clear_task_instances_without_dag', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='task_0', owner='test', dag=dag) task1 = DummyOperator(task_id='task_1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) dag.create_dagrun( execution_date=ti0.execution_date, state=State.RUNNING, run_type=DagRunType.SCHEDULED, ) ti0.run() ti1.run() with create_session() as session: qry = session.query(TI).filter(TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session) # When dag is None, max_tries will be maximum of original max_tries or try_number. ti0.refresh_from_db() ti1.refresh_from_db() # Next try to run will be try 2 self.assertEqual(ti0.try_number, 2) self.assertEqual(ti0.max_tries, 1) self.assertEqual(ti1.try_number, 2) self.assertEqual(ti1.max_tries, 2)
def test_clear_task_instances_for_backfill_dagrun(self): now = timezone.utcnow() session = settings.Session() dag_id = 'test_clear_task_instances_for_backfill_dagrun' dag = DAG(dag_id=dag_id, start_date=now) self.create_dag_run(dag, execution_date=now, is_backfill=True) task0 = DummyOperator(task_id='backfill_task_0', owner='test', dag=dag) ti0 = TI(task=task0, execution_date=now) ti0.run() qry = session.query(TI).filter(TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session) session.commit() ti0.refresh_from_db() dr0 = session.query(DagRun).filter(DagRun.dag_id == dag_id, DagRun.execution_date == now).first() assert dr0.state == State.RUNNING
def test_clear_task_instances_with_task_reschedule(self): """Test that TaskReschedules are deleted correctly when TaskInstances are cleared""" with DAG( 'test_clear_task_instances_with_task_reschedule', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10), ) as dag: task0 = PythonSensor(task_id='0', python_callable=lambda: False, mode="reschedule") task1 = PythonSensor(task_id='1', python_callable=lambda: False, mode="reschedule") ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) dag.create_dagrun( execution_date=ti0.execution_date, state=State.RUNNING, run_type=DagRunType.SCHEDULED, ) ti0.run() ti1.run() with create_session() as session: def count_task_reschedule(task_id): return (session.query(TaskReschedule).filter( TaskReschedule.dag_id == dag.dag_id, TaskReschedule.task_id == task_id, TaskReschedule.execution_date == DEFAULT_DATE, TaskReschedule.try_number == 1, ).count()) assert count_task_reschedule(ti0.task_id) == 1 assert count_task_reschedule(ti1.task_id) == 1 qry = session.query(TI).filter(TI.dag_id == dag.dag_id, TI.task_id == ti0.task_id).all() clear_task_instances(qry, session, dag=dag) assert count_task_reschedule(ti0.task_id) == 0 assert count_task_reschedule(ti1.task_id) == 1
def test_clear_task_instances(self): dag = DAG('test_clear_task_instances', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='0', owner='test', dag=dag) task1 = DummyOperator(task_id='1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) ti0.run() ti1.run() session = settings.Session() qry = session.query(TI).filter( TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session, dag=dag) session.commit() ti0.refresh_from_db() ti1.refresh_from_db() self.assertEqual(ti0.try_number, 1) self.assertEqual(ti0.max_tries, 1) self.assertEqual(ti1.try_number, 1) self.assertEqual(ti1.max_tries, 3)
def test_clear_task_instances(self): dag = DAG('test_clear_task_instances', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='0', owner='test', dag=dag) task1 = DummyOperator(task_id='1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) ti0.run() ti1.run() session = settings.Session() qry = session.query(TI).filter(TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session, dag=dag) session.commit() ti0.refresh_from_db() ti1.refresh_from_db() self.assertEqual(ti0.try_number, 1) self.assertEqual(ti0.max_tries, 1) self.assertEqual(ti1.try_number, 1) self.assertEqual(ti1.max_tries, 3)
def get(self): """ :param dag_id: dag_id of target dag run :param task_id: task_id of target task run :param execution_date: the execution date from which to start looking :param action: which action to perform - stop running task: kill - restart running task: restart - mark success: mark_success :param options (comma separated): - upstream - downstream """ try: dag = self.dagbag.get_dag(self.dag) task = dag.get_task(self.task) task_list = [task] except Exception as err: self.session.rollback() return jsonify({"message": str(err)}) else: if "upstream" in self.options: data = task.get_flat_relatives(upstream=False) task_list.extend(data) if "downstream" in self.options: data = task.get_flat_relatives(upstream=True) task_list.extend(data) task_ids = [task.task_id for task in task_list] tis = self.session.query(self.TI).filter( self.TI.dag_id == self.dag, self.TI.execution_date == self.execution_date) if self.action == "restart": tis_all = tis.filter(self.TI.state != "running", self.TI.task_id.in_(task_ids)).all() clear_task_instances(tis_all, self.session, dag=task.dag) self.session.commit() elif self.action == "kill": tis_all = tis.filter(self.TI.task_id == self.task).all() this_state = tis_all[0].state if this_state in (None, "running"): message = { "message": "Can't kill/stop task. Current state is {}.".format( this_state) } return jsonify(message) clear_task_instances(tis_all, self.session, activate_dag_runs=False) self.session.commit() elif self.action == "mark_success": tis_all = tis.filter(self.TI.state != "running", self.TI.task_id.in_(task_ids)).all() for ti in tis_all: ti.state = State.SUCCESS if not self.options: self._set_dag_run_state(State.RUNNING) else: self._set_dag_run_state(State.SUCCESS) modified_tasks = '\n'.join('{}'.format(item) for item in tis_all) message = { "message": "The following tasks had status set to: {}\n{}".format( self.action.upper(), modified_tasks) } self.session.commit() return jsonify(message) finally: self.session.close()