def test_mark_tasks_past(self): # set one task to success towards end of scheduled dag runs snapshot = TestMarkTasks.snapshot_state(self.dag1, self.execution_dates) task = self.dag1.get_task("runme_1") altered = set_state(tasks=[task], execution_date=self.execution_dates[1], upstream=False, downstream=False, future=False, past=True, state=State.SUCCESS, commit=True) self.assertEqual(len(altered), 2) self.verify_state(self.dag1, [task.task_id], self.execution_dates, State.SUCCESS, snapshot) snapshot = TestMarkTasks.snapshot_state(self.dag3, self.dag3_execution_dates) task = self.dag3.get_task("run_this") altered = set_state(tasks=[task], execution_date=self.dag3_execution_dates[1], upstream=False, downstream=False, future=False, past=True, state=State.FAILED, commit=True) self.assertEqual(len(altered), 2) self.verify_state(self.dag3, [task.task_id], self.dag3_execution_dates[:2], State.FAILED, snapshot) self.verify_state(self.dag3, [task.task_id], [self.dag3_execution_dates[2]], None, snapshot)
def test_mark_tasks_future(self): # set one task to success towards end of scheduled dag runs snapshot = TestMarkTasks.snapshot_state(self.dag1, self.execution_dates) task = self.dag1.get_task("runme_1") altered = set_state(task=task, execution_date=self.execution_dates[0], upstream=False, downstream=False, future=True, past=False, state=State.SUCCESS, commit=True) self.assertEqual(len(altered), 2) self.verify_state(self.dag1, [task.task_id], self.execution_dates, State.SUCCESS, snapshot)
def test_mark_tasks_past(self): # set one task to success towards end of scheduled dag runs snapshot = TestMarkTasks.snapshot_state(self.dag1, self.execution_dates) task = self.dag1.get_task("runme_1") altered = set_state(task=task, execution_date=self.execution_dates[1], upstream=False, downstream=False, future=False, past=True, state=State.SUCCESS, commit=True) self.assertEqual(len(altered), 2) self.verify_state(self.dag1, [task.task_id], self.execution_dates, State.SUCCESS, snapshot)
def test_mark_tasks_now(self): # set one task to success but do not commit snapshot = self.snapshot_state(self.dag1, self.execution_dates) task = self.dag1.get_task("runme_1") altered = set_state(task=task, execution_date=self.execution_dates[0], upstream=False, downstream=False, future=False, past=False, state=State.SUCCESS, commit=False) self.assertEqual(len(altered), 1) self.verify_state(self.dag1, [task.task_id], [self.execution_dates[0]], None, snapshot) # set one and only one task to success altered = set_state(task=task, execution_date=self.execution_dates[0], upstream=False, downstream=False, future=False, past=False, state=State.SUCCESS, commit=True) self.assertEqual(len(altered), 1) self.verify_state(self.dag1, [task.task_id], [self.execution_dates[0]], State.SUCCESS, snapshot) # set no tasks altered = set_state(task=task, execution_date=self.execution_dates[0], upstream=False, downstream=False, future=False, past=False, state=State.SUCCESS, commit=True) self.assertEqual(len(altered), 0) self.verify_state(self.dag1, [task.task_id], [self.execution_dates[0]], State.SUCCESS, snapshot) # set task to other than success altered = set_state(task=task, execution_date=self.execution_dates[0], upstream=False, downstream=False, future=False, past=False, state=State.FAILED, commit=True) self.assertEqual(len(altered), 1) self.verify_state(self.dag1, [task.task_id], [self.execution_dates[0]], State.FAILED, snapshot) # dont alter other tasks snapshot = self.snapshot_state(self.dag1, self.execution_dates) task = self.dag1.get_task("runme_0") altered = set_state(task=task, execution_date=self.execution_dates[0], upstream=False, downstream=False, future=False, past=False, state=State.SUCCESS, commit=True) self.assertEqual(len(altered), 1) self.verify_state(self.dag1, [task.task_id], [self.execution_dates[0]], State.SUCCESS, snapshot)
def test_mark_tasks_multiple(self): # set multiple tasks to success snapshot = TestMarkTasks.snapshot_state(self.dag1, self.execution_dates) tasks = [self.dag1.get_task("runme_1"), self.dag1.get_task("runme_2")] altered = set_state(tasks=tasks, execution_date=self.execution_dates[0], upstream=False, downstream=False, future=False, past=False, state=State.SUCCESS, commit=True) self.assertEqual(len(altered), 2) self.verify_state(self.dag1, [task.task_id for task in tasks], [self.execution_dates[0]], State.SUCCESS, snapshot)
def test_mark_downstream(self): # test downstream snapshot = TestMarkTasks.snapshot_state(self.dag1, self.execution_dates) task = self.dag1.get_task("runme_1") relatives = task.get_flat_relatives(upstream=False) task_ids = [t.task_id for t in relatives] task_ids.append(task.task_id) altered = set_state(tasks=[task], execution_date=self.execution_dates[0], upstream=False, downstream=True, future=False, past=False, state=State.SUCCESS, commit=True) self.assertEqual(len(altered), 3) self.verify_state(self.dag1, task_ids, [self.execution_dates[0]], State.SUCCESS, snapshot)
def test_mark_downstream(self): # test downstream snapshot = TestMarkTasks.snapshot_state(self.dag1, self.execution_dates) task = self.dag1.get_task("runme_1") relatives = task.get_flat_relatives(upstream=False) task_ids = [t.task_id for t in relatives] task_ids.append(task.task_id) altered = set_state(task=task, execution_date=self.execution_dates[0], upstream=False, downstream=True, future=False, past=False, state=State.SUCCESS, commit=True) self.assertEqual(len(altered), 3) self.verify_state(self.dag1, task_ids, [self.execution_dates[0]], State.SUCCESS, snapshot)
def test_mark_tasks_subdag(self): # set one task to success towards end of scheduled dag runs task = self.dag2.get_task("section-1") relatives = task.get_flat_relatives(upstream=False) task_ids = [t.task_id for t in relatives] task_ids.append(task.task_id) altered = set_state(task=task, execution_date=self.execution_dates[0], upstream=False, downstream=True, future=False, past=False, state=State.SUCCESS, commit=True) self.assertEqual(len(altered), 14) # cannot use snapshot here as that will require drilling down the # the sub dag tree essentially recreating the same code as in the # tested logic. self.verify_state(self.dag2, task_ids, [self.execution_dates[0]], State.SUCCESS, [])
def post_set_task_instances_state(dag_id, session): """Set a state of task instances.""" body = request.get_json() try: data = set_task_instance_state_form.load(body) except ValidationError as err: raise BadRequest(detail=str(err.messages)) error_message = f"Dag ID {dag_id} not found" try: dag = current_app.dag_bag.get_dag(dag_id) if not dag: raise NotFound(error_message) except SerializedDagNotFound: # If DAG is not found in serialized_dag table raise NotFound(error_message) task_id = data['task_id'] task = dag.task_dict.get(task_id) if not task: error_message = f"Task ID {task_id} not found" raise NotFound(error_message) tis = set_state( tasks=[task], execution_date=data["execution_date"], upstream=data["include_upstream"], downstream=data["include_downstream"], future=data["include_future"], past=data["include_past"], state=data["new_state"], commit=not data["dry_run"], ) execution_dates = {ti.execution_date for ti in tis} execution_date_to_run_id_map = dict( session.query(DR.execution_date, DR.run_id).filter( DR.dag_id == dag_id, DR.execution_date.in_(execution_dates) ) ) tis_with_run_id = [(ti, execution_date_to_run_id_map.get(ti.execution_date)) for ti in tis] return task_instance_reference_collection_schema.dump( TaskInstanceReferenceCollection(task_instances=tis_with_run_id) )
def test_mark_tasks_now(self): # set one task to success but do not commit snapshot = TestMarkTasks.snapshot_state(self.dag1, self.execution_dates) task = self.dag1.get_task("runme_1") altered = set_state(tasks=[task], execution_date=self.execution_dates[0], upstream=False, downstream=False, future=False, past=False, state=State.SUCCESS, commit=False) self.assertEqual(len(altered), 1) self.verify_state(self.dag1, [task.task_id], [self.execution_dates[0]], None, snapshot) # set one and only one task to success altered = set_state(tasks=[task], execution_date=self.execution_dates[0], upstream=False, downstream=False, future=False, past=False, state=State.SUCCESS, commit=True) self.assertEqual(len(altered), 1) self.verify_state(self.dag1, [task.task_id], [self.execution_dates[0]], State.SUCCESS, snapshot) # set no tasks altered = set_state(tasks=[task], execution_date=self.execution_dates[0], upstream=False, downstream=False, future=False, past=False, state=State.SUCCESS, commit=True) self.assertEqual(len(altered), 0) self.verify_state(self.dag1, [task.task_id], [self.execution_dates[0]], State.SUCCESS, snapshot) # set task to other than success altered = set_state(tasks=[task], execution_date=self.execution_dates[0], upstream=False, downstream=False, future=False, past=False, state=State.FAILED, commit=True) self.assertEqual(len(altered), 1) self.verify_state(self.dag1, [task.task_id], [self.execution_dates[0]], State.FAILED, snapshot) # dont alter other tasks snapshot = TestMarkTasks.snapshot_state(self.dag1, self.execution_dates) task = self.dag1.get_task("runme_0") altered = set_state(tasks=[task], execution_date=self.execution_dates[0], upstream=False, downstream=False, future=False, past=False, state=State.SUCCESS, commit=True) self.assertEqual(len(altered), 1) self.verify_state(self.dag1, [task.task_id], [self.execution_dates[0]], State.SUCCESS, snapshot) # set one task as FAILED. dag3 has schedule_interval None snapshot = TestMarkTasks.snapshot_state(self.dag3, self.dag3_execution_dates) task = self.dag3.get_task("run_this") altered = set_state(tasks=[task], execution_date=self.dag3_execution_dates[1], upstream=False, downstream=False, future=False, past=False, state=State.FAILED, commit=True) # exactly one TaskInstance should have been altered self.assertEqual(len(altered), 1) # task should have been marked as failed self.verify_state(self.dag3, [task.task_id], [self.dag3_execution_dates[1]], State.FAILED, snapshot) # tasks on other days should be unchanged self.verify_state(self.dag3, [task.task_id], [self.dag3_execution_dates[0]], None, snapshot) self.verify_state(self.dag3, [task.task_id], [self.dag3_execution_dates[2]], None, snapshot)