def _create_task_state(dag_id, task_id, execution_date): with create_session() as session: ts = TaskState(dag_id=dag_id, task_id=task_id, execution_date=execution_date) session.merge(ts) session.commit()
def verify_integrity(self, session=None): """ Verifies the DagRun by checking for removed tasks or tasks that are not in the database yet. It will set state to removed or add the task if required. """ from airflow.models.taskinstance import TaskInstance # Avoid circular import dag = self.get_dag() tis = self.get_task_instances(session=session) # check for removed or restored tasks task_ids = [] for ti in tis: task_ids.append(ti.task_id) task = None try: task = dag.get_task(ti.task_id) except AirflowException: if ti.state == State.REMOVED: pass # ti has already been removed, just ignore it elif self.state is not State.RUNNING and not dag.partial: self.log.warning("Failed to get task '{}' for dag '{}'. " "Marking it as removed.".format(ti, dag)) Stats.incr( "task_removed_from_dag.{}".format(dag.dag_id), 1, 1) ti.state = State.REMOVED is_task_in_dag = task is not None should_restore_task = is_task_in_dag and ti.state == State.REMOVED if should_restore_task: self.log.info("Restoring task '{}' which was previously " "removed from DAG '{}'".format(ti, dag)) Stats.incr("task_restored_to_dag.{}".format(dag.dag_id), 1, 1) ti.state = State.NONE # check for missing tasks for task in six.itervalues(dag.task_dict): if task.start_date > self.execution_date and not self.is_backfill: continue if task.task_id not in task_ids: Stats.incr( "task_instance_created-{}".format(task.__class__.__name__), 1, 1) # add TaskState to db ti = TaskInstance(task, self.execution_date) ts = TaskState(ti) if task.event_met_handler() is not None: ts.event_handler = task.event_met_handler() session.add(ti) session.add(ts) session.commit()
def _operator_handle_event(event, operator, execution_date) -> SchedulingAction: task_state = TaskState.get_task_state(operator.dag_id, operator.task_id, execution_date) event_handler = operator.get_events_handler() if not task_state: task_state = TaskState(task_id=operator.task_id, dag_id=operator.dag_id, execution_date=execution_date) with create_session() as session: session.add(task_state) session.commit() scheduling_action, state = event_handler.handle_event( event, task_state.task_state) task_state.task_state = state task_state.update_task_state() return scheduling_action
def create_task_state(dag_run: DagRun, task_id: str): TaskState(dag_id=dag_run.dag_id, task_id=task_id, execution_date=dag_run.execution_date).update_task_state()