Ejemplo n.º 1
0
def get_all_dag_task_query(
    dag: DAG,
    session: SASession,
    state: TaskInstanceState,
    task_ids: Union[List[str], List[Tuple[str, int]]],
    confirmed_dates: Iterable[datetime],
):
    """Get all tasks of the main dag that will be affected by a state change"""
    is_string_list = isinstance(task_ids[0], str)
    qry_dag = (session.query(TaskInstance).join(TaskInstance.dag_run).filter(
        TaskInstance.dag_id == dag.dag_id,
        DagRun.execution_date.in_(confirmed_dates),
    ))

    if is_string_list:
        qry_dag = qry_dag.filter(TaskInstance.task_id.in_(task_ids))
    else:
        qry_dag = qry_dag.filter(
            tuple_in_condition((TaskInstance.task_id, TaskInstance.map_index),
                               task_ids))
    qry_dag = qry_dag.filter(
        or_(TaskInstance.state.is_(None),
            TaskInstance.state != state)).options(
                contains_eager(TaskInstance.dag_run))
    return qry_dag
Ejemplo n.º 2
0
    def schedule_tis(self, schedulable_tis: Iterable[TI], session: Session = NEW_SESSION) -> int:
        """
        Set the given task instances in to the scheduled state.

        Each element of ``schedulable_tis`` should have it's ``task`` attribute already set.

        Any EmptyOperator without callbacks is instead set straight to the success state.

        All the TIs should belong to this DagRun, but this code is in the hot-path, this is not checked -- it
        is the caller's responsibility to call this function only with TIs from a single dag run.
        """
        # Get list of TI IDs that do not need to executed, these are
        # tasks using EmptyOperator and without on_execute_callback / on_success_callback
        dummy_ti_ids = []
        schedulable_ti_ids = []
        for ti in schedulable_tis:
            if (
                ti.task.inherits_from_empty_operator
                and not ti.task.on_execute_callback
                and not ti.task.on_success_callback
            ):
                dummy_ti_ids.append(ti.task_id)
            else:
                schedulable_ti_ids.append((ti.task_id, ti.map_index))

        count = 0

        if schedulable_ti_ids:
            count += (
                session.query(TI)
                .filter(
                    TI.dag_id == self.dag_id,
                    TI.run_id == self.run_id,
                    tuple_in_condition((TI.task_id, TI.map_index), schedulable_ti_ids),
                )
                .update({TI.state: State.SCHEDULED}, synchronize_session=False)
            )

        # Tasks using EmptyOperator should not be executed, mark them as success
        if dummy_ti_ids:
            count += (
                session.query(TI)
                .filter(
                    TI.dag_id == self.dag_id,
                    TI.run_id == self.run_id,
                    TI.task_id.in_(dummy_ti_ids),
                )
                .update(
                    {
                        TI.state: State.SUCCESS,
                        TI.start_date: timezone.utcnow(),
                        TI.end_date: timezone.utcnow(),
                        TI.duration: 0,
                    },
                    synchronize_session=False,
                )
            )

        return count