Esempio n. 1
0
    def _get_ready_tis(
        self,
        scheduleable_tasks: List[TI],
        finished_tasks: List[TI],
        session: Session,
    ) -> Tuple[List[TI], bool]:
        old_states = {}
        ready_tis: List[TI] = []
        changed_tis = False

        if not scheduleable_tasks:
            return ready_tis, changed_tis

        # Check dependencies
        for st in scheduleable_tasks:
            old_state = st.state
            if st.are_dependencies_met(dep_context=DepContext(
                    flag_upstream_failed=True, finished_tasks=finished_tasks),
                                       session=session):
                ready_tis.append(st)
            else:
                old_states[st.key] = old_state

        # Check if any ti changed state
        tis_filter = TI.filter_for_tis(old_states.keys())
        if tis_filter is not None:
            fresh_tis = session.query(TI).filter(tis_filter).all()
            changed_tis = any(ti.state != old_states[ti.key]
                              for ti in fresh_tis)

        return ready_tis, changed_tis
Esempio n. 2
0
    def _send_stalled_tis_back_to_scheduler(
            self,
            keys: List[TaskInstanceKey],
            session: Session = NEW_SESSION) -> None:
        try:
            session.query(TaskInstance).filter(
                TaskInstance.filter_for_tis(keys),
                TaskInstance.state == State.QUEUED,
                TaskInstance.queued_by_job_id == self.job_id,
            ).update(
                {
                    TaskInstance.state: State.SCHEDULED,
                    TaskInstance.queued_dttm: None,
                    TaskInstance.queued_by_job_id: None,
                    TaskInstance.external_executor_id: None,
                },
                synchronize_session=False,
            )
            session.commit()
        except Exception:
            self.log.exception("Error sending tasks back to scheduler")
            session.rollback()
            return

        for key in keys:
            self._set_celery_pending_task_timeout(key, None)
            self.running.discard(key)
            celery_async_result = self.tasks.pop(key, None)
            if celery_async_result:
                try:
                    app.control.revoke(celery_async_result.task_id)
                except Exception as ex:
                    self.log.error(
                        "Error revoking task instance %s from celery: %s", key,
                        ex)
Esempio n. 3
0
    def _get_ready_tis(
        self,
        schedulable_tis: List[TI],
        finished_tis: List[TI],
        session: Session,
    ) -> Tuple[List[TI], bool]:
        old_states = {}
        ready_tis: List[TI] = []
        changed_tis = False

        if not schedulable_tis:
            return ready_tis, changed_tis

        # If we expand TIs, we need a new list so that we iterate over them too. (We can't alter
        # `schedulable_tis` in place and have the `for` loop pick them up
        expanded_tis: List[TI] = []
        dep_context = DepContext(
            flag_upstream_failed=True,
            ignore_unmapped_tasks=
            True,  # Ignore this Dep, as we will expand it if we can.
            finished_tis=finished_tis,
        )

        # Check dependencies
        for schedulable in itertools.chain(schedulable_tis, expanded_tis):

            old_state = schedulable.state
            if schedulable.are_dependencies_met(session=session,
                                                dep_context=dep_context):
                ready_tis.append(schedulable)
            else:
                old_states[schedulable.key] = old_state
                continue

            # Expansion of last resort! This is ideally handled in the mini-scheduler in LocalTaskJob, but if
            # for any reason it wasn't, we need to expand it now
            if schedulable.map_index < 0 and schedulable.task.is_mapped:
                # HACK. This needs a better way, one that copes with multiple upstreams!
                for ti in finished_tis:
                    if schedulable.task_id in ti.task.downstream_task_ids:

                        assert isinstance(schedulable.task, MappedOperator)
                        new_tis = schedulable.task.expand_mapped_task(
                            self.run_id, session=session)
                        if schedulable.state == TaskInstanceState.SKIPPED:
                            # Task is now skipped (likely cos upstream returned 0 tasks
                            continue
                        assert new_tis[0] is schedulable
                        expanded_tis.extend(new_tis[1:])
                        break

        # Check if any ti changed state
        tis_filter = TI.filter_for_tis(old_states.keys())
        if tis_filter is not None:
            fresh_tis = session.query(TI).filter(tis_filter).all()
            changed_tis = any(ti.state != old_states[ti.key]
                              for ti in fresh_tis)

        return ready_tis, changed_tis
Esempio n. 4
0
    def _get_ready_tis(
        self,
        schedulable_tis: List[TI],
        finished_tis: List[TI],
        session: Session,
    ) -> Tuple[List[TI], bool, bool]:
        old_states = {}
        ready_tis: List[TI] = []
        changed_tis = False

        if not schedulable_tis:
            return ready_tis, changed_tis, False

        # If we expand TIs, we need a new list so that we iterate over them too. (We can't alter
        # `schedulable_tis` in place and have the `for` loop pick them up
        additional_tis: List[TI] = []
        dep_context = DepContext(
            flag_upstream_failed=True,
            ignore_unmapped_tasks=
            True,  # Ignore this Dep, as we will expand it if we can.
            finished_tis=finished_tis,
        )

        # Check dependencies.
        expansion_happened = False
        for schedulable in itertools.chain(schedulable_tis, additional_tis):
            old_state = schedulable.state
            if not schedulable.are_dependencies_met(session=session,
                                                    dep_context=dep_context):
                old_states[schedulable.key] = old_state
                continue
            # If schedulable is from a mapped task, but not yet expanded, do it
            # now. This is called in two places: First and ideally in the mini
            # scheduler at the end of LocalTaskJob, and then as an "expansion of
            # last resort" in the scheduler to ensure that the mapped task is
            # correctly expanded before executed.
            if schedulable.map_index < 0 and isinstance(
                    schedulable.task, MappedOperator):
                expanded_tis, _ = schedulable.task.expand_mapped_task(
                    self.run_id, session=session)
                if expanded_tis:
                    assert expanded_tis[0] is schedulable
                    additional_tis.extend(expanded_tis[1:])
                expansion_happened = True
            if schedulable.state in SCHEDULEABLE_STATES:
                ready_tis.append(schedulable)

        # Check if any ti changed state
        tis_filter = TI.filter_for_tis(old_states)
        if tis_filter is not None:
            fresh_tis = session.query(TI).filter(tis_filter).all()
            changed_tis = any(ti.state != old_states[ti.key]
                              for ti in fresh_tis)

        return ready_tis, changed_tis, expansion_happened
Esempio n. 5
0
        def query(result, items):
            if not items:
                return result

            filter_for_tis = TaskInstance.filter_for_tis(items)
            reset_tis = (session.query(TaskInstance).filter(
                filter_for_tis, TaskInstance.state.in_(
                    resettable_states)).with_for_update().all())

            for ti in reset_tis:
                ti.state = State.NONE
                session.merge(ti)

            return result + reset_tis