def _get_ready_tis( self, scheduleable_tasks: List[TI], finished_tasks: List[TI], session: Session, ) -> Tuple[List[TI], bool]: old_states = {} ready_tis: List[TI] = [] changed_tis = False if not scheduleable_tasks: return ready_tis, changed_tis # Check dependencies for st in scheduleable_tasks: old_state = st.state if st.are_dependencies_met(dep_context=DepContext( flag_upstream_failed=True, finished_tasks=finished_tasks), session=session): ready_tis.append(st) else: old_states[st.key] = old_state # Check if any ti changed state tis_filter = TI.filter_for_tis(old_states.keys()) if tis_filter is not None: fresh_tis = session.query(TI).filter(tis_filter).all() changed_tis = any(ti.state != old_states[ti.key] for ti in fresh_tis) return ready_tis, changed_tis
def _send_stalled_tis_back_to_scheduler( self, keys: List[TaskInstanceKey], session: Session = NEW_SESSION) -> None: try: session.query(TaskInstance).filter( TaskInstance.filter_for_tis(keys), TaskInstance.state == State.QUEUED, TaskInstance.queued_by_job_id == self.job_id, ).update( { TaskInstance.state: State.SCHEDULED, TaskInstance.queued_dttm: None, TaskInstance.queued_by_job_id: None, TaskInstance.external_executor_id: None, }, synchronize_session=False, ) session.commit() except Exception: self.log.exception("Error sending tasks back to scheduler") session.rollback() return for key in keys: self._set_celery_pending_task_timeout(key, None) self.running.discard(key) celery_async_result = self.tasks.pop(key, None) if celery_async_result: try: app.control.revoke(celery_async_result.task_id) except Exception as ex: self.log.error( "Error revoking task instance %s from celery: %s", key, ex)
def _get_ready_tis( self, schedulable_tis: List[TI], finished_tis: List[TI], session: Session, ) -> Tuple[List[TI], bool]: old_states = {} ready_tis: List[TI] = [] changed_tis = False if not schedulable_tis: return ready_tis, changed_tis # If we expand TIs, we need a new list so that we iterate over them too. (We can't alter # `schedulable_tis` in place and have the `for` loop pick them up expanded_tis: List[TI] = [] dep_context = DepContext( flag_upstream_failed=True, ignore_unmapped_tasks= True, # Ignore this Dep, as we will expand it if we can. finished_tis=finished_tis, ) # Check dependencies for schedulable in itertools.chain(schedulable_tis, expanded_tis): old_state = schedulable.state if schedulable.are_dependencies_met(session=session, dep_context=dep_context): ready_tis.append(schedulable) else: old_states[schedulable.key] = old_state continue # Expansion of last resort! This is ideally handled in the mini-scheduler in LocalTaskJob, but if # for any reason it wasn't, we need to expand it now if schedulable.map_index < 0 and schedulable.task.is_mapped: # HACK. This needs a better way, one that copes with multiple upstreams! for ti in finished_tis: if schedulable.task_id in ti.task.downstream_task_ids: assert isinstance(schedulable.task, MappedOperator) new_tis = schedulable.task.expand_mapped_task( self.run_id, session=session) if schedulable.state == TaskInstanceState.SKIPPED: # Task is now skipped (likely cos upstream returned 0 tasks continue assert new_tis[0] is schedulable expanded_tis.extend(new_tis[1:]) break # Check if any ti changed state tis_filter = TI.filter_for_tis(old_states.keys()) if tis_filter is not None: fresh_tis = session.query(TI).filter(tis_filter).all() changed_tis = any(ti.state != old_states[ti.key] for ti in fresh_tis) return ready_tis, changed_tis
def _get_ready_tis( self, schedulable_tis: List[TI], finished_tis: List[TI], session: Session, ) -> Tuple[List[TI], bool, bool]: old_states = {} ready_tis: List[TI] = [] changed_tis = False if not schedulable_tis: return ready_tis, changed_tis, False # If we expand TIs, we need a new list so that we iterate over them too. (We can't alter # `schedulable_tis` in place and have the `for` loop pick them up additional_tis: List[TI] = [] dep_context = DepContext( flag_upstream_failed=True, ignore_unmapped_tasks= True, # Ignore this Dep, as we will expand it if we can. finished_tis=finished_tis, ) # Check dependencies. expansion_happened = False for schedulable in itertools.chain(schedulable_tis, additional_tis): old_state = schedulable.state if not schedulable.are_dependencies_met(session=session, dep_context=dep_context): old_states[schedulable.key] = old_state continue # If schedulable is from a mapped task, but not yet expanded, do it # now. This is called in two places: First and ideally in the mini # scheduler at the end of LocalTaskJob, and then as an "expansion of # last resort" in the scheduler to ensure that the mapped task is # correctly expanded before executed. if schedulable.map_index < 0 and isinstance( schedulable.task, MappedOperator): expanded_tis, _ = schedulable.task.expand_mapped_task( self.run_id, session=session) if expanded_tis: assert expanded_tis[0] is schedulable additional_tis.extend(expanded_tis[1:]) expansion_happened = True if schedulable.state in SCHEDULEABLE_STATES: ready_tis.append(schedulable) # Check if any ti changed state tis_filter = TI.filter_for_tis(old_states) if tis_filter is not None: fresh_tis = session.query(TI).filter(tis_filter).all() changed_tis = any(ti.state != old_states[ti.key] for ti in fresh_tis) return ready_tis, changed_tis, expansion_happened
def query(result, items): if not items: return result filter_for_tis = TaskInstance.filter_for_tis(items) reset_tis = (session.query(TaskInstance).filter( filter_for_tis, TaskInstance.state.in_( resettable_states)).with_for_update().all()) for ti in reset_tis: ti.state = State.NONE session.merge(ti) return result + reset_tis