def test_prohibit_commit(self): with prohibit_commit(self.session) as guard: self.session.execute('SELECT 1') with pytest.raises(RuntimeError): self.session.commit() self.session.rollback() self.session.execute('SELECT 1') guard.commit() # Check the expected_commit is reset with pytest.raises(RuntimeError): self.session.execute('SELECT 1') self.session.commit()
def _create_dag_run(self, dag_id, session, run_type=DagRunType.SCHEDULED) -> DagRun: with prohibit_commit(session) as guard: if settings.USE_JOB_SCHEDULE: """ Unconditionally create a DAG run for the given DAG, and update the dag_model's fields to control if/when the next DAGRun should be created """ try: dag = self.dagbag.get_dag(dag_id, session=session) dag_model = session \ .query(DagModel).filter(DagModel.dag_id == dag_id).first() if dag_model is None: return None next_dagrun = dag_model.next_dagrun dag_hash = self.dagbag.dags_hash.get(dag.dag_id) external_trigger = False # register periodic task if run_type == DagRunType.MANUAL: next_dagrun = timezone.utcnow() external_trigger = True dag_run = dag.create_dagrun( run_type=run_type, execution_date=next_dagrun, start_date=timezone.utcnow(), state=State.RUNNING, external_trigger=external_trigger, session=session, dag_hash=dag_hash, creating_job_id=self.id, ) if run_type == DagRunType.SCHEDULED: self._update_dag_next_dagrun(dag_id, session) # commit the session - Release the write lock on DagModel table. guard.commit() # END: create dagrun self._register_periodic_events(dag_run.run_id, dag) return dag_run except SerializedDagNotFound: self.log.exception( "DAG '%s' not found in serialized_dag table", dag_id) return None except Exception: self.log.exception( "Error occurred when create dag_run of dag: %s", dag_id)
def test_prohibit_commit_specific_session_only(self): """ Test that "prohibit_commit" applies only to the given session object, not any other session objects that may be used """ # We _want_ another session. By default this would be the _same_ # session we already had other_session = Session.session_factory() assert other_session is not self.session with prohibit_commit(self.session): self.session.execute('SELECT 1') with pytest.raises(RuntimeError): self.session.commit() self.session.rollback() other_session.execute('SELECT 1') other_session.commit()
def _fetch_callbacks(self, max_callbacks: int, session: Session = NEW_SESSION): """Fetches callbacks from database and add them to the internal queue for execution.""" self.log.debug("Fetching callbacks from the database.") with prohibit_commit(session) as guard: query = ( session.query(DbCallbackRequest) .order_by(DbCallbackRequest.priority_weight.asc()) .limit(max_callbacks) ) callbacks = with_row_locks( query, of=DbCallbackRequest, session=session, **skip_locked(session=session) ).all() for callback in callbacks: try: self._add_callback_to_queue(callback.get_callback_request()) session.delete(callback) except Exception as e: self.log.warning("Error adding callback for execution: %s, %s", callback, e) guard.commit()
def _do_scheduling(self, session) -> int: """ This function is where the main scheduling decisions take places. It: - Creates any necessary DAG runs by examining the next_dagrun_create_after column of DagModel Since creating Dag Runs is a relatively time consuming process, we select only 10 dags by default (configurable via ``scheduler.max_dagruns_to_create_per_loop`` setting) - putting this higher will mean one scheduler could spend a chunk of time creating dag runs, and not ever get around to scheduling tasks. - Finds the "next n oldest" running DAG Runs to examine for scheduling (n=20 by default, configurable via ``scheduler.max_dagruns_per_loop_to_schedule`` config setting) and tries to progress state (TIs to SCHEDULED, or DagRuns to SUCCESS/FAILURE etc) By "next oldest", we mean hasn't been examined/scheduled in the most time. The reason we don't select all dagruns at once because the rows are selected with row locks, meaning that only one scheduler can "process them", even it is waiting behind other dags. Increasing this limit will allow more throughput for smaller DAGs but will likely slow down throughput for larger (>500 tasks.) DAGs - Then, via a Critical Section (locking the rows of the Pool model) we queue tasks, and then send them to the executor. See docs of _critical_section_execute_task_instances for more. :return: Number of TIs enqueued in this iteration :rtype: int """ # Put a check in place to make sure we don't commit unexpectedly with prohibit_commit(session) as guard: if settings.USE_JOB_SCHEDULE: self._create_dagruns_for_dags(guard, session) self._start_queued_dagruns(session) guard.commit() dag_runs = self._get_next_dagruns_to_examine(State.RUNNING, session) # Bulk fetch the currently active dag runs for the dags we are # examining, rather than making one query per DagRun callback_tuples = [] for dag_run in dag_runs: callback_to_run = self._schedule_dag_run(dag_run, session) callback_tuples.append((dag_run, callback_to_run)) guard.commit() # Send the callbacks after we commit to ensure the context is up to date when it gets run for dag_run, callback_to_run in callback_tuples: self._send_dag_callbacks_to_processor(dag_run, callback_to_run) # Without this, the session has an invalid view of the DB session.expunge_all() # END: schedule TIs try: if self.executor.slots_available <= 0: # We know we can't do anything here, so don't even try! self.log.debug("Executor full, skipping critical section") return 0 timer = Stats.timer('scheduler.critical_section_duration') timer.start() # Find anything TIs in state SCHEDULED, try to QUEUE it (send it to the executor) num_queued_tis = self._critical_section_execute_task_instances(session=session) # Make sure we only sent this metric if we obtained the lock, otherwise we'll skew the # metric, way down timer.stop(send=True) except OperationalError as e: timer.stop(send=False) if is_lock_not_available_error(error=e): self.log.debug("Critical section lock held by another Scheduler") Stats.incr('scheduler.critical_section_busy') session.rollback() return 0 raise guard.commit() return num_queued_tis