def test_scheduler_process_check_heartrate(self): """ Test if process dag honors the heartrate """ dag = DAG( dag_id='test_scheduler_process_check_heartrate', start_date=DEFAULT_DATE) dag_task1 = DummyOperator( task_id='dummy', dag=dag, owner='airflow') session = settings.Session() orm_dag = DagModel(dag_id=dag.dag_id) orm_dag.last_scheduler_run = datetime.datetime.now() session.merge(orm_dag) session.commit() session.close() scheduler = SchedulerJob() scheduler.heartrate = 1000 dag.clear() dr = scheduler.schedule_dag(dag) self.assertIsNotNone(dr) queue = mock.Mock() scheduler.process_dag(dag, queue=queue) queue.put.assert_not_called()
def test_scheduler_verify_pool_full(self, mock_pool_full): """ Test task instances not queued when pool is full """ mock_pool_full.return_value = False dag = DAG( dag_id='test_scheduler_verify_pool_full', start_date=DEFAULT_DATE) DummyOperator( task_id='dummy', dag=dag, owner='airflow', pool='test_scheduler_verify_pool_full') session = settings.Session() pool = Pool(pool='test_scheduler_verify_pool_full', slots=1) session.add(pool) orm_dag = DagModel(dag_id=dag.dag_id) orm_dag.is_paused = False session.merge(orm_dag) session.commit() scheduler = SchedulerJob() dag.clear() # Create 2 dagruns, which will create 2 task instances. dr = scheduler.create_dag_run(dag) self.assertIsNotNone(dr) self.assertEquals(dr.execution_date, DEFAULT_DATE) dr = scheduler.create_dag_run(dag) self.assertIsNotNone(dr) queue = [] scheduler._process_task_instances(dag, queue=queue) self.assertEquals(len(queue), 2) dagbag = SimpleDagBag([dag]) # Recreated part of the scheduler here, to kick off tasks -> executor for ti_key in queue: task = dag.get_task(ti_key[1]) ti = models.TaskInstance(task, ti_key[2]) # Task starts out in the scheduled state. All tasks in the # scheduled state will be sent to the executor ti.state = State.SCHEDULED # Also save this task instance to the DB. session.merge(ti) session.commit() scheduler._execute_task_instances(dagbag, (State.SCHEDULED, State.UP_FOR_RETRY)) self.assertEquals(len(scheduler.executor.queued_tasks), 1)
def test_scheduler_reschedule(self): """ Checks if tasks that are not taken up by the executor get rescheduled """ executor = TestExecutor() dagbag = DagBag(executor=executor) dagbag.dags.clear() dagbag.executor = executor dag = DAG( dag_id='test_scheduler_reschedule', start_date=DEFAULT_DATE) dag_task1 = DummyOperator( task_id='dummy', dag=dag, owner='airflow') dag.clear() dag.is_subdag = False session = settings.Session() orm_dag = DagModel(dag_id=dag.dag_id) orm_dag.is_paused = False session.merge(orm_dag) session.commit() dagbag.bag_dag(dag=dag, root_dag=dag, parent_dag=dag) @mock.patch('airflow.models.DagBag', return_value=dagbag) @mock.patch('airflow.models.DagBag.collect_dags') def do_schedule(function, function2): # Use a empty file since the above mock will return the # expected DAGs. Also specify only a single file so that it doesn't # try to schedule the above DAG repeatedly. scheduler = SchedulerJob(num_runs=1, executor=executor, subdir=os.path.join(models.DAGS_FOLDER, "no_dags.py")) scheduler.heartrate = 0 scheduler.run() do_schedule() self.assertEquals(1, len(executor.queued_tasks)) executor.queued_tasks.clear() do_schedule() self.assertEquals(2, len(executor.queued_tasks))
def test_scheduler_reschedule(self): """ Checks if tasks that are not taken up by the executor get rescheduled """ executor = TestExecutor() dagbag = DagBag(executor=executor) dagbag.dags.clear() dagbag.executor = executor dag = DAG( dag_id='test_scheduler_reschedule', start_date=DEFAULT_DATE) dag_task1 = DummyOperator( task_id='dummy', dag=dag, owner='airflow') dag.clear() dag.is_subdag = False session = settings.Session() orm_dag = DagModel(dag_id=dag.dag_id) orm_dag.is_paused = False session.merge(orm_dag) session.commit() dagbag.bag_dag(dag=dag, root_dag=dag, parent_dag=dag) @mock.patch('airflow.models.DagBag', return_value=dagbag) @mock.patch('airflow.models.DagBag.collect_dags') def do_schedule(function, function2): scheduler = SchedulerJob(num_runs=1, executor=executor,) scheduler.heartrate = 0 scheduler.run() do_schedule() self.assertEquals(1, len(executor.queued_tasks)) executor.queued_tasks.clear() do_schedule() self.assertEquals(2, len(executor.queued_tasks))
def test_dag_is_deactivated_upon_dagfile_deletion(self): dag_id = 'old_existing_dag' dag_fileloc = "/usr/local/airflow/dags/non_existing_path.py" dag = DAG( dag_id, is_paused_upon_creation=True, ) dag.fileloc = dag_fileloc session = settings.Session() dag.sync_to_db(session=session) orm_dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).one() self.assertTrue(orm_dag.is_active) self.assertEqual(orm_dag.fileloc, dag_fileloc) DagModel.deactivate_deleted_dags(list_py_file_paths(settings.DAGS_FOLDER)) orm_dag = session.query(DagModel).filter(DagModel.dag_id == dag_id).one() self.assertFalse(orm_dag.is_active) # CleanUp session.execute(DagModel.__table__.delete().where(DagModel.dag_id == dag_id)) session.close()
def test_is_paused_subdag(self, mock_dag_bag): subdag_id = 'dag.subdag' subdag = DAG( subdag_id, start_date=DEFAULT_DATE, ) with subdag: DummyOperator(task_id='dummy_task', ) dag_id = 'dag' dag = DAG( dag_id, start_date=DEFAULT_DATE, ) with dag: SubDagOperator(task_id='subdag', subdag=subdag) mock_dag_bag.return_value.get_dag.return_value = dag session = settings.Session() dag.sync_to_db(session=session) unpaused_dags = session.query(DagModel).filter( DagModel.dag_id.in_([subdag_id, dag_id]), ).filter( DagModel.is_paused.is_(False)).count() self.assertEqual(2, unpaused_dags) DagModel.get_dagmodel(dag.dag_id).set_is_paused(is_paused=True) paused_dags = session.query(DagModel).filter( DagModel.dag_id.in_([subdag_id, dag_id]), ).filter( DagModel.is_paused.is_(True)).count() self.assertEqual(2, paused_dags)
def test_delete_dag(self): url_template = '/api/experimental/dags/{}' from airflow import settings session = settings.Session() key = "my_dag_id" session.add(DagModel(dag_id=key)) session.commit() response = self.app.delete(url_template.format(key), content_type="application/json") self.assertEqual(200, response.status_code) response = self.app.delete(url_template.format('does_not_exist_dag'), content_type="application/json") self.assertEqual(404, response.status_code)
def setUp(self): # Airflow relies on reading the DAG from disk when triggering it. # Therefore write a temp file holding the DAG to trigger. with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: self._tmpfile = f.name f.write(DAG_SCRIPT) f.flush() with create_session() as session: session.add(DagModel(dag_id=TRIGGERED_DAG_ID, fileloc=self._tmpfile)) session.commit() self.dag = DAG(TEST_DAG_ID, default_args={"owner": "airflow", "start_date": DEFAULT_DATE}) dagbag = DagBag(f.name, read_dags_from_db=False, include_examples=False) dagbag.bag_dag(self.dag, root_dag=self.dag) dagbag.sync_to_db()
def _create_dag_runs(self, count): dag_runs = [ DagRun( dag_id="TEST_DAG_ID", run_id="TEST_DAG_RUN_ID" + str(i), run_type=DagRunType.MANUAL.value, execution_date=timezone.parse(self.default_time) + timedelta(minutes=i), start_date=timezone.parse(self.default_time), external_trigger=True, ) for i in range(1, count + 1) ] dag = DagModel(dag_id="TEST_DAG_ID") with create_session() as session: session.add_all(dag_runs) session.add(dag)
def test_emit_scheduling_delay(self, schedule_interval, expected): """ Tests that dag scheduling delay stat is set properly once running scheduled dag. dag_run.update_state() invokes the _emit_true_scheduling_delay_stats_for_finished_state method. """ dag = DAG(dag_id='test_emit_dag_stats', start_date=days_ago(1), schedule_interval=schedule_interval) dag_task = DummyOperator(task_id='dummy', dag=dag, owner='airflow') session = settings.Session() try: orm_dag = DagModel( dag_id=dag.dag_id, has_task_concurrency_limits=False, next_dagrun=dag.start_date, next_dagrun_create_after=dag.following_schedule(dag.start_date), is_active=True, ) session.add(orm_dag) session.flush() dag_run = dag.create_dagrun( run_type=DagRunType.SCHEDULED, state=State.SUCCESS, execution_date=dag.start_date, start_date=dag.start_date, session=session, ) ti = dag_run.get_task_instance(dag_task.task_id, session) ti.set_state(State.SUCCESS, session) session.flush() with mock.patch.object(Stats, 'timing') as stats_mock: dag_run.update_state(session) metric_name = f'dagrun.{dag.dag_id}.first_task_scheduling_delay' if expected: true_delay = ti.start_date - dag.following_schedule(dag_run.execution_date) sched_delay_stat_call = call(metric_name, true_delay) assert sched_delay_stat_call in stats_mock.mock_calls else: # Assert that we never passed the metric sched_delay_stat_call = call(metric_name, mock.ANY) assert sched_delay_stat_call not in stats_mock.mock_calls finally: # Don't write anything to the DB session.rollback() session.close()
def check_and_get_dag(dag_id: str, task_id: Optional[str] = None) -> DagModel: """Checks that DAG exists and in case it is specified that Task exist""" dag_model = DagModel.get_current(dag_id) if dag_model is None: raise DagNotFound("Dag id {} not found in DagModel".format(dag_id)) dagbag = DagBag(dag_folder=dag_model.fileloc, store_serialized_dags=conf.getboolean( 'core', 'store_serialized_dags')) dag = dagbag.get_dag(dag_id) # prefetch dag if it is stored serialized if dag_id not in dagbag.dags: error_message = "Dag id {} not found".format(dag_id) raise DagNotFound(error_message) if task_id and not dag.has_task(task_id): error_message = 'Task {} not found in dag {}'.format(task_id, dag_id) raise TaskNotFound(error_message) return dag
def test_stop_dag(self): t = threading.Thread(target=self.stop_dag_function) t.setDaemon(True) t.start() self.start_scheduler('../../dags/test_event_based_scheduler.py') with create_session() as session: from airflow.models import DagModel dag_model: DagModel = DagModel.get_dagmodel( EVENT_BASED_SCHEDULER_DAG) self.assertTrue(dag_model.is_paused) self.assertEqual(dag_model.get_last_dagrun().state, "killed") for ti in session.query(TaskInstance).filter( TaskInstance.dag_id == EVENT_BASED_SCHEDULER_DAG): self.assertTrue(ti.state in [State.SUCCESS, State.KILLED]) for te in session.query(TaskExecution).filter( TaskExecution.dag_id == EVENT_BASED_SCHEDULER_DAG): self.assertTrue(te.state in [State.SUCCESS, State.KILLED])
def check_and_get_dag(dag_id: str, task_id: Optional[str] = None) -> DagModel: """Checks that DAG exists and in case it is specified that Task exist""" dag_model = DagModel.get_current(dag_id) if dag_model is None: raise DagNotFound("Dag id {} not found in DagModel".format(dag_id)) dagbag = DagBag( dag_folder=dag_model.fileloc, read_dags_from_db=True ) dag = dagbag.get_dag(dag_id) if not dag: error_message = "Dag id {} not found".format(dag_id) raise DagNotFound(error_message) if task_id and not dag.has_task(task_id): error_message = 'Task {} not found in dag {}'.format(task_id, dag_id) raise TaskNotFound(error_message) return dag
def dag_state(args, session=NEW_SESSION): """ Returns the state (and conf if exists) of a DagRun at the command line. >>> airflow dags state tutorial 2015-01-01T00:00:00.000000 running >>> airflow dags state a_dag_with_conf_passed 2015-01-01T00:00:00.000000 failed, {"name": "bob", "age": "42"} """ dag = DagModel.get_dagmodel(args.dag_id, session=session) if not dag: raise SystemExit(f"DAG: {args.dag_id} does not exist in 'dag' table") dr = session.query(DagRun).filter_by(dag_id=args.dag_id, execution_date=args.execution_date).one_or_none() out = dr.state if dr else None conf_out = '' if out and dr.conf: conf_out = ', ' + json.dumps(dr.conf) print(str(out) + conf_out)
def test_delete_dag(self): key = "my_dag_id" with create_session() as session: self.assertEqual( session.query(DagModel).filter(DagModel.dag_id == key).count(), 0) session.add(DagModel(dag_id=key)) with create_session() as session: self.assertEqual( session.query(DagModel).filter(DagModel.dag_id == key).count(), 1) self.client.delete_dag(dag_id=key) self.assertEqual( session.query(DagModel).filter(DagModel.dag_id == key).count(), 0)
def execute(self, context: Dict): if isinstance(self.execution_date, datetime.datetime): execution_date = self.execution_date elif isinstance(self.execution_date, str): execution_date = timezone.parse(self.execution_date) self.execution_date = execution_date else: execution_date = timezone.utcnow() run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date) try: # Ignore MyPy type for self.execution_date # because it doesn't pick up the timezone.parse() for strings trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=self.execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, self.execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound( f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag( dag_folder=dag_model.fileloc, store_serialized_dags=settings.STORE_SERIALIZED_DAGS) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=self.execution_date, end_date=self.execution_date) else: raise e
def test_process_dag_to_response(self): dag_id = "test_dag" fileloc = "my_file" is_paused = True dag = DagModel(dag_id=dag_id, is_paused=is_paused, is_active=True, is_subdag=False, last_scheduler_run=datetime.now(), last_pickled=datetime.now(), last_expired=datetime.now(), scheduler_lock=False, pickle_id=1, fileloc=fileloc, owners="test") processed_response = _process_dag_to_response(dag) assert dag_id == processed_response[DAG_ID_KEY] assert fileloc == processed_response[FILE_LOCATION_KEY] assert is_paused == processed_response[IS_PAUSED_KEY]
def test_should_respond_200(self, name, request_json, session): del name dag_instance = DagModel(dag_id="TEST_DAG_ID") session.add(dag_instance) session.commit() response = self.client.post( "api/v1/dags/TEST_DAG_ID/dagRuns", json=request_json, environ_overrides={'REMOTE_USER': "******"} ) assert response.status_code == 200 assert { "conf": {}, "dag_id": "TEST_DAG_ID", "dag_run_id": response.json["dag_run_id"], "end_date": None, "execution_date": response.json["execution_date"], "external_trigger": True, "start_date": response.json["start_date"], "state": "running", } == response.json
def test_create_dag_specific_permissions(self): dag_id = 'some_dag_id' dag_permission_name = self.security_manager.prefixed_dag_id(dag_id) assert ('can_read', dag_permission_name) not in self.security_manager.get_all_permissions() dag_model = DagModel( dag_id=dag_id, fileloc='/tmp/dag_.py', schedule_interval='2 2 * * *', is_paused=True ) self.session.add(dag_model) self.session.commit() self.security_manager.create_dag_specific_permissions() self.session.commit() assert ('can_read', dag_permission_name) in self.security_manager.get_all_permissions() # Make sure we short circuit when the perms already exist with assert_queries_count(2): # One query to get DagModels, one query to get all perms self.security_manager.create_dag_specific_permissions()
def index(self): logging.info("REST_API.index() called") # get the information that we want to display on the page regarding the dags that are available dagbag = self.get_dagbag() dags = [] for dag_id in dagbag.dags: orm_dag = DagModel.get_current(dag_id) dags.append({ "dag_id": dag_id, "is_active": (not orm_dag.is_paused) if orm_dag is not None else False }) return self.render("rest_api_plugin/index.html", dags=dags, airflow_webserver_base_url=airflow_webserver_base_url, rest_api_endpoint=rest_api_endpoint, apis_metadata=apis_metadata, airflow_version=airflow_version, )
def test_response_409(self, session): dag_instance = DagModel(dag_id="TEST_DAG_ID") session.add(dag_instance) session.add_all(self._create_test_dag_run()) session.commit() response = self.client.post( "api/v1/dags/TEST_DAG_ID/dagRuns", json={"dag_run_id": "TEST_DAG_RUN_ID_1", "execution_date": self.default_time,}, environ_overrides={'REMOTE_USER': "******"}, ) self.assertEqual(response.status_code, 409, response.data) self.assertEqual( response.json, { "detail": "DAGRun with DAG ID: 'TEST_DAG_ID' and " "DAGRun ID: 'TEST_DAG_RUN_ID_1' already exists", "status": 409, "title": "Conflict", "type": EXCEPTIONS_LINK_MAP[409], }, )
def index(self): logging.info("REST_API.index() called") dagbag = self.get_dagbag() dags = [] for dag_id in dagbag.dags: orm_dag = DagModel.get_current(dag_id) dags.append({ "dag_id": dag_id, "is_active": (not orm_dag.is_paused) if orm_dag is not None else False }) return self.render( "rest_api_plugin/index.html", dags=dags, airflow_webserver_base_url=airflow_webserver_base_url, rest_api_endpoint=rest_api_endpoint, apis=apis, airflow_version=airflow_version, rest_api_plugin_version=rest_api_plugin_version)
def test_should_response_200(self, name, request_json, session): del name dag_instance = DagModel(dag_id="TEST_DAG_ID") session.add(dag_instance) session.commit() response = self.client.post("api/v1/dags/TEST_DAG_ID/dagRuns", json=request_json) self.assertEqual(response.status_code, 200) self.assertEqual( { "conf": {}, "dag_id": "TEST_DAG_ID", "dag_run_id": response.json["dag_run_id"], "end_date": None, "execution_date": response.json["execution_date"], "external_trigger": True, "start_date": response.json["start_date"], "state": "running", }, response.json, )
def test_scheduler_do_not_schedule_too_early(self): dag = DAG(dag_id='test_scheduler_do_not_schedule_too_early', start_date=datetime.datetime(2200, 1, 1)) dag_task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow') session = settings.Session() orm_dag = DagModel(dag_id=dag.dag_id) session.merge(orm_dag) session.commit() session.close() scheduler = SchedulerJob() dag.clear() dr = scheduler.create_dag_run(dag) self.assertIsNone(dr) queue = mock.Mock() scheduler._process_task_instances(dag, queue=queue) queue.put.assert_not_called()
def _create_xcom_entries(self, dag_id, dag_run_id, execution_date, task_id, session=None): for i in [1, 2]: XCom.set( key=f'test-xcom-key-{i}', value="TEST", execution_date=execution_date, task_id=task_id, dag_id=dag_id, ) dag = DagModel(dag_id=dag_id) session.add(dag) dagrun = DR( dag_id=dag_id, run_id=dag_run_id, execution_date=execution_date, start_date=execution_date, run_type=DagRunType.MANUAL, ) session.add(dagrun)
def test_deactivate_unknown_dags(self): """ Test that dag_ids not passed into deactivate_unknown_dags are deactivated when function is invoked """ dagbag = DagBag(include_examples=True) dag_id = "test_deactivate_unknown_dags" expected_active_dags = dagbag.dags.keys() model_before = DagModel(dag_id=dag_id, is_active=True) with create_session() as session: session.merge(model_before) models.DAG.deactivate_unknown_dags(expected_active_dags) after_model = DagModel.get_dagmodel(dag_id) self.assertTrue(model_before.is_active) self.assertFalse(after_model.is_active) # clean up with create_session() as session: session.query(DagModel).filter(DagModel.dag_id == 'test_deactivate_unknown_dags').delete()
def trigger_dag( dag_id: str, run_id: Optional[str] = None, conf: Optional[Union[dict, str]] = None, execution_date: Optional[datetime] = None, replace_microseconds: bool = True, ) -> Optional[DagRun]: """Triggers execution of DAG specified by dag_id :param dag_id: DAG ID :param run_id: ID of the dag_run :param conf: configuration :param execution_date: date of execution :param replace_microseconds: whether microseconds should be zeroed :return: first dag run triggered - even if more than one Dag Runs were triggered or None """ dag_model = DagModel.get_current(dag_id) if dag_model is None: raise DagNotFound("Dag id {} not found in DagModel".format(dag_id)) def read_store_serialized_dags(): from airflow.configuration import conf return conf.getboolean('core', 'store_serialized_dags') dagbag = DagBag( dag_folder=dag_model.fileloc, store_serialized_dags=read_store_serialized_dags() ) dag_run = DagRun() triggers = _trigger_dag( dag_id=dag_id, dag_run=dag_run, dag_bag=dagbag, run_id=run_id, conf=conf, execution_date=execution_date, replace_microseconds=replace_microseconds, ) return triggers[0] if triggers else None
def test_should_respond_200_with_schedule_interval_none(self, session=None): dag_model = DagModel( dag_id="TEST_DAG_1", fileloc="/tmp/dag_1.py", schedule_interval=None, ) session.add(dag_model) session.commit() response = self.client.get("/api/v1/dags/TEST_DAG_1", environ_overrides={'REMOTE_USER': "******"}) assert response.status_code == 200 assert { "dag_id": "TEST_DAG_1", "description": None, "fileloc": "/tmp/dag_1.py", "file_token": 'Ii90bXAvZGFnXzEucHki.EnmIdPaUPo26lHQClbWMbDFD1Pk', "is_paused": False, "is_subdag": False, "owners": [], "root_dag_id": None, "schedule_interval": None, "tags": [], } == response.json
def trigger_dag(self, dag_id, run_id, conf): try: dag_path = DagModel.get_current(dag_id).fileloc except Exception: dag_path = path.join(DAGS_FOLDER, dag_id + ".py") dag_bag = DagBag(dag_folder=dag_path) if not dag_bag.dags: logging.info("Failed to import dag due to the following errors") logging.info(dag_bag.import_errors) logging.info("Sleep for 3 seconds and give it a second try") sleep(3) dag_bag = DagBag(dag_folder=dag_path) triggers = trigger_dag._trigger_dag(dag_id=dag_id, dag_run=DagRun(), dag_bag=dag_bag, run_id=run_id, conf=conf, execution_date=None, replace_microseconds=False) return triggers[0] if triggers else None
def test_scheduler_max_active_runs_respected_after_clear(self): """ Test if _process_task_instances only schedules ti's up to max_active_runs (related to issue AIRFLOW-137) """ dag = DAG( dag_id='test_scheduler_max_active_runs_respected_after_clear', start_date=DEFAULT_DATE) dag.max_active_runs = 3 dag_task1 = DummyOperator( task_id='dummy', dag=dag, owner='airflow') session = settings.Session() orm_dag = DagModel(dag_id=dag.dag_id) session.merge(orm_dag) session.commit() session.close() scheduler = SchedulerJob() dag.clear() # First create up to 3 dagruns in RUNNING state. scheduler.create_dag_run(dag) # Reduce max_active_runs to 1 dag.max_active_runs = 1 queue = mock.Mock() # and schedule them in, so we can check how many # tasks are put on the queue (should be one, not 3) scheduler._process_task_instances(dag, queue=queue) queue.append.assert_called_with( (dag.dag_id, dag_task1.task_id, DEFAULT_DATE) )
def test_scheduler_verify_max_active_runs_and_dagrun_timeout(self): """ Test if a a dagrun will not be scheduled if max_dag_runs has been reached and dagrun_timeout is not reached Test if a a dagrun will be scheduled if max_dag_runs has been reached but dagrun_timeout is also reached """ dag = DAG( dag_id='test_scheduler_verify_max_active_runs_and_dagrun_timeout', start_date=DEFAULT_DATE) dag.max_active_runs = 1 dag.dagrun_timeout = datetime.timedelta(seconds=60) dag_task1 = DummyOperator( task_id='dummy', dag=dag, owner='airflow') session = settings.Session() orm_dag = DagModel(dag_id=dag.dag_id) session.merge(orm_dag) session.commit() session.close() scheduler = SchedulerJob() dag.clear() dr = scheduler.create_dag_run(dag) self.assertIsNotNone(dr) # Should not be scheduled as DagRun has not timedout and max_active_runs is reached new_dr = scheduler.create_dag_run(dag) self.assertIsNone(new_dr) # Should be scheduled as dagrun_timeout has passed dr.start_date = datetime.datetime.now() - datetime.timedelta(days=1) session.merge(dr) session.commit() new_dr = scheduler.create_dag_run(dag) self.assertIsNotNone(new_dr)
def test_scheduler_add_new_task(self): """ Test if a task instance will be added if the dag is updated """ dag = DAG( dag_id='test_scheduler_add_new_task', start_date=DEFAULT_DATE) dag_task1 = DummyOperator( task_id='dummy', dag=dag, owner='airflow') session = settings.Session() orm_dag = DagModel(dag_id=dag.dag_id) session.merge(orm_dag) session.commit() session.close() scheduler = SchedulerJob() dag.clear() dr = scheduler.create_dag_run(dag) self.assertIsNotNone(dr) tis = dr.get_task_instances() self.assertEquals(len(tis), 1) dag_task2 = DummyOperator( task_id='dummy2', dag=dag, owner='airflow') queue = mock.Mock() scheduler._process_task_instances(dag, queue=queue) tis = dr.get_task_instances() self.assertEquals(len(tis), 2)
def trigger_dag(self): """ Triggers execution of DAG interpreted from the report's dag_id _trigger_dag iterates through the class registry and looks For any model that has dag_id as an attribute and deletes all references to the specific dag_id :param dag_id: DAG ID :param dagbag: dagbag :param dagrun: empty dag run to be created """ dag_model = DagModel.get_current(self.dag_id) if dag_model is None: raise DagNotFound(f"Dag id {self.dag_id} not found in DagModel") dagbag = DagBag( dag_folder=dag_model.fileloc, store_serialized_dags=conf.getboolean("core", "store_serialized_dags"), ) dag_run = DagRun() self._trigger_dag(dag_id=self.dag_id, dag_bag=dagbag, dag_run=dag_run)
def trigger_dag( dag_id, run_id=None, conf=None, execution_date=None, replace_microseconds=True, ): dag_model = DagModel.get_current(dag_id) if dag_model is None: raise DagNotFound("Dag id {} not found in DagModel".format(dag_id)) dagbag = DagBag(dag_folder=dag_model.fileloc) dag_run = DagRun() triggers = _trigger_dag( dag_id=dag_id, dag_run=dag_run, dag_bag=dagbag, run_id=run_id, conf=conf, execution_date=execution_date, replace_microseconds=replace_microseconds, ) return triggers[0] if triggers else None
def test_response_409(self, session): dag_instance = DagModel(dag_id="TEST_DAG_ID") session.add(dag_instance) session.add_all(self._create_test_dag_run()) session.commit() response = self.client.post( "api/v1/dags/TEST_DAG_ID/dagRuns", json={ "dag_run_id": "TEST_DAG_RUN_ID_1", "execution_date": self.default_time, }, ) self.assertEqual(response.status_code, 409, response.data) self.assertEqual( response.json, { "detail": "DAGRun with DAG ID: 'TEST_DAG_ID' and " "DAGRun ID: 'TEST_DAG_RUN_ID_1' already exists", "status": 409, "title": "Object already exists", "type": "about:blank", }, )