def _get_airflow_executor(self): """Creates a new instance of the configured executor if none exists and returns it""" if self.task_executor_type == AirflowTaskExecutorType.airflow_inprocess: return InProcessExecutor() if (self.task_executor_type == AirflowTaskExecutorType.airflow_multiprocess_local): if self.run.context.settings.run.parallel: return LocalExecutor() else: return SequentialExecutor() if self.task_executor_type == AirflowTaskExecutorType.airflow_kubernetes: assert_plugin_enabled("dbnd-docker") from dbnd_airflow.executors.kubernetes_executor.kubernetes_executor import ( DbndKubernetesExecutor, ) from dbnd_docker.kubernetes.kubernetes_engine_config import ( KubernetesEngineConfig, ) if not isinstance(self.target_engine, KubernetesEngineConfig): raise friendly_error.executor_k8s.kubernetes_with_non_compatible_engine( self.target_engine) kube_dbnd = self.target_engine.build_kube_dbnd() if kube_dbnd.engine_config.debug: logging.getLogger("airflow.contrib.kubernetes").setLevel( logging.DEBUG) return DbndKubernetesExecutor(kube_dbnd=kube_dbnd)
def test_localtaskjob_essential_attr(self): """ Check whether essential attributes of LocalTaskJob can be assigned with proper values without intervention """ dag = DAG('test_localtaskjob_essential_attr', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}) with dag: op1 = DummyOperator(task_id='op1') dag.clear() dr = dag.create_dagrun(run_id="test", state=State.SUCCESS, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE) ti = dr.get_task_instance(task_id=op1.task_id) job1 = LocalTaskJob(task_instance=ti, ignore_ti_state=True, executor=SequentialExecutor()) essential_attr = ["dag_id", "job_type", "start_date", "hostname"] check_result_1 = [hasattr(job1, attr) for attr in essential_attr] self.assertTrue(all(check_result_1)) check_result_2 = [ getattr(job1, attr) is not None for attr in essential_attr ] self.assertTrue(all(check_result_2))
def test_terminate_task(self): """If a task instance's db state get deleted, it should fail""" TI = TaskInstance dag = self.dagbag.dags.get('test_utils') task = dag.task_dict.get('sleeps_forever') ti = TI(task=task, execution_date=DEFAULT_DATE) job = jobs.LocalTaskJob( task_instance=ti, ignore_ti_state=True, executor=SequentialExecutor()) # Running task instance asynchronously p = multiprocessing.Process(target=job.run) p.start() sleep(5) settings.engine.dispose() session = settings.Session() ti.refresh_from_db(session=session) # making sure it's actually running self.assertEqual(State.RUNNING, ti.state) ti = session.query(TI).filter_by( dag_id=task.dag_id, task_id=task.task_id, execution_date=DEFAULT_DATE ).one() # deleting the instance should result in a failure session.delete(ti) session.commit() # waiting for the async task to finish p.join() # making sure that the task ended up as failed ti.refresh_from_db(session=session) self.assertEqual(State.FAILED, ti.state) session.close()
def get_executor_for_test(): try: from dbnd_airflow.executors.simple_executor import InProcessExecutor return InProcessExecutor() except Exception: return SequentialExecutor()
def test_mark_failure_on_failure_callback(self): """ Test that ensures that mark_failure in the UI fails the task, and executes on_failure_callback """ data = {'called': False} def check_failure(context): self.assertEqual(context['dag_run'].dag_id, 'test_mark_failure') data['called'] = True def task_function(ti, **context): with create_session() as session: self.assertEqual(State.RUNNING, ti.state) ti.log.info("Marking TI as failed 'externally'") ti.state = State.FAILED session.merge(ti) session.commit() time.sleep(60) # This should not happen -- the state change should be noticed and the task should get killed data['reached_end_of_sleep'] = True with DAG(dag_id='test_mark_failure', start_date=DEFAULT_DATE) as dag: task = PythonOperator(task_id='test_state_succeeded1', python_callable=task_function, provide_context=True, on_failure_callback=check_failure) session = settings.Session() dag.clear() dag.create_dagrun(run_id="test", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = TI(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() job1 = LocalTaskJob(task_instance=ti, ignore_ti_state=True, executor=SequentialExecutor()) with timeout(30): # This should be _much_ shorter to run. # If you change this limit, make the timeout in the callbable above bigger job1.run() ti.refresh_from_db() self.assertEqual(ti.state, State.FAILED) self.assertTrue(data['called']) self.assertNotIn( 'reached_end_of_sleep', data, 'Task should not have been allowed to run to completion')
def test_class_with_logger_should_have_logger_with_correct_name(self): # each class should automatically receive a logger with a correct name class Blah(LoggingMixin): pass assert Blah().logger.name == "tests.core.Blah" assert SequentialExecutor( ).logger.name == "airflow.executors.sequential_executor.SequentialExecutor" assert LocalExecutor( ).logger.name == "airflow.executors.local_executor.LocalExecutor"
def test_mark_success_on_success_callback(self): """ Test that ensures that where a task is marked suceess in the UI on_success_callback gets executed """ data = {'called': False} def success_callback(context): self.assertEqual(context['dag_run'].dag_id, 'test_mark_success') data['called'] = True dag = DAG(dag_id='test_mark_success', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}) task = DummyOperator(task_id='test_state_succeeded1', dag=dag, on_success_callback=success_callback) session = settings.Session() dag.clear() dag.create_dagrun(run_id="test", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = TI(task=task, execution_date=DEFAULT_DATE) ti.refresh_from_db() job1 = LocalTaskJob(task_instance=ti, ignore_ti_state=True, executor=SequentialExecutor()) from airflow.task.task_runner.standard_task_runner import StandardTaskRunner job1.task_runner = StandardTaskRunner(job1) process = multiprocessing.Process(target=job1.run) process.start() ti.refresh_from_db() for _ in range(0, 50): if ti.state == State.RUNNING: break time.sleep(0.1) ti.refresh_from_db() self.assertEqual(State.RUNNING, ti.state) ti.state = State.SUCCESS session.merge(ti) session.commit() job1.heartbeat_callback(session=None) self.assertTrue(data['called']) process.join(timeout=10) self.assertFalse(process.is_alive())
def test_localtaskjob_maintain_heart_rate(self): dagbag = models.DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag = dagbag.dags.get('test_localtaskjob_double_trigger') task = dag.get_task('test_localtaskjob_double_trigger_task') session = settings.Session() dag.clear() dag.create_dagrun(run_id="test", state=State.SUCCESS, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti_run = TI(task=task, execution_date=DEFAULT_DATE) ti_run.refresh_from_db() job1 = LocalTaskJob(task_instance=ti_run, executor=SequentialExecutor()) # this should make sure we only heartbeat once and exit at the second # loop in _execute() return_codes = [None, 0] def multi_return_code(): return return_codes.pop(0) time_start = time.time() from airflow.task.task_runner.standard_task_runner import StandardTaskRunner with patch.object(StandardTaskRunner, 'start', return_value=None) as mock_start: with patch.object(StandardTaskRunner, 'return_code') as mock_ret_code: mock_ret_code.side_effect = multi_return_code job1.run() self.assertEqual(mock_start.call_count, 1) self.assertEqual(mock_ret_code.call_count, 2) time_end = time.time() self.assertEqual(self.mock_base_job_sleep.call_count, 1) self.assertEqual(job1.state, State.SUCCESS) # Consider we have patched sleep call, it should not be sleeping to # keep up with the heart rate in other unpatched places # # We already make sure patched sleep call is only called once self.assertLess(time_end - time_start, job1.heartrate) session.close()
def test_localtaskjob_double_trigger(self): dagbag = models.DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag = dagbag.dags.get('test_localtaskjob_double_trigger') task = dag.get_task('test_localtaskjob_double_trigger_task') session = settings.Session() dag.clear() dr = dag.create_dagrun(run_id="test", state=State.SUCCESS, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = dr.get_task_instance(task_id=task.task_id, session=session) ti.state = State.RUNNING ti.hostname = get_hostname() ti.pid = 1 session.merge(ti) session.commit() ti_run = TI(task=task, execution_date=DEFAULT_DATE) ti_run.refresh_from_db() job1 = LocalTaskJob(task_instance=ti_run, executor=SequentialExecutor()) from airflow.task.task_runner.standard_task_runner import StandardTaskRunner with patch.object(StandardTaskRunner, 'start', return_value=None) as mock_method: job1.run() mock_method.assert_not_called() ti = dr.get_task_instance(task_id=task.task_id, session=session) self.assertEqual(ti.pid, 1) self.assertEqual(ti.state, State.RUNNING) session.close()
def test_localtaskjob_heartbeat(self, mock_pid): session = settings.Session() dag = DAG( 'test_localtaskjob_heartbeat', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}) with dag: op1 = DummyOperator(task_id='op1') dag.clear() dr = dag.create_dagrun(run_id="test", state=State.SUCCESS, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session) ti = dr.get_task_instance(task_id=op1.task_id, session=session) ti.state = State.RUNNING ti.hostname = "blablabla" session.commit() job1 = LocalTaskJob(task_instance=ti, ignore_ti_state=True, executor=SequentialExecutor()) self.assertRaises(AirflowException, job1.heartbeat_callback) mock_pid.return_value = 1 ti.state = State.RUNNING ti.hostname = get_hostname() ti.pid = 1 session.merge(ti) session.commit() ret = job1.heartbeat_callback() self.assertEqual(ret, None) mock_pid.return_value = 2 self.assertRaises(AirflowException, job1.heartbeat_callback)
DEFAULT_DATE = datetime(2016, 1, 1) default_args = { 'owner': 'airflow', 'start_date': DEFAULT_DATE, 'run_as_user': '******' } dag = DAG(dag_id='impersonation_subdag', default_args=default_args) def print_today(): print('Today is {}'.format(datetime.utcnow())) subdag = DAG('impersonation_subdag.test_subdag_operation', default_args=default_args) PythonOperator(python_callable=print_today, task_id='exec_python_fn', dag=subdag) BashOperator(task_id='exec_bash_operator', bash_command='echo "Running within SubDag"', dag=subdag) subdag_operator = SubDagOperator(task_id='test_subdag_operation', subdag=subdag, executor=SequentialExecutor(), dag=dag)