def execution_parallelism(self, parallelism=0): executor = LocalExecutor(parallelism=parallelism) executor.start() success_key = 'success {}' success_command = ['true', 'some_parameter'] fail_command = ['false', 'some_parameter'] self.assertTrue(executor.result_queue.empty()) for i in range(self.TEST_SUCCESS_COMMANDS): key, command = success_key.format(i), success_command executor.running[key] = True executor.execute_async(key=key, command=command) executor.running['fail'] = True executor.execute_async(key='fail', command=fail_command) executor.end() # By that time Queues are already shutdown so we cannot check if they are empty self.assertEqual(len(executor.running), 0) for i in range(self.TEST_SUCCESS_COMMANDS): key = success_key.format(i) self.assertEqual(executor.event_buffer[key], State.SUCCESS) self.assertEqual(executor.event_buffer['fail'], State.FAILED) expected = self.TEST_SUCCESS_COMMANDS + 1 if parallelism == 0 else parallelism self.assertEqual(executor.workers_used, expected)
def execution_parallelism(self, parallelism=0): executor = LocalExecutor(parallelism=parallelism) executor.start() success_key = 'success {}' success_command = ['true', 'some_parameter'] fail_command = ['false', 'some_parameter'] self.assertTrue(executor.result_queue.empty()) for i in range(self.TEST_SUCCESS_COMMANDS): key, command = success_key.format(i), success_command executor.running[key] = True executor.execute_async(key=key, command=command) executor.running['fail'] = True executor.execute_async(key='fail', command=fail_command) executor.end() if isinstance(executor.impl, LocalExecutor._LimitedParallelism): self.assertTrue(executor.queue.empty()) self.assertEqual(len(executor.running), 0) self.assertTrue(executor.result_queue.empty()) for i in range(self.TEST_SUCCESS_COMMANDS): key = success_key.format(i) self.assertEqual(executor.event_buffer[key], State.SUCCESS) self.assertEqual(executor.event_buffer['fail'], State.FAILED) expected = self.TEST_SUCCESS_COMMANDS + 1 if parallelism == 0 else parallelism self.assertEqual(executor.workers_used, expected)
def _test_execute(self, parallelism, success_command, fail_command): executor = LocalExecutor(parallelism=parallelism) executor.start() success_key = 'success {}' self.assertTrue(executor.result_queue.empty()) execution_date = datetime.datetime.now() for i in range(self.TEST_SUCCESS_COMMANDS): key_id, command = success_key.format(i), success_command key = key_id, 'fake_ti', execution_date, 0 executor.running.add(key) executor.execute_async(key=key, command=command) fail_key = 'fail', 'fake_ti', execution_date, 0 executor.running.add(fail_key) executor.execute_async(key=fail_key, command=fail_command) executor.end() # By that time Queues are already shutdown so we cannot check if they are empty self.assertEqual(len(executor.running), 0) for i in range(self.TEST_SUCCESS_COMMANDS): key_id = success_key.format(i) key = key_id, 'fake_ti', execution_date, 0 self.assertEqual(executor.event_buffer[key][0], State.SUCCESS) self.assertEqual(executor.event_buffer[fail_key][0], State.FAILED) expected = self.TEST_SUCCESS_COMMANDS + 1 if parallelism == 0 else parallelism self.assertEqual(executor.workers_used, expected)
def execution_parallelism(self, mock_check_call, parallelism=0): success_command = ['airflow', 'tasks', 'run', 'true', 'some_parameter'] fail_command = ['airflow', 'tasks', 'run', 'false'] def fake_execute_command(command, close_fds=True): # pylint: disable=unused-argument if command != success_command: raise subprocess.CalledProcessError(returncode=1, cmd=command) else: return 0 mock_check_call.side_effect = fake_execute_command executor = LocalExecutor(parallelism=parallelism) executor.start() success_key = 'success {}' self.assertTrue(executor.result_queue.empty()) execution_date = datetime.datetime.now() for i in range(self.TEST_SUCCESS_COMMANDS): key_id, command = success_key.format(i), success_command key = key_id, 'fake_ti', execution_date, 0 executor.running.add(key) executor.execute_async(key=key, command=command) fail_key = 'fail', 'fake_ti', execution_date, 0 executor.running.add(fail_key) executor.execute_async(key=fail_key, command=fail_command) executor.end() # By that time Queues are already shutdown so we cannot check if they are empty self.assertEqual(len(executor.running), 0) for i in range(self.TEST_SUCCESS_COMMANDS): key_id = success_key.format(i) key = key_id, 'fake_ti', execution_date, 0 self.assertEqual(executor.event_buffer[key][0], State.SUCCESS) self.assertEqual(executor.event_buffer[fail_key][0], State.FAILED) expected = self.TEST_SUCCESS_COMMANDS + 1 if parallelism == 0 else parallelism self.assertEqual(executor.workers_used, expected)
def execution_parallelism(self, parallelism=0): executor = LocalExecutor(parallelism=parallelism) executor.start() success_key = 'success {}' success_command = 'echo {}' fail_command = 'exit 1' for i in range(self.TEST_SUCCESS_COMMANDS): key, command = success_key.format(i), success_command.format(i) executor.execute_async(key=key, command=command) executor.running[key] = True # errors are propagated for some reason try: executor.execute_async(key='fail', command=fail_command) except: pass executor.running['fail'] = True if parallelism == 0: with timeout(seconds=5): executor.end() else: executor.end() for i in range(self.TEST_SUCCESS_COMMANDS): key = success_key.format(i) self.assertTrue(executor.event_buffer[key], State.SUCCESS) self.assertTrue(executor.event_buffer['fail'], State.FAILED) for i in range(self.TEST_SUCCESS_COMMANDS): self.assertNotIn(success_key.format(i), executor.running) self.assertNotIn('fail', executor.running) expected = self.TEST_SUCCESS_COMMANDS + 1 if parallelism == 0 else parallelism self.assertEqual(executor.workers_used, expected)
def test_scheduler_task(self): TEST_DAG_FOLDER = os.environ['AIRFLOW__CORE__DAGS_FOLDER'] DEFAULT_DATE = timezone.datetime(2020, 1, 1) dag_id = 'test_event_based_dag' task_id = 'sleep_1000_secs' with create_session() as session: dag_bag = DagBag( dag_folder=TEST_DAG_FOLDER, include_examples=False, ) dag = dag_bag.get_dag(dag_id) task = dag.get_task(task_id) dag.create_dagrun( run_id="sleep_1000_secs_run", state=State.RUNNING, execution_date=DEFAULT_DATE, start_date=DEFAULT_DATE, session=session, ) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) ti.state = State.SCHEDULED dag_model = DagModel( dag_id=dag_id, is_paused=False, concurrency=5, has_task_concurrency_limits=False, ) session.merge(dag_model) session.merge(ti) session.commit() executor = LocalExecutor(2) executor.start() executor.heartbeat() executor.schedule_task(ti.key, SchedulingAction.START) executor.heartbeat() time.sleep(30) # wait for task instance started ti.refresh_from_db() self.assertEqual(ti.state, State.RUNNING) process = psutil.Process(ti.pid) self.assertIsNotNone(process) child = process.children(recursive=False) self.assertEqual(1, len(child)) grandchild = child[0].children(recursive=False) self.assertEqual(1, len(grandchild)) tes = self._check_task_execution(ti) self.assertEqual(1, len(tes)) # restart the task instance executor.schedule_task(ti.key, SchedulingAction.RESTART) executor.heartbeat() time.sleep(30) self.assertFalse(self._check_process_exist(process.pid)) self.assertFalse(self._check_process_exist(child[0].pid)) self.assertFalse(self._check_process_exist(grandchild[0].pid)) ti.refresh_from_db() self.assertEqual(ti.state, State.RUNNING) process = psutil.Process(ti.pid) self.assertIsNotNone(process) child = process.children(recursive=False) self.assertEqual(1, len(child)) grandchild = child[0].children(recursive=False) self.assertEqual(1, len(grandchild)) tes = self._check_task_execution(ti) self.assertEqual(2, len(tes)) self.assertEqual(2, tes[0].seq_num) executor.schedule_task(ti.key, SchedulingAction.STOP) ti.refresh_from_db() time.sleep(10) self.assertEqual(State.KILLED, ti.state) self.assertFalse(self._check_process_exist(process.pid)) self.assertFalse(self._check_process_exist(child[0].pid)) self.assertFalse(self._check_process_exist(grandchild[0].pid)) self._check_task_execution(ti) executor.end()