def test_parse_once(self): def processor_factory(file_path, zombies): return DagFileProcessorProcess(file_path, False, [], zombies) test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_scheduler_dags.py') async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') processor_agent = DagFileProcessorAgent(test_dag_path, [test_dag_path], 1, processor_factory, timedelta.max, async_mode) processor_agent.start() parsing_result = [] if not async_mode: processor_agent.heartbeat() while not processor_agent.done: if not async_mode: processor_agent.wait_until_finished() parsing_result.extend(processor_agent.harvest_simple_dags()) dag_ids = [result.dag_id for result in parsing_result] self.assertEqual(dag_ids.count('test_start_date_scheduling'), 1)
def test_handle_failure_callback_with_zobmies_are_correctly_passed_to_dag_file_processor( self): """ Check that the same set of failure callback with zombies are passed to the dag file processors until the next zombie detection logic is invoked. """ test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_example_bash_operator.py') with conf_vars({ ('scheduler', 'max_threads'): '1', ('core', 'load_examples'): 'False' }): dagbag = DagBag(test_dag_path) with create_session() as session: session.query(LJ).delete() dag = dagbag.get_dag('test_example_bash_operator') dag.sync_to_db() task = dag.get_task(task_id='run_this_last') ti = TI(task, DEFAULT_DATE, State.RUNNING) local_job = LJ(ti) local_job.state = State.SHUTDOWN local_job.id = 1 ti.job_id = local_job.id session.add(local_job) session.add(ti) session.commit() fake_failure_callback_requests = [ FailureCallbackRequest( full_filepath=dag.full_filepath, simple_task_instance=SimpleTaskInstance(ti), msg="Message") ] test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_example_bash_operator.py') async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') processor_agent = DagFileProcessorAgent( test_dag_path, 1, FakeDagFileProcessorRunner._fake_dag_processor_factory, timedelta.max, [], False, async_mode) processor_agent.start() parsing_result = [] if not async_mode: processor_agent.run_single_parsing_loop() while not processor_agent.done: if not async_mode: processor_agent.wait_until_finished() parsing_result.extend(processor_agent.harvest_simple_dags()) self.assertEqual(len(fake_failure_callback_requests), len(parsing_result)) self.assertEqual( set(zombie.simple_task_instance.key for zombie in fake_failure_callback_requests), set(result.simple_task_instance.key for result in parsing_result))
def test_reload_module(self): """ Configure the context to have core.logging_config_class set to a fake logging class path, thus when reloading logging module the airflow.processor_manager logger should not be configured. """ with settings_context(SETTINGS_FILE_VALID): # Launch a process through DagFileProcessorAgent, which will try # reload the logging module. def processor_factory(file_path, zombies): return DagFileProcessor(file_path, False, [], zombies) test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_scheduler_dags.py') async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') log_file_loc = conf.get('core', 'DAG_PROCESSOR_MANAGER_LOG_LOCATION') try: os.remove(log_file_loc) except OSError: pass # Starting dag processing with 0 max_runs to avoid redundant operations. processor_agent = DagFileProcessorAgent(test_dag_path, [], 0, processor_factory, async_mode) manager_process = \ processor_agent._launch_process(processor_agent._dag_directory, processor_agent._file_paths, processor_agent._max_runs, processor_agent._processor_factory, processor_agent._child_signal_conn, processor_agent._stat_queue, processor_agent._result_queue, processor_agent._async_mode) if not async_mode: processor_agent.heartbeat() manager_process.join() # Since we are reloading logging config not creating this file, # we should expect it to be nonexistent. self.assertFalse(os.path.isfile(log_file_loc))
def test_reload_module(self): """ Configure the context to have logging.logging_config_class set to a fake logging class path, thus when reloading logging module the airflow.processor_manager logger should not be configured. """ with settings_context(SETTINGS_FILE_VALID): # Launch a process through DagFileProcessorAgent, which will try # reload the logging module. test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_scheduler_dags.py') async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') log_file_loc = conf.get('logging', 'DAG_PROCESSOR_MANAGER_LOG_LOCATION') try: os.remove(log_file_loc) except OSError: pass # Starting dag processing with 0 max_runs to avoid redundant operations. processor_agent = DagFileProcessorAgent( test_dag_path, 0, type(self)._processor_factory, timedelta.max, [], False, async_mode ) processor_agent.start() if not async_mode: processor_agent.run_single_parsing_loop() processor_agent._process.join() # Since we are reloading logging config not creating this file, # we should expect it to be nonexistent. assert not os.path.isfile(log_file_loc)
def test_launch_process(self): def processor_factory(file_path, zombies): return DagFileProcessorProcess(file_path, False, [], zombies) test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_scheduler_dags.py') async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') log_file_loc = conf.get('logging', 'DAG_PROCESSOR_MANAGER_LOG_LOCATION') try: os.remove(log_file_loc) except OSError: pass # Starting dag processing with 0 max_runs to avoid redundant operations. processor_agent = DagFileProcessorAgent(test_dag_path, 0, processor_factory, timedelta.max, async_mode) processor_agent.start() if not async_mode: processor_agent.run_single_parsing_loop() processor_agent._process.join() self.assertTrue(os.path.isfile(log_file_loc))
def test_parse_once(self): test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_scheduler_dags.py') async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') processor_agent = DagFileProcessorAgent(test_dag_path, 1, type(self)._processor_factory, timedelta.max, [], False, async_mode) processor_agent.start() parsing_result = [] if not async_mode: processor_agent.run_single_parsing_loop() while not processor_agent.done: if not async_mode: processor_agent.wait_until_finished() parsing_result.extend(processor_agent.harvest_serialized_dags()) dag_ids = [result.dag_id for result in parsing_result] self.assertEqual(dag_ids.count('test_start_date_scheduling'), 1)
def test_launch_process(self): def processor_factory(file_path, zombies): return DagFileProcessor(file_path, False, [], zombies) test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_scheduler_dags.py') async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') log_file_loc = conf.get('core', 'DAG_PROCESSOR_MANAGER_LOG_LOCATION') try: os.remove(log_file_loc) except OSError: pass # Starting dag processing with 0 max_runs to avoid redundant operations. processor_agent = DagFileProcessorAgent(test_dag_path, [], 0, processor_factory, async_mode) manager_process = \ processor_agent._launch_process(processor_agent._dag_directory, processor_agent._file_paths, processor_agent._max_runs, processor_agent._processor_factory, processor_agent._child_signal_conn, processor_agent._stat_queue, processor_agent._result_queue, processor_agent._async_mode) if not async_mode: processor_agent.heartbeat() manager_process.join() self.assertTrue(os.path.isfile(log_file_loc))
def start(self): self.log.info("Running dag trigger loop for %s seconds", self.run_duration) self.log.info("Processing each file at most %s times", self.num_runs) # Build up a list of Python files that could contain DAGs self.log.info("Searching for files in %s", self.subdir) known_file_paths = list_py_file_paths(self.subdir) self.log.info("There are %s files in %s", len(known_file_paths), self.subdir) self.log.info("known files are %s.", str(known_file_paths)) def processor_factory(file_path, zombies): return EventDagFileProcessor(file_path, self.pickle_dags, [], zombies) # When using sqlite, we do not use async_mode # so the scheduler job and DAG parser don't access the DB at the same time. async_mode = not self.using_sqlite processor_timeout_seconds = conf.getint('core', 'dag_file_processor_timeout') processor_timeout = timedelta(seconds=processor_timeout_seconds) self.processor_agent = DagFileProcessorAgent(self.subdir, known_file_paths, self.num_runs, processor_factory, processor_timeout, async_mode) self.processor_agent.start() self.execute_start_time = timezone.utcnow() self.dag_process_thread = threading.Thread(target=self.run_parse_dags, args=()) self.dag_process_thread.setDaemon(True) self.dag_process_thread.start()
def test_parse_once(self): def processor_factory(file_path, zombies): return DagFileProcessor(file_path, False, [], zombies) test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_scheduler_dags.py') async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') processor_agent = DagFileProcessorAgent(test_dag_path, [test_dag_path], 1, processor_factory, async_mode) processor_agent.start() parsing_result = [] while not processor_agent.done: if not async_mode: processor_agent.heartbeat() processor_agent.wait_until_finished() parsing_result.extend(processor_agent.harvest_simple_dags()) dag_ids = [result.dag_id for result in parsing_result] self.assertEqual(dag_ids.count('test_start_date_scheduling'), 1)
def test_handle_failure_callback_with_zobmies_are_correctly_passed_to_dag_file_processor(self): """ Check that the same set of failure callback with zombies are passed to the dag file processors until the next zombie detection logic is invoked. """ test_dag_path = os.path.join(TEST_DAG_FOLDER, 'test_example_bash_operator.py') with conf_vars({('scheduler', 'max_threads'): '1', ('core', 'load_examples'): 'False'}): dagbag = DagBag(test_dag_path) with create_session() as session: session.query(LJ).delete() dag = dagbag.get_dag('test_example_bash_operator') dag.sync_to_db() task = dag.get_task(task_id='run_this_last') ti = TI(task, DEFAULT_DATE, State.RUNNING) local_job = LJ(ti) local_job.state = State.SHUTDOWN local_job.id = 1 ti.job_id = local_job.id session.add(local_job) session.add(ti) session.commit() fake_failure_callback_requests = [ FailureCallbackRequest( full_filepath=dag.full_filepath, simple_task_instance=SimpleTaskInstance(ti), msg="Message" ) ] class FakeDagFileProcessorRunner(DagFileProcessorProcess): # This fake processor will return the zombies it received in constructor # as its processing result w/o actually parsing anything. def __init__(self, file_path, pickle_dags, dag_id_white_list, failure_callback_requests): super().__init__(file_path, pickle_dags, dag_id_white_list, failure_callback_requests) self._result = failure_callback_requests, 0 def start(self): pass @property def start_time(self): return DEFAULT_DATE @property def pid(self): return 1234 @property def done(self): return True @property def result(self): return self._result def processor_factory(file_path, failure_callback_requests): return FakeDagFileProcessorRunner( file_path, False, [], failure_callback_requests ) async_mode = 'sqlite' not in conf.get('core', 'sql_alchemy_conn') processor_agent = DagFileProcessorAgent(test_dag_path, 1, processor_factory, timedelta.max, async_mode) processor_agent.start() parsing_result = [] if not async_mode: processor_agent.heartbeat() while not processor_agent.done: if not async_mode: processor_agent.wait_until_finished() parsing_result.extend(processor_agent.harvest_simple_dags()) self.assertEqual(len(fake_failure_callback_requests), len(parsing_result)) self.assertEqual( set(zombie.simple_task_instance.key for zombie in fake_failure_callback_requests), set(result.simple_task_instance.key for result in parsing_result) )
class DagTrigger(Trigger): """ process dag files and send DAG_RUN event to notification service. """ def __init__( self, subdir, mailbox, run_duration=None, using_sqlite=True, num_runs=conf.getint('scheduler', 'num_runs', fallback=-1), processor_poll_interval=conf.getfloat('scheduler', 'processor_poll_interval', fallback=1) ) -> None: self.mailbox = mailbox self.pickle_dags = True self.using_sqlite = using_sqlite self.subdir = subdir self.num_runs = num_runs self.processor_agent = None self.execute_start_time = None if run_duration is None: self.run_duration = conf.getint('scheduler', 'run_duration') else: self.run_duration = run_duration self._processor_poll_interval = processor_poll_interval self.running = True self.dag_process_thread = None signal.signal(signal.SIGINT, self._exit_gracefully) signal.signal(signal.SIGTERM, self._exit_gracefully) def _exit_gracefully(self, signum, frame): """ Helper method to clean up processor_agent to avoid leaving orphan processes. """ self.log.info("Exiting gracefully upon receiving signal %s", signum) if self.processor_agent: self.processor_agent.end() sys.exit(os.EX_OK) def start(self): self.log.info("Running dag trigger loop for %s seconds", self.run_duration) self.log.info("Processing each file at most %s times", self.num_runs) # Build up a list of Python files that could contain DAGs self.log.info("Searching for files in %s", self.subdir) known_file_paths = list_py_file_paths(self.subdir) self.log.info("There are %s files in %s", len(known_file_paths), self.subdir) self.log.info("known files are %s.", str(known_file_paths)) def processor_factory(file_path, zombies): return EventDagFileProcessor(file_path, self.pickle_dags, [], zombies) # When using sqlite, we do not use async_mode # so the scheduler job and DAG parser don't access the DB at the same time. async_mode = not self.using_sqlite processor_timeout_seconds = conf.getint('core', 'dag_file_processor_timeout') processor_timeout = timedelta(seconds=processor_timeout_seconds) self.processor_agent = DagFileProcessorAgent( self.subdir, known_file_paths, self.num_runs, processor_factory, processor_timeout, async_mode) self.processor_agent.start() self.execute_start_time = timezone.utcnow() self.dag_process_thread = threading.Thread(target=self.run_parse_dags, args=()) self.dag_process_thread.setDaemon(True) self.dag_process_thread.start() def _get_simple_dags(self): return self.processor_agent.harvest_simple_dags() def run_parse_dags(self): # For the execute duration, parse and schedule DAGs num_runs = 0 if self.num_runs < 0: self.num_runs = sys.maxsize while self.running and num_runs < self.num_runs: self.log.debug( "Starting Loop... num_runs: {0} max_runs {1}".format( num_runs, self.num_runs)) loop_start_time = time.time() if self.using_sqlite: self.processor_agent.heartbeat() # For the sqlite case w/ 1 thread, wait until the processor # is finished to avoid concurrent access to the DB. self.log.debug( "Waiting for processors to finish since we're using sqlite" ) self.processor_agent.wait_until_finished() self.log.debug("Harvesting DAG parsing results") simple_dag_runs = self._get_simple_dags() self.log.debug("Harvested {} SimpleDAGRuns".format( len(simple_dag_runs))) if 0 == len(simple_dag_runs): sleep(self._processor_poll_interval) continue else: for simple_dag_run in simple_dag_runs: event = DagRunEvent(dag_run_id=simple_dag_run.dag_run_id, simple_dag=simple_dag_run.simple_dag) self.mailbox.send_message(event) num_runs += 1 is_unit_test = conf.getboolean('core', 'unit_test_mode') loop_end_time = time.time() loop_duration = loop_end_time - loop_start_time self.log.debug("Ran scheduling loop in %.2f seconds", loop_duration) if not is_unit_test: self.log.debug("Sleeping for %.2f seconds", self._processor_poll_interval) time.sleep(self._processor_poll_interval) if self.processor_agent.done: self.log.info("Exiting scheduler loop as all files" " have been processed {} times".format( self.num_runs)) break if loop_duration < 1 and not is_unit_test: sleep_length = 1 - loop_duration self.log.debug( "Sleeping for {0:.2f} seconds to prevent excessive logging" .format(sleep_length)) sleep(sleep_length) if is_unit_test: sleep(1) while self.running: time.sleep(1) def stop(self): self.running = False self.dag_process_thread.join() # Stop any processors self.processor_agent.terminate() # Verify that all files were processed, and if so, deactivate DAGs that # haven't been touched by the scheduler as they likely have been # deleted. if self.processor_agent.all_files_processed: self.log.info( "Deactivating DAGs that haven't been touched since %s", self.execute_start_time.isoformat()) models.DAG.deactivate_stale_dags(self.execute_start_time) self.processor_agent.end()