def log(self): """Return logger for a sensor instance object.""" # The created log_id is used inside of smart sensor as the key to fetch # the corresponding in memory log handler. si = self.si si.raw = False # Otherwise set_context will fail log_id = "-".join([ si.dag_id, si.task_id, si.execution_date.strftime("%Y_%m_%dT%H_%M_%S_%f"), str(si.try_number) ]) logger = logging.getLogger(f'airflow.task.{log_id}') if len(logger.handlers) == 0: handler = self.create_new_task_handler() logger.addHandler(handler) set_context(logger, si) line_break = "-" * 120 logger.info(line_break) logger.info( "Processing sensor task %s in smart sensor service on host: %s", self.ti_key, get_hostname()) logger.info(line_break) return logger
def _run_task_instance(self, ti, mark_success, pool, session=None): # set proper state and try number to keep logger in sync if isinstance(ti, SimpleTaskInstance): from airflow.models import TaskInstance dag = self.dag_bag.get_dag(ti.dag_id) task = dag.get_task(ti.task_id) ti = TaskInstance(task, ti.execution_date) ti.state = State.RUNNING ti._try_number += 1 # let save state session.merge(ti) session.commit() # backward compatible with airflow loggers from airflow.utils.log import logging_mixin logging_mixin.set_context(logging.root, ti) try: ti._run_raw_task(mark_success=mark_success, job_id=ti.job_id, pool=pool) finally: for handler in logging.root.handlers: if handler.name == "task": handler.close()
def test_file_task_handler(self): def task_callable(ti, **kwargs): ti.log.info("test") dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE) task = PythonOperator( task_id='task_for_testing_file_log_handler', dag=dag, python_callable=task_callable, provide_context=True ) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) logger = ti.log ti.log.disabled = False file_handler = next((handler for handler in logger.handlers if handler.name == FILE_TASK_HANDLER), None) self.assertIsNotNone(file_handler) set_context(logger, ti) self.assertIsNotNone(file_handler.handler) # We expect set_context generates a file locally. log_filename = file_handler.handler.baseFilename self.assertTrue(os.path.isfile(log_filename)) self.assertTrue(log_filename.endswith("1.log"), log_filename) ti.run(ignore_ti_state=True) file_handler.flush() file_handler.close() self.assertTrue(hasattr(file_handler, 'read')) # Return value of read must be a tuple of list and list. logs, metadatas = file_handler.read(ti) self.assertTrue(isinstance(logs, list)) self.assertTrue(isinstance(metadatas, list)) self.assertEqual(len(logs), 1) self.assertEqual(len(logs), len(metadatas)) self.assertTrue(isinstance(metadatas[0], dict)) target_re = r'\n\[[^\]]+\] {test_log_handlers.py:\d+} INFO - test\n' # We should expect our log line from the callable above to appear in # the logs we read back six.assertRegex( self, logs[0], target_re, "Logs were " + str(logs) ) # Remove the generated tmp log file. os.remove(log_filename)
def test_formatter(self): dag = DAG('test_dag', start_date=DEFAULT_DATE) task = DummyOperator(task_id='test_task', dag=dag) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) logger = ti.log ti.log.disabled = False handler = next((handler for handler in logger.handlers if handler.name == TASK_HANDLER), None) self.assertIsNotNone(handler) # setting the expected value of the formatter expected_formatter_value = "test_dag-test_task:" + handler.formatter._fmt set_context(logger, ti) self.assertEqual(expected_formatter_value, handler.formatter._fmt)
def test_file_task_handler(self): def task_callable(ti, **kwargs): ti.log.info("test") dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE) dag.create_dagrun(run_type=DagRunType.MANUAL, state=State.RUNNING, execution_date=DEFAULT_DATE) task = PythonOperator( task_id='task_for_testing_file_log_handler', dag=dag, python_callable=task_callable, ) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) logger = ti.log ti.log.disabled = False file_handler = next( (handler for handler in logger.handlers if handler.name == FILE_TASK_HANDLER), None ) assert file_handler is not None set_context(logger, ti) assert file_handler.handler is not None # We expect set_context generates a file locally. log_filename = file_handler.handler.baseFilename assert os.path.isfile(log_filename) assert log_filename.endswith("1.log"), log_filename ti.run(ignore_ti_state=True) file_handler.flush() file_handler.close() assert hasattr(file_handler, 'read') # Return value of read must be a tuple of list and list. logs, metadatas = file_handler.read(ti) assert isinstance(logs, list) assert isinstance(metadatas, list) assert len(logs) == 1 assert len(logs) == len(metadatas) assert isinstance(metadatas[0], dict) target_re = r'\n\[[^\]]+\] {test_log_handlers.py:\d+} INFO - test\n' # We should expect our log line from the callable above to appear in # the logs we read back assert re.search(target_re, logs[0][0][-1]), "Logs were " + str(logs) # Remove the generated tmp log file. os.remove(log_filename)
def test_set_context(self): handler1 = mock.MagicMock() handler2 = mock.MagicMock() parent = mock.MagicMock() parent.propagate = False parent.handlers = [handler1, ] log = mock.MagicMock() log.handlers = [handler2, ] log.parent = parent log.propagate = True value = "test" set_context(log, value) handler1.set_context.assert_called_with(value) handler2.set_context.assert_called_with(value)
def test_file_task_handler_when_ti_value_is_invalid(self): def task_callable(ti, **kwargs): ti.log.info("test") dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE) dag.create_dagrun(run_type=DagRunType.MANUAL, state=State.RUNNING, execution_date=DEFAULT_DATE) task = PythonOperator( task_id='task_for_testing_file_log_handler', dag=dag, python_callable=task_callable, ) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) logger = ti.log ti.log.disabled = False file_handler = next( (handler for handler in logger.handlers if handler.name == FILE_TASK_HANDLER), None ) assert file_handler is not None set_context(logger, ti) assert file_handler.handler is not None # We expect set_context generates a file locally. log_filename = file_handler.handler.baseFilename assert os.path.isfile(log_filename) assert log_filename.endswith("1.log"), log_filename ti.run(ignore_ti_state=True) file_handler.flush() file_handler.close() assert hasattr(file_handler, 'read') # Return value of read must be a tuple of list and list. # passing invalid `try_number` to read function logs, metadatas = file_handler.read(ti, 0) assert isinstance(logs, list) assert isinstance(metadatas, list) assert len(logs) == 1 assert len(logs) == len(metadatas) assert isinstance(metadatas[0], dict) assert logs[0][0][0] == "default_host" assert logs[0][0][1] == "Error fetching the logs. Try number 0 is invalid." # Remove the generated tmp log file. os.remove(log_filename)
def test_file_task_handler_running(self): def task_callable(ti, **kwargs): ti.log.info("test") dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE) task = PythonOperator( task_id='task_for_testing_file_log_handler', dag=dag, python_callable=task_callable, ) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) ti.try_number = 2 ti.state = State.RUNNING logger = ti.log ti.log.disabled = False file_handler = next( (handler for handler in logger.handlers if handler.name == FILE_TASK_HANDLER), None ) assert file_handler is not None set_context(logger, ti) assert file_handler.handler is not None # We expect set_context generates a file locally. log_filename = file_handler.handler.baseFilename assert os.path.isfile(log_filename) assert log_filename.endswith("2.log"), log_filename logger.info("Test") # Return value of read must be a tuple of list and list. logs, metadatas = file_handler.read(ti) assert isinstance(logs, list) # Logs for running tasks should show up too. assert isinstance(logs, list) assert isinstance(metadatas, list) assert len(logs) == 2 assert len(logs) == len(metadatas) assert isinstance(metadatas[0], dict) # Remove the generated tmp log file. os.remove(log_filename)
def test_file_task_handler_running(self): def task_callable(ti, **kwargs): ti.log.info("test") dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE) task = PythonOperator( task_id='task_for_testing_file_log_handler', dag=dag, python_callable=task_callable, provide_context=True ) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) ti.try_number = 2 ti.state = State.RUNNING logger = ti.log ti.log.disabled = False file_handler = next((handler for handler in logger.handlers if handler.name == FILE_TASK_HANDLER), None) self.assertIsNotNone(file_handler) set_context(logger, ti) self.assertIsNotNone(file_handler.handler) # We expect set_context generates a file locally. log_filename = file_handler.handler.baseFilename self.assertTrue(os.path.isfile(log_filename)) self.assertTrue(log_filename.endswith("2.log"), log_filename) logger.info("Test") # Return value of read must be a tuple of list and list. logs, metadatas = file_handler.read(ti) self.assertTrue(isinstance(logs, list)) # Logs for running tasks should show up too. self.assertTrue(isinstance(logs, list)) self.assertTrue(isinstance(metadatas, list)) self.assertEqual(len(logs), 2) self.assertEqual(len(logs), len(metadatas)) self.assertTrue(isinstance(metadatas[0], dict)) # Remove the generated tmp log file. os.remove(log_filename)
def test_file_task_handler_running(self): def task_callable(ti, **kwargs): ti.log.info("test") dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE) task = PythonOperator(task_id='task_for_testing_file_log_handler', dag=dag, python_callable=task_callable, provide_context=True) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) ti.try_number = 2 ti.state = State.RUNNING logger = ti.log ti.log.disabled = False file_handler = next((handler for handler in logger.handlers if handler.name == FILE_TASK_HANDLER), None) self.assertIsNotNone(file_handler) set_context(logger, ti) self.assertIsNotNone(file_handler.handler) # We expect set_context generates a file locally. log_filename = file_handler.handler.baseFilename self.assertTrue(os.path.isfile(log_filename)) self.assertTrue(log_filename.endswith("2.log"), log_filename) logger.info("Test") # Return value of read must be a list. logs = file_handler.read(ti) self.assertTrue(isinstance(logs, list)) # Logs for running tasks should show up too. self.assertEqual(len(logs), 2) # Remove the generated tmp log file. os.remove(log_filename)
def _run_file_processor( result_channel: MultiprocessingConnection, parent_channel: MultiprocessingConnection, file_path: str, pickle_dags: bool, dag_ids: Optional[List[str]], thread_name: str, callback_requests: List[CallbackRequest], ) -> None: """ Process the given file. :param result_channel: the connection to use for passing back the result :type result_channel: multiprocessing.Connection :param parent_channel: the parent end of the channel to close in the child :type parent_channel: multiprocessing.Connection :param file_path: the file to process :type file_path: str :param pickle_dags: whether to pickle the DAGs found in the file and save them to the DB :type pickle_dags: bool :param dag_ids: if specified, only examine DAG ID's that are in this list :type dag_ids: list[str] :param thread_name: the name to use for the process that is launched :type thread_name: str :param callback_requests: failure callback to execute :type callback_requests: List[airflow.utils.callback_requests.CallbackRequest] :return: the process that was launched :rtype: multiprocessing.Process """ # This helper runs in the newly created process log: logging.Logger = logging.getLogger("airflow.processor") # Since we share all open FDs from the parent, we need to close the parent side of the pipe here in # the child, else it won't get closed properly until we exit. log.info("Closing parent pipe") parent_channel.close() del parent_channel set_context(log, file_path) setproctitle(f"airflow scheduler - DagFileProcessor {file_path}") try: # redirect stdout/stderr to log with redirect_stdout(StreamLogWriter( log, logging.INFO)), redirect_stderr( StreamLogWriter(log, logging.WARN)), Stats.timer() as timer: # Re-configure the ORM engine as there are issues with multiple processes settings.configure_orm() # Change the thread name to differentiate log lines. This is # really a separate process, but changing the name of the # process doesn't work, so changing the thread name instead. threading.current_thread().name = thread_name log.info("Started process (PID=%s) to work on %s", os.getpid(), file_path) dag_file_processor = DagFileProcessor(dag_ids=dag_ids, log=log) result: Tuple[int, int] = dag_file_processor.process_file( file_path=file_path, pickle_dags=pickle_dags, callback_requests=callback_requests, ) result_channel.send(result) log.info("Processing %s took %.3f seconds", file_path, timer.duration) except Exception: # pylint: disable=broad-except # Log exceptions through the logging framework. log.exception("Got an exception! Propagating...") raise finally: # We re-initialized the ORM within this Process above so we need to # tear it down manually here settings.dispose_orm() result_channel.close()