def test_read_log_stream_should_read_each_try_in_turn(self, mock_read): first_return = ([[('', "try_number=1.")]], [{"end_of_log": True}]) second_return = ([[('', "try_number=2.")]], [{"end_of_log": True}]) third_return = ([[('', "try_number=3.")]], [{"end_of_log": True}]) fourth_return = ([[('', "should never be read")]], [{ "end_of_log": True }]) mock_read.side_effect = [ first_return, second_return, third_return, fourth_return ] task_log_reader = TaskLogReader() log_stream = task_log_reader.read_log_stream(ti=self.ti, try_number=None, metadata={}) self.assertEqual( ['\ntry_number=1.\n', '\ntry_number=2.\n', '\ntry_number=3.\n'], list(log_stream)) mock_read.assert_has_calls( [ mock.call(self.ti, 1, metadata={}), mock.call(self.ti, 2, metadata={}), mock.call(self.ti, 3, metadata={}), ], any_order=False, )
def test_test_read_log_chunks_should_read_all_files(self): task_log_reader = TaskLogReader() logs, metadatas = task_log_reader.read_log_chunks(ti=self.ti, try_number=None, metadata={}) assert [ [( '', "*** Reading local file: " f"{self.log_dir}/dag_log_reader/task_log_reader/2017-09-01T00.00.00+00.00/1.log\n" "try_number=1.\n", )], [( '', f"*** Reading local file: " f"{self.log_dir}/dag_log_reader/task_log_reader/2017-09-01T00.00.00+00.00/2.log\n" f"try_number=2.\n", )], [( '', f"*** Reading local file: " f"{self.log_dir}/dag_log_reader/task_log_reader/2017-09-01T00.00.00+00.00/3.log\n" f"try_number=3.\n", )], ] == logs assert {"end_of_log": True} == metadatas
def test_read_log_stream_should_support_multiple_chunks(self, mock_read): first_return = ([[('', "1st line")]], [{}]) second_return = ([[('', "2nd line")]], [{"end_of_log": False}]) third_return = ([[('', "3rd line")]], [{"end_of_log": True}]) fourth_return = ([[('', "should never be read")]], [{ "end_of_log": True }]) mock_read.side_effect = [ first_return, second_return, third_return, fourth_return ] task_log_reader = TaskLogReader() log_stream = task_log_reader.read_log_stream(ti=self.ti, try_number=1, metadata={}) self.assertEqual(["\n1st line\n", "\n2nd line\n", "\n3rd line\n"], list(log_stream)) mock_read.assert_has_calls( [ mock.call(self.ti, 1, metadata={}), mock.call(self.ti, 1, metadata={}), mock.call(self.ti, 1, metadata={"end_of_log": False}), ], any_order=False, )
def get_log(session, dag_id, dag_run_id, task_id, task_try_number, full_content=False, token=None): """Get logs for specific task instance""" key = current_app.config["SECRET_KEY"] if not token: metadata = {} else: try: metadata = URLSafeSerializer(key).loads(token) except BadSignature: raise BadRequest("Bad Signature. Please use only the tokens provided by the API.") if metadata.get('download_logs') and metadata['download_logs']: full_content = True if full_content: metadata['download_logs'] = True else: metadata['download_logs'] = False task_log_reader = TaskLogReader() if not task_log_reader.supports_read: raise BadRequest("Task log handler does not support read logs.") ti = ( session.query(TaskInstance) .filter(TaskInstance.task_id == task_id, TaskInstance.run_id == dag_run_id) .join(TaskInstance.dag_run) .options(eagerload(TaskInstance.dag_run)) .one_or_none() ) if ti is None: metadata['end_of_log'] = True raise NotFound(title="TaskInstance not found") dag = current_app.dag_bag.get_dag(dag_id) if dag: try: ti.task = dag.get_task(ti.task_id) except TaskNotFound: pass return_type = request.accept_mimetypes.best_match(['text/plain', 'application/json']) # return_type would be either the above two or None if return_type == 'application/json' or return_type is None: # default logs, metadata = task_log_reader.read_log_chunks(ti, task_try_number, metadata) logs = logs[0] if task_try_number is not None else logs token = URLSafeSerializer(key).dumps(metadata) return logs_schema.dump(LogResponseObject(continuation_token=token, content=logs)) # text/plain. Stream logs = task_log_reader.read_log_stream(ti, task_try_number, metadata) return Response(logs, headers={"Content-Type": return_type})
def test_test_test_read_log_stream_should_read_one_try(self): task_log_reader = TaskLogReader() stream = task_log_reader.read_log_stream(ti=self.ti, try_number=1, metadata={}) assert [ "\n*** Reading local file: " f"{self.log_dir}/dag_log_reader/task_log_reader/2017-09-01T00.00.00+00.00/1.log\n" "try_number=1.\n" "\n" ] == list(stream)
def assert_remote_logs(self, expected_message, ti): with provide_gcp_context(GCP_STACKDDRIVER), conf_vars({ ('logging', 'remote_logging'): 'True', ('logging', 'remote_base_log_folder'): f"stackdriver://{self.log_name}", }): from airflow.config_templates import airflow_local_settings importlib.reload(airflow_local_settings) settings.configure_logging() task_log_reader = TaskLogReader() logs = "\n".join(task_log_reader.read_log_stream(ti, try_number=None, metadata={})) self.assertIn(expected_message, logs)
def test_test_read_log_chunks_should_read_one_try(self): task_log_reader = TaskLogReader() logs, metadatas = task_log_reader.read_log_chunks(ti=self.ti, try_number=1, metadata={}) self.assertEqual( [('', f"*** Reading local file: " f"{self.log_dir}/dag_log_reader/task_log_reader/2017-09-01T00.00.00+00.00/1.log\n" f"try_number=1.\n")], logs[0], ) self.assertEqual({"end_of_log": True}, metadatas)
def assert_remote_logs(self, expected_message, ti): with provide_gcp_context(GCP_GCS_KEY), conf_vars({ ('logging', 'remote_logging'): 'True', ('logging', 'remote_base_log_folder'): f"gs://{self.bucket_name}/path/to/logs", ('logging', 'remote_log_conn_id'): "google_cloud_default", }): from airflow.config_templates import airflow_local_settings importlib.reload(airflow_local_settings) settings.configure_logging() task_log_reader = TaskLogReader() logs = "\n".join(task_log_reader.read_log_stream(ti, try_number=None, metadata={})) self.assertIn(expected_message, logs)
def test_test_test_read_log_stream_should_read_all_logs(self): task_log_reader = TaskLogReader() stream = task_log_reader.read_log_stream(ti=self.ti, try_number=None, metadata={}) self.assertEqual( [ "\n*** Reading local file: " f"{self.log_dir}/dag_log_reader/task_log_reader/2017-09-01T00.00.00+00.00/1.log\n" "try_number=1.\n" "\n", "\n*** Reading local file: " f"{self.log_dir}/dag_log_reader/task_log_reader/2017-09-01T00.00.00+00.00/2.log\n" "try_number=2.\n" "\n", "\n*** Reading local file: " f"{self.log_dir}/dag_log_reader/task_log_reader/2017-09-01T00.00.00+00.00/3.log\n" "try_number=3.\n" "\n", ], list(stream), )
def get_log( *, dag_id: str, dag_run_id: str, task_id: str, task_try_number: int, full_content: bool = False, map_index: int = -1, token: Optional[str] = None, session: Session = NEW_SESSION, ) -> APIResponse: """Get logs for specific task instance""" key = get_airflow_app().config["SECRET_KEY"] if not token: metadata = {} else: try: metadata = URLSafeSerializer(key).loads(token) except BadSignature: raise BadRequest( "Bad Signature. Please use only the tokens provided by the API." ) if metadata.get('download_logs') and metadata['download_logs']: full_content = True if full_content: metadata['download_logs'] = True else: metadata['download_logs'] = False task_log_reader = TaskLogReader() if not task_log_reader.supports_read: raise BadRequest("Task log handler does not support read logs.") ti = (session.query(TaskInstance).filter( TaskInstance.task_id == task_id, TaskInstance.dag_id == dag_id, TaskInstance.run_id == dag_run_id, TaskInstance.map_index == map_index, ).join(TaskInstance.dag_run).one_or_none()) if ti is None: metadata['end_of_log'] = True raise NotFound(title="TaskInstance not found") dag = get_airflow_app().dag_bag.get_dag(dag_id) if dag: try: ti.task = dag.get_task(ti.task_id) except TaskNotFound: pass return_type = request.accept_mimetypes.best_match( ['text/plain', 'application/json']) # return_type would be either the above two or None logs: Any if return_type == 'application/json' or return_type is None: # default logs, metadata = task_log_reader.read_log_chunks( ti, task_try_number, metadata) logs = logs[0] if task_try_number is not None else logs # we must have token here, so we can safely ignore it token = URLSafeSerializer(key).dumps( metadata) # type: ignore[assignment] return logs_schema.dump( LogResponseObject(continuation_token=token, content=logs)) # text/plain. Stream logs = task_log_reader.read_log_stream(ti, task_try_number, metadata) return Response(logs, headers={"Content-Type": return_type})