def __init__(self, logs_dir, gcs_address): """Initialize the log monitor object.""" self.ip = services.get_node_ip_address() self.logs_dir = logs_dir self.publisher = gcs_pubsub.GcsPublisher(address=gcs_address) self.log_filenames = set() self.open_file_infos = [] self.closed_file_infos = [] self.can_open_more_files = True
def __init__(self, logs_dir, redis_address, redis_password=None): """Initialize the log monitor object.""" self.ip = services.get_node_ip_address() self.logs_dir = logs_dir self.redis_client = ray._private.services.create_redis_client( redis_address, password=redis_password) self.publisher = None if gcs_pubsub.gcs_pubsub_enabled(): gcs_addr = gcs_utils.get_gcs_address_from_redis(self.redis_client) self.publisher = gcs_pubsub.GcsPublisher(address=gcs_addr) self.log_filenames = set() self.open_file_infos = [] self.closed_file_infos = [] self.can_open_more_files = True
log_dir=args.logs_dir, filename=args.logging_filename, max_bytes=args.logging_rotate_bytes, backup_count=args.logging_rotate_backup_count, ) def is_proc_alive(pid): try: os.kill(pid, 0) return True except OSError: # If OSError is raised, the process is not alive. return False log_monitor = LogMonitor(args.logs_dir, gcs_pubsub.GcsPublisher(address=args.gcs_address), is_proc_alive) try: log_monitor.run() except Exception as e: # Something went wrong, so push an error to all drivers. gcs_publisher = GcsPublisher(address=args.gcs_address) traceback_str = ray._private.utils.format_error_message( traceback.format_exc()) message = (f"The log monitor on node {platform.node()} " f"failed with the following error:\n{traceback_str}") ray._private.utils.publish_error_to_driver( ray_constants.LOG_MONITOR_DIED_ERROR, message, gcs_publisher=gcs_publisher,