예제 #1
0
 def __init__(self, logs_dir, gcs_address):
     """Initialize the log monitor object."""
     self.ip = services.get_node_ip_address()
     self.logs_dir = logs_dir
     self.publisher = gcs_pubsub.GcsPublisher(address=gcs_address)
     self.log_filenames = set()
     self.open_file_infos = []
     self.closed_file_infos = []
     self.can_open_more_files = True
예제 #2
0
 def __init__(self, logs_dir, redis_address, redis_password=None):
     """Initialize the log monitor object."""
     self.ip = services.get_node_ip_address()
     self.logs_dir = logs_dir
     self.redis_client = ray._private.services.create_redis_client(
         redis_address, password=redis_password)
     self.publisher = None
     if gcs_pubsub.gcs_pubsub_enabled():
         gcs_addr = gcs_utils.get_gcs_address_from_redis(self.redis_client)
         self.publisher = gcs_pubsub.GcsPublisher(address=gcs_addr)
     self.log_filenames = set()
     self.open_file_infos = []
     self.closed_file_infos = []
     self.can_open_more_files = True
예제 #3
0
        log_dir=args.logs_dir,
        filename=args.logging_filename,
        max_bytes=args.logging_rotate_bytes,
        backup_count=args.logging_rotate_backup_count,
    )

    def is_proc_alive(pid):
        try:
            os.kill(pid, 0)
            return True
        except OSError:
            # If OSError is raised, the process is not alive.
            return False

    log_monitor = LogMonitor(args.logs_dir,
                             gcs_pubsub.GcsPublisher(address=args.gcs_address),
                             is_proc_alive)

    try:
        log_monitor.run()
    except Exception as e:
        # Something went wrong, so push an error to all drivers.
        gcs_publisher = GcsPublisher(address=args.gcs_address)
        traceback_str = ray._private.utils.format_error_message(
            traceback.format_exc())
        message = (f"The log monitor on node {platform.node()} "
                   f"failed with the following error:\n{traceback_str}")
        ray._private.utils.publish_error_to_driver(
            ray_constants.LOG_MONITOR_DIED_ERROR,
            message,
            gcs_publisher=gcs_publisher,