Beispiel #1
0
    def get_log_file_handles(self, name, unique=False):
        """Open log files with partially randomized filenames, returning the
        file handles. If output redirection has been disabled, no files will
        be opened and `(None, None)` will be returned.

        Args:
            name (str): descriptive string for this log file.
            unique (bool): if true, a counter will be attached to `name` to
                ensure the returned filename is not already used.

        Returns:
            A tuple of two file handles for redirecting (stdout, stderr), or
            `(None, None)` if output redirection is disabled.
        """
        redirect_output = self._ray_params.redirect_output

        if redirect_output is None:
            # Make the default behavior match that of glog.
            redirect_output = os.getenv("GLOG_logtostderr") != "1"

        if not redirect_output:
            return None, None

        log_stdout, log_stderr = self._get_log_file_names(name, unique=unique)
        return open_log(log_stdout), open_log(log_stderr)
Beispiel #2
0
    def start_dashboard(self, require_dashboard):
        """Start the dashboard.

        Args:
            require_dashboard (bool): If true, this will raise an exception
                if we fail to start the dashboard. Otherwise it will print
                a warning if we fail to start the dashboard.
        """
        dashboard_out_name, dashboard_err_name = self.get_log_file_names(
            "dashboard", unique=True)
        stdout_file, stderr_file = (open_log(dashboard_out_name),
                                    open_log(dashboard_err_name))
        self._webui_url, process_info = ray.services.start_dashboard(
            require_dashboard,
            self._ray_params.dashboard_host,
            self.redis_address,
            self._temp_dir,
            stdout_file=stdout_file,
            stderr_file=stderr_file,
            redis_password=self._ray_params.redis_password,
            fate_share=self.kernel_fate_share,
            port=self._ray_params.dashboard_port)
        assert ray_constants.PROCESS_TYPE_DASHBOARD not in self.all_processes
        if process_info is not None:
            self.all_processes[ray_constants.PROCESS_TYPE_DASHBOARD] = [
                process_info,
            ]
            redis_client = self.create_redis_client()
            redis_client.hmset("webui", {"url": self._webui_url})
Beispiel #3
0
    def start_redis(self):
        """Start the Redis servers."""
        assert self._redis_address is None
        redis_out_name, redis_err_name = self.get_log_file_names("redis",
                                                                 unique=True)
        redis_log_files = [(open_log(redis_out_name), open_log(redis_err_name))
                           ]
        for i in range(self._ray_params.num_redis_shards):
            shard_out_name, shard_err_name = self.get_log_file_names(
                "redis-shard_{}".format(i), unique=True)
            redis_log_files.append(
                (open_log(shard_out_name), open_log(shard_err_name)))

        (self._redis_address, redis_shards,
         process_infos) = ray.services.start_redis(
             self._node_ip_address,
             redis_log_files,
             self.get_resource_spec(),
             port=self._ray_params.redis_port,
             redis_shard_ports=self._ray_params.redis_shard_ports,
             num_redis_shards=self._ray_params.num_redis_shards,
             redis_max_clients=self._ray_params.redis_max_clients,
             redirect_worker_output=True,
             password=self._ray_params.redis_password,
             include_java=self._ray_params.include_java,
             fate_share=self.kernel_fate_share)
        assert (ray_constants.PROCESS_TYPE_REDIS_SERVER
                not in self.all_processes)
        self.all_processes[ray_constants.PROCESS_TYPE_REDIS_SERVER] = (
            process_infos)
def get_logs() -> Tuple[IO, IO]:
    try:
        os.makedirs(LOG_DIR)
    except OSError:
        pass

    err_path = os.path.join(LOG_DIR, ERR_NAME)
    out_path = os.path.join(LOG_DIR, OUT_NAME)

    return open_log(err_path), open_log(out_path)
Beispiel #5
0
 def start_monitor(self):
     """Start the monitor."""
     monitor_out_name, monitor_err_name = self.get_log_file_names(
         "monitor", unique=True)
     stdout_file, stderr_file = (open_log(monitor_out_name),
                                 open_log(monitor_err_name))
     process_info = ray.services.start_monitor(
         self._redis_address,
         stdout_file=stdout_file,
         stderr_file=stderr_file,
         autoscaling_config=self._ray_params.autoscaling_config,
         redis_password=self._ray_params.redis_password,
         fate_share=self.kernel_fate_share)
     assert ray_constants.PROCESS_TYPE_MONITOR not in self.all_processes
     self.all_processes[ray_constants.PROCESS_TYPE_MONITOR] = [process_info]
Beispiel #6
0
 def start_reporter(self):
     """Start the reporter."""
     reporter_out_name, reporter_err_name = self.get_log_file_names(
         "reporter", unique=True)
     stdout_file, stderr_file = (open_log(reporter_out_name),
                                 open_log(reporter_err_name))
     process_info = ray.services.start_reporter(
         self.redis_address,
         stdout_file=stdout_file,
         stderr_file=stderr_file,
         redis_password=self._ray_params.redis_password,
         fate_share=self.kernel_fate_share)
     assert ray_constants.PROCESS_TYPE_REPORTER not in self.all_processes
     if process_info is not None:
         self.all_processes[ray_constants.PROCESS_TYPE_REPORTER] = [
             process_info,
         ]
Beispiel #7
0
 def start_log_monitor(self):
     """Start the log monitor."""
     log_out_name, log_err_name = self.get_log_file_names("log_monitor",
                                                          unique=True)
     stdout_file, stderr_file = open_log(log_out_name), open_log(
         log_err_name)
     process_info = ray.services.start_log_monitor(
         self.redis_address,
         self._logs_dir,
         stdout_file=stdout_file,
         stderr_file=stderr_file,
         redis_password=self._ray_params.redis_password,
         fate_share=self.kernel_fate_share)
     assert ray_constants.PROCESS_TYPE_LOG_MONITOR not in self.all_processes
     self.all_processes[ray_constants.PROCESS_TYPE_LOG_MONITOR] = [
         process_info,
     ]
Beispiel #8
0
 def start_gcs_server(self):
     """Start the gcs server.
     """
     gcs_out_name, gcs_err_name = self.get_log_file_names("gcs_server",
                                                          unique=True)
     stdout_file, stderr_file = (open_log(gcs_out_name),
                                 open_log(gcs_err_name))
     process_info = ray.services.start_gcs_server(
         self._redis_address,
         stdout_file=stdout_file,
         stderr_file=stderr_file,
         redis_password=self._ray_params.redis_password,
         config=self._config,
         fate_share=self.kernel_fate_share)
     assert (ray_constants.PROCESS_TYPE_GCS_SERVER
             not in self.all_processes)
     self.all_processes[ray_constants.PROCESS_TYPE_GCS_SERVER] = [
         process_info,
     ]
Beispiel #9
0
    def start_raylet(self, use_valgrind=False, use_profiler=False):
        """Start the raylet.

        Args:
            use_valgrind (bool): True if we should start the process in
                valgrind.
            use_profiler (bool): True if we should start the process in the
                valgrind profiler.
        """
        raylet_out_name, raylet_err_name = self.get_log_file_names("raylet",
                                                                   unique=True)
        stdout_file, stderr_file = (open_log(raylet_out_name),
                                    open_log(raylet_err_name))
        process_info = ray.services.start_raylet(
            self._redis_address,
            self._node_ip_address,
            self._ray_params.node_manager_port,
            self._raylet_socket_name,
            self._plasma_store_socket_name,
            self._ray_params.worker_path,
            self._temp_dir,
            self._session_dir,
            self.get_resource_spec(),
            self._ray_params.min_worker_port,
            self._ray_params.max_worker_port,
            self._ray_params.object_manager_port,
            self._ray_params.redis_password,
            self._ray_params.metrics_agent_port,
            use_valgrind=use_valgrind,
            use_profiler=use_profiler,
            stdout_file=stdout_file,
            stderr_file=stderr_file,
            config=self._config,
            include_java=self._ray_params.include_java,
            java_worker_options=self._ray_params.java_worker_options,
            load_code_from_local=self._ray_params.load_code_from_local,
            plasma_directory=self._ray_params.plasma_directory,
            huge_pages=self._ray_params.huge_pages,
            fate_share=self.kernel_fate_share,
            socket_to_use=self.socket,
            head_node=self.head)
        assert ray_constants.PROCESS_TYPE_RAYLET not in self.all_processes
        self.all_processes[ray_constants.PROCESS_TYPE_RAYLET] = [process_info]
Beispiel #10
0
 def start_plasma_store(self):
     """Start the plasma store."""
     plasma_out_name, plasma_err_name = self.get_log_file_names(
         "plasma_store", unique=True)
     stdout_file, stderr_file = (open_log(plasma_out_name),
                                 open_log(plasma_err_name))
     process_info = ray.services.start_plasma_store(
         self.get_resource_spec(),
         self._plasma_store_socket_name,
         stdout_file=stdout_file,
         stderr_file=stderr_file,
         plasma_directory=self._ray_params.plasma_directory,
         huge_pages=self._ray_params.huge_pages,
         keep_idle=bool(self._config.get("plasma_store_as_thread")),
         fate_share=self.kernel_fate_share)
     assert (ray_constants.PROCESS_TYPE_PLASMA_STORE
             not in self.all_processes)
     self.all_processes[ray_constants.PROCESS_TYPE_PLASMA_STORE] = [
         process_info,
     ]