예제 #1
0
 def destroy_external_storage(self):
     object_spilling_config = self._config.get("object_spilling_config", {})
     if object_spilling_config:
         object_spilling_config = json.loads(object_spilling_config)
         from ray import external_storage
         storage = external_storage.setup_external_storage(
             object_spilling_config)
         storage.destroy_external_storage()
예제 #2
0
    def validate_external_storage(self):
        """Make sure we can setup the object spilling external storage.
        This will also fill up the default setting for object spilling
        if not specified.
        """
        object_spilling_config = self._config.get("object_spilling_config", {})
        automatic_spilling_enabled = self._config.get(
            "automatic_object_spilling_enabled", True)
        if not automatic_spilling_enabled:
            return

        # If the config is not specified, we fill up the default.
        if not object_spilling_config:
            object_spilling_config = json.dumps({
                "type": "filesystem",
                "params": {
                    "directory_path": self._session_dir
                }
            })

        # Try setting up the storage.
        # Configure the proper system config.
        # We need to set both ray param's system config and self._config
        # because they could've been diverged at this point.
        deserialized_config = json.loads(object_spilling_config)
        self._ray_params._system_config["object_spilling_config"] = (
            object_spilling_config)
        self._config["object_spilling_config"] = object_spilling_config

        is_external_storage_type_fs = (
            deserialized_config["type"] == "filesystem")
        self._ray_params._system_config["is_external_storage_type_fs"] = (
            is_external_storage_type_fs)
        self._config["is_external_storage_type_fs"] = (
            is_external_storage_type_fs)

        # Validate external storage usage.
        from ray import external_storage
        external_storage.setup_external_storage(deserialized_config)
        external_storage.reset_external_storage()
예제 #3
0
        mode = ray.UTIL_WORKER_MODE
    else:
        raise ValueError("Unknown worker type: " + args.worker_type)

    # NOTE(suquark): We must initialize the external storage before we
    # connect to raylet. Otherwise we may receive requests before the
    # external storage is intialized.
    if mode == ray.RESTORE_WORKER_MODE or mode == ray.SPILL_WORKER_MODE:
        from ray import external_storage
        if args.object_spilling_config:
            object_spilling_config = base64.b64decode(
                args.object_spilling_config)
            object_spilling_config = json.loads(object_spilling_config)
        else:
            object_spilling_config = {}
        external_storage.setup_external_storage(object_spilling_config)

    raylet_ip_address = args.raylet_ip_address
    if raylet_ip_address is None:
        raylet_ip_address = args.node_ip_address

    ray_params = RayParams(
        node_ip_address=args.node_ip_address,
        raylet_ip_address=raylet_ip_address,
        node_manager_port=args.node_manager_port,
        redis_address=args.redis_address,
        redis_password=args.redis_password,
        plasma_store_socket_name=args.object_store_name,
        raylet_socket_name=args.raylet_name,
        temp_dir=args.temp_dir,
        metrics_agent_port=args.metrics_agent_port,
예제 #4
0
    def _check_usage(self):
        if self.worker_port_list is not None:
            for port_str in self.worker_port_list.split(","):
                try:
                    port = int(port_str)
                except ValueError as e:
                    raise ValueError(
                        "worker_port_list must be a comma-separated " +
                        "list of integers: {}".format(e)) from None

                if port < 1024 or port > 65535:
                    raise ValueError(
                        "Ports in worker_port_list must be "
                        "between 1024 and 65535. Got: {}".format(port))

        # Used primarily for testing.
        if os.environ.get("RAY_USE_RANDOM_PORTS", False):
            if self.min_worker_port is None and self.min_worker_port is None:
                self.min_worker_port = 0
                self.max_worker_port = 0

        if self.min_worker_port is not None:
            if self.min_worker_port != 0 and (self.min_worker_port < 1024
                                              or self.min_worker_port > 65535):
                raise ValueError("min_worker_port must be 0 or an integer "
                                 "between 1024 and 65535.")

        if self.max_worker_port is not None:
            if self.min_worker_port is None:
                raise ValueError("If max_worker_port is set, min_worker_port "
                                 "must also be set.")
            elif self.max_worker_port != 0:
                if self.max_worker_port < 1024 or self.max_worker_port > 65535:
                    raise ValueError(
                        "max_worker_port must be 0 or an integer between "
                        "1024 and 65535.")
                elif self.max_worker_port <= self.min_worker_port:
                    raise ValueError("max_worker_port must be higher than "
                                     "min_worker_port.")

        if self.resources is not None:
            assert "CPU" not in self.resources, (
                "'CPU' should not be included in the resource dictionary. Use "
                "num_cpus instead.")
            assert "GPU" not in self.resources, (
                "'GPU' should not be included in the resource dictionary. Use "
                "num_gpus instead.")

        if self.redirect_worker_output is not None:
            raise DeprecationWarning(
                "The redirect_worker_output argument is deprecated. To "
                "control logging to the driver, use the 'log_to_driver' "
                "argument to 'ray.init()'")

        if self.redirect_output is not None:
            raise DeprecationWarning(
                "The redirect_output argument is deprecated.")

        # Parse the numpy version.
        numpy_version = np.__version__.split(".")
        numpy_major, numpy_minor = int(numpy_version[0]), int(numpy_version[1])
        if numpy_major <= 1 and numpy_minor < 16:
            logger.warning("Using ray with numpy < 1.16.0 will result in slow "
                           "serialization. Upgrade numpy if using with ray.")

        # Make sure object spilling configuration is applicable.
        object_spilling_config = self._system_config.get(
            "object_spilling_config", {})
        if object_spilling_config:
            object_spilling_config = json.loads(object_spilling_config)
            from ray import external_storage
            # Validate external storage usage.
            external_storage.setup_external_storage(object_spilling_config)
            external_storage.reset_external_storage()
예제 #5
0
def main(args):
    ray.ray_logging.setup_logger(args.logging_level, args.logging_format)

    if args.worker_type == "WORKER":
        mode = ray.WORKER_MODE
    elif args.worker_type == "SPILL_WORKER":
        mode = ray.SPILL_WORKER_MODE
    elif args.worker_type == "RESTORE_WORKER":
        mode = ray.RESTORE_WORKER_MODE
    else:
        raise ValueError("Unknown worker type: " + args.worker_type)

    # NOTE(suquark): We must initialize the external storage before we
    # connect to raylet. Otherwise we may receive requests before the
    # external storage is intialized.
    if mode == ray.RESTORE_WORKER_MODE or mode == ray.SPILL_WORKER_MODE:
        from ray import external_storage
        if args.object_spilling_config:
            object_spilling_config = base64.b64decode(
                args.object_spilling_config)
            object_spilling_config = json.loads(object_spilling_config)
        else:
            object_spilling_config = {}
        external_storage.setup_external_storage(object_spilling_config)

    raylet_ip_address = args.raylet_ip_address
    if raylet_ip_address is None:
        raylet_ip_address = args.node_ip_address

    code_search_path = args.code_search_path
    if code_search_path is not None:
        for p in code_search_path.split(":"):
            if os.path.isfile(p):
                p = os.path.dirname(p)
            sys.path.append(p)

    ray_params = RayParams(
        node_ip_address=args.node_ip_address,
        raylet_ip_address=raylet_ip_address,
        node_manager_port=args.node_manager_port,
        redis_address=args.redis_address,
        redis_password=args.redis_password,
        plasma_store_socket_name=args.object_store_name,
        raylet_socket_name=args.raylet_name,
        temp_dir=args.temp_dir,
        load_code_from_local=args.load_code_from_local,
        metrics_agent_port=args.metrics_agent_port,
    )

    node = ray.node.Node(ray_params,
                         head=False,
                         shutdown_at_exit=False,
                         spawn_reaper=False,
                         connect_only=True)
    ray.worker._global_node = node
    ray.worker.connect(node, mode=mode)

    # Redirect stdout and stderr to the default worker interceptor logger.
    # NOTE: We deprecated redirect_worker_output arg,
    # so we don't need to handle here.
    stdout_interceptor = StandardStreamInterceptor(
        setup_and_get_worker_interceptor_logger(is_for_stdout=True),
        intercept_stdout=True)
    stderr_interceptor = StandardStreamInterceptor(
        setup_and_get_worker_interceptor_logger(is_for_stdout=False),
        intercept_stdout=False)
    with redirect_stdout(stdout_interceptor):
        with redirect_stderr(stderr_interceptor):
            if mode == ray.WORKER_MODE:
                ray.worker.global_worker.main_loop()
            elif (mode == ray.RESTORE_WORKER_MODE
                  or mode == ray.SPILL_WORKER_MODE):
                # It is handled by another thread in the C++ core worker.
                # We just need to keep the worker alive.
                while True:
                    time.sleep(100000)
            else:
                raise ValueError(f"Unexcepted worker mode: {mode}")