Esempio n. 1
0
 def spawn_updater(self, node_id, init_commands, ray_start_commands,
                   node_resources, docker_config):
     logger.info(f"Creating new (spawn_updater) updater thread for node"
                 f" {node_id}.")
     updater = NodeUpdaterThread(
         node_id=node_id,
         provider_config=self.config["provider"],
         provider=self.provider,
         auth_config=self.config["auth"],
         cluster_name=self.config["cluster_name"],
         file_mounts=self.config["file_mounts"],
         initialization_commands=with_head_node_ip(
             self._get_node_type_specific_fields(
                 node_id, "initialization_commands")),
         setup_commands=with_head_node_ip(init_commands),
         ray_start_commands=with_head_node_ip(ray_start_commands),
         runtime_hash=self.runtime_hash,
         file_mounts_contents_hash=self.file_mounts_contents_hash,
         is_head_node=False,
         cluster_synced_files=self.config["cluster_synced_files"],
         process_runner=self.process_runner,
         use_internal_ip=True,
         docker_config=docker_config,
         node_resources=node_resources)
     updater.start()
     self.updaters[node_id] = updater
Esempio n. 2
0
 def recover_if_needed(self, node_id, now):
     if not self.can_update(node_id):
         return
     key = self.provider.internal_ip(node_id)
     if key not in self.load_metrics.last_heartbeat_time_by_ip:
         self.load_metrics.last_heartbeat_time_by_ip[key] = now
     last_heartbeat_time = self.load_metrics.last_heartbeat_time_by_ip[key]
     delta = now - last_heartbeat_time
     if delta < AUTOSCALER_HEARTBEAT_TIMEOUT_S:
         return
     logger.warning("StandardAutoscaler: "
                    "{}: No heartbeat in {}s, "
                    "restarting Ray to recover...".format(node_id, delta))
     updater = NodeUpdaterThread(
         node_id=node_id,
         provider_config=self.config["provider"],
         provider=self.provider,
         auth_config=self.config["auth"],
         cluster_name=self.config["cluster_name"],
         file_mounts={},
         initialization_commands=[],
         setup_commands=[],
         ray_start_commands=with_head_node_ip(
             self.config["worker_start_ray_commands"]),
         runtime_hash=self.runtime_hash,
         process_runner=self.process_runner,
         use_internal_ip=True,
         docker_config=self.config.get("docker"))
     updater.start()
     self.updaters[node_id] = updater
Esempio n. 3
0
 def spawn_updater(self, node_id, init_commands, ray_start_commands):
     updater = NodeUpdaterThread(
         node_id=node_id,
         provider_config=self.config["provider"],
         provider=self.provider,
         auth_config=self.config["auth"],
         cluster_name=self.config["cluster_name"],
         file_mounts=self.config["file_mounts"],
         initialization_commands=with_head_node_ip(
             self.config["initialization_commands"]),
         setup_commands=with_head_node_ip(init_commands),
         ray_start_commands=with_head_node_ip(ray_start_commands),
         runtime_hash=self.runtime_hash,
         process_runner=self.process_runner,
         use_internal_ip=True)
     updater.start()
     self.updaters[node_id] = updater