Ejemplo n.º 1
0
Archivo: mock.py Proyecto: yuan776/ray
 def on_step_begin(self, **info):
     if not os.path.exists(self.config_path):
         return
     import click
     from ray.autoscaler._private.commands import kill_node
     failures = 0
     max_failures = 3
     # With 10% probability inject failure to a worker.
     if random.random() < self.probability and not self.disable:
         # With 10% probability fully terminate the node.
         should_terminate = random.random() < self.probability
         while failures < max_failures:
             try:
                 kill_node(
                     self.config_path,
                     yes=True,
                     hard=should_terminate,
                     override_cluster_name=None)
             except click.exceptions.ClickException:
                 failures += 1
                 logger.exception("Killing random node failed in attempt "
                                  "{}. "
                                  "Retrying {} more times".format(
                                      str(failures),
                                      str(max_failures - failures)))
Ejemplo n.º 2
0
 def on_step_begin(self, **info):
     from ray.autoscaler._private.commands import kill_node
     # With 10% probability inject failure to a worker.
     if random.random() < self.probability and not self.disable:
         # With 10% probability fully terminate the node.
         should_terminate = random.random() < self.probability
         kill_node(self.config_path,
                   yes=True,
                   hard=should_terminate,
                   override_cluster_name=None)
Ejemplo n.º 3
0
 def on_step_begin(self, trial_runner):
     """Before step(), update available resources and inject failure."""
     self._update_avail_resources()
     # With 10% probability inject failure to a worker.
     if random.random() < 0.1 and not args.smoke_test:
         # With 10% probability fully terminate the node.
         should_terminate = random.random() < 0.1
         kill_node(
             "/home/ubuntu/ray_bootstrap_config.yaml",
             yes=True,
             hard=should_terminate,
             override_cluster_name=None)
Ejemplo n.º 4
0
    def remove_host(self, hosts):
        good_hosts = [k for k in hosts if k not in self._removed_hosts]

        from ray.autoscaler._private.commands import kill_node
        if good_hosts:
            if self._graceful:
                host = random.choice(good_hosts)
            else:
                host = kill_node(
                    os.path.expanduser("~/ray_bootstrap_config.yaml"), True,
                    False, None)
        self._removed_hosts.add(host)
Ejemplo n.º 5
0
def kill_random_node(cluster_config_file, yes, hard, cluster_name):
    """Kills a random Ray node. For testing purposes only."""
    click.echo("Killed node with IP " +
               kill_node(cluster_config_file, yes, hard, cluster_name))