def on_step_begin(self, trial_runner): """Before step(), update available resources and inject failure.""" self._update_avail_resources() # With 10% probability inject failure to a worker. if random.random() < 0.1 and not args.smoke_test: # With 10% probability fully terminate the node. should_terminate = random.random() < 0.1 kill_node("/home/ubuntu/ray_bootstrap_config.yaml", yes=True, hard=should_terminate, override_cluster_name=None)
def kill_random_node(cluster_config_file, yes, hard, cluster_name): """Kills a random Ray node. For testing purposes only.""" click.echo("Killed node with IP " + kill_node(cluster_config_file, yes, hard, cluster_name))
def kill_random_node(cluster_config_file, yes, cluster_name): """Kills a random Ray node. For testing purposes only.""" click.echo("Killed node with IP " + kill_node(cluster_config_file, yes, cluster_name))