def destroy_autoscaler_workers(self): """Cleanup the autoscaler, in case of an exception in the run() method. We kill the worker nodes, but retain the head node in order to keep logs around, keeping costs minimal. This monitor process runs on the head node anyway, so this is more reliable.""" if self.autoscaler is None: return # Nothing to clean up. if self.autoscaling_config is None: # This is a logic error in the program. Can't do anything. logger.error( "Monitor: Cleanup failed due to lack of autoscaler config.") return logger.info("Monitor: Exception caught. Taking down workers...") clean = False while not clean: try: teardown_cluster( config_file=self.autoscaling_config, yes=True, # Non-interactive. workers_only=True, # Retain head node for logs. override_cluster_name=None, keep_min_workers=True, # Retain minimal amount of workers. ) clean = True logger.info("Monitor: Workers taken down.") except Exception: logger.error("Monitor: Cleanup exception. Trying again...") time.sleep(2)
def stop(): project_definition = load_project_or_throw() teardown_cluster( project_definition["cluster"], yes=True, workers_only=False, override_cluster_name=None)
def stop(name): project_definition = load_project_or_throw() teardown_cluster( project_definition.cluster_yaml(), yes=True, workers_only=False, override_cluster_name=name)
def __do_destroy(self): try: teardown_cluster( self.config_file, yes=True, workers_only=False, override_cluster_name=None, keep_min_workers=0, ) self.ready = False self.config = None except BaseException as ex: self.destroyer.exc = CannotDestroyCluster( "Cannot destroy cluster", cause=ex, traceback=traceback.format_exc() ) if not self.destroyer.silent: sys.stderr.write(f"Cannot destroy cluster:\n{traceback.format_exc()}\n")
def teardown(cluster_config_file, yes, workers_only, cluster_name, keep_min_workers): """Tear down a Ray cluster.""" teardown_cluster(cluster_config_file, yes, workers_only, cluster_name, keep_min_workers)
def teardown(cluster_config_file, yes, workers_only, cluster_name): """Tear down the Ray cluster.""" teardown_cluster(cluster_config_file, yes, workers_only, cluster_name)
def teardown(cluster_config_file, yes, workers_only, cluster_name): teardown_cluster(cluster_config_file, yes, workers_only, cluster_name)
def teardown(cluster_config_file, yes): teardown_cluster(cluster_config_file, yes)
def down(cluster_config_file, yes, workers_only, cluster_name, keep_min_workers, log_old_style, log_color, verbose): """Tear down a Ray cluster.""" teardown_cluster(cluster_config_file, yes, workers_only, cluster_name, keep_min_workers, log_old_style, log_color, verbose)