def shutdown_cluster(): """Shutdown all distributed cluster.""" # detect master is running if not General._parallel: return try: logging.info("Try to shutdown cluster.") # stop ReportServer from zeus.report import ReportServer ReportServer.stop() # stop Master from zeus.trainer.utils import load_master_ip from distributed import Client ip, port = load_master_ip() if ip is None or port is None: logging.info("Stand-alone mode, no need to shut down the cluster.") return shutdown_client = Client("{}:{}".format(ip, port)) logging.info("Cluster will be shut down.") shutdown_client.shutdown() shutdown_client.close() del shutdown_client time.sleep(15) logging.info("Cluster is shut down.") except Exception as e: logging.error("Pipeline's cluster shutdown error: {}".format(str(e))) logging.error(traceback.format_exc())
def close_client(self): """Close cluster client.""" ReportServer.stop() self._thread_runing = False # Waiting thread exit. time.sleep(1) if hasattr(self, "client") and self.client is not None: self.client.close() del self.client # Waiting cluster closed time.sleep(1)