Exemple #1
0
def shutdown_cluster():
    """Shutdown all distributed cluster."""
    # detect master is running
    if not General._parallel:
        return
    try:
        logging.info("Try to shutdown cluster.")

        # stop ReportServer
        from zeus.report import ReportServer
        ReportServer.stop()

        # stop Master
        from zeus.trainer.utils import load_master_ip
        from distributed import Client
        ip, port = load_master_ip()
        if ip is None or port is None:
            logging.info("Stand-alone mode, no need to shut down the cluster.")
            return
        shutdown_client = Client("{}:{}".format(ip, port))
        logging.info("Cluster will be shut down.")
        shutdown_client.shutdown()
        shutdown_client.close()
        del shutdown_client
        time.sleep(15)
        logging.info("Cluster is shut down.")
    except Exception as e:
        logging.error("Pipeline's cluster shutdown error: {}".format(str(e)))
        logging.error(traceback.format_exc())
Exemple #2
0
 def close_client(self):
     """Close cluster client."""
     ReportServer.stop()
     self._thread_runing = False
     # Waiting thread exit.
     time.sleep(1)
     if hasattr(self, "client") and self.client is not None:
         self.client.close()
         del self.client
     # Waiting cluster closed
     time.sleep(1)