def broadcast(client: skein.ApplicationClient, key: str, value: str = "") -> None: _logger.info(f"Broadcasting {key} = {value!r}") try: client.kv[key] = value.encode() except AttributeError: client.kv[key] = value
def _shutdown_on_exception(app: skein.ApplicationClient): # Ensure SIGINT is not masked to enable kill on C-c. import signal signal.signal(signal.SIGINT, signal.default_int_handler) try: yield except (KeyboardInterrupt, SystemExit): with suppress(SkeinError): app.shutdown(FinalStatus.KILLED) logger.error("Application killed on user request") except Exception: with suppress(SkeinError): app.shutdown(FinalStatus.FAILED) logger.exception("Application shutdown due to an exception") raise
def __init__(self): assert ApplicationClient is not None self._app_client = ApplicationClient.from_current() self._nodes = defaultdict(set) self._supervisor_watch_task = None self._role_to_events = defaultdict(list)
def _shutdown_on_exception(app: skein.ApplicationClient, path_to_log_hdfs: str = None): # Ensure SIGINT is not masked to enable kill on C-c. import signal signal.signal(signal.SIGINT, signal.default_int_handler) try: yield except (KeyboardInterrupt, SystemExit): with suppress(SkeinError): app.shutdown(FinalStatus.KILLED) logger.error("Application killed on user request") except Exception: with suppress(SkeinError): app.shutdown(FinalStatus.FAILED) logger.exception("Application shutdown due to an exception") raise finally: if path_to_log_hdfs: with tf.gfile.GFile(f'{path_to_log_hdfs}/yarn_logs.txt', 'wb') as fd: fd.write(app_logs(app))
def _setup_cluster_tasks(task_instances: List[Tuple[str, int]], app: skein.ApplicationClient, standalone_client_mode: bool) -> tf.train.ClusterSpec: tasks_not_in_cluster = ['evaluator'] # In standalone client mode the chief is also not part of the cluster if standalone_client_mode: tasks_not_in_cluster.append('chief') cluster_instances = [ t for t in task_instances if t[0] not in tasks_not_in_cluster ] app.kv[KV_CLUSTER_INSTANCES] = json.dumps(cluster_instances).encode() return tf.train.ClusterSpec( aggregate_spec(app, list(iter_tasks(cluster_instances))))
def broadcast(client: skein.ApplicationClient, key: str, value: str = "") -> None: tf.logging.info(f"Broadcasting {key} = {value!r}") client.kv[key] = value.encode()
def _setup_cluster_tasks(task_instances: List[Tuple[str, int]], app: skein.ApplicationClient) -> tf.train.ClusterSpec: app.kv[KV_CLUSTER_INSTANCES] = json.dumps(task_instances).encode() return tf.train.ClusterSpec( aggregate_spec(app, list(iter_tasks(task_instances))))
def app_client(self): if not hasattr(self, '_app_client'): self._app_client = ApplicationClient.from_current() return self._app_client