예제 #1
0
def broadcast(client: skein.ApplicationClient,
              key: str,
              value: str = "") -> None:
    _logger.info(f"Broadcasting {key} = {value!r}")
    try:
        client.kv[key] = value.encode()
    except AttributeError:
        client.kv[key] = value
예제 #2
0
def _shutdown_on_exception(app: skein.ApplicationClient):
    # Ensure SIGINT is not masked to enable kill on C-c.
    import signal
    signal.signal(signal.SIGINT, signal.default_int_handler)

    try:
        yield
    except (KeyboardInterrupt, SystemExit):
        with suppress(SkeinError):
            app.shutdown(FinalStatus.KILLED)
        logger.error("Application killed on user request")
    except Exception:
        with suppress(SkeinError):
            app.shutdown(FinalStatus.FAILED)
        logger.exception("Application shutdown due to an exception")
        raise
예제 #3
0
    def __init__(self):
        assert ApplicationClient is not None
        self._app_client = ApplicationClient.from_current()

        self._nodes = defaultdict(set)
        self._supervisor_watch_task = None
        self._role_to_events = defaultdict(list)
예제 #4
0
def _shutdown_on_exception(app: skein.ApplicationClient,
                           path_to_log_hdfs: str = None):
    # Ensure SIGINT is not masked to enable kill on C-c.
    import signal
    signal.signal(signal.SIGINT, signal.default_int_handler)

    try:
        yield
    except (KeyboardInterrupt, SystemExit):
        with suppress(SkeinError):
            app.shutdown(FinalStatus.KILLED)
        logger.error("Application killed on user request")
    except Exception:
        with suppress(SkeinError):
            app.shutdown(FinalStatus.FAILED)
        logger.exception("Application shutdown due to an exception")
        raise
    finally:
        if path_to_log_hdfs:
            with tf.gfile.GFile(f'{path_to_log_hdfs}/yarn_logs.txt',
                                'wb') as fd:
                fd.write(app_logs(app))
예제 #5
0
def _setup_cluster_tasks(task_instances: List[Tuple[str, int]],
                         app: skein.ApplicationClient,
                         standalone_client_mode: bool) -> tf.train.ClusterSpec:
    tasks_not_in_cluster = ['evaluator']
    # In standalone client mode the chief is also not part of the cluster
    if standalone_client_mode:
        tasks_not_in_cluster.append('chief')
    cluster_instances = [
        t for t in task_instances if t[0] not in tasks_not_in_cluster
    ]
    app.kv[KV_CLUSTER_INSTANCES] = json.dumps(cluster_instances).encode()
    return tf.train.ClusterSpec(
        aggregate_spec(app, list(iter_tasks(cluster_instances))))
예제 #6
0
def broadcast(client: skein.ApplicationClient,
              key: str,
              value: str = "") -> None:
    tf.logging.info(f"Broadcasting {key} = {value!r}")
    client.kv[key] = value.encode()
예제 #7
0
def _setup_cluster_tasks(task_instances: List[Tuple[str, int]],
                         app: skein.ApplicationClient) -> tf.train.ClusterSpec:
    app.kv[KV_CLUSTER_INSTANCES] = json.dumps(task_instances).encode()
    return tf.train.ClusterSpec(
        aggregate_spec(app, list(iter_tasks(task_instances))))
예제 #8
0
 def app_client(self):
     if not hasattr(self, '_app_client'):
         self._app_client = ApplicationClient.from_current()
     return self._app_client