Ejemplo n.º 1
0
    def _build_remote_agent(self, zoo_manager_conns):
        # Get a random zoo manager connection.
        zoo_manager_conn = random.choice(zoo_manager_conns)

        # Spawn remote worker and get its port.
        retries = 3
        worker_port = None
        for retry in range(retries):
            try:
                response = zoo_manager_conn["stub"].spawn_worker(
                    manager_pb2.Machine())
                worker_port = response.num
                break
            except grpc.RpcError as e:
                self._log.debug(
                    f"Failed {retry+1}/{retries} times in attempt to spawn a remote worker process. {e}"
                )

        if worker_port == None:
            raise RemoteAgentException(
                "Remote worker process could not be spawned by the zoo manager."
            )

        # Instantiate and return a local RemoteAgent.
        return RemoteAgent(zoo_manager_conn["address"],
                           (zoo_manager_conn["address"][0], worker_port))
Ejemplo n.º 2
0
    def acquire_remote_agent(self,
                             retries: int = 3,
                             timeout: Optional[float] = None) -> RemoteAgent:
        """Creates RemoteAgent objects.

        Args:
            retries (int, optional): Number of attempts in creating or connecting to an available
                RemoteAgent. Defaults to 3.
            timeout (Optional[float], optional): Time (seconds) to wait in acquiring a RemoteAgent.
                Defaults to None, which does not timeout.

        Raises:
            RemoteAgentException: If fail to acquire a RemoteAgent.

        Returns:
            RemoteAgent: A new RemoteAgent object.
        """
        if timeout == None:
            timeout = self._timeout

        for retry in range(retries):
            try:
                return self._try_to_acquire_remote_agent(timeout)
            except Exception as e:
                self._log.debug(
                    f"Failed {retry+1}/{retries} times in acquiring remote agent. {repr(e)}"
                )
                time.sleep(0.1)

        raise RemoteAgentException("Failed to acquire remote agent.")
Ejemplo n.º 3
0
def get_manager_channel_stub(addr):
    channel = grpc.insecure_channel(f"{addr[0]}:{addr[1]}")
    try:
        # Wait until the grpc server is ready or timeout after 30 seconds
        grpc.channel_ready_future(channel).result(timeout=30)
    except grpc.FutureTimeoutError:
        raise RemoteAgentException(
            "Timeout in connecting to remote zoo manager.")
    stub = manager_pb2_grpc.ManagerStub(channel)
    return channel, stub
Ejemplo n.º 4
0
    def acquire_remote_agent(self, retries=3) -> RemoteAgent:
        for retry in range(retries):
            try:
                return self._try_to_acquire_remote_agent()
            except Exception as e:
                self._log.debug(
                    f"Failed {retry+1}/{retries} times in acquiring remote agent. {repr(e)}"
                )
                time.sleep(0.1)

        raise RemoteAgentException("Failed to acquire remote agent.")
Ejemplo n.º 5
0
def get_manager_channel_stub(addr: Tuple[str, int], timeout: float = 10):
    """Connects to the gRPC server at `addr` and returns the channel and stub.

    Args:
        addr (Tuple[str,int]): gRPC server address.
        timeout (float, optional): Time to wait for the gRPC server to be ready. Defaults to 10.

    Raises:
        RemoteAgentException: If timeout occurs while connecting to the gRPC server.

    Returns:
        grpc.Channel: Channel to the gRPC server.
        manager_pb2_grpc.ManagerStub : gRPC stub.
    """
    channel = grpc.insecure_channel(f"{addr[0]}:{addr[1]}")
    try:
        grpc.channel_ready_future(channel).result(timeout=timeout)
    except grpc.FutureTimeoutError:
        raise RemoteAgentException(
            "Timeout in connecting to remote zoo manager.")
    stub = manager_pb2_grpc.ManagerStub(channel)
    return channel, stub