def _slave_wait(self): remote = self._waiting[self._hostid][0] mode = "wait" while True: # All control messages are the same size to allow # us to split individual messages easily. remote.settimeout(self._remaining()) reply = remote.recv(4) if not reply: break reply = reply.strip("\r\n") logging.info("master said: %s", reply) mode = reply if reply == "ping": # Ensure we have sufficient time for the # ping/pong/rlse cyle to complete normally. self._update_timeout(10 + 10 * len(self._members)) if self._abort: msg = "abrt" else: msg = "pong" logging.info(msg) remote.settimeout(self._remaining()) remote.send(msg) elif reply == "rlse" or reply == "abrt": # Ensure we have sufficient time for the # ping/pong/rlse cyle to complete normally. self._update_timeout(10 + 10 * len(self._members)) logging.info("was released, waiting for close") if mode == "rlse": pass elif mode == "wait": raise error.BarrierError("master abort -- barrier timeout") elif mode == "ping": raise error.BarrierError("master abort -- client lost") elif mode == "!tag": raise error.BarrierError("master abort -- incorrect tag") elif mode == "!dup": raise error.BarrierError("master abort -- duplicate client") elif mode == "abrt": raise BarrierAbortError("Client requested abort") else: raise error.BarrierError("master handshake failure: " + mode)
def get_host_from_id(hostid): # Remove any trailing local identifier following a #. # This allows multiple members per host which is particularly # helpful in testing. if not hostid.startswith('#'): return hostid.split('#')[0] else: raise error.BarrierError( "Invalid Host id: Host Address should be specified")
def _remaining(self): if self._timeout_secs is not None and self._start_time is not None: timeout = self._timeout_secs - (time() - self._start_time) if timeout <= 0: errmsg = "timeout waiting for barrier: %s" % self._tag raise error.BarrierError(errmsg) else: timeout = self._timeout_secs if self._timeout_secs is not None: logging.info("seconds remaining: %d", timeout) return timeout
def _master_release(self): # Check everyone is still there, that they have not # crashed or disconnected in the meantime. allpresent = True abort = self._abort for name in self._waiting: (client, addr) = self._waiting[name] logging.info("checking client present: %s", name) client.settimeout(5) reply = 'none' try: client.send("ping") reply = client.recv(1024) except socket.timeout: logging.warn("ping/pong timeout: %s", name) pass if reply == 'abrt': logging.warn("Client %s requested abort", name) abort = True elif reply != "pong": allpresent = False if not allpresent: raise error.BarrierError("master lost client") if abort: logging.info("Aborting the clients") msg = 'abrt' else: logging.info("Releasing clients") msg = 'rlse' # If every ones checks in then commit the release. for name in self._waiting: (client, addr) = self._waiting[name] client.settimeout(5) try: client.send(msg) except socket.timeout: logging.warn("release timeout: %s", name) pass if abort: raise BarrierAbortError("Client requested abort")
def __init__(self, hostid, tag, timeout=None, port=None, listen_server=None): """ :param hostid: My hostname/IP address + optional tag. :param tag: Symbolic name of the barrier in progress. :param timeout: Maximum seconds to wait for a the barrier to meet. :param port: Port number to listen on. :param listen_server: External listen_server instance to use instead of creating our own. Create a listen_server instance and reuse it across multiple barrier instances so that the barrier code doesn't try to quickly re-bind on the same port (packets still in transit for the previous barrier they may reset new connections). """ self._hostid = hostid self._tag = tag if listen_server: if port: raise error.BarrierError( '"port" and "listen_server" are mutually exclusive.') self._port = listen_server.port else: self._port = port or _DEFAULT_PORT self._server = listen_server # A listen_server instance or None. self._members = [] # List of hosts we expect to find at the barrier. self._timeout_secs = timeout self._start_time = None # Timestamp of when we started waiting. self._masterid = None # Host/IP + optional tag of selected master. logging.info("tag=%s port=%d timeout=%r", self._tag, self._port, self._timeout_secs) # Number of clients seen (should be the length of self._waiting). self._seen = 0 # Clients who have checked in and are waiting (if we are a master). self._waiting = {} # Maps from hostname -> (client, addr) tuples.