def __init__(self, zk_hosts, hostname, port, join_cluster): self.me = '%s:%s' % (hostname, port) self.is_master = None self.slaves = cycle([]) self.slave_count = 0 self.started_shutdown = False if join_cluster: read_only = False else: read_only = True self.zk = KazooClient(hosts=zk_hosts, handler=SequentialGeventHandler(), read_only=read_only) event = self.zk.start_async() event.wait(timeout=5) self.lock = self.zk.Lock(path='/iris/sender_master', identifier=self.me) # Used to keep track of slaves / senders present in cluster self.party = Party(client=self.zk, path='/iris/sender_nodes', identifier=self.me) if join_cluster: self.zk.add_listener(self.event_listener) self.party.join()
def join_parties(self, parties=None): """List of parties, join them all""" if parties is None or not parties: self.log.debug("No parties to join specified") parties = [] else: self.log.debug("Joining %s parties: %s", len(parties), ", ".join(parties)) for party in parties: partypath = '%s/%s/parties/%s' % (self.BASE_ZNODE, self.session, party) thisparty = Party(self, partypath, self.whoami) thisparty.join() self.parties[party] = thisparty
def __init__(self, ip_port, name): '''Initialize everyting for the player''' self.name = name logging.basicConfig() try: # Create client self.zk = KazooClient(hosts=ip_port, logger=logging) self.zk.start() except Exception as ex: print( 'Error connecting the Zookeeper Service, Please make sure the service is up or the IP:PORT provided is correct' ) sys.exit(-1) # Ensure Paths self.zk.ensure_path('/csjain_queue') self.zk.ensure_path('/csjain_players') # Create Data structures self.my_queue = Queue(self.zk, '/csjain_queue') self.party = Party(self.zk, '/csjain_players', self.name)
def __init__(self, ip_port, score_board_size): '''Initialize everyting for the watcher''' logging.basicConfig() self.score_board_size = score_board_size self.is_dump = False self.is_init_client = True try: # Create client self.zk = KazooClient(hosts=ip_port, logger=logging) self.zk.start() except Exception as ex: print( 'Error connecting the Zookeeper Service, Please make sure the service is up or the IP:PORT provided is correct' ) sys.exit(-1) # Ensure Paths self.zk.ensure_path('/csjain_queue') self.zk.ensure_path('/csjain_players') # Create Data structures self.score_queue = Queue(self.zk, '/csjain_queue') self.party = Party(self.zk, '/csjain_players') self.online_players = set(self.party) if len(self.score_queue) == 0: print('Most recent scores') print('------------------') print('\n') print('Highest scores') print('--------------') # Create Watchers _ = ChildrenWatch(self.zk, '/csjain_queue', self.process_score) _ = ChildrenWatch(self.zk, '/csjain_players', self.process_client)
class Player: name = '' def __init__(self, ip_port, name): '''Initialize everyting for the player''' self.name = name logging.basicConfig() try: # Create client self.zk = KazooClient(hosts=ip_port, logger=logging) self.zk.start() except Exception as ex: print( 'Error connecting the Zookeeper Service, Please make sure the service is up or the IP:PORT provided is correct' ) sys.exit(-1) # Ensure Paths self.zk.ensure_path('/csjain_queue') self.zk.ensure_path('/csjain_players') # Create Data structures self.my_queue = Queue(self.zk, '/csjain_queue') self.party = Party(self.zk, '/csjain_players', self.name) def join_party(self): '''Add player to list of current online players''' self.party.join() def leave_party(self): '''Remove player from list of current online players''' self.party.leave() def post_score(self, score): '''Post a random score''' if self.name not in set(self.party): self.party.join() self.my_queue.put('{}:{}'.format(self.name, str(score)).encode('utf-8'))
class Coordinator(object): def __init__(self, zk_hosts, hostname, port, join_cluster): self.me = '%s:%s' % (hostname, port) self.is_master = None self.slaves = cycle([]) self.slave_count = 0 self.started_shutdown = False if join_cluster: read_only = False else: read_only = True self.zk = KazooClient(hosts=zk_hosts, handler=SequentialGeventHandler(), read_only=read_only) event = self.zk.start_async() event.wait(timeout=5) self.lock = self.zk.Lock(path='/iris/sender_master', identifier=self.me) # Used to keep track of slaves / senders present in cluster self.party = Party(client=self.zk, path='/iris/sender_nodes', identifier=self.me) if join_cluster: self.zk.add_listener(self.event_listener) self.party.join() def am_i_master(self): return self.is_master # Used for API to get the current master def get_current_master(self): try: contenders = self.lock.contenders() except kazoo.exceptions.KazooException: logger.exception('Failed getting contenders') return None if contenders: return self.address_to_tuple(contenders[0]) else: return None # Used for API to get the current slaves if master can't be reached def get_current_slaves(self): return [self.address_to_tuple(host) for host in self.party] def address_to_tuple(self, address): try: host, port = address.split(':') return host, int(port) except (IndexError, ValueError): logger.error('Failed getting address tuple from %s', address) return None def update_status(self): if self.started_shutdown: return if self.zk.state == KazooState.CONNECTED: if self.lock.is_acquired: self.is_master = True else: try: self.is_master = self.lock.acquire(blocking=False, timeout=2) # This one is expected when we're recovering from ZK being down except kazoo.exceptions.CancelledError: self.is_master = False except kazoo.exceptions.LockTimeout: self.is_master = False logger.exception( 'Failed trying to acquire lock (shouldn\'t happen as we\'re using nonblocking locks)' ) except kazoo.exceptions.KazooException: self.is_master = False logger.exception( 'ZK problem while Failed trying to acquire lock') else: logger.error('ZK connection is in %s state', self.zk.state) self.is_master = False if self.zk.state == KazooState.CONNECTED: if self.is_master: slaves = [ self.address_to_tuple(host) for host in self.party if host != self.me ] self.slave_count = len(slaves) self.slaves = cycle(slaves) else: self.slaves = cycle([]) self.slave_count = 0 # Keep us as part of the party, so the current master sees us as a slave if not self.party.participating: try: self.party.join() except kazoo.exceptions.KazooException: logger.exception('ZK problem while trying to join party') else: self.slaves = cycle([]) self.slave_count = 0 def update_forever(self): while True: if self.started_shutdown: return old_status = self.is_master self.update_status() new_status = self.is_master if old_status != new_status: log = logger.info else: log = logger.debug if self.is_master: log('I am the master sender') else: log('I am a slave sender') metrics.set('slave_instance_count', self.slave_count) metrics.set('is_master_sender', int(self.is_master is True)) sleep(UPDATE_FREQUENCY) def leave_cluster(self): self.started_shutdown = True # cancel any attempts to acquire master lock which could make us hang self.lock.cancel() if self.zk.state == KazooState.CONNECTED: if self.party and self.party.participating: logger.info('Leaving party') self.party.leave() if self.lock and self.lock.is_acquired: logger.info('Releasing lock') self.lock.release() def event_listener(self, state): if state == KazooState.LOST or state == KazooState.SUSPENDED: logger.info( 'ZK state transitioned to %s. Resetting master status.', state) # cancel pending attempts to acquire lock which will break and leave # us in bad state self.lock.cancel() # make us try to re-acquire lock during next iteration when we're connected if self.lock.is_acquired: self.lock.is_acquired = False # make us try to rejoin the party during next iteration when we're connected if self.party.participating: self.party.participating = False # in the meantime we're not master self.is_master = None
class Coordinator(object): def __init__(self, zk_hosts, hostname, port, join_cluster): self.me = '%s:%s' % (hostname, port) self.is_master = None self.slaves = cycle([]) self.slave_count = 0 self.started_shutdown = False if join_cluster: read_only = False else: read_only = True self.zk = KazooClient(hosts=zk_hosts, handler=SequentialGeventHandler(), read_only=read_only) event = self.zk.start_async() event.wait(timeout=5) self.lock = self.zk.Lock(path='/iris/sender_master', identifier=self.me) # Used to keep track of slaves / senders present in cluster self.party = Party(client=self.zk, path='/iris/sender_nodes', identifier=self.me) if join_cluster: self.party.join() def am_i_master(self): return self.is_master # Used for API to get the current master def get_current_master(self): try: contenders = self.lock.contenders() except kazoo.exceptions.KazooException: logger.exception('Failed getting contenders') return None if contenders: return self.address_to_tuple(contenders[0]) else: return None # Used for API to get the current slaves if master can't be reached def get_current_slaves(self): return [self.address_to_tuple(host) for host in self.party] def address_to_tuple(self, address): try: host, port = address.split(':') return host, int(port) except (IndexError, ValueError): logger.error('Failed getting address tuple from %s', address) return None def update_status(self): if self.started_shutdown: return if self.zk.state == KazooState.CONNECTED: if self.is_master: self.is_master = self.lock.is_acquired else: try: self.is_master = self.lock.acquire(blocking=False, timeout=2) except kazoo.exceptions.LockTimeout: self.is_master = False logger.exception( 'Failed trying to acquire lock (shouldn\'t happen as we\'re using nonblocking locks)' ) except kazoo.exceptions.KazooException: self.is_master = False logger.exception( 'ZK problem while Failed trying to acquire lock') else: logger.error('ZK connection is not in connected state') self.is_master = False if self.is_master: slaves = [ self.address_to_tuple(host) for host in self.party if host != self.me ] self.slave_count = len(slaves) self.slaves = cycle(slaves) else: self.slaves = cycle([]) self.slave_count = 0 def update_forever(self): while True: if self.started_shutdown: return old_status = self.is_master self.update_status() new_status = self.is_master if old_status != new_status: log = logger.info else: log = logger.debug if self.is_master: log('I am the master sender') else: log('I am a slave sender') metrics.set('slave_instance_count', self.slave_count) metrics.set('is_master_sender', int(self.is_master)) sleep(UPDATE_FREQUENCY) def leave_cluster(self): self.started_shutdown = True if self.party and self.party.participating: logger.info('Leaving party') self.party.leave() if self.lock and self.lock.is_acquired: logger.info('Releasing lock') self.lock.release()
TimeoutError: The port isn't accepting connection after time specified in `timeout`. """ start_time = time.perf_counter() while True: try: with socket.create_connection((host, port), timeout=timeout): break except OSError as ex: time.sleep(0.01) if time.perf_counter() - start_time >= timeout: raise TimeoutError( "Waited too long for the port {} on host {} to start accepting " "connections.".format(port, host)) from ex if __name__ == "__main__": # args = get_args() port = 80 hostname = "127.0.0.1" host = f"{hostname}:{port}" zk = KazooClient( hosts=host, connection_retry=KazooRetry(20), ) zk.start() party = Party(zk, "/celery/workers", identifier=1) print(zk.get_children("/celery/")) # for member in party: # print(member) zk.stop()
elapsed = datetime.datetime.utcnow() - begun logging.info('assayed %d keys and %d values on server %d in %f ms', c_count, v_count, server, elapsed.total_seconds * 1000.0) def summarize_me(signum, frame): party_size = len(party) logging.info('there are %s clients in the party', party_size) my_position = get_my_position(party, ME) logging.info('my place in the party is: %d', (my_position)) targets = get_work(party_size, my_position) logging.debug('%d assigned targets: %s', len(targets), targets) compute_averages(targets) # reset the alarm signal.alarm(1) zk = KazooClient(hosts=ZK_HOSTS) zk.start() zk.add_listener(zk_state) zk.ensure_path(PREFIX) party = Party(zk, PREFIX, identifier=ME) atexit.register(exit_handler, party) party.join() signal.signal(signal.SIGALRM, summarize_me) signal.alarm(5) while True: time.sleep(300)