class ZKClient(object): def __init__(self, zk_hosts, local_port, local_host=None, server_info_path='/crawlers'): self.zk_hosts = zk_hosts self.local_host = local_host if not self.local_host: self.local_host = get_local_host() self.server_info_path = server_info_path self.server_info_znode = '{server_info_path}/{ip}:{port}'.format( server_info_path=server_info_path, ip=self.local_host, port=local_port) self.connect_zk() def connect_zk(self): self.zk = KazooClient(hosts=self.zk_hosts) self.zk.add_listener(self.state_listener) self.zk.start() def update_heartbeat(self): def callback(async_stat): stat = async_stat.get() if stat: _ = self.zk.set_async(self.server_info_znode, ts) else: _ = self.zk.create_async(self.server_info_znode, ts, ephemeral=True, makepath=True) ts = str(int(time.time())) async_stat = self.zk.exists_async(self.server_info_znode, watch=None) async_stat.rawlink(callback) def state_listener(self, state): if state == KazooState.LOST: # Register somewhere that the session was lost pass elif state == KazooState.SUSPENDED: # Handle being disconnected from Zookeeper pass else: self.update_heartbeat() def add_watcher(self, request_handler): @self.zk.DataWatch(ZOO_CONFIG_PROXY_PATH) def proxy_change(data, stat): run_in_thread(request_handler.config_proxy_via_zookeeper, data) def close(self): self.zk.stop()
def main(hosts, path): exists = Queue() children = Queue() counters = {"child": 1, "total": 0} done = Event() zk = KazooClient(hosts) zk.start() exists.put(Request(path, zk.exists_async(path))) def exists_worker(zk, counters): while done.isSet() is False: try: e = exists.get() except Empty as ex: continue stat = e.get() counters["total"] += stat.dataLength counters["child"] += stat.numChildren if stat.numChildren > 0: children.put(Request(e.path, zk.get_children_async(e.path))) counters["child"] -= 1 if counters["child"] == 0: done.set() def children_worker(zk): while done.isSet() is False: try: c = children.get(block=False, timeout=1) except Empty as ex: continue ppath = c.path if c.path == "/" else c.path + "/" for child in c.get(): cpath = "%s%s" % (ppath, child) exists.put(Request(cpath, zk.exists_async(cpath))) texists = Thread(target=exists_worker, args=(zk, counters)) texists.start() tchildren = Thread(target=children_worker, args=(zk, )) tchildren.start() done.wait() print("Total = %d" % (counters["total"])) zk.stop() texists.join() tchildren.join()
def main(hosts, path): exists = Queue() children = Queue() counters = {"child": 1, "total": 0} done = Event() zk = KazooClient(hosts) zk.start() exists.put(Request(path, zk.exists_async(path))) def exists_worker(zk, counters): while done.isSet() is False: try: e = exists.get() except Empty as ex: continue stat = e.get() counters["total"] += stat.dataLength counters["child"] += stat.numChildren if stat.numChildren > 0: children.put(Request(e.path, zk.get_children_async(e.path))) counters["child"] -= 1 if counters["child"] == 0: done.set() def children_worker(zk): while done.isSet() is False: try: c = children.get(block=False, timeout=1) except Empty as ex: continue ppath = c.path if c.path == "/" else c.path + "/" for child in c.get(): cpath = "%s%s" % (ppath, child) exists.put(Request(cpath, zk.exists_async(cpath))) texists = Thread(target=exists_worker, args=(zk, counters)) texists.start() tchildren = Thread(target=children_worker, args=(zk,)) tchildren.start() done.wait() print("Total = %d" % (counters["total"])) zk.stop() texists.join() tchildren.join()
class ZooHandler(object): def __init__(self): self.zookeeper_client = None if not settings.ZOOKEEPER_SETTING['enable']: logging.info('zookeeper disabled') return self.zoo_hosts = settings.ZOOKEEPER_SETTING['server_address'] logging.info('start zookeeper client, zoo hosts: %s' % self.zoo_hosts) self.base_dir = settings.ZOOKEEPER_SETTING['base_dir'] self.zookeeper_client = KazooClient(hosts=self.zoo_hosts) self.zookeeper_client.add_listener(self.state_listener) self.zookeeper_client.start_async() def state_listener(self, state): # session was lost if state == KazooState.LOST: logging.error('zookeeper lost!') # disconnected from Zookeeper elif state == KazooState.SUSPENDED: logging.error('zookeeper disconnected!') # connected/reconnected to Zookeeper elif state == KazooState.CONNECTED: self.register_node() logging.warn('zookeeper reconnected! try to register') else: logging.error('unexpected zookeeper state!!!') logging.critical('unexpected zookeeper state!!!') def register_node(self): if not self.zookeeper_client or not self.zookeeper_client.connected: logging.error('zoo not connected, register cancel') return path = ZooHandler.get_register_path() try: # 尝试注册节点 def try_to_create_node(result): logging.info('zoo try_to_create_noe called') try: # None表示节点不存在 if result.value is None: self.zookeeper_client.create_async(path, makepath=True, ephemeral=True) elif result.exception: logging.fatal( 'critical error when try to check node when reconnected, %s', result.exception) else: logging.warn( 'node already exists when reconnect and try to register' ) except BaseException as e: logging.exception('critical error, %s', e.message) # 监控节点变化 def node_watcher(watch_event): logging.info('zoo node_watcher called') try: if EventType.DELETED == watch_event.type: logging.warn('zoo nodes deleted, try recreate') self.zookeeper_client.create_async(path, makepath=True, ephemeral=True) if EventType.CHANGED == watch_event.type: logging.warn('zoo nodes changed,do nothing') if EventType.CHILD == watch_event.type: logging.warn('zoo nodes childed,do nothing') if EventType.CREATED == watch_event.type: logging.info('zoo nodes success created') if EventType.NONE == watch_event.type: logging.error('zoo nodes status return None') finally: self.zookeeper_client.exists_async(path, watch=node_watcher) future = self.zookeeper_client.exists_async(path, watch=node_watcher) future.rawlink(try_to_create_node) except ZookeeperError as e: logging.exception('zookeeper exception when register node: %s' % e.message) except BaseException as e: logging.exception('critical error!') # 1. remove nodes,stop client def stop(self): logging.info('stopping zookeeper client') if self.zookeeper_client: self.zookeeper_client.remove_listener(self.state_listener) self.zookeeper_client.stop() logging.info('zookeeper stopped') @staticmethod def get_register_path(): base_dir = settings.ZOOKEEPER_SETTING['base_dir'] if base_dir[-1] == '/': base_dir = base_dir[0:-1] register_name = "%s/%s:%s:%s" % ( base_dir, settings.ZOOKEEPER_SETTING['local_name'], settings.ZOOKEEPER_SETTING['local_ip'], settings.HTTP_SERVER_SETTING['port']) return register_name