def __init__(self, host='127.0.0.1:2181', handler=None, ignore_expire=False): if handler == 'gevent': import gevent kr = KazooRetry(max_tries=-1, delay=0.2, sleep_func=gevent.sleep, ignore_expire=ignore_expire) KazooClient.__init__(self, hosts=host, connection_retry=kr, handler=SequentialGeventHandler()) else: kr = KazooRetry(max_tries=-1, delay=0.2, ignore_expire=ignore_expire) KazooClient.__init__( self, hosts=host, connection_retry=kr, ) self.start() self.add_listener(self._conn_state_listener)
def __init__(self, module, server_list): # logging logger = logging.getLogger(module) logger.setLevel(logging.INFO) try: handler = logging.handlers.RotatingFileHandler( '/var/log/contrail/' + module + '-zk.log', maxBytes=10 * 1024 * 1024, backupCount=5) except IOError: print "Cannot open log file in /var/log/contrail/" else: log_format = logging.Formatter( '%(asctime)s [%(name)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') handler.setFormatter(log_format) logger.addHandler(handler) self._zk_client = \ kazoo.client.KazooClient( server_list, timeout=20, handler=kazoo.handlers.gevent.SequentialGeventHandler(), logger=logger) self._zk_client.add_listener(self._zk_listener) self._logger = logger self._election = None # KazooRetry to retry keeper CRUD operations self._retry = KazooRetry(max_tries=None) self.connect()
def __init__(self, client, keys, txid=None): """ Create an entity lock. Args: client: A kazoo client. keys: A list of entity Reference objects. txid: An integer specifying the transaction ID. """ self.client = client self.paths = [zk_group_path(key) for key in keys] # The txid is written to the contender nodes for deadlock resolution. self.data = str(txid or '') self.wake_event = client.handler.event_object() # Give the contender nodes a uniquely identifiable prefix in case its # existence is in question. self.prefix = uuid.uuid4().hex + self._NODE_NAME self.create_paths = [path + '/' + self.prefix for path in self.paths] self.create_tried = False self.is_acquired = False self.cancelled = False self._retry = KazooRetry(max_tries=None, sleep_func=client.handler.sleep_func) self._lock = client.handler.lock_object()
def _get_hiveserver2_info_with_zookeeper(self, host, port, zookeeper_name_space): """Get hiveserver2 URL information from zookeeper.""" from kazoo.client import KazooClient from kazoo.retry import KazooRetry hosts = host.split(',') zk_hosts = ','.join( list(map(lambda x: ':'.join([x, str(port)]), hosts))) conn_retry_policy = KazooRetry(max_tries=-1, delay=0.1, max_delay=0.1) cmd_retry_policy = KazooRetry(max_tries=3, delay=0.3, backoff=1, max_delay=1, ignore_expire=False) zk = KazooClient(hosts=zk_hosts, connection_retry=conn_retry_policy, command_retry=cmd_retry_policy) zk.start() children = zk.get_children('/' + zookeeper_name_space) nodes = self.get_hiveserver2_info(children) zk.stop() zk.close() if len(nodes) == 0: from kazoo.exceptions import ZookeeperError raise ZookeeperError( "Can not find child in zookeeper path({}).".format( zookeeper_name_space)) return nodes
def __init__(self, client, path, identifier=None): """Create a Kazoo lock. :param client: A :class:`~kazoo.client.KazooClient` instance. :param path: The lock path to use. :param identifier: Name to use for this lock contender. This can be useful for querying to see who the current lock contenders are. """ self.client = client self.path = path # some data is written to the node. this can be queried via # contenders() to see who is contending for the lock self.data = str(identifier or "").encode('utf-8') self.wake_event = client.handler.event_object() # props to Netflix Curator for this trick. It is possible for our # create request to succeed on the server, but for a failure to # prevent us from getting back the full path name. We prefix our # lock name with a uuid and can check for its presence on retry. self.prefix = uuid.uuid4().hex + self._NODE_NAME self.create_path = self.path + "/" + self.prefix self.create_tried = False self.is_acquired = False self.assured_path = False self.cancelled = False self._retry = KazooRetry(max_tries=None, sleep_func=client.handler.sleep_func) self._lock = client.handler.lock_object()
def __init__(self, module, server_list, host_ip, logging_fn=None, zk_timeout=400, log_response_time=None): self.host_ip = host_ip # logging logger = logging.getLogger(module) logger.setLevel(logging.DEBUG) try: handler = logging.handlers.RotatingFileHandler( LOG_DIR + module + '-zk.log', maxBytes=10 * 1024 * 1024, backupCount=5) except IOError: print "Cannot open log file in %s" % (LOG_DIR) else: log_format = logging.Formatter( '%(asctime)s [%(name)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') handler.setFormatter(log_format) logger.addHandler(handler) if logging_fn: self.log = logging_fn else: self.log = self.syslog self.log_response_time = log_response_time # KazooRetry to retry keeper CRUD operations self._retry = KazooRetry(max_tries=None, max_delay=300, sleep_func=gevent.sleep) self._zk_client = kazoo.client.KazooClient( server_list, timeout=zk_timeout, handler=kazoo.handlers.gevent.SequentialGeventHandler(), logger=logger, connection_retry=self._retry, command_retry=self._retry) self._zk_client.add_listener(self._zk_listener) self._logger = logger self._election = None self._server_list = server_list self._conn_state = None self._sandesh_connection_info_update(status='INIT', message='') self._lost_cb = None self._suspend_cb = None self.delete_node = self._response_time(self.delete_node, "DELETE") self.create_node = self._response_time(self.create_node, "CREATE") self.read_node = self._response_time(self.read_node, "READ") self.get_children = self._response_time(self.get_children, "GET_CHILDREN") self.exists = self._response_time(self.exists, "EXISTS") self.connect()
def zk_connect(zk_addr: str, zk_user: Optional[str] = None, zk_secret: Optional[str] = None) -> KazooClient: """Connect to ZooKeeper. On connection failure, the function attempts to reconnect indefinitely with exponential backoff up to 3 seconds. If a command fails, that command is retried every 300ms for 3 attempts before failing. These values are chosen to suit a human-interactive time. Args: zk_addr: The address to connect to zk_user: The username to use when connecting to ZooKeeper or `None` if no authentication is necessary. zk_secret: The secret to use when connecting to ZooKeeper or `None` if no authentication is necessary. Returns: A ZooKeeper client connection in the form of a `kazoo.client.KazooClient`. """ # Try to reconnect indefinitely, with time between updates going # exponentially to ~3s. Then every retry occurs every ~3 seconds. conn_retry_policy = KazooRetry( max_tries=-1, delay=0.3, backoff=1.3, max_delay=3, ignore_expire=True, ) # Retry commands every 0.3 seconds, for a total of <1s (usually 0.9) cmd_retry_policy = KazooRetry( max_tries=3, delay=0.3, backoff=1, max_delay=1, ignore_expire=False, ) default_acl = None auth_data = None if zk_user and zk_secret: default_acl = [make_digest_acl(zk_user, zk_secret, all=True)] scheme = 'digest' credential = "{}:{}".format(zk_user, zk_secret) auth_data = [(scheme, credential)] zk = KazooClient( hosts=zk_addr, timeout=30, connection_retry=conn_retry_policy, command_retry=cmd_retry_policy, default_acl=default_acl, auth_data=auth_data, ) zk.start() return zk
def __init__(self, zk_hosts): conn_retry_policy = KazooRetry(max_tries=-1, delay=0.1, max_delay=0.1) cmd_retry_policy = KazooRetry(max_tries=3, delay=0.3, backoff=1, max_delay=1, ignore_expire=False) self._zk = KazooClient(hosts=zk_hosts, connection_retry=conn_retry_policy, command_retry=cmd_retry_policy)
def create_zk_client(zk_hosts: str) -> KazooClient: conn_retry_policy = KazooRetry(max_tries=-1, delay=0.1, max_delay=0.1) cmd_retry_policy = KazooRetry(max_tries=3, delay=0.3, backoff=1, max_delay=1, ignore_expire=False) return KazooClient( hosts=zk_hosts, connection_retry=conn_retry_policy, command_retry=cmd_retry_policy, )
def zk() -> KazooClient: conn_retry_policy = KazooRetry(max_tries=-1, delay=0.1, max_delay=0.1) cmd_retry_policy = KazooRetry( max_tries=3, delay=0.3, backoff=1, max_delay=1, ignore_expire=False) zk = KazooClient( hosts='zk-1.zk:2181,zk-2.zk:2181,zk-3.zk:2181', connection_retry=conn_retry_policy, command_retry=cmd_retry_policy, ) zk.start() yield zk zk.stop()
def __init__(self, name: str = None, server_host: str = get_host_ip() + ':2181', activate_distributor: bool = True, distributor_kafka_host: Union[str, List[str]] = get_host_ip() + ':9092'): self.is_virtual = False self.randomize_location() self.latency = random.uniform(0, 10) try: from jtop import jtop self.jetson = jtop() self.jetson.open() except Exception: self.jetson = None if 'virtual' in os.environ: self.is_virtual = True if name is None: if 'NODE_NAME' in os.environ: name = os.environ['NODE_NAME'] else: name = get_local_ip() self.name = name print('I am ' + self.name) retry = KazooRetry(max_tries=-1) self.zk = KazooClient(hosts=server_host, connection_retry=retry, timeout=5.0) while True: try: self.zk.start() break except Exception as e: print(e) self.zk.ensure_path('/nodes/' + self.name) if activate_distributor: self.distributor = Distributor(name=name, kafka_bootstrap_servers=distributor_kafka_host, trigger=lambda x: self.randomize_location())
def __init__(self, client, path, identifier=None): """Create a Kazoo lock. :param client: A :class:`~kazoo.client.KazooClient` instance. :param path: The lock path to use. :param identifier: Name to use for this lock contender. This can be useful for querying to see who the current lock contenders are. """ self.client = client self.path = path # some data is written to the node. this can be queried via # contenders() to see who is contending for the lock self.data = str(identifier or "").encode("utf-8") self.wake_event = client.handler.event_object() # props to Netflix Curator for this trick. It is possible for our # create request to succeed on the server, but for a failure to # prevent us from getting back the full path name. We prefix our # lock name with a uuid and can check for its presence on retry. self.prefix = uuid.uuid4().hex + self._NODE_NAME self.create_path = self.path + "/" + self.prefix self.create_tried = False self.is_acquired = False self.assured_path = False self.cancelled = False self._retry = KazooRetry(max_tries=None, sleep_func=client.handler.sleep_func) self._lock = client.handler.lock_object()
def __init__(self, config): self.zk = KazooClient(config.hosts.zookeeper.connection_string) self.zk.start() credentials = ":".join( (config.hosts.zookeeper.username, config.hosts.zookeeper.password)) self.zk.add_auth("digest", credentials) self.retry = KazooRetry(max_tries=3)
def __init__(self, module, server_list): # logging logger = logging.getLogger(module) logger.setLevel(logging.INFO) try: handler = logging.handlers.RotatingFileHandler('/var/log/contrail/' + module + '-zk.log', maxBytes=10*1024*1024, backupCount=5) except IOError: print "Cannot open log file in /var/log/contrail/" else: log_format = logging.Formatter('%(asctime)s [%(name)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') handler.setFormatter(log_format) logger.addHandler(handler) self._zk_client = \ kazoo.client.KazooClient( server_list, timeout=20, handler=kazoo.handlers.gevent.SequentialGeventHandler(), logger=logger) self._zk_client.add_listener(self._zk_listener) self._logger = logger self._election = None # KazooRetry to retry keeper CRUD operations self._retry = KazooRetry(max_tries=None) self.connect()
class ZkOpers(object): zk = None DEFAULT_RETRY_POLICY = KazooRetry( max_tries=None, max_delay=10000, ) rootPath = "/letv/nginx" confOpers = ConfigFileOpers() ''' classdocs ''' def __init__(self): ''' Constructor ''' self.zkaddress, self.zkport = get_zk_address() if "" != self.zkaddress and "" != self.zkport: self.zk = KazooClient(hosts=self.zkaddress + ':' + str(self.zkport), connection_retry=self.DEFAULT_RETRY_POLICY, timeout=20) self.zk.add_listener(self.listener) self.zk.start() logging.info("instance zk client (%s:%s)" % (self.zkaddress, self.zkport)) def close(self): try: self.zk.stop() self.zk.close() except Exception, e: logging.error(e)
def get_job_from_path(self, path): kr = KazooRetry(max_tries=0, ignore_expire=False) try: result = kr(self._inner_get_for_update, path) except RetryFailedError: return None return result
def _get_retry(): """ ZooKeeper connection retry is weird and needs a particular object to achieve. Create that object with its appropriate settings here and return it. :return: KazooRetry object. """ return KazooRetry(max_tries=5, backoff=2, max_delay=30)
def __init__(self, host='127.0.0.1:2181', lock_path_prefix='/mastermind/locks/'): self.client = KazooClient(host, timeout=3) logger.info( 'Connecting to zookeeper host {}, lock_path_prefix: {}'.format( host, lock_path_prefix)) try: self.client.start() except Exception as e: logger.error(e) raise self._retry = KazooRetry(max_tries=self.RETRIES) self.lock_path_prefix = lock_path_prefix
def main(): """ Starts the groomer. """ logging.basicConfig(format=LOG_FORMAT, level=logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('-v', '--verbose', action='store_true', help='Output debug-level logging') args = parser.parse_args() if args.verbose: logger.setLevel(logging.DEBUG) zk_hosts = appscale_info.get_zk_node_ips() zk_client = KazooClient(hosts=','.join(zk_hosts), connection_retry=ZK_PERSISTENT_RECONNECTS, command_retry=KazooRetry(max_tries=-1)) zk_client.start() db_access = DatastoreProxy() thread_pool = ThreadPoolExecutor(4) TransactionGroomer(zk_client, db_access, thread_pool) logger.info('Starting transaction groomer') IOLoop.current().start()
def __init__(self, host, port, ishead=False, istail=False, previousid=0, nextid=0, chaintailid=0): self.inet = "{}:{}".format(host, port) self.retry = KazooRetry(max_tries=1000, delay=0.5) self.zk = KazooClient(hosts=self.inet, logger=logging, connection_retry=self.retry) self.ishead = ishead # am I head? self.istail = istail # am I tail? self.replicaid = 0 # id of this replica self.previousid = previousid # id of predecessor self.nextid = nextid # id of successor self.chaintailid = chaintailid self.hashtable = {} # table of values updated by client requests self.client = {} # client stubs for future use self.sentlist = [] # sent update requests not yet processed by tail self.previous = None self.next = None self.chaintail = None self.currxid = 0 # current xid server has seen so far self.host = host self.port = port
def _zookeeper_resolver(self, cfg): hosts, path = cfg[5:].split("/", 1) path = "/" + path retry = KazooRetry(max_tries=10) with zookeeper.client(hosts=hosts, read_only=True, connection_retry=retry) as zk: def master_id(key): return int(key.split("_")[-1]) def get_masters(): return [x for x in zk.get_children(path) if re.search("\d+", x)] leader = sorted(get_masters(), key=lambda x: master_id(x)) if len(leader) == 0: raise exceptions.MasterNotAvailableException("cannot find any masters at {0}".format(cfg,)) data, stat = zk.get(os.path.join(path, leader[0])) if not data: exceptions.MasterNotAvailableException("Cannot retrieve valid MasterInfo data from ZooKeeper") try: parsed = json.loads(data) if parsed and "address" in parsed: ip = parsed["address"].get("ip") port = parsed["address"].get("port") if ip and port: return "{ip}:{port}".format(ip=ip, port=port) except ValueError as parse_error: log.debug("[WARN] No JSON content, probably connecting to older Mesos version. " "Reason: {}".format(parse_error)) raise exceptions.MasterNotAvailableException("Failed to parse mesos master ip from ZK")
class DefaultAnnouncerCheckerProvider(AnnouncerCheckerProvider): DEFAULT_RETRY_MAX_DELAY = Amount(5, Time.MINUTES) DEFAULT_RETRY_POLICY = KazooRetry( max_tries=None, ignore_expire=True, max_delay=DEFAULT_RETRY_MAX_DELAY.as_(Time.SECONDS), ) def __init__(self, ensemble, root='/aurora', allow_custom_serverset_path=False): self.__ensemble = ensemble self.__root = root super(DefaultAnnouncerCheckerProvider, self).__init__(allow_custom_serverset_path) def make_zk_client(self): return KazooClient(self.__ensemble, connection_retry=self.DEFAULT_RETRY_POLICY) def make_zk_path(self, assigned_task): config = assigned_task.task role, environment, name = (config.job.role, config.job.environment, config.job.name) return posixpath.join(self.__root, role, environment, name)
def __init__(self, host=DEFAULT_HOST, db_access=None, log_level=logging.INFO): """ Creates a new ZKTransaction, which will communicate with Zookeeper on the given host. Args: host: A str that indicates which machine runs the Zookeeper service. db_access: A DatastoreProxy instance. log_level: A logging constant that specifies the instance logging level. """ retry_policy = KazooRetry(max_tries=5) class_name = self.__class__.__name__ self.logger = logging.getLogger(class_name) self.logger.setLevel(log_level) self.logger.info('Starting {}'.format(class_name)) # Connection instance variables. self.host = host self.handle = kazoo.client.KazooClient( hosts=host, connection_retry=ZK_PERSISTENT_RECONNECTS, command_retry=retry_policy) self.run_with_retry = self.handle.retry self.handle.start() self.__counter_cache = {} self.db_access = db_access
def zk_client(static_three_master_cluster: Cluster) -> KazooClient: """ ZooKeeper client connected to a given DC/OS cluster. """ zk_hostports = ','.join([ '{}:2181'.format(m.public_ip_address) for m in static_three_master_cluster.masters ]) retry_policy = KazooRetry( max_tries=-1, delay=1, backoff=1, max_delay=600, ignore_expire=True, ) zk_client = KazooClient( hosts=zk_hostports, # Avoid failure due to client session timeout. timeout=40, # Work around https://github.com/python-zk/kazoo/issues/374 connection_retry=retry_policy, command_retry=retry_policy, ) zk_client.start() try: yield zk_client finally: zk_client.stop() zk_client.close()
def rebalance(self, partition_ids=None): if partition_ids is None: partition_ids = [ str(p_id) for p_id in self.consumer_partitions[self._identifier] ] kr = KazooRetry(max_tries=3) kr.retry_exceptions = kr.retry_exceptions + tuple([NodeExistsError]) my_partitions = self.consumer_partitions[self._identifier] self.logger.info('My partitions (%d): %s', len(my_partitions), my_partitions) # Clean up old ownership data first, so we don't block # the joining node(s) self._release_locks() nodes = sorted([node for node in self._group], key=lambda x: hash(x)) my_new_partitions = [ partition for partition in partition_ids if nodes[int(partition) % len(nodes)] == self._identifier and int(partition) not in my_partitions ] self.logger.info('My new partitions (%d): %s', len(my_new_partitions), my_new_partitions) for partition in my_new_partitions: c_id = nodes[int(partition) % len(nodes)] self.consumer_partitions[c_id].append(int(partition)) p_path = self.path_formats['owner'].format(group=self.group, topic=self.topic, partition=partition) try: self.logger.debug('Acquiring ownership of partition %s', partition) kr(self._client.create, p_path, value=self._identifier, ephemeral=True, makepath=True) except RetryFailedError as err: # A different consumer had been registered as the owner expired_cid, zstat = self._client.get(p_path) msg = 'Acquiring ownership of partition %s (was owned by %s)' self.logger.warn(msg, partition, expired_cid) # We need to delete / create, so that the node is created # ephemeral and owned by us self._client.delete(p_path) self._client.create(p_path, value=self._identifier, ephemeral=True, makepath=True) if self.partitions_changed_cb: self.partitions_changed_cb(self.consumer_partitions[self._identifier])
def rebalance(self, partition_ids=None): if partition_ids is None: partition_ids = [ str(p_id) for p_id in self.consumer_partitions[self._identifier] ] kr = KazooRetry(max_tries=3) kr.retry_exceptions = kr.retry_exceptions + tuple([NodeExistsError]) my_partitions = self.consumer_partitions[self._identifier] self.logger.info('My partitions (%d): %s', len(my_partitions), my_partitions) # Clean up old ownership data first, so we don't block # the joining node(s) self._release_locks() nodes = sorted([node for node in self._group]) self.logger.info('Connected nodes (%d): %s', len(nodes), nodes) my_new_partitions = [ partition for partition in partition_ids if nodes[int(partition) % len(nodes)] == self._identifier and int(partition) not in my_partitions ] self.logger.info('My new partitions (%d): %s', len(my_new_partitions), my_new_partitions) for partition in my_new_partitions: c_id = nodes[int(partition) % len(nodes)] self.consumer_partitions[c_id].append(int(partition)) p_path = self.path_formats['owner'].format(group=self.group, topic=self.topic, partition=partition) try: self.logger.debug('Acquiring ownership of partition %s', partition) kr(self._client.create, p_path, value=self._identifier, ephemeral=True, makepath=True) except RetryFailedError: # A different consumer is still connected and owns this, # try to gracefully release everything else and fail out self.finish() if self.partitions_changed_cb: self.partitions_changed_cb( self.consumer_partitions[self._identifier])
def get_job(self, entry): path = self.unowned_path + "/" + str(entry) kr = KazooRetry(max_tries=3, ignore_expire=False) try: result = kr(self._inner_get, path) except RetryFailedError: return None return result
def __init__(self, client, path, identifier=None, extra_lock_patterns=()): """Create a Kazoo lock. :param client: A :class:`~kazoo.client.KazooClient` instance. :param path: The lock path to use. :param identifier: Name to use for this lock contender. This can be useful for querying to see who the current lock contenders are. :param extra_lock_patterns: Strings that will be used to identify other znode in the path that should be considered contenders for this lock. Use this for cross-implementation compatibility. .. versionadded:: 2.7.1 The extra_lock_patterns option. """ self.client = client self.path = path self._exclude_names = set(self._EXCLUDE_NAMES + list(extra_lock_patterns)) self._contenders_re = re.compile(r"(?:{patterns})(-?\d{{10}})$".format( patterns="|".join(self._exclude_names))) # some data is written to the node. this can be queried via # contenders() to see who is contending for the lock self.data = str(identifier or "").encode("utf-8") self.node = None self.wake_event = client.handler.event_object() # props to Netflix Curator for this trick. It is possible for our # create request to succeed on the server, but for a failure to # prevent us from getting back the full path name. We prefix our # lock name with a uuid and can check for its presence on retry. self.prefix = uuid.uuid4().hex + self._NODE_NAME self.create_path = self.path + "/" + self.prefix self.create_tried = False self.is_acquired = False self.assured_path = False self.cancelled = False self._retry = KazooRetry(max_tries=None, sleep_func=client.handler.sleep_func) self._lock = client.handler.lock_object()
def __enter__(self): """Initialize zk connnection.""" kazooRetry = KazooRetry(max_tries=5) self.zk = KazooClient( hosts=self.hosts, read_only=True, connection_retry=kazooRetry ) self.zk.start() return self
def rebalance(self, partition_ids=None): if partition_ids is None: partition_ids = [ str(p_id) for p_id in self.consumer_partitions[self._identifier] ] kr = KazooRetry(max_tries=3) kr.retry_exceptions = kr.retry_exceptions + tuple([NodeExistsError]) my_partitions = self.consumer_partitions[self._identifier] self.logger.info('My partitions (%d): %s', len(my_partitions), my_partitions) # Clean up old ownership data first, so we don't block # the joining node(s) self._release_locks() nodes = sorted([node for node in self._group]) self.logger.info('Connected nodes (%d): %s', len(nodes), nodes) my_new_partitions = [ partition for partition in partition_ids if nodes[int(partition) % len(nodes)] == self._identifier and int(partition) not in my_partitions ] self.logger.info('My new partitions (%d): %s', len(my_new_partitions), my_new_partitions) for partition in my_new_partitions: c_id = nodes[int(partition) % len(nodes)] self.consumer_partitions[c_id].append(int(partition)) p_path = self.path_formats['owner'].format(group=self.group, topic=self.topic, partition=partition) try: self.logger.debug('Acquiring ownership of partition %s', partition) kr(self._client.create, p_path, value=self._identifier, ephemeral=True, makepath=True) except RetryFailedError: # A different consumer is still connected and owns this, # try to gracefully release everything else and fail out self.finish() if self.partitions_changed_cb: self.partitions_changed_cb(self.consumer_partitions[self._identifier])
def __init__(self, hosts, config): self._section_name = utils.get_module(__name__) self._max_delay = config.getint(self._section_name, "max_retry_delay", default=settings.DEFAULT_ZK_RETRY_MAX_DELAY) self._timeout = config.getint(self._section_name, "time_out", default=settings.DEFAULT_ZK_CONNECTION_TIMEOUT) connection_retry = KazooRetry(max_tries=-1, max_delay=self._max_delay) super(prpcZKClientManager, self).__init__(hosts=hosts, timeout=self._timeout, connection_retry=connection_retry)
def __init__(self) -> None: hosts = settings.ZOO_HOSTS retry = KazooRetry(max_tries=-1, max_delay=60) self._zk = KazooClient(hosts, connection_retry=retry, command_retry=retry) # establish the connection self._zk.start()
def __init__(self, module, server_list, logging_fn=None): # logging logger = logging.getLogger(module) logger.setLevel(logging.INFO) try: handler = logging.handlers.RotatingFileHandler( LOG_DIR + module + '-zk.log', maxBytes=10 * 1024 * 1024, backupCount=5) except IOError: print "Cannot open log file in %s" % (LOG_DIR) else: log_format = logging.Formatter( '%(asctime)s [%(name)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') handler.setFormatter(log_format) logger.addHandler(handler) if logging_fn: self.log = logging_fn else: self.log = self.syslog self._zk_client = \ kazoo.client.KazooClient( server_list, timeout=400, handler=kazoo.handlers.gevent.SequentialGeventHandler(), logger=logger) self._zk_client.add_listener(self._zk_listener) self._logger = logger self._election = None self._server_list = server_list # KazooRetry to retry keeper CRUD operations self._retry = KazooRetry(max_tries=None, max_delay=300, sleep_func=gevent.sleep) self._conn_state = None self._sandesh_connection_info_update(status='INIT', message='') self._lost_cb = None self.connect()
def __init__(self, server_list): self._retry = KazooRetry(max_tries=None, max_delay=300, sleep_func=gevent.sleep) self._zk_client = KazooClient(hosts=','.join(server_list), timeout=400, handler=SequentialGeventHandler(), logger=logger, connection_retry=self._retry, command_retry=self._retry)
def __init__(self, client, path, identifier=None, exclusive=True): """Create a Kazoo lock. :param client: The Kazoo client :type client: :class:`~kazoo.client.KazooClient` :param path: The lock path to use. May not contain the strings ``__SHARED__`` or ``__EXCLUSIVE__``, as they are used internally :type path: str :param identifier: Name to use for this lock contender, which may be useful for querying to see who the current lock :py:meth:`contenders` are. May not contain the string ``__UNLOCK__``, as this is used internally. :type identifier: str :param exclusive: Whether this is an exclusive lock (``False`` means a "shared lock" as described above) :type exclusive: bool .. versionadded:: 1.4 The exclusive option. """ if self._MODE_SHARED in path or self._MODE_EXCLUSIVE in path: raise ValueError('Path "{}" contains a reserved word'.format(path)) if identifier and self._UNLOCK_REQUEST in str(identifier): raise ValueError('Identifier "{}" contains a reserved word'.format( identifier)) self.client = client self.path = path self.exclusive = exclusive # some data is written to the node. this can be queried via # contenders() to see who is contending for the lock self.data = str(identifier or "").encode('utf-8') self.wake_event = client.handler.event_object() mode_suffix = self._MODE_EXCLUSIVE if exclusive else self._MODE_SHARED # props to Netflix Curator for this trick. It is possible for our # create request to succeed on the server, but for a failure to # prevent us from getting back the full path name. We prefix our # lock name with a uuid and can check for its presence on retry. self.prefix = uuid.uuid4().hex + mode_suffix self.create_path = self.path + "/" + self.prefix self.create_tried = False self.is_acquired = False self.assured_path = False self.cancelled = False self._retry = KazooRetry(max_tries=None)
def __init__(self, host='127.0.0.1:2181', lock_path_prefix='/mastermind/locks/'): self.client = KazooClient(host, timeout=3) logger.info('Connecting to zookeeper host {0}, ' 'lock_path_prefix: {1}'.format(host, lock_path_prefix)) try: self.client.start() except Exception as e: logger.error(e) raise self._retry = KazooRetry(max_tries=self.RETRIES) self.lock_path_prefix = lock_path_prefix
def __init__(self, client, path, identifier=None, node_name="__lock__", exclude_names=None): """Create a Kazoo lock. node_name and exclude_names are typically only used internally to implement read/write locks. They should be left unset for exclusive locks. :param client: A :class:`~kazoo.client.KazooClient` instance. :param path: The lock path to use. :param identifier: Name to use for this lock contender. This can be useful for querying to see who the current lock contenders are. :param node_name: Node name, after the contender UUID, before the sequence number. Involved in read/write locks. For a normal (exclusive) lock, leave unset. :param exclude_names: Node names which exclude this contender when present at a lower sequence number. Involved in read/write locks. For a normal (exclusive) lock, leave unset. """ self.client = client self.path = path # some data is written to the node. this can be queried via # contenders() to see who is contending for the lock self.data = str(identifier or "").encode('utf-8') self.wake_event = client.handler.event_object() self.node_name = node_name if exclude_names is None: exclude_names = [self.node_name] self.exclude_names = exclude_names # props to Netflix Curator for this trick. It is possible for our # create request to succeed on the server, but for a failure to # prevent us from getting back the full path name. We prefix our # lock name with a uuid and can check for its presence on retry. self.prefix = uuid.uuid4().hex + self.node_name self.create_path = self.path + "/" + self.prefix self.create_tried = False self.is_acquired = False self.assured_path = False self.cancelled = False self._retry = KazooRetry(max_tries=None, sleep_func=client.handler.sleep_func)
def __init__(self, module, server_list, host_ip, logging_fn=None, zk_timeout=400, log_response_time=None): self.host_ip = host_ip # logging logger = logging.getLogger(module) logger.setLevel(logging.DEBUG) try: handler = logging.handlers.RotatingFileHandler( LOG_DIR + module + '-zk.log', maxBytes=10*1024*1024, backupCount=5) except IOError: print "Cannot open log file in %s" %(LOG_DIR) else: log_format = logging.Formatter('%(asctime)s [%(name)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') handler.setFormatter(log_format) logger.addHandler(handler) if logging_fn: self.log = logging_fn else: self.log = self.syslog self.log_response_time = log_response_time # KazooRetry to retry keeper CRUD operations self._retry = KazooRetry(max_tries=None, max_delay=300, sleep_func=gevent.sleep) self._zk_client = kazoo.client.KazooClient( server_list, timeout=zk_timeout, handler=kazoo.handlers.gevent.SequentialGeventHandler(), logger=logger, connection_retry=self._retry, command_retry=self._retry) self._zk_client.add_listener(self._zk_listener) self._logger = logger self._election = None self._server_list = server_list self._conn_state = None self._sandesh_connection_info_update(status='INIT', message='') self._lost_cb = None self._suspend_cb = None self.delete_node = self._response_time(self.delete_node, "DELETE") self.create_node = self._response_time(self.create_node, "CREATE") self.read_node = self._response_time(self.read_node, "READ") self.get_children= self._response_time(self.get_children, "GET_CHILDREN") self.exists = self._response_time(self.exists, "EXISTS") self.connect()
def __init__(self,server_list): self._retry = KazooRetry(max_tries=None, max_delay=300, sleep_func=gevent.sleep) self._zk_client = kazoo.client.KazooClient( server_list, timeout=400, handler=kazoo.handlers.gevent.SequentialGeventHandler(), connection_retry=self._retry, command_retry=self._retry) self._zk_client.add_listener(self._zk_listener) self._election = None self._server_list = server_list self._conn_state = None self._lost_cb = None self.connect()
def __init__(self, module, server_list, logging_fn=None): # logging logger = logging.getLogger(module) logger.setLevel(logging.INFO) try: handler = logging.handlers.RotatingFileHandler(LOG_DIR + module + '-zk.log', maxBytes=10*1024*1024, backupCount=5) except IOError: print "Cannot open log file in %s" %(LOG_DIR) else: log_format = logging.Formatter('%(asctime)s [%(name)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') handler.setFormatter(log_format) logger.addHandler(handler) if logging_fn: self.log = logging_fn else: self.log = self.syslog self._zk_client = \ kazoo.client.KazooClient( server_list, timeout=400, handler=kazoo.handlers.gevent.SequentialGeventHandler(), logger=logger) self._zk_client.add_listener(self._zk_listener) self._logger = logger self._election = None self._server_list = server_list # KazooRetry to retry keeper CRUD operations self._retry = KazooRetry(max_tries=None, max_delay=300, sleep_func=gevent.sleep) self._conn_state = None self._sandesh_connection_info_update(status='INIT', message='') self._lost_cb = None self.connect()
class Lock(object): """Kazoo Lock Example usage with a :class:`~kazoo.client.KazooClient` instance: .. code-block:: python zk = KazooClient() zk.start() lock = zk.Lock("/lockpath", "my-identifier") with lock: # blocks waiting for lock acquisition # do something with the lock Note: This lock is not *re-entrant*. Repeated calls after already acquired will block. """ _NODE_NAME = "__lock__" def __init__(self, client, path, identifier=None): """Create a Kazoo lock. :param client: A :class:`~kazoo.client.KazooClient` instance. :param path: The lock path to use. :param identifier: Name to use for this lock contender. This can be useful for querying to see who the current lock contenders are. """ self.client = client self.path = path # some data is written to the node. this can be queried via # contenders() to see who is contending for the lock self.data = str(identifier or "").encode("utf-8") self.wake_event = client.handler.event_object() # props to Netflix Curator for this trick. It is possible for our # create request to succeed on the server, but for a failure to # prevent us from getting back the full path name. We prefix our # lock name with a uuid and can check for its presence on retry. self.prefix = uuid.uuid4().hex + self._NODE_NAME self.create_path = self.path + "/" + self.prefix self.create_tried = False self.is_acquired = False self.assured_path = False self.cancelled = False self._retry = KazooRetry(max_tries=None, sleep_func=client.handler.sleep_func) self._lock = client.handler.lock_object() def _ensure_path(self): self.client.ensure_path(self.path) self.assured_path = True def cancel(self): """Cancel a pending lock acquire.""" self.cancelled = True self.wake_event.set() def acquire(self, blocking=True, timeout=None): """ Acquire the lock. By defaults blocks and waits forever. :param blocking: Block until lock is obtained or return immediately. :type blocking: bool :param timeout: Don't wait forever to acquire the lock. :type timeout: float or None :returns: Was the lock acquired? :rtype: bool :raises: :exc:`~kazoo.exceptions.LockTimeout` if the lock wasn't acquired within `timeout` seconds. .. versionadded:: 1.1 The timeout option. """ def _acquire_lock(): got_it = self._lock.acquire(False) if not got_it: raise ForceRetryError() return True retry = self._retry.copy() retry.deadline = timeout # Ensure we are locked so that we avoid multiple threads in # this acquistion routine at the same time... locked = self._lock.acquire(False) if not locked and not blocking: return False if not locked: # Lock acquire doesn't take a timeout, so simulate it... try: locked = retry(_acquire_lock) except RetryFailedError: return False already_acquired = self.is_acquired try: gotten = False try: gotten = retry(self._inner_acquire, blocking=blocking, timeout=timeout) except RetryFailedError: if not already_acquired: self._best_effort_cleanup() except KazooException: # if we did ultimately fail, attempt to clean up exc_info = sys.exc_info() if not already_acquired: self._best_effort_cleanup() self.cancelled = False six.reraise(exc_info[0], exc_info[1], exc_info[2]) if gotten: self.is_acquired = gotten if not gotten and not already_acquired: self._delete_node(self.node) return gotten finally: self._lock.release() def _watch_session(self, state): self.wake_event.set() return True def _inner_acquire(self, blocking, timeout): # wait until it's our chance to get it.. if self.is_acquired: if not blocking: return False raise ForceRetryError() # make sure our election parent node exists if not self.assured_path: self._ensure_path() node = None if self.create_tried: node = self._find_node() else: self.create_tried = True if not node: node = self.client.create(self.create_path, self.data, ephemeral=True, sequence=True) # strip off path to node node = node[len(self.path) + 1 :] self.node = node while True: self.wake_event.clear() # bail out with an exception if cancellation has been requested if self.cancelled: raise CancelledError() children = self._get_sorted_children() try: our_index = children.index(node) except ValueError: # pragma: nocover # somehow we aren't in the children -- probably we are # recovering from a session failure and our ephemeral # node was removed raise ForceRetryError() if self.acquired_lock(children, our_index): return True if not blocking: return False # otherwise we are in the mix. watch predecessor and bide our time predecessor = self.path + "/" + children[our_index - 1] self.client.add_listener(self._watch_session) try: if self.client.exists(predecessor, self._watch_predecessor): self.wake_event.wait(timeout) if not self.wake_event.isSet(): raise LockTimeout("Failed to acquire lock on %s after " "%s seconds" % (self.path, timeout)) finally: self.client.remove_listener(self._watch_session) def acquired_lock(self, children, index): return index == 0 def _watch_predecessor(self, event): self.wake_event.set() def _get_sorted_children(self): children = self.client.get_children(self.path) # can't just sort directly: the node names are prefixed by uuids lockname = self._NODE_NAME children.sort(key=lambda c: c[c.find(lockname) + len(lockname) :]) return children def _find_node(self): children = self.client.get_children(self.path) for child in children: if child.startswith(self.prefix): return child return None def _delete_node(self, node): self.client.delete(self.path + "/" + node) def _best_effort_cleanup(self): try: node = self._find_node() if node: self._delete_node(node) except KazooException: # pragma: nocover pass def release(self): """Release the lock immediately.""" return self.client.retry(self._inner_release) def _inner_release(self): if not self.is_acquired: return False try: self._delete_node(self.node) except NoNodeError: # pragma: nocover pass self.is_acquired = False self.node = None return True def contenders(self): """Return an ordered list of the current contenders for the lock. .. note:: If the contenders did not set an identifier, it will appear as a blank string. """ # make sure our election parent node exists if not self.assured_path: self._ensure_path() children = self._get_sorted_children() contenders = [] for child in children: try: data, stat = self.client.get(self.path + "/" + child) contenders.append(data.decode("utf-8")) except NoNodeError: # pragma: nocover pass return contenders def __enter__(self): self.acquire() def __exit__(self, exc_type, exc_value, traceback): self.release()
class Lock(object): """Kazoo Lock Kazoo `Lock` supports three different locking strategies for a lock path. **Exclusive locks** represent the least complex locking strategy and guarantee that only a single ``Lock`` instance can acquire a lock path at any given time. This applies even if other locking strategies (as described below) are simultaneously in use for the same lock path. Exclusive locks are the default and will be provided if :py:meth:`__init__` is invoked with the default ``exclusive=True`` parameter. **Shared locks** allow different ``Lock`` instances to simultaneously acquire locks to the same lock path. In this strategy, a ``Lock`` instance is constructed with either ``exclusive=True`` (which is known as an "exclusive lock" and is described above) or ``exclusive=False`` (which is known as a "shared lock"). A shared lock will only be acquired if no exclusive locks are pending at the time acquisition is attempted. This means multiple shared locks can be acquired simultaneously, however additional shared locks will not be acquired once any exclusive lock for the lock path is pending. The shared lock strategy is most useful when multiple clients require read-only access to a resource but writing to that resource requires exclusive access. To use the shared locks strategy, invoke :py:meth:`__init__` and indicate a shared or exclusive lock via the ``exclusive`` parameter. **Revocable shared locks** provide the same locking guarantees and usage behavior as the shared locks strategy described above, however add the ability for any blocked lock acquisition request to signal to the blocking locks (or other lock requests which would be granted before it) to revoke. This is useful if shared lock holders do not routinely release resources (eg they are long-running readers) but are able to do so on request. Given cooperation from earlier lock holders or requestors is required, a callback is used to signal a revocation request. In the callback any resources should be released and then :py:meth:`cancel` and :py:meth:`release` invoked so the lock is removed. Note that a callback may safely ignore the callback notification if desired. To use the revocable shared locks strategy, invoke :py:meth:`acquire` with ``revoke=True``. This indicates a blocked lock request should request the revocation of any earlier blocking locks. For locks that can be interrupted and respond to such revocation requests, use the ``unlock`` parameter of :py:meth:`acquire` to provide the callback function that should be invoked on the first (and only first) revocation request. Example exclusive lock usage with a :class:`~kazoo.client.KazooClient` instance: .. code-block:: python zk = KazooClient() lock = zk.Lock("/lockpath", "my-identifier") with lock: # blocks waiting for exclusive lock acquisition # do something with the lock """ _MODE_SHARED = '__SHARED__' _MODE_EXCLUSIVE = '__EXCLUSIVE__' _UNLOCK_REQUEST = '__UNLOCK__' _UNLOCK_SUFFIX = ' ' + _UNLOCK_REQUEST def __init__(self, client, path, identifier=None, exclusive=True): """Create a Kazoo lock. :param client: The Kazoo client :type client: :class:`~kazoo.client.KazooClient` :param path: The lock path to use. May not contain the strings ``__SHARED__`` or ``__EXCLUSIVE__``, as they are used internally :type path: str :param identifier: Name to use for this lock contender, which may be useful for querying to see who the current lock :py:meth:`contenders` are. May not contain the string ``__UNLOCK__``, as this is used internally. :type identifier: str :param exclusive: Whether this is an exclusive lock (``False`` means a "shared lock" as described above) :type exclusive: bool .. versionadded:: 1.4 The exclusive option. """ if self._MODE_SHARED in path or self._MODE_EXCLUSIVE in path: raise ValueError('Path "{}" contains a reserved word'.format(path)) if identifier and self._UNLOCK_REQUEST in str(identifier): raise ValueError('Identifier "{}" contains a reserved word'.format( identifier)) self.client = client self.path = path self.exclusive = exclusive # some data is written to the node. this can be queried via # contenders() to see who is contending for the lock self.data = str(identifier or "").encode('utf-8') self.wake_event = client.handler.event_object() mode_suffix = self._MODE_EXCLUSIVE if exclusive else self._MODE_SHARED # props to Netflix Curator for this trick. It is possible for our # create request to succeed on the server, but for a failure to # prevent us from getting back the full path name. We prefix our # lock name with a uuid and can check for its presence on retry. self.prefix = uuid.uuid4().hex + mode_suffix self.create_path = self.path + "/" + self.prefix self.create_tried = False self.is_acquired = False self.assured_path = False self.cancelled = False self._retry = KazooRetry(max_tries=None) def _ensure_path(self): self.client.ensure_path(self.path) self.assured_path = True def cancel(self): """Cancel a pending lock acquire.""" self.cancelled = True self.wake_event.set() def acquire(self, blocking=True, timeout=None, revoke=False, unlock=None): """ Acquire the lock. By defaults blocks and waits forever. :param blocking: Block until lock is obtained or return immediately. :type blocking: bool :param timeout: Don't wait forever to acquire the lock. :type timeout: float or None :param revoke: Identify all existing locks and lock requests that prevent this lock being acquired and immediately request them to unlock (this does not mean they will unlock or are even listening for such requests though) :type revoke: bool :param unlock: The callback which will be invoked exactly once if another lock used ``revoke=True`` and this lock or lock request is blocking that lock from being acquired (it is legal to use ``None`` to ignore revocation requests, or provide a callback which takes no action) :type unlock: a zero-parameter function :returns: Was the lock acquired? :rtype: bool :raises: :exc:`~kazoo.exceptions.LockTimeout` if the lock wasn't acquired within `timeout` seconds. .. versionadded:: 1.1 The timeout option. .. versionadded:: 1.4 The revoke and unlock options. """ try: retry = self._retry.copy() retry.deadline = timeout self.is_acquired = retry(self._inner_acquire, blocking=blocking, timeout=timeout, revoke=revoke, unlock=unlock) except KazooException: # if we did ultimately fail, attempt to clean up self._best_effort_cleanup() self.cancelled = False raise except RetryFailedError: # pragma: nocover self._best_effort_cleanup() if not self.is_acquired: self._delete_node(self.node) return self.is_acquired def _inner_acquire(self, blocking, timeout, revoke, unlock): # make sure our election parent node exists if not self.assured_path: self._ensure_path() node = None if self.create_tried: node = self._find_node() else: self.create_tried = True if not node: node = self.client.create(self.create_path, self.data, ephemeral=True, sequence=True) if unlock: # watch this node for its first data change (the only other # events would be deletion or additional data change events, so # either way the absence of additional events is fine) def unlock_callback(event): if event.type == EventType.CHANGED: unlock() data, _ = self.client.get(node, unlock_callback) if self._UNLOCK_REQUEST in data.decode('utf-8'): # a request to revoke our request has already been received # (we let the callback know about this, but we keep going # given the callback is under no obligation to comply) unlock() # pragma: nocover # strip off path to node node = node[len(self.path) + 1:] self.node = node while True: self.wake_event.clear() # bail out with an exception if cancellation has been requested if self.cancelled: raise CancelledError() children = self._get_sorted_children() try: our_index = children.index(node) except ValueError: # pragma: nocover # somehow we aren't in the children -- probably we are # recovering from a session failure and our ephemeral # node was removed raise ForceRetryError() acquired, blockers = self.acquired_lock(children, our_index) if acquired: return True if not blocking: return False # we are in the mix if revoke: for child in blockers: try: child_path = self.path + "/" + child data, stat = self.client.get(child_path) decoded_data = data.decode('utf-8') if self._UNLOCK_REQUEST not in decoded_data: data = str(decoded_data + self._UNLOCK_SUFFIX).encode('utf-8') self.client.set(child_path, data) except NoNodeError: # pragma: nocover pass # watch the last blocker and bide our time predecessor = self.path + "/" + blockers[-1] if self.client.exists(predecessor, self._watch_predecessor): self.wake_event.wait(timeout) if not self.wake_event.isSet(): raise LockTimeout("Failed to acquire lock on %s after %s " "seconds" % (self.path, timeout)) def acquired_lock(self, children, index): """Return if we acquired the lock, and if not, the blocking contenders. """ prior_nodes = children[:index] if self.exclusive: return (index == 0, prior_nodes) # Shared locks are only unavailable if a prior lock is exclusive prior_exclusive = [x for x in prior_nodes if self._MODE_EXCLUSIVE in x] if prior_exclusive: return (False, prior_exclusive) return (True, None) def _watch_predecessor(self, event): self.wake_event.set() def _get_sorted_children(self): children = self.client.get_children(self.path) # zookeeper sequence node suffix of %010d is relied upon for sorting children.sort(key=lambda c: c[-10:]) return children def _find_node(self): children = self.client.get_children(self.path) for child in children: if child.startswith(self.prefix): return child return None def _delete_node(self, node): self.client.delete(self.path + "/" + node) def _best_effort_cleanup(self): try: node = self._find_node() if node: self._delete_node(node) except KazooException: # pragma: nocover pass def release(self): """Release the lock immediately.""" return self.client.retry(self._inner_release) def _inner_release(self): if not self.is_acquired: return False try: self._delete_node(self.node) except NoNodeError: # pragma: nocover pass self.is_acquired = False self.node = None return True def contenders(self, unlocks_only=False): """Return an ordered list of the current contenders for the lock. .. note:: If the contenders did not set an identifier, it will appear as a blank string. :param unlocks_only: indicates whether to only return those contenders which have been requested to revoke their locks or lock requests :type unlocks_only: bool :return: a list of contender identifiers :type: list .. versionadded:: 1.4 The unlocks_only option. """ # make sure our election parent node exists if not self.assured_path: self._ensure_path() children = self._get_sorted_children() contenders = [] for child in children: try: data, stat = self.client.get(self.path + "/" + child) identifier = data.decode('utf-8') if not unlocks_only or self._UNLOCK_REQUEST in identifier: identifier = identifier.replace(self._UNLOCK_SUFFIX, '') contenders.append(identifier) except NoNodeError: # pragma: nocover pass return contenders def __enter__(self): self.acquire() def __exit__(self, exc_type, exc_value, traceback): self.release()
class KazooClient(object): """An Apache Zookeeper Python client supporting alternate callback handlers and high-level functionality. Watch functions registered with this class will not get session events, unlike the default Zookeeper watches. They will also be called with a single argument, a :class:`~kazoo.protocol.states.WatchedEvent` instance. """ def __init__(self, hosts='127.0.0.1:2181', timeout=10.0, client_id=None, handler=None, default_acl=None, auth_data=None, read_only=None, randomize_hosts=True, connection_retry=None, command_retry=None, logger=None, **kwargs): """Create a :class:`KazooClient` instance. All time arguments are in seconds. :param hosts: Comma-separated list of hosts to connect to (e.g. 127.0.0.1:2181,127.0.0.1:2182,[::1]:2183). :param timeout: The longest to wait for a Zookeeper connection. :param client_id: A Zookeeper client id, used when re-establishing a prior session connection. :param handler: An instance of a class implementing the :class:`~kazoo.interfaces.IHandler` interface for callback handling. :param default_acl: A default ACL used on node creation. :param auth_data: A list of authentication credentials to use for the connection. Should be a list of (scheme, credential) tuples as :meth:`add_auth` takes. :param read_only: Allow connections to read only servers. :param randomize_hosts: By default randomize host selection. :param connection_retry: A :class:`kazoo.retry.KazooRetry` object to use for retrying the connection to Zookeeper. Also can be a dict of options which will be used for creating one. :param command_retry: A :class:`kazoo.retry.KazooRetry` object to use for the :meth:`KazooClient.retry` method. Also can be a dict of options which will be used for creating one. :param logger: A custom logger to use instead of the module global `log` instance. Basic Example: .. code-block:: python zk = KazooClient() zk.start() children = zk.get_children('/') zk.stop() As a convenience all recipe classes are available as attributes and get automatically bound to the client. For example:: zk = KazooClient() zk.start() lock = zk.Lock('/lock_path') .. versionadded:: 0.6 The read_only option. Requires Zookeeper 3.4+ .. versionadded:: 0.6 The retry_max_delay option. .. versionadded:: 0.6 The randomize_hosts option. .. versionchanged:: 0.8 Removed the unused watcher argument (was second argument). .. versionadded:: 1.2 The connection_retry, command_retry and logger options. """ self.logger = logger or log # Record the handler strategy used self.handler = handler if handler else SequentialThreadingHandler() if inspect.isclass(self.handler): raise ConfigurationError("Handler must be an instance of a class, " "not the class: %s" % self.handler) self.auth_data = auth_data if auth_data else set([]) self.default_acl = default_acl self.randomize_hosts = randomize_hosts self.hosts = None self.chroot = None self.set_hosts(hosts) # Curator like simplified state tracking, and listeners for # state transitions self._state = KeeperState.CLOSED self.state = KazooState.LOST self.state_listeners = set() self._reset() self.read_only = read_only if client_id: self._session_id = client_id[0] self._session_passwd = client_id[1] else: self._reset_session() # ZK uses milliseconds self._session_timeout = int(timeout * 1000) # We use events like twitter's client to track current and # desired state (connected, and whether to shutdown) self._live = self.handler.event_object() self._writer_stopped = self.handler.event_object() self._stopped = self.handler.event_object() self._stopped.set() self._writer_stopped.set() self.retry = self._conn_retry = None if type(connection_retry) is dict: self._conn_retry = KazooRetry(**connection_retry) elif type(connection_retry) is KazooRetry: self._conn_retry = connection_retry if type(command_retry) is dict: self.retry = KazooRetry(**command_retry) elif type(command_retry) is KazooRetry: self.retry = command_retry if type(self._conn_retry) is KazooRetry: if self.handler.sleep_func != self._conn_retry.sleep_func: raise ConfigurationError("Retry handler and event handler " " must use the same sleep func") if type(self.retry) is KazooRetry: if self.handler.sleep_func != self.retry.sleep_func: raise ConfigurationError("Command retry handler and event " "handler must use the same sleep func") if self.retry is None or self._conn_retry is None: old_retry_keys = dict(_RETRY_COMPAT_DEFAULTS) for key in old_retry_keys: try: old_retry_keys[key] = kwargs.pop(key) warnings.warn('Passing retry configuration param %s to the' ' client directly is deprecated, please pass a' ' configured retry object (using param %s)' % ( key, _RETRY_COMPAT_MAPPING[key]), DeprecationWarning, stacklevel=2) except KeyError: pass retry_keys = {} for oldname, value in old_retry_keys.items(): retry_keys[_RETRY_COMPAT_MAPPING[oldname]] = value if self._conn_retry is None: self._conn_retry = KazooRetry( sleep_func=self.handler.sleep_func, **retry_keys) if self.retry is None: self.retry = KazooRetry( sleep_func=self.handler.sleep_func, **retry_keys) self._conn_retry.interrupt = lambda: self._stopped.is_set() self._connection = ConnectionHandler(self, self._conn_retry.copy(), logger=self.logger) # Every retry call should have its own copy of the retry helper # to avoid shared retry counts self._retry = self.retry def _retry(*args, **kwargs): return self._retry.copy()(*args, **kwargs) self.retry = _retry self.Barrier = partial(Barrier, self) self.Counter = partial(Counter, self) self.DoubleBarrier = partial(DoubleBarrier, self) self.ChildrenWatch = partial(ChildrenWatch, self) self.DataWatch = partial(DataWatch, self) self.Election = partial(Election, self) self.Lock = partial(Lock, self) self.Party = partial(Party, self) self.Queue = partial(Queue, self) self.LockingQueue = partial(LockingQueue, self) self.SetPartitioner = partial(SetPartitioner, self) self.Semaphore = partial(Semaphore, self) self.ShallowParty = partial(ShallowParty, self) # If we got any unhandled keywords, complain like python would if kwargs: raise TypeError('__init__() got unexpected keyword arguments: %s' % (kwargs.keys(),)) def _reset(self): """Resets a variety of client states for a new connection.""" self._queue = deque() self._pending = deque() self._reset_watchers() self._reset_session() self.last_zxid = 0 self._protocol_version = None def _reset_watchers(self): self._child_watchers = defaultdict(set) self._data_watchers = defaultdict(set) def _reset_session(self): self._session_id = None self._session_passwd = b'\x00' * 16 @property def client_state(self): """Returns the last Zookeeper client state This is the non-simplified state information and is generally not as useful as the simplified KazooState information. """ return self._state @property def client_id(self): """Returns the client id for this Zookeeper session if connected. :returns: client id which consists of the session id and password. :rtype: tuple """ if self._live.is_set(): return (self._session_id, self._session_passwd) return None @property def connected(self): """Returns whether the Zookeeper connection has been established.""" return self._live.is_set() def set_hosts(self, hosts, randomize_hosts=None): """ sets the list of hosts used by this client. This function accepts the same format hosts parameter as the init function and sets the client to use the new hosts the next time it needs to look up a set of hosts. This function does not affect the current connected status. It is not currently possible to change the chroot with this function, setting a host list with a new chroot will raise a ConfigurationError. :param hosts: see description in :meth:`KazooClient.__init__` :param randomize_hosts: override client default for host randomization :raises: :exc:`ConfigurationError` if the hosts argument changes the chroot .. versionadded:: 1.4 .. warning:: Using this function to point a client to a completely disparate zookeeper server cluster has undefined behavior. """ if randomize_hosts is None: randomize_hosts = self.randomize_hosts self.hosts, chroot = collect_hosts(hosts, randomize_hosts) if chroot: new_chroot = normpath(chroot) else: new_chroot = '' if self.chroot is not None and new_chroot != self.chroot: raise ConfigurationError("Changing chroot at runtime is not " "currently supported") self.chroot = new_chroot def add_listener(self, listener): """Add a function to be called for connection state changes. This function will be called with a :class:`~kazoo.protocol.states.KazooState` instance indicating the new connection state on state transitions. .. warning:: This function must not block. If its at all likely that it might need data or a value that could result in blocking than the :meth:`~kazoo.interfaces.IHandler.spawn` method should be used so that the listener can return immediately. """ if not (listener and callable(listener)): raise ConfigurationError("listener must be callable") self.state_listeners.add(listener) def remove_listener(self, listener): """Remove a listener function""" self.state_listeners.discard(listener) def _make_state_change(self, state): # skip if state is current if self.state == state: return self.state = state # Create copy of listeners for iteration in case one needs to # remove itself for listener in list(self.state_listeners): try: remove = listener(state) if remove is True: self.remove_listener(listener) except Exception: self.logger.exception("Error in connection state listener") def _session_callback(self, state): if state == self._state: return # Note that we don't check self.state == LOST since that's also # the client's initial state dead_state = self._state in LOST_STATES self._state = state # If we were previously closed or had an expired session, and # are now connecting, don't bother with the rest of the # transitions since they only apply after # we've established a connection if dead_state and state == KeeperState.CONNECTING: self.logger.log(BLATHER, "Skipping state change") return if state in (KeeperState.CONNECTED, KeeperState.CONNECTED_RO): self.logger.info("Zookeeper connection established, state: %s", state) self._live.set() self._make_state_change(KazooState.CONNECTED) elif state in LOST_STATES: self.logger.info("Zookeeper session lost, state: %s", state) self._live.clear() self._make_state_change(KazooState.LOST) self._notify_pending(state) self._reset() else: self.logger.info("Zookeeper connection lost") # Connection lost self._live.clear() self._notify_pending(state) self._make_state_change(KazooState.SUSPENDED) self._reset_watchers() def _notify_pending(self, state): """Used to clear a pending response queue and request queue during connection drops.""" if state == KeeperState.AUTH_FAILED: exc = AuthFailedError() elif state == KeeperState.EXPIRED_SESSION: exc = SessionExpiredError() else: exc = ConnectionLoss() while True: try: request, async_object, xid = self._pending.popleft() if async_object: async_object.set_exception(exc) except IndexError: break while True: try: request, async_object = self._queue.popleft() if async_object: async_object.set_exception(exc) except IndexError: break def _safe_close(self): self.handler.stop() timeout = self._session_timeout // 1000 if timeout < 10: timeout = 10 if not self._connection.stop(timeout): raise WriterNotClosedException( "Writer still open from prior connection " "and wouldn't close after %s seconds" % timeout) def _call(self, request, async_object): """Ensure there's an active connection and put the request in the queue if there is. Returns False if the call short circuits due to AUTH_FAILED, CLOSED, EXPIRED_SESSION or CONNECTING state. """ if self._state == KeeperState.AUTH_FAILED: async_object.set_exception(AuthFailedError()) return False elif self._state == KeeperState.CLOSED: async_object.set_exception(ConnectionClosedError( "Connection has been closed")) return False elif self._state in (KeeperState.EXPIRED_SESSION, KeeperState.CONNECTING): async_object.set_exception(SessionExpiredError()) return False self._queue.append((request, async_object)) # wake the connection, guarding against a race with close() write_pipe = self._connection._write_pipe if write_pipe is None: async_object.set_exception(ConnectionClosedError( "Connection has been closed")) try: os.write(write_pipe, b'\0') except: async_object.set_exception(ConnectionClosedError( "Connection has been closed")) def start(self, timeout=15): """Initiate connection to ZK. :param timeout: Time in seconds to wait for connection to succeed. :raises: :attr:`~kazoo.interfaces.IHandler.timeout_exception` if the connection wasn't established within `timeout` seconds. """ event = self.start_async() event.wait(timeout=timeout) if not self.connected: # We time-out, ensure we are disconnected self.stop() raise self.handler.timeout_exception("Connection time-out") if self.chroot and not self.exists("/"): warnings.warn("No chroot path exists, the chroot path " "should be created before normal use.") def start_async(self): """Asynchronously initiate connection to ZK. :returns: An event object that can be checked to see if the connection is alive. :rtype: :class:`~threading.Event` compatible object. """ # If we're already connected, ignore if self._live.is_set(): return self._live # Make sure we're safely closed self._safe_close() # We've been asked to connect, clear the stop and our writer # thread indicator self._stopped.clear() self._writer_stopped.clear() # Start the handler self.handler.start() # Start the connection self._connection.start() return self._live def stop(self): """Gracefully stop this Zookeeper session. This method can be called while a reconnection attempt is in progress, which will then be halted. Once the connection is closed, its session becomes invalid. All the ephemeral nodes in the ZooKeeper server associated with the session will be removed. The watches left on those nodes (and on their parents) will be triggered. """ if self._stopped.is_set(): return self._stopped.set() self._queue.append((CloseInstance, None)) os.write(self._connection._write_pipe, b'\0') self._safe_close() def restart(self): """Stop and restart the Zookeeper session.""" self.stop() self.start() def close(self): """Free any resources held by the client. This method should be called on a stopped client before it is discarded. Not doing so may result in filehandles being leaked. .. versionadded:: 1.0 """ self._connection.close() def command(self, cmd=b'ruok'): """Sent a management command to the current ZK server. Examples are `ruok`, `envi` or `stat`. :returns: An unstructured textual response. :rtype: str :raises: :exc:`ConnectionLoss` if there is no connection open, or possibly a :exc:`socket.error` if there's a problem with the connection used just for this command. .. versionadded:: 0.5 """ if not self._live.is_set(): raise ConnectionLoss("No connection to server") peer = self._connection._socket.getpeername() sock = self.handler.create_connection( peer, timeout=self._session_timeout / 1000.0) sock.sendall(cmd) result = sock.recv(8192) sock.close() return result.decode('utf-8', 'replace') def server_version(self): """Get the version of the currently connected ZK server. :returns: The server version, for example (3, 4, 3). :rtype: tuple .. versionadded:: 0.5 """ data = self.command(b'envi') string = ENVI_VERSION.match(data).group(1) return tuple([int(i) for i in string.split('.')]) def add_auth(self, scheme, credential): """Send credentials to server. :param scheme: authentication scheme (default supported: "digest"). :param credential: the credential -- value depends on scheme. :returns: True if it was successful. :rtype: bool :raises: :exc:`~kazoo.exceptions.AuthFailedError` if it failed though the session state will be set to AUTH_FAILED as well. """ return self.add_auth_async(scheme, credential).get() def add_auth_async(self, scheme, credential): """Asynchronously send credentials to server. Takes the same arguments as :meth:`add_auth`. :rtype: :class:`~kazoo.interfaces.IAsyncResult` """ if not isinstance(scheme, basestring): raise TypeError("Invalid type for scheme") if not isinstance(credential, basestring): raise TypeError("Invalid type for credential") # we need this auth data to re-authenticate on reconnect self.auth_data.add((scheme, credential)) async_result = self.handler.async_result() self._call(Auth(0, scheme, credential), async_result) return async_result def unchroot(self, path): """Strip the chroot if applicable from the path.""" if not self.chroot: return path if path.startswith(self.chroot): return path[len(self.chroot):] else: return path def sync_async(self, path): """Asynchronous sync. :rtype: :class:`~kazoo.interfaces.IAsyncResult` """ async_result = self.handler.async_result() self._call(Sync(_prefix_root(self.chroot, path)), async_result) return async_result def sync(self, path): """Sync, blocks until response is acknowledged. Flushes channel between process and leader. :param path: path of node. :returns: The node path that was synced. :raises: :exc:`~kazoo.exceptions.ZookeeperError` if the server returns a non-zero error code. .. versionadded:: 0.5 """ return self.sync_async(path).get() def create(self, path, value=b"", acl=None, ephemeral=False, sequence=False, makepath=False): """Create a node with the given value as its data. Optionally set an ACL on the node. The ephemeral and sequence arguments determine the type of the node. An ephemeral node will be automatically removed by ZooKeeper when the session associated with the creation of the node expires. A sequential node will be given the specified path plus a suffix `i` where i is the current sequential number of the node. The sequence number is always fixed length of 10 digits, 0 padded. Once such a node is created, the sequential number will be incremented by one. If a node with the same actual path already exists in ZooKeeper, a NodeExistsError will be raised. Note that since a different actual path is used for each invocation of creating sequential nodes with the same path argument, the call will never raise NodeExistsError. If the parent node does not exist in ZooKeeper, a NoNodeError will be raised. Setting the optional `makepath` argument to `True` will create all missing parent nodes instead. An ephemeral node cannot have children. If the parent node of the given path is ephemeral, a NoChildrenForEphemeralsError will be raised. This operation, if successful, will trigger all the watches left on the node of the given path by :meth:`exists` and :meth:`get` API calls, and the watches left on the parent node by :meth:`get_children` API calls. The maximum allowable size of the node value is 1 MB. Values larger than this will cause a ZookeeperError to be raised. :param path: Path of node. :param value: Initial bytes value of node. :param acl: :class:`~kazoo.security.ACL` list. :param ephemeral: Boolean indicating whether node is ephemeral (tied to this session). :param sequence: Boolean indicating whether path is suffixed with a unique index. :param makepath: Whether the path should be created if it doesn't exist. :returns: Real path of the new node. :rtype: str :raises: :exc:`~kazoo.exceptions.NodeExistsError` if the node already exists. :exc:`~kazoo.exceptions.NoNodeError` if parent nodes are missing. :exc:`~kazoo.exceptions.NoChildrenForEphemeralsError` if the parent node is an ephemeral node. :exc:`~kazoo.exceptions.ZookeeperError` if the provided value is too large. :exc:`~kazoo.exceptions.ZookeeperError` if the server returns a non-zero error code. """ acl = acl or self.default_acl return self.create_async(path, value, acl=acl, ephemeral=ephemeral, sequence=sequence, makepath=makepath).get() def create_async(self, path, value=b"", acl=None, ephemeral=False, sequence=False, makepath=False): """Asynchronously create a ZNode. Takes the same arguments as :meth:`create`. :rtype: :class:`~kazoo.interfaces.IAsyncResult` .. versionadded:: 1.1 The makepath option. """ if acl is None and self.default_acl: acl = self.default_acl if not isinstance(path, basestring): raise TypeError("path must be a string") if acl and (isinstance(acl, ACL) or not isinstance(acl, (tuple, list))): raise TypeError("acl must be a tuple/list of ACL's") if value is not None and not isinstance(value, bytes): raise TypeError("value must be a byte string") if not isinstance(ephemeral, bool): raise TypeError("ephemeral must be a bool") if not isinstance(sequence, bool): raise TypeError("sequence must be a bool") if not isinstance(makepath, bool): raise TypeError("makepath must be a bool") flags = 0 if ephemeral: flags |= 1 if sequence: flags |= 2 if acl is None: acl = OPEN_ACL_UNSAFE async_result = self.handler.async_result() @capture_exceptions(async_result) def do_create(): result = self._create_async_inner(path, value, acl, flags, trailing=sequence) result.rawlink(create_completion) @capture_exceptions(async_result) def retry_completion(result): result.get() do_create() @wrap(async_result) def create_completion(result): try: return self.unchroot(result.get()) except NoNodeError: if not makepath: raise if sequence and path.endswith('/'): parent = path.rstrip('/') else: parent, _ = split(path) self.ensure_path_async(parent, acl).rawlink(retry_completion) do_create() return async_result def _create_async_inner(self, path, value, acl, flags, trailing=False): async_result = self.handler.async_result() call_result = self._call( Create(_prefix_root(self.chroot, path, trailing=trailing), value, acl, flags), async_result) if call_result is False: # We hit a short-circuit exit on the _call. Because we are # not using the original async_result here, we bubble the # exception upwards to the do_create function in # KazooClient.create so that it gets set on the correct # async_result object raise async_result.exception return async_result def ensure_path(self, path, acl=None): """Recursively create a path if it doesn't exist. :param path: Path of node. :param acl: Permissions for node. """ return self.ensure_path_async(path, acl).get() def ensure_path_async(self, path, acl=None): """Recursively create a path asynchronously if it doesn't exist. Takes the same arguments as :meth:`ensure_path`. :rtype: :class:`~kazoo.interfaces.IAsyncResult` .. versionadded:: 1.1 """ acl = acl or self.default_acl async_result = self.handler.async_result() @wrap(async_result) def create_completion(result): try: return result.get() except NodeExistsError: return True @capture_exceptions(async_result) def prepare_completion(next_path, result): result.get() self.create_async(next_path, acl=acl).rawlink(create_completion) @wrap(async_result) def exists_completion(path, result): if result.get(): return True parent, node = split(path) if node: self.ensure_path_async(parent, acl=acl).rawlink( partial(prepare_completion, path)) else: self.create_async(path, acl=acl).rawlink(create_completion) self.exists_async(path).rawlink(partial(exists_completion, path)) return async_result def exists(self, path, watch=None): """Check if a node exists. If a watch is provided, it will be left on the node with the given path. The watch will be triggered by a successful operation that creates/deletes the node or sets the data on the node. :param path: Path of node. :param watch: Optional watch callback to set for future changes to this path. :returns: ZnodeStat of the node if it exists, else None if the node does not exist. :rtype: :class:`~kazoo.protocol.states.ZnodeStat` or `None`. :raises: :exc:`~kazoo.exceptions.ZookeeperError` if the server returns a non-zero error code. """ return self.exists_async(path, watch).get() def exists_async(self, path, watch=None): """Asynchronously check if a node exists. Takes the same arguments as :meth:`exists`. :rtype: :class:`~kazoo.interfaces.IAsyncResult` """ if not isinstance(path, basestring): raise TypeError("path must be a string") if watch and not callable(watch): raise TypeError("watch must be a callable") async_result = self.handler.async_result() self._call(Exists(_prefix_root(self.chroot, path), watch), async_result) return async_result def get(self, path, watch=None): """Get the value of a node. If a watch is provided, it will be left on the node with the given path. The watch will be triggered by a successful operation that sets data on the node, or deletes the node. :param path: Path of node. :param watch: Optional watch callback to set for future changes to this path. :returns: Tuple (value, :class:`~kazoo.protocol.states.ZnodeStat`) of node. :rtype: tuple :raises: :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't exist :exc:`~kazoo.exceptions.ZookeeperError` if the server returns a non-zero error code """ return self.get_async(path, watch).get() def get_async(self, path, watch=None): """Asynchronously get the value of a node. Takes the same arguments as :meth:`get`. :rtype: :class:`~kazoo.interfaces.IAsyncResult` """ if not isinstance(path, basestring): raise TypeError("path must be a string") if watch and not callable(watch): raise TypeError("watch must be a callable") async_result = self.handler.async_result() self._call(GetData(_prefix_root(self.chroot, path), watch), async_result) return async_result def get_children(self, path, watch=None, include_data=False): """Get a list of child nodes of a path. If a watch is provided it will be left on the node with the given path. The watch will be triggered by a successful operation that deletes the node of the given path or creates/deletes a child under the node. The list of children returned is not sorted and no guarantee is provided as to its natural or lexical order. :param path: Path of node to list. :param watch: Optional watch callback to set for future changes to this path. :param include_data: Include the :class:`~kazoo.protocol.states.ZnodeStat` of the node in addition to the children. This option changes the return value to be a tuple of (children, stat). :returns: List of child node names, or tuple if `include_data` is `True`. :rtype: list :raises: :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't exist. :exc:`~kazoo.exceptions.ZookeeperError` if the server returns a non-zero error code. .. versionadded:: 0.5 The `include_data` option. """ return self.get_children_async(path, watch, include_data).get() def get_children_async(self, path, watch=None, include_data=False): """Asynchronously get a list of child nodes of a path. Takes the same arguments as :meth:`get_children`. :rtype: :class:`~kazoo.interfaces.IAsyncResult` """ if not isinstance(path, basestring): raise TypeError("path must be a string") if watch and not callable(watch): raise TypeError("watch must be a callable") if not isinstance(include_data, bool): raise TypeError("include_data must be a bool") async_result = self.handler.async_result() if include_data: req = GetChildren2(_prefix_root(self.chroot, path), watch) else: req = GetChildren(_prefix_root(self.chroot, path), watch) self._call(req, async_result) return async_result def get_acls(self, path): """Return the ACL and stat of the node of the given path. :param path: Path of the node. :returns: The ACL array of the given node and its :class:`~kazoo.protocol.states.ZnodeStat`. :rtype: tuple of (:class:`~kazoo.security.ACL` list, :class:`~kazoo.protocol.states.ZnodeStat`) :raises: :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't exist. :exc:`~kazoo.exceptions.ZookeeperError` if the server returns a non-zero error code .. versionadded:: 0.5 """ return self.get_acls_async(path).get() def get_acls_async(self, path): """Return the ACL and stat of the node of the given path. Takes the same arguments as :meth:`get_acls`. :rtype: :class:`~kazoo.interfaces.IAsyncResult` """ if not isinstance(path, basestring): raise TypeError("path must be a string") async_result = self.handler.async_result() self._call(GetACL(_prefix_root(self.chroot, path)), async_result) return async_result def set_acls(self, path, acls, version=-1): """Set the ACL for the node of the given path. Set the ACL for the node of the given path if such a node exists and the given version matches the version of the node. :param path: Path for the node. :param acls: List of :class:`~kazoo.security.ACL` objects to set. :param version: The expected node version that must match. :returns: The stat of the node. :raises: :exc:`~kazoo.exceptions.BadVersionError` if version doesn't match. :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't exist. :exc:`~kazoo.exceptions.InvalidACLError` if the ACL is invalid. :exc:`~kazoo.exceptions.ZookeeperError` if the server returns a non-zero error code. .. versionadded:: 0.5 """ return self.set_acls_async(path, acls, version).get() def set_acls_async(self, path, acls, version=-1): """Set the ACL for the node of the given path. Takes the same arguments as :meth:`set_acls`. :rtype: :class:`~kazoo.interfaces.IAsyncResult` """ if not isinstance(path, basestring): raise TypeError("path must be a string") if isinstance(acls, ACL) or not isinstance(acls, (tuple, list)): raise TypeError("acl must be a tuple/list of ACL's") if not isinstance(version, int): raise TypeError("version must be an int") async_result = self.handler.async_result() self._call(SetACL(_prefix_root(self.chroot, path), acls, version), async_result) return async_result def set(self, path, value, version=-1): """Set the value of a node. If the version of the node being updated is newer than the supplied version (and the supplied version is not -1), a BadVersionError will be raised. This operation, if successful, will trigger all the watches on the node of the given path left by :meth:`get` API calls. The maximum allowable size of the value is 1 MB. Values larger than this will cause a ZookeeperError to be raised. :param path: Path of node. :param value: New data value. :param version: Version of node being updated, or -1. :returns: Updated :class:`~kazoo.protocol.states.ZnodeStat` of the node. :raises: :exc:`~kazoo.exceptions.BadVersionError` if version doesn't match. :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't exist. :exc:`~kazoo.exceptions.ZookeeperError` if the provided value is too large. :exc:`~kazoo.exceptions.ZookeeperError` if the server returns a non-zero error code. """ return self.set_async(path, value, version).get() def set_async(self, path, value, version=-1): """Set the value of a node. Takes the same arguments as :meth:`set`. :rtype: :class:`~kazoo.interfaces.IAsyncResult` """ if not isinstance(path, basestring): raise TypeError("path must be a string") if value is not None and not isinstance(value, bytes): raise TypeError("value must be a byte string") if not isinstance(version, int): raise TypeError("version must be an int") async_result = self.handler.async_result() self._call(SetData(_prefix_root(self.chroot, path), value, version), async_result) return async_result def transaction(self): """Create and return a :class:`TransactionRequest` object Creates a :class:`TransactionRequest` object. A Transaction can consist of multiple operations which can be committed as a single atomic unit. Either all of the operations will succeed or none of them. :returns: A TransactionRequest. :rtype: :class:`TransactionRequest` .. versionadded:: 0.6 Requires Zookeeper 3.4+ """ return TransactionRequest(self) def delete(self, path, version=-1, recursive=False): """Delete a node. The call will succeed if such a node exists, and the given version matches the node's version (if the given version is -1, the default, it matches any node's versions). This operation, if successful, will trigger all the watches on the node of the given path left by `exists` API calls, and the watches on the parent node left by `get_children` API calls. :param path: Path of node to delete. :param version: Version of node to delete, or -1 for any. :param recursive: Recursively delete node and all its children, defaults to False. :type recursive: bool :raises: :exc:`~kazoo.exceptions.BadVersionError` if version doesn't match. :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't exist. :exc:`~kazoo.exceptions.NotEmptyError` if the node has children. :exc:`~kazoo.exceptions.ZookeeperError` if the server returns a non-zero error code. """ if not isinstance(recursive, bool): raise TypeError("recursive must be a bool") if recursive: return self._delete_recursive(path) else: return self.delete_async(path, version).get() def delete_async(self, path, version=-1): """Asynchronously delete a node. Takes the same arguments as :meth:`delete`, with the exception of `recursive`. :rtype: :class:`~kazoo.interfaces.IAsyncResult` """ if not isinstance(path, basestring): raise TypeError("path must be a string") if not isinstance(version, int): raise TypeError("version must be an int") async_result = self.handler.async_result() self._call(Delete(_prefix_root(self.chroot, path), version), async_result) return async_result def _delete_recursive(self, path): try: children = self.get_children(path) except NoNodeError: return True if children: for child in children: if path == "/": child_path = path + child else: child_path = path + "/" + child self._delete_recursive(child_path) try: self.delete(path) except NoNodeError: # pragma: nocover pass
def __init__(self, hosts='127.0.0.1:2181', timeout=10.0, client_id=None, handler=None, default_acl=None, auth_data=None, read_only=None, randomize_hosts=True, connection_retry=None, command_retry=None, logger=None, **kwargs): """Create a :class:`KazooClient` instance. All time arguments are in seconds. :param hosts: Comma-separated list of hosts to connect to (e.g. 127.0.0.1:2181,127.0.0.1:2182,[::1]:2183). :param timeout: The longest to wait for a Zookeeper connection. :param client_id: A Zookeeper client id, used when re-establishing a prior session connection. :param handler: An instance of a class implementing the :class:`~kazoo.interfaces.IHandler` interface for callback handling. :param default_acl: A default ACL used on node creation. :param auth_data: A list of authentication credentials to use for the connection. Should be a list of (scheme, credential) tuples as :meth:`add_auth` takes. :param read_only: Allow connections to read only servers. :param randomize_hosts: By default randomize host selection. :param connection_retry: A :class:`kazoo.retry.KazooRetry` object to use for retrying the connection to Zookeeper. Also can be a dict of options which will be used for creating one. :param command_retry: A :class:`kazoo.retry.KazooRetry` object to use for the :meth:`KazooClient.retry` method. Also can be a dict of options which will be used for creating one. :param logger: A custom logger to use instead of the module global `log` instance. Basic Example: .. code-block:: python zk = KazooClient() zk.start() children = zk.get_children('/') zk.stop() As a convenience all recipe classes are available as attributes and get automatically bound to the client. For example:: zk = KazooClient() zk.start() lock = zk.Lock('/lock_path') .. versionadded:: 0.6 The read_only option. Requires Zookeeper 3.4+ .. versionadded:: 0.6 The retry_max_delay option. .. versionadded:: 0.6 The randomize_hosts option. .. versionchanged:: 0.8 Removed the unused watcher argument (was second argument). .. versionadded:: 1.2 The connection_retry, command_retry and logger options. """ self.logger = logger or log # Record the handler strategy used self.handler = handler if handler else SequentialThreadingHandler() if inspect.isclass(self.handler): raise ConfigurationError("Handler must be an instance of a class, " "not the class: %s" % self.handler) self.auth_data = auth_data if auth_data else set([]) self.default_acl = default_acl self.randomize_hosts = randomize_hosts self.hosts = None self.chroot = None self.set_hosts(hosts) # Curator like simplified state tracking, and listeners for # state transitions self._state = KeeperState.CLOSED self.state = KazooState.LOST self.state_listeners = set() self._reset() self.read_only = read_only if client_id: self._session_id = client_id[0] self._session_passwd = client_id[1] else: self._reset_session() # ZK uses milliseconds self._session_timeout = int(timeout * 1000) # We use events like twitter's client to track current and # desired state (connected, and whether to shutdown) self._live = self.handler.event_object() self._writer_stopped = self.handler.event_object() self._stopped = self.handler.event_object() self._stopped.set() self._writer_stopped.set() self.retry = self._conn_retry = None if type(connection_retry) is dict: self._conn_retry = KazooRetry(**connection_retry) elif type(connection_retry) is KazooRetry: self._conn_retry = connection_retry if type(command_retry) is dict: self.retry = KazooRetry(**command_retry) elif type(command_retry) is KazooRetry: self.retry = command_retry if type(self._conn_retry) is KazooRetry: if self.handler.sleep_func != self._conn_retry.sleep_func: raise ConfigurationError("Retry handler and event handler " " must use the same sleep func") if type(self.retry) is KazooRetry: if self.handler.sleep_func != self.retry.sleep_func: raise ConfigurationError("Command retry handler and event " "handler must use the same sleep func") if self.retry is None or self._conn_retry is None: old_retry_keys = dict(_RETRY_COMPAT_DEFAULTS) for key in old_retry_keys: try: old_retry_keys[key] = kwargs.pop(key) warnings.warn('Passing retry configuration param %s to the' ' client directly is deprecated, please pass a' ' configured retry object (using param %s)' % ( key, _RETRY_COMPAT_MAPPING[key]), DeprecationWarning, stacklevel=2) except KeyError: pass retry_keys = {} for oldname, value in old_retry_keys.items(): retry_keys[_RETRY_COMPAT_MAPPING[oldname]] = value if self._conn_retry is None: self._conn_retry = KazooRetry( sleep_func=self.handler.sleep_func, **retry_keys) if self.retry is None: self.retry = KazooRetry( sleep_func=self.handler.sleep_func, **retry_keys) self._conn_retry.interrupt = lambda: self._stopped.is_set() self._connection = ConnectionHandler(self, self._conn_retry.copy(), logger=self.logger) # Every retry call should have its own copy of the retry helper # to avoid shared retry counts self._retry = self.retry def _retry(*args, **kwargs): return self._retry.copy()(*args, **kwargs) self.retry = _retry self.Barrier = partial(Barrier, self) self.Counter = partial(Counter, self) self.DoubleBarrier = partial(DoubleBarrier, self) self.ChildrenWatch = partial(ChildrenWatch, self) self.DataWatch = partial(DataWatch, self) self.Election = partial(Election, self) self.Lock = partial(Lock, self) self.Party = partial(Party, self) self.Queue = partial(Queue, self) self.LockingQueue = partial(LockingQueue, self) self.SetPartitioner = partial(SetPartitioner, self) self.Semaphore = partial(Semaphore, self) self.ShallowParty = partial(ShallowParty, self) # If we got any unhandled keywords, complain like python would if kwargs: raise TypeError('__init__() got unexpected keyword arguments: %s' % (kwargs.keys(),))
class Lock(object): """Kazoo Lock Example usage with a :class:`~kazoo.client.KazooClient` instance: .. code-block:: python zk = KazooClient() lock = zk.Lock("/lockpath", "my-identifier") with lock: # blocks waiting for lock acquisition # do something with the lock Note: This lock is not *re-entrant*. Repeated calls after already acquired will raise a ``RuntimeError``. This is an exclusive lock. For a read/write lock, see :method:`WLock` and :method:`RLock`. """ def __init__(self, client, path, identifier=None, node_name="__lock__", exclude_names=None): """Create a Kazoo lock. node_name and exclude_names are typically only used internally to implement read/write locks. They should be left unset for exclusive locks. :param client: A :class:`~kazoo.client.KazooClient` instance. :param path: The lock path to use. :param identifier: Name to use for this lock contender. This can be useful for querying to see who the current lock contenders are. :param node_name: Node name, after the contender UUID, before the sequence number. Involved in read/write locks. For a normal (exclusive) lock, leave unset. :param exclude_names: Node names which exclude this contender when present at a lower sequence number. Involved in read/write locks. For a normal (exclusive) lock, leave unset. """ self.client = client self.path = path # some data is written to the node. this can be queried via # contenders() to see who is contending for the lock self.data = str(identifier or "").encode('utf-8') self.wake_event = client.handler.event_object() self.node_name = node_name if exclude_names is None: exclude_names = [self.node_name] self.exclude_names = exclude_names # props to Netflix Curator for this trick. It is possible for our # create request to succeed on the server, but for a failure to # prevent us from getting back the full path name. We prefix our # lock name with a uuid and can check for its presence on retry. self.prefix = uuid.uuid4().hex + self.node_name self.create_path = self.path + "/" + self.prefix self.create_tried = False self.is_acquired = False self.assured_path = False self.cancelled = False self._retry = KazooRetry(max_tries=None, sleep_func=client.handler.sleep_func) def _ensure_path(self): self.client.ensure_path(self.path) self.assured_path = True def cancel(self): """Cancel a pending lock acquire.""" self.cancelled = True self.wake_event.set() def acquire(self, blocking=True, timeout=None): """ Acquire the lock. By defaults blocks and waits forever. :param blocking: Block until lock is obtained or return immediately. :type blocking: bool :param timeout: Don't wait forever to acquire the lock. :type timeout: float or None :returns: Was the lock acquired? :rtype: bool :raises: :exc:`~kazoo.exceptions.LockTimeout` if the lock wasn't acquired within `timeout` seconds. .. versionadded:: 1.1 The timeout option. """ if self.is_acquired: raise RuntimeError("Lock at path '%s' has already been" " acquired" % self.path) try: retry = self._retry.copy() retry.deadline = timeout self.is_acquired = retry(self._inner_acquire, blocking=blocking, timeout=timeout) except RetryFailedError: self._best_effort_cleanup() except KazooException: # if we did ultimately fail, attempt to clean up self._best_effort_cleanup() self.cancelled = False raise if not self.is_acquired: self._delete_node(self.node) return self.is_acquired def _watch_session(self, state): self.wake_event.set() return True def _inner_acquire(self, blocking, timeout): # make sure our election parent node exists if not self.assured_path: self._ensure_path() node = None if self.create_tried: node = self._find_node() else: self.create_tried = True if not node: node = self.client.create(self.create_path, self.data, ephemeral=True, sequence=True) # strip off path to node node = node[len(self.path) + 1:] self.node = node while True: self.wake_event.clear() # bail out with an exception if cancellation has been requested if self.cancelled: raise CancelledError() children = self._get_sorted_children() try: our_index = children.index(node) except ValueError: # pragma: nocover # somehow we aren't in the children -- probably we are # recovering from a session failure and our ephemeral # node was removed raise ForceRetryError() predecessor = self.predecessor(children, our_index) if not predecessor: return True if not blocking: return False # otherwise we are in the mix. watch predecessor and bide our time predecessor = self.path + "/" + predecessor self.client.add_listener(self._watch_session) try: if self.client.exists(predecessor, self._watch_predecessor): self.wake_event.wait(timeout) if not self.wake_event.isSet(): raise LockTimeout("Failed to acquire lock on %s after " "%s seconds" % (self.path, timeout)) finally: self.client.remove_listener(self._watch_session) def predecessor(self, children, index): for c in children[:index]: if any(n in c for n in self.exclude_names): return c return None def _watch_predecessor(self, event): self.wake_event.set() def _get_sorted_children(self): children = self.client.get_children(self.path) # Node names are prefixed by a type: strip the prefix first, which may # be one of multiple values in case of a read-write lock, and return # only the sequence number (as a string since it is padded and will sort # correctly anyway). # # In some cases, the lock path may contain nodes with other prefixes # (eg. in case of a lease), just sort them last ('~' sorts after all # ASCII digits). def _seq(c): for name in ["__lock__", "__rlock__"]: idx = c.find(name) if idx != -1: return c[idx + len(name):] # Sort unknown node names eg. "lease_holder" last. return '~' children.sort(key=_seq) return children def _find_node(self): children = self.client.get_children(self.path) for child in children: if child.startswith(self.prefix): return child return None def _delete_node(self, node): self.client.delete(self.path + "/" + node) def _best_effort_cleanup(self): try: node = self._find_node() if node: self._delete_node(node) except KazooException: # pragma: nocover pass def release(self): """Release the lock immediately.""" return self.client.retry(self._inner_release) def _inner_release(self): if not self.is_acquired: return False try: self._delete_node(self.node) except NoNodeError: # pragma: nocover pass self.is_acquired = False self.node = None return True def contenders(self): """Return an ordered list of the current contenders for the lock. .. note:: If the contenders did not set an identifier, it will appear as a blank string. """ # make sure our election parent node exists if not self.assured_path: self._ensure_path() children = self._get_sorted_children() contenders = [] for child in children: try: data, stat = self.client.get(self.path + "/" + child) contenders.append(data.decode('utf-8')) except NoNodeError: # pragma: nocover pass return contenders def __enter__(self): self.acquire() def __exit__(self, exc_type, exc_value, traceback): self.release()
class ZookeeperClient(object): def __init__(self, module, server_list): # logging logger = logging.getLogger(module) logger.setLevel(logging.INFO) try: handler = logging.handlers.RotatingFileHandler('/var/log/contrail/' + module + '-zk.log', maxBytes=10*1024*1024, backupCount=5) except IOError: print "Cannot open log file in /var/log/contrail/" else: log_format = logging.Formatter('%(asctime)s [%(name)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') handler.setFormatter(log_format) logger.addHandler(handler) self._zk_client = \ kazoo.client.KazooClient( server_list, timeout=20, handler=kazoo.handlers.gevent.SequentialGeventHandler(), logger=logger) self._zk_client.add_listener(self._zk_listener) self._logger = logger self._election = None # KazooRetry to retry keeper CRUD operations self._retry = KazooRetry(max_tries=None) self.connect() # end __init__ # start def connect(self): while True: try: self._zk_client.start() break except gevent.event.Timeout as e: self.syslog( 'Failed to connect with Zookeeper -will retry in a second') gevent.sleep(1) # Zookeeper is also throwing exception due to delay in master election except Exception as e: self.syslog('%s -will retry in a second' % (str(e))) gevent.sleep(1) self.syslog('Connected to ZooKeeper!') # end def is_connected(self): return self._zk_client.state == KazooState.CONNECTED # end is_connected def syslog(self, msg): if not self._logger: return self._logger.info(msg) # end syslog def _zk_listener(self, state): if state == KazooState.CONNECTED: if self._election: self._election.cancel() elif state == KazooState.LOST: # Lost the session with ZooKeeper Server # Best of option we have is to exit the process and restart all # over again os._exit(2) # end def _zk_election_callback(self, func, *args, **kwargs): func(*args, **kwargs) # Exit if running master encounters error or exception exit(1) # end def master_election(self, path, identifier, func, *args, **kwargs): while True: self._election = self._zk_client.Election(path, identifier) self._election.run(self._zk_election_callback, func, *args, **kwargs) # end master_election def create_node(self, path, value=None): try: if value is None: value = uuid.uuid4() retry = self._retry.copy() retry(self._zk_client.create, path, str(value), makepath=True) except kazoo.exceptions.NodeExistsError: current_value = self.read_node(path) if current_value == value: return True; raise ResourceExistsError(path, str(current_value)) # end create_node def delete_node(self, path, recursive=False): try: retry = self._retry.copy() retry(self._zk_client.delete, path, recursive=recursive) except kazoo.exceptions.NoNodeError: pass except Exeception as e: raise e # end delete_node def read_node(self, path): try: retry = self._retry.copy() value = retry(self._zk_client.get, path) return value[0] except Exception: return None # end read_node def get_children(self, path): try: retry = self._retry.copy() return retry(self._zk_client.get_children, path) except Exception: return []
class ZkSyncManager(object): RETRIES = 2 LOCK_TIMEOUT = 3 def __init__(self, host='127.0.0.1:2181', lock_path_prefix='/mastermind/locks/'): self.client = KazooClient(host, timeout=3) logger.info('Connecting to zookeeper host {0}, ' 'lock_path_prefix: {1}'.format(host, lock_path_prefix)) try: self.client.start() except Exception as e: logger.error(e) raise self._retry = KazooRetry(max_tries=self.RETRIES) self.lock_path_prefix = lock_path_prefix @contextmanager def lock(self, lockid, blocking=True, timeout=LOCK_TIMEOUT): # with self.__locks_lock: lock = Lock(self.client, self.lock_path_prefix + lockid) try: acquired = lock.acquire(blocking=blocking, timeout=timeout) logger.debug('Lock {0} acquired: {1}'.format(lockid, acquired)) if not acquired: raise LockFailedError(lock_id=lockid) yield except LockTimeout: logger.info('Failed to acquire lock {0} due to timeout ' '({1} seconds)'.format(lockid, timeout)) raise LockFailedError(lock_id=lockid) except LockFailedError: raise except Exception as e: logger.error('Failed to acquire lock {0}: {1}\n{2}'.format( lockid, e, traceback.format_exc())) raise finally: lock.release() def persistent_locks_acquire(self, locks, data=''): try: retry = self._retry.copy() result = retry(self._inner_persistent_locks_acquire, locks=locks, data=data) except RetryFailedError: raise LockError except KazooException as e: logger.error('Failed to fetch persistent locks {0}: {1}\n{2}'.format( locks, e, traceback.format_exc())) raise LockError return result def _inner_persistent_locks_acquire(self, locks, data): ensured_paths = set() tr = self.client.transaction() for lockid in locks: path = self.lock_path_prefix + lockid parts = path.rsplit('/', 1) if len(parts) == 2 and parts[0] not in ensured_paths: self.client.ensure_path(parts[0]) ensured_paths.add(parts[0]) tr.create(path, data) failed = False failed_locks = [] result = tr.commit() for i, res in enumerate(result): if isinstance(res, ZookeeperError): failed = True if isinstance(res, NodeExistsError): failed_locks.append(locks[i]) if failed_locks: holders = [] for f in failed_locks: # TODO: fetch all holders with 1 transaction request holders.append((f, self.client.get(self.lock_path_prefix + f))) foreign_holders = [(l, h) for l, h in holders if h[0] != data] failed_lock, holder_resp = foreign_holders and foreign_holders[0] or holders[0] holder = holder_resp[0] holders_ids = list(set([h[0] for _, h in holders])) logger.warn('Persistent lock {0} is already set by {1}'.format(failed_lock, holder)) raise LockAlreadyAcquiredError( 'Lock for {0} is already acquired by job {1}'.format(failed_lock, holder), lock_id=failed_lock, holder_id=holder, holders_ids=holders_ids) elif failed: logger.error('Failed to set persistent locks {0}, result: {1}'.format( locks, result)) raise LockError return True def get_children_locks(self, lock_prefix): try: retry = self._retry.copy() result = retry(self.__inner_get_children_locks, lock_prefix) except RetryFailedError: raise LockError return result def __inner_get_children_locks(self, lock_prefix): full_path = self.lock_path_prefix + lock_prefix self.client.ensure_path(os.path.normpath(full_path)) result = self.client.get_children(full_path) return ['{0}{1}'.format(lock_prefix, lock) for lock in result] def persistent_locks_release(self, locks, check=''): try: retry = self._retry.copy() result = retry(self.__inner_persistent_locks_release, locks=locks, check=check) except RetryFailedError: raise LockError except KazooException as e: logger.error('Failed to remove persistent locks {0}: {1}\n{2}'.format( locks, e, traceback.format_exc())) raise LockError return result def __inner_persistent_locks_release(self, locks, check): for lockid in locks: try: if check: data = self.client.get(self.lock_path_prefix + lockid) if data[0] != check: logger.error('Lock {0} has inconsistent data: {1}, ' 'expected {2}'.format(lockid, data[0], check)) raise InconsistentLockError(lock_id=lockid, holder_id=data[0]) self.client.delete(self.lock_path_prefix + lockid) except NoNodeError: logger.warn('Persistent lock {0} is already removed'.format(lockid)) pass return True
def __init__(self, hosts='127.0.0.1:2181', timeout=10.0, client_id=None, handler=None, default_acl=None, auth_data=None, read_only=None, randomize_hosts=True, retry=None, logger=None, **kwargs): """Create a :class:`KazooClient` instance. All time arguments are in seconds. :param hosts: Comma-separated list of hosts to connect to (e.g. 127.0.0.1:2181,127.0.0.1:2182). :param timeout: The longest to wait for a Zookeeper connection. :param client_id: A Zookeeper client id, used when re-establishing a prior session connection. :param handler: An instance of a class implementing the :class:`~kazoo.interfaces.IHandler` interface for callback handling. :param default_acl: A default ACL used on node creation. :param auth_data: A list of authentication credentials to use for the connection. Should be a list of (scheme, credential) tuples as :meth:`add_auth` takes. :param read_only: Allow connections to read only servers. :param randomize_hosts: By default randomize host selection. :param retry: The configured retry object to use. Retry parameters will be used for connection establishment attempts and reconnects. Basic Example: .. code-block:: python zk = KazooClient() zk.start() children = zk.get_children('/') zk.stop() As a convenience all recipe classes are available as attributes and get automatically bound to the client. For example:: zk = KazooClient() zk.start() lock = zk.Lock('/lock_path') .. versionadded:: 0.6 The read_only option. Requires Zookeeper 3.4+ .. versionadded:: 0.6 The retry_max_delay option. .. versionadded:: 0.6 The randomize_hosts option. .. versionchanged:: 0.8 Removed the unused watcher argument (was second argument). """ self.logger = logger or log # Record the handler strategy used self.handler = handler if handler else SequentialThreadingHandler() if inspect.isclass(self.handler): raise ConfigurationError("Handler must be an instance of a class, " "not the class: %s" % self.handler) self.auth_data = auth_data if auth_data else set([]) self.default_acl = default_acl self.randomize_hosts = randomize_hosts self.hosts, chroot = collect_hosts(hosts, randomize_hosts) if chroot: self.chroot = normpath(chroot) else: self.chroot = '' # Curator like simplified state tracking, and listeners for # state transitions self._state = KeeperState.CLOSED self.state = KazooState.LOST self.state_listeners = set() self._reset() self.read_only = read_only if client_id: self._session_id = client_id[0] self._session_passwd = client_id[1] else: self._reset_session() # ZK uses milliseconds self._session_timeout = int(timeout * 1000) # We use events like twitter's client to track current and # desired state (connected, and whether to shutdown) self._live = self.handler.async_result() self._live.set(False) self._writer_stopped = self.handler.event_object() self._stopped = self.handler.event_object() self._stopped.set() self._writer_stopped.set() if retry is not None: self.retry = retry assert self.handler.sleep_func == self.retry.sleep_func, \ 'retry handler and event handler must use the same sleep func' else: retry_keys = dict(_RETRY_COMPAT_DEFAULTS) for key in retry_keys: try: retry_keys[key] = kwargs.pop(key) warnings.warn('Passing retry configuration param %s to the' ' client directly is deprecated, please pass a' ' configured retry object (using param %s)' % ( key, _RETRY_COMPAT_MAPPING[key]), DeprecationWarning, stacklevel=2) except KeyError: pass retry_keys = {_RETRY_COMPAT_MAPPING[oldname]: value for oldname, value in retry_keys.items()} self.retry = KazooRetry( sleep_func=self.handler.sleep_func, **retry_keys) self._connection = ConnectionHandler( self, self.retry.copy(), logger=self.logger) # convenience API from kazoo.recipe.barrier import Barrier from kazoo.recipe.barrier import DoubleBarrier from kazoo.recipe.counter import Counter from kazoo.recipe.election import Election from kazoo.recipe.lock import Lock from kazoo.recipe.lock import Semaphore from kazoo.recipe.partitioner import SetPartitioner from kazoo.recipe.party import Party from kazoo.recipe.party import ShallowParty from kazoo.recipe.queue import Queue from kazoo.recipe.queue import LockingQueue from kazoo.recipe.watchers import ChildrenWatch from kazoo.recipe.watchers import DataWatch self.Barrier = partial(Barrier, self) self.Counter = partial(Counter, self) self.DoubleBarrier = partial(DoubleBarrier, self) self.ChildrenWatch = partial(ChildrenWatch, self) self.DataWatch = partial(DataWatch, self) self.Election = partial(Election, self) self.Lock = partial(Lock, self) self.Party = partial(Party, self) self.Queue = partial(Queue, self) self.LockingQueue = partial(LockingQueue, self) self.SetPartitioner = partial(SetPartitioner, self) self.Semaphore = partial(Semaphore, self) self.ShallowParty = partial(ShallowParty, self) # If we got any unhandled keywords, complain like python would if kwargs: raise TypeError('__init__() got unexpected keyword arguments: %s' % (kwargs.keys(),))
class ZookeeperClient(object): def __init__(self, module, server_list, logging_fn=None): # logging logger = logging.getLogger(module) logger.setLevel(logging.DEBUG) try: handler = logging.handlers.RotatingFileHandler( LOG_DIR + module + '-zk.log', maxBytes=10*1024*1024, backupCount=5) except IOError: print "Cannot open log file in %s" %(LOG_DIR) else: log_format = logging.Formatter('%(asctime)s [%(name)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p') handler.setFormatter(log_format) logger.addHandler(handler) if logging_fn: self.log = logging_fn else: self.log = self.syslog # KazooRetry to retry keeper CRUD operations self._retry = KazooRetry(max_tries=None, max_delay=300, sleep_func=gevent.sleep) self._zk_client = kazoo.client.KazooClient( server_list, timeout=400, handler=kazoo.handlers.gevent.SequentialGeventHandler(), logger=logger, connection_retry=self._retry, command_retry=self._retry) self._zk_client.add_listener(self._zk_listener) self._logger = logger self._election = None self._server_list = server_list self._conn_state = None self._sandesh_connection_info_update(status='INIT', message='') self._lost_cb = None self.connect() # end __init__ # start def connect(self): while True: try: self._zk_client.start() break except gevent.event.Timeout as e: # Update connection info self._sandesh_connection_info_update(status='DOWN', message=str(e)) gevent.sleep(1) # Zookeeper is also throwing exception due to delay in master election except Exception as e: # Update connection info self._sandesh_connection_info_update(status='DOWN', message=str(e)) gevent.sleep(1) # Update connection info self._sandesh_connection_info_update(status='UP', message='') # end def is_connected(self): return self._zk_client.state == KazooState.CONNECTED # end is_connected def syslog(self, msg, *args, **kwargs): if not self._logger: return level = kwargs.get('level', 'info') if isinstance(level, int): from pysandesh.sandesh_logger import SandeshLogger level = SandeshLogger.get_py_logger_level(level) log_method = getattr(self._logger, level, self._logger.info) log_method(msg) # end syslog def set_lost_cb(self, lost_cb=None): # set a callback to be called when kazoo state is lost # set to None for default action self._lost_cb = lost_cb # end set_lost_cb def _zk_listener(self, state): if state == KazooState.CONNECTED: if self._election: self._election.cancel() # Update connection info self._sandesh_connection_info_update(status='UP', message='') elif state == KazooState.LOST: # Lost the session with ZooKeeper Server # Best of option we have is to exit the process and restart all # over again self._sandesh_connection_info_update(status='DOWN', message='Connection to Zookeeper lost') if self._lost_cb: self._lost_cb() else: os._exit(2) elif state == KazooState.SUSPENDED: # Update connection info self._sandesh_connection_info_update(status='INIT', message = 'Connection to zookeeper lost. Retrying') # end def _zk_election_callback(self, func, *args, **kwargs): func(*args, **kwargs) # Exit if running master encounters error or exception exit(1) # end def master_election(self, path, identifier, func, *args, **kwargs): while True: self._election = self._zk_client.Election(path, identifier) self._election.run(self._zk_election_callback, func, *args, **kwargs) # end master_election def create_node(self, path, value=None): try: if value is None: value = uuid.uuid4() retry = self._retry.copy() retry(self._zk_client.create, path, str(value), makepath=True) except kazoo.exceptions.NodeExistsError: current_value = self.read_node(path) if current_value == value: return True; raise ResourceExistsError(path, str(current_value), 'zookeeper') # end create_node def delete_node(self, path, recursive=False): try: retry = self._retry.copy() retry(self._zk_client.delete, path, recursive=recursive) except kazoo.exceptions.NoNodeError: pass except Exception as e: raise e # end delete_node def read_node(self, path, include_timestamp=False): try: retry = self._retry.copy() value = retry(self._zk_client.get, path) if include_timestamp: return value return value[0] except Exception: return None # end read_node def get_children(self, path): try: retry = self._retry.copy() return retry(self._zk_client.get_children, path) except Exception: return [] # end read_node def _sandesh_connection_info_update(self, status, message): from pysandesh.connection_info import ConnectionState from pysandesh.gen_py.process_info.ttypes import ConnectionStatus from pysandesh.gen_py.process_info.ttypes import ConnectionType as ConnType from pysandesh.gen_py.sandesh.ttypes import SandeshLevel new_conn_state = getattr(ConnectionStatus, status) ConnectionState.update(conn_type = ConnType.ZOOKEEPER, name = 'Zookeeper', status = new_conn_state, message = message, server_addrs = self._server_list.split(',')) if (self._conn_state and self._conn_state != ConnectionStatus.DOWN and new_conn_state == ConnectionStatus.DOWN): msg = 'Connection to Zookeeper down: %s' %(message) self.log(msg, level=SandeshLevel.SYS_ERR) if (self._conn_state and self._conn_state != new_conn_state and new_conn_state == ConnectionStatus.UP): msg = 'Connection to Zookeeper ESTABLISHED' self.log(msg, level=SandeshLevel.SYS_NOTICE) self._conn_state = new_conn_state
class zkClient(object): def __init__(self,server_list): self._retry = KazooRetry(max_tries=None, max_delay=300, sleep_func=gevent.sleep) self._zk_client = kazoo.client.KazooClient( server_list, timeout=400, handler=kazoo.handlers.gevent.SequentialGeventHandler(), connection_retry=self._retry, command_retry=self._retry) self._zk_client.add_listener(self._zk_listener) self._election = None self._server_list = server_list self._conn_state = None self._lost_cb = None self.connect() def _zk_listener(self, state): if state == KazooState.CONNECTED: if self._election: self._election.cancel() elif state == KazooState.LOST: if self._lost_cb: self._lost_cb() else: os._exit(2) elif state == KazooState.SUSPENDED: pass def connect(self): while True: try: self._zk_client.start() break except gevent.event.Timeout as e: gevent.sleep(1) except Exception as e: gevent.sleep(1) def is_connected(self): return self._zk_client.state == KazooState.CONNECTED def master_election(self, path, identifier, func, *args, **kwargs): self._election = self._zk_client.Election(path, identifier) self._election.run(func, *args, **kwargs) def create_node(self, path, value=None): try: if value is None: value = uuid.uuid4() retry = self._retry.copy() retry(self._zk_client.create, path, str(value), makepath=True) except kazoo.exceptions.NodeExistsError: current_value = self.read_node(path) if current_value == value: return True; raise Exception("create node path %s, value %s" % (path,value)) # end create_node def delete_node(self, path, recursive=False): try: retry = self._retry.copy() retry(self._zk_client.delete, path, recursive=recursive) except kazoo.exceptions.NoNodeError: pass except Exception as e: raise e # end delete_node def read_node(self, path, include_timestamp=False): try: retry = self._retry.copy() value = retry(self._zk_client.get, path) if include_timestamp: return value return value[0] except Exception: return None
class EntityLock(object): """ A ZooKeeper-based entity lock that allows test-and-set operations. This is based on kazoo's lock recipe, and has been modified to lock multiple entity groups. This lock is not re-entrant. Repeated calls after already acquired will block. """ _NODE_NAME = '__lock__' # Tornado lock which allows tornado to switch to different coroutine # if current one is waiting for entity group lock _tornado_lock = TornadoLock() def __init__(self, client, keys, txid=None): """ Create an entity lock. Args: client: A kazoo client. keys: A list of entity Reference objects. txid: An integer specifying the transaction ID. """ self.client = client self.paths = [zk_group_path(key) for key in keys] # The txid is written to the contender nodes for deadlock resolution. self.data = str(txid or '') self.wake_event = client.handler.event_object() # Give the contender nodes a uniquely identifiable prefix in case its # existence is in question. self.prefix = uuid.uuid4().hex + self._NODE_NAME self.create_paths = [path + '/' + self.prefix for path in self.paths] self.create_tried = False self.is_acquired = False self.cancelled = False self._retry = KazooRetry(max_tries=None, sleep_func=client.handler.sleep_func) self._lock = client.handler.lock_object() def _ensure_path(self): """ Make sure the ZooKeeper lock paths have been created. """ for path in self.paths: self.client.ensure_path(path) def cancel(self): """ Cancel a pending lock acquire. """ self.cancelled = True self.wake_event.set() @gen.coroutine def acquire(self): now = ioloop.IOLoop.current().time() yield EntityLock._tornado_lock.acquire(now + LOCK_TIMEOUT) try: locked = self.unsafe_acquire() raise gen.Return(locked) finally: if not self.is_acquired: EntityLock._tornado_lock.release() def unsafe_acquire(self): """ Acquire the lock. By default blocks and waits forever. Returns: A boolean indicating whether or not the lock was acquired. """ def _acquire_lock(): """ Acquire a kazoo thread lock. """ got_it = self._lock.acquire(False) if not got_it: raise ForceRetryError() return True retry = self._retry.copy() retry.deadline = LOCK_TIMEOUT # Prevent other threads from acquiring the lock at the same time. locked = self._lock.acquire(False) if not locked: try: retry(_acquire_lock) except RetryFailedError: return False already_acquired = self.is_acquired try: gotten = False try: gotten = retry(self._inner_acquire) except RetryFailedError: if not already_acquired: self._best_effort_cleanup() except KazooException: if not already_acquired: self._best_effort_cleanup() self.cancelled = False raise if gotten: self.is_acquired = gotten if not gotten and not already_acquired: self._delete_nodes(self.nodes) return gotten finally: self._lock.release() def _watch_session(self, state): """ A callback function for handling connection state changes. Args: state: The new connection state. """ self.wake_event.set() return True def _resolve_deadlocks(self, children_list): """ Check if there are any concurrent cross-group locks. Args: children_list: A list of current transactions for each group. """ current_txid = int(self.data) for index, children in enumerate(children_list): our_index = children.index(self.nodes[index]) # Skip groups where this lock already has the earliest contender. if our_index == 0: continue # Get transaction IDs for earlier contenders. for child in children[:our_index - 1]: try: data, _ = self.client.get( self.paths[index] + '/' + child) except NoNodeError: continue # If data is not set, it doesn't belong to a cross-group # transaction. if not data: continue child_txid = int(data) # As an arbitrary rule, require later transactions to # resolve deadlocks. if current_txid > child_txid: # TODO: Implement a more graceful deadlock detection. self.client.retry(self._delete_nodes(self.nodes)) raise ForceRetryError() def _inner_acquire(self): """ Create contender node(s) and wait until the lock is acquired. """ # Make sure the group lock node exists. self._ensure_path() nodes = [None for _ in self.paths] if self.create_tried: nodes = self._find_nodes() else: self.create_tried = True for index, node in enumerate(nodes): if node is not None: continue # The entity group lock root may have been deleted, so try a few times. try_num = 0 while True: try: node = self.client.create( self.create_paths[index], self.data, sequence=True) break except NoNodeError: self.client.ensure_path(self.paths[index]) if try_num > 3: raise ForceRetryError() try_num += 1 # Strip off path to node. node = node[len(self.paths[index]) + 1:] nodes[index] = node self.nodes = nodes while True: self.wake_event.clear() # Bail out with an exception if cancellation has been requested. if self.cancelled: raise CancelledError() children_list = self._get_sorted_children() predecessors = [] for index, children in enumerate(children_list): try: our_index = children.index(nodes[index]) except ValueError: raise ForceRetryError() # If the lock for this group hasn't been acquired, get the predecessor. if our_index != 0: predecessors.append( self.paths[index] + "/" + children[our_index - 1]) if not predecessors: return True if len(nodes) > 1: self._resolve_deadlocks(children_list) # Wait for predecessor to be removed. # TODO: Listen for all at the same time. for index, predecessor in enumerate(predecessors): self.client.add_listener(self._watch_session) try: if self.client.exists(predecessor, self._watch_predecessor): self.wake_event.wait(LOCK_TIMEOUT) if not self.wake_event.isSet(): error = 'Failed to acquire lock on {} after {} '\ 'seconds'.format(self.paths, LOCK_TIMEOUT * (index + 1)) raise LockTimeout(error) finally: self.client.remove_listener(self._watch_session) def _watch_predecessor(self, event): """ A callback function for handling contender deletions. Args: event: A ZooKeeper event. """ self.wake_event.set() def _get_sorted_children(self): """ Retrieve a list of sorted contenders for each group. Returns: A list of contenders for each group. """ children = [] for path in self.paths: try: children.append(self.client.get_children(path)) except NoNodeError: children.append([]) # Ignore lock path prefix when sorting contenders. lockname = self._NODE_NAME for child_list in children: child_list.sort(key=lambda c: c[c.find(lockname) + len(lockname):]) return children def _find_nodes(self): """ Retrieve a list of paths this lock has created. Returns: A list of ZooKeeper paths. """ nodes = [] for path in self.paths: try: children = self.client.get_children(path) except NoNodeError: children = [] node = None for child in children: if child.startswith(self.prefix): node = child nodes.append(node) return nodes def _delete_nodes(self, nodes): """ Remove ZooKeeper nodes. Args: nodes: A list of nodes to delete. """ for index, node in enumerate(nodes): if node is None: continue self.client.delete(self.paths[index] + "/" + node) def _best_effort_cleanup(self): """ Attempt to delete nodes that this lock has created. """ try: nodes = self._find_nodes() self._delete_nodes(nodes) except KazooException: pass def release(self): """ Release the lock immediately. """ try: self.client.retry(self._inner_release) # Try to clean up the group lock path. for path in self.paths: try: self.client.delete(path) except (NotEmptyError, NoNodeError): pass return finally: if not self.is_acquired: EntityLock._tornado_lock.release() def ensure_release_tornado_lock(self): """ Ensures that tornado lock (which is global for datastore server) is released. It MUST BE CALLED any time when lock is acquired even if entity group lock in zookeeper left acquired after failure. """ if self.is_acquired: EntityLock._tornado_lock.release() def _inner_release(self): """ Release the lock by removing created nodes. """ if not self.is_acquired: return False try: self._delete_nodes(self.nodes) except NoNodeError: pass self.is_acquired = False self.nodes = [None for _ in self.paths] return True def __enter__(self): self.unsafe_acquire() def __exit__(self, exc_type, exc_value, traceback): self.release()