Example #1
0
    def __init__(self,
                 host='127.0.0.1:2181',
                 handler=None,
                 ignore_expire=False):

        if handler == 'gevent':
            import gevent

            kr = KazooRetry(max_tries=-1,
                            delay=0.2,
                            sleep_func=gevent.sleep,
                            ignore_expire=ignore_expire)
            KazooClient.__init__(self,
                                 hosts=host,
                                 connection_retry=kr,
                                 handler=SequentialGeventHandler())
        else:
            kr = KazooRetry(max_tries=-1,
                            delay=0.2,
                            ignore_expire=ignore_expire)
            KazooClient.__init__(
                self,
                hosts=host,
                connection_retry=kr,
            )

        self.start()
        self.add_listener(self._conn_state_listener)
Example #2
0
    def __init__(self, module, server_list):
        # logging
        logger = logging.getLogger(module)
        logger.setLevel(logging.INFO)
        try:
            handler = logging.handlers.RotatingFileHandler(
                '/var/log/contrail/' + module + '-zk.log',
                maxBytes=10 * 1024 * 1024,
                backupCount=5)
        except IOError:
            print "Cannot open log file in /var/log/contrail/"
        else:
            log_format = logging.Formatter(
                '%(asctime)s [%(name)s]: %(message)s',
                datefmt='%m/%d/%Y %I:%M:%S %p')
            handler.setFormatter(log_format)
            logger.addHandler(handler)

        self._zk_client = \
            kazoo.client.KazooClient(
                server_list,
                timeout=20,
                handler=kazoo.handlers.gevent.SequentialGeventHandler(),
                logger=logger)

        self._zk_client.add_listener(self._zk_listener)
        self._logger = logger
        self._election = None
        # KazooRetry to retry keeper CRUD operations
        self._retry = KazooRetry(max_tries=None)
        self.connect()
Example #3
0
    def __init__(self, client, keys, txid=None):
        """ Create an entity lock.

    Args:
      client: A kazoo client.
      keys: A list of entity Reference objects.
      txid: An integer specifying the transaction ID.
    """
        self.client = client
        self.paths = [zk_group_path(key) for key in keys]

        # The txid is written to the contender nodes for deadlock resolution.
        self.data = str(txid or '')

        self.wake_event = client.handler.event_object()

        # Give the contender nodes a uniquely identifiable prefix in case its
        # existence is in question.
        self.prefix = uuid.uuid4().hex + self._NODE_NAME

        self.create_paths = [path + '/' + self.prefix for path in self.paths]

        self.create_tried = False
        self.is_acquired = False
        self.cancelled = False
        self._retry = KazooRetry(max_tries=None,
                                 sleep_func=client.handler.sleep_func)
        self._lock = client.handler.lock_object()
Example #4
0
    def _get_hiveserver2_info_with_zookeeper(self, host, port,
                                             zookeeper_name_space):
        """Get hiveserver2 URL information from zookeeper."""
        from kazoo.client import KazooClient
        from kazoo.retry import KazooRetry

        hosts = host.split(',')
        zk_hosts = ','.join(
            list(map(lambda x: ':'.join([x, str(port)]), hosts)))

        conn_retry_policy = KazooRetry(max_tries=-1, delay=0.1, max_delay=0.1)
        cmd_retry_policy = KazooRetry(max_tries=3,
                                      delay=0.3,
                                      backoff=1,
                                      max_delay=1,
                                      ignore_expire=False)
        zk = KazooClient(hosts=zk_hosts,
                         connection_retry=conn_retry_policy,
                         command_retry=cmd_retry_policy)

        zk.start()
        children = zk.get_children('/' + zookeeper_name_space)
        nodes = self.get_hiveserver2_info(children)
        zk.stop()
        zk.close()

        if len(nodes) == 0:
            from kazoo.exceptions import ZookeeperError
            raise ZookeeperError(
                "Can not find child in zookeeper path({}).".format(
                    zookeeper_name_space))

        return nodes
Example #5
0
    def __init__(self, client, path, identifier=None):
        """Create a Kazoo lock.

        :param client: A :class:`~kazoo.client.KazooClient` instance.
        :param path: The lock path to use.
        :param identifier: Name to use for this lock contender. This
                           can be useful for querying to see who the
                           current lock contenders are.

        """
        self.client = client
        self.path = path

        # some data is written to the node. this can be queried via
        # contenders() to see who is contending for the lock
        self.data = str(identifier or "").encode('utf-8')

        self.wake_event = client.handler.event_object()

        # props to Netflix Curator for this trick. It is possible for our
        # create request to succeed on the server, but for a failure to
        # prevent us from getting back the full path name. We prefix our
        # lock name with a uuid and can check for its presence on retry.
        self.prefix = uuid.uuid4().hex + self._NODE_NAME
        self.create_path = self.path + "/" + self.prefix

        self.create_tried = False
        self.is_acquired = False
        self.assured_path = False
        self.cancelled = False
        self._retry = KazooRetry(max_tries=None,
                                 sleep_func=client.handler.sleep_func)
        self._lock = client.handler.lock_object()
Example #6
0
    def __init__(self,
                 module,
                 server_list,
                 host_ip,
                 logging_fn=None,
                 zk_timeout=400,
                 log_response_time=None):
        self.host_ip = host_ip
        # logging
        logger = logging.getLogger(module)
        logger.setLevel(logging.DEBUG)
        try:
            handler = logging.handlers.RotatingFileHandler(
                LOG_DIR + module + '-zk.log',
                maxBytes=10 * 1024 * 1024,
                backupCount=5)
        except IOError:
            print "Cannot open log file in %s" % (LOG_DIR)
        else:
            log_format = logging.Formatter(
                '%(asctime)s [%(name)s]: %(message)s',
                datefmt='%m/%d/%Y %I:%M:%S %p')
            handler.setFormatter(log_format)
            logger.addHandler(handler)

        if logging_fn:
            self.log = logging_fn
        else:
            self.log = self.syslog
        self.log_response_time = log_response_time
        # KazooRetry to retry keeper CRUD operations
        self._retry = KazooRetry(max_tries=None,
                                 max_delay=300,
                                 sleep_func=gevent.sleep)
        self._zk_client = kazoo.client.KazooClient(
            server_list,
            timeout=zk_timeout,
            handler=kazoo.handlers.gevent.SequentialGeventHandler(),
            logger=logger,
            connection_retry=self._retry,
            command_retry=self._retry)

        self._zk_client.add_listener(self._zk_listener)
        self._logger = logger
        self._election = None
        self._server_list = server_list

        self._conn_state = None
        self._sandesh_connection_info_update(status='INIT', message='')
        self._lost_cb = None
        self._suspend_cb = None

        self.delete_node = self._response_time(self.delete_node, "DELETE")
        self.create_node = self._response_time(self.create_node, "CREATE")
        self.read_node = self._response_time(self.read_node, "READ")
        self.get_children = self._response_time(self.get_children,
                                                "GET_CHILDREN")
        self.exists = self._response_time(self.exists, "EXISTS")
        self.connect()
Example #7
0
def zk_connect(zk_addr: str,
               zk_user: Optional[str] = None,
               zk_secret: Optional[str] = None) -> KazooClient:
    """Connect to ZooKeeper.

    On connection failure, the function attempts to reconnect indefinitely with
    exponential backoff up to 3 seconds. If a command fails, that command is
    retried every 300ms for 3 attempts before failing.

    These values are chosen to suit a human-interactive time.

    Args:
        zk_addr: The address to connect to
        zk_user: The username to use when connecting to ZooKeeper or `None`
            if no authentication is necessary.
        zk_secret: The secret to use when connecting to ZooKeeper or `None`
            if no authentication is necessary.

    Returns:
        A ZooKeeper client connection in the form of a `kazoo.client.KazooClient`.
    """
    # Try to reconnect indefinitely, with time between updates going
    # exponentially to ~3s. Then every retry occurs every ~3 seconds.
    conn_retry_policy = KazooRetry(
        max_tries=-1,
        delay=0.3,
        backoff=1.3,
        max_delay=3,
        ignore_expire=True,
    )

    # Retry commands every 0.3 seconds, for a total of <1s (usually 0.9)
    cmd_retry_policy = KazooRetry(
        max_tries=3,
        delay=0.3,
        backoff=1,
        max_delay=1,
        ignore_expire=False,
    )

    default_acl = None
    auth_data = None
    if zk_user and zk_secret:
        default_acl = [make_digest_acl(zk_user, zk_secret, all=True)]
        scheme = 'digest'
        credential = "{}:{}".format(zk_user, zk_secret)
        auth_data = [(scheme, credential)]

    zk = KazooClient(
        hosts=zk_addr,
        timeout=30,
        connection_retry=conn_retry_policy,
        command_retry=cmd_retry_policy,
        default_acl=default_acl,
        auth_data=auth_data,
    )

    zk.start()
    return zk
Example #8
0
 def __init__(self, zk_hosts):
     conn_retry_policy = KazooRetry(max_tries=-1, delay=0.1, max_delay=0.1)
     cmd_retry_policy = KazooRetry(max_tries=3,
                                   delay=0.3,
                                   backoff=1,
                                   max_delay=1,
                                   ignore_expire=False)
     self._zk = KazooClient(hosts=zk_hosts,
                            connection_retry=conn_retry_policy,
                            command_retry=cmd_retry_policy)
Example #9
0
def create_zk_client(zk_hosts: str) -> KazooClient:
    conn_retry_policy = KazooRetry(max_tries=-1, delay=0.1, max_delay=0.1)
    cmd_retry_policy = KazooRetry(max_tries=3,
                                  delay=0.3,
                                  backoff=1,
                                  max_delay=1,
                                  ignore_expire=False)
    return KazooClient(
        hosts=zk_hosts,
        connection_retry=conn_retry_policy,
        command_retry=cmd_retry_policy,
    )
Example #10
0
def zk() -> KazooClient:
    conn_retry_policy = KazooRetry(max_tries=-1, delay=0.1, max_delay=0.1)
    cmd_retry_policy = KazooRetry(
        max_tries=3, delay=0.3, backoff=1, max_delay=1, ignore_expire=False)
    zk = KazooClient(
        hosts='zk-1.zk:2181,zk-2.zk:2181,zk-3.zk:2181',
        connection_retry=conn_retry_policy,
        command_retry=cmd_retry_policy,
    )
    zk.start()
    yield zk
    zk.stop()
Example #11
0
 def __init__(self, name: str = None, server_host: str = get_host_ip() + ':2181', activate_distributor: bool = True,
              distributor_kafka_host: Union[str, List[str]] = get_host_ip() + ':9092'):
     self.is_virtual = False
     self.randomize_location()
     self.latency = random.uniform(0, 10)
     try:
         from jtop import jtop
         self.jetson = jtop()
         self.jetson.open()
     except Exception:
         self.jetson = None
     if 'virtual' in os.environ:
         self.is_virtual = True
     if name is None:
         if 'NODE_NAME' in os.environ:
             name = os.environ['NODE_NAME']
         else:
             name = get_local_ip()
     self.name = name
     print('I am ' + self.name)
     retry = KazooRetry(max_tries=-1)
     self.zk = KazooClient(hosts=server_host, connection_retry=retry, timeout=5.0)
     while True:
         try:
             self.zk.start()
             break
         except Exception as e:
             print(e)
     self.zk.ensure_path('/nodes/' + self.name)
     if activate_distributor:
         self.distributor = Distributor(name=name, kafka_bootstrap_servers=distributor_kafka_host,
                                        trigger=lambda x: self.randomize_location())
Example #12
0
File: lock.py Project: lmtwga/kazoo
    def __init__(self, client, path, identifier=None):
        """Create a Kazoo lock.

        :param client: A :class:`~kazoo.client.KazooClient` instance.
        :param path: The lock path to use.
        :param identifier: Name to use for this lock contender. This
                           can be useful for querying to see who the
                           current lock contenders are.

        """
        self.client = client
        self.path = path

        # some data is written to the node. this can be queried via
        # contenders() to see who is contending for the lock
        self.data = str(identifier or "").encode("utf-8")

        self.wake_event = client.handler.event_object()

        # props to Netflix Curator for this trick. It is possible for our
        # create request to succeed on the server, but for a failure to
        # prevent us from getting back the full path name. We prefix our
        # lock name with a uuid and can check for its presence on retry.
        self.prefix = uuid.uuid4().hex + self._NODE_NAME
        self.create_path = self.path + "/" + self.prefix

        self.create_tried = False
        self.is_acquired = False
        self.assured_path = False
        self.cancelled = False
        self._retry = KazooRetry(max_tries=None, sleep_func=client.handler.sleep_func)
        self._lock = client.handler.lock_object()
Example #13
0
 def __init__(self, config):
     self.zk = KazooClient(config.hosts.zookeeper.connection_string)
     self.zk.start()
     credentials = ":".join(
         (config.hosts.zookeeper.username, config.hosts.zookeeper.password))
     self.zk.add_auth("digest", credentials)
     self.retry = KazooRetry(max_tries=3)
Example #14
0
    def __init__(self, module, server_list):
        # logging
        logger = logging.getLogger(module)
        logger.setLevel(logging.INFO)
        try:
            handler = logging.handlers.RotatingFileHandler('/var/log/contrail/' + module + '-zk.log', maxBytes=10*1024*1024, backupCount=5)
        except IOError:
            print "Cannot open log file in /var/log/contrail/"
        else:
            log_format = logging.Formatter('%(asctime)s [%(name)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
            handler.setFormatter(log_format)
            logger.addHandler(handler)

        self._zk_client = \
            kazoo.client.KazooClient(
                server_list,
                timeout=20,
                handler=kazoo.handlers.gevent.SequentialGeventHandler(),
                logger=logger)

        self._zk_client.add_listener(self._zk_listener)
        self._logger = logger
        self._election = None
        # KazooRetry to retry keeper CRUD operations
        self._retry = KazooRetry(max_tries=None)
        self.connect()
Example #15
0
class ZkOpers(object):

    zk = None

    DEFAULT_RETRY_POLICY = KazooRetry(
        max_tries=None,
        max_delay=10000,
    )

    rootPath = "/letv/nginx"
    confOpers = ConfigFileOpers()
    '''
    classdocs
    '''
    def __init__(self):
        '''
        Constructor
        '''
        self.zkaddress, self.zkport = get_zk_address()
        if "" != self.zkaddress and "" != self.zkport:
            self.zk = KazooClient(hosts=self.zkaddress + ':' +
                                  str(self.zkport),
                                  connection_retry=self.DEFAULT_RETRY_POLICY,
                                  timeout=20)
            self.zk.add_listener(self.listener)
            self.zk.start()
            logging.info("instance zk client (%s:%s)" %
                         (self.zkaddress, self.zkport))

    def close(self):
        try:
            self.zk.stop()
            self.zk.close()
        except Exception, e:
            logging.error(e)
Example #16
0
 def get_job_from_path(self, path):
     kr = KazooRetry(max_tries=0, ignore_expire=False)
     try:
         result = kr(self._inner_get_for_update, path)
     except RetryFailedError:
         return None
     return result
Example #17
0
 def _get_retry():
     """
     ZooKeeper connection retry is weird and needs a particular object to achieve.
     Create that object with its appropriate settings here and return it.
     :return: KazooRetry object.
     """
     return KazooRetry(max_tries=5, backoff=2, max_delay=30)
Example #18
0
  def __init__(self, client, keys, txid=None):
    """ Create an entity lock.

    Args:
      client: A kazoo client.
      keys: A list of entity Reference objects.
      txid: An integer specifying the transaction ID.
    """
    self.client = client
    self.paths = [zk_group_path(key) for key in keys]

    # The txid is written to the contender nodes for deadlock resolution.
    self.data = str(txid or '')

    self.wake_event = client.handler.event_object()

    # Give the contender nodes a uniquely identifiable prefix in case its
    # existence is in question.
    self.prefix = uuid.uuid4().hex + self._NODE_NAME

    self.create_paths = [path + '/' + self.prefix for path in self.paths]

    self.create_tried = False
    self.is_acquired = False
    self.cancelled = False
    self._retry = KazooRetry(max_tries=None,
                             sleep_func=client.handler.sleep_func)
    self._lock = client.handler.lock_object()
Example #19
0
    def __init__(self,
                 host='127.0.0.1:2181',
                 lock_path_prefix='/mastermind/locks/'):
        self.client = KazooClient(host, timeout=3)
        logger.info(
            'Connecting to zookeeper host {}, lock_path_prefix: {}'.format(
                host, lock_path_prefix))
        try:
            self.client.start()
        except Exception as e:
            logger.error(e)
            raise

        self._retry = KazooRetry(max_tries=self.RETRIES)

        self.lock_path_prefix = lock_path_prefix
Example #20
0
def main():
    """ Starts the groomer. """
    logging.basicConfig(format=LOG_FORMAT, level=logging.INFO)

    parser = argparse.ArgumentParser()
    parser.add_argument('-v',
                        '--verbose',
                        action='store_true',
                        help='Output debug-level logging')
    args = parser.parse_args()

    if args.verbose:
        logger.setLevel(logging.DEBUG)

    zk_hosts = appscale_info.get_zk_node_ips()
    zk_client = KazooClient(hosts=','.join(zk_hosts),
                            connection_retry=ZK_PERSISTENT_RECONNECTS,
                            command_retry=KazooRetry(max_tries=-1))
    zk_client.start()

    db_access = DatastoreProxy()

    thread_pool = ThreadPoolExecutor(4)

    TransactionGroomer(zk_client, db_access, thread_pool)
    logger.info('Starting transaction groomer')

    IOLoop.current().start()
    def __init__(self,
                 host,
                 port,
                 ishead=False,
                 istail=False,
                 previousid=0,
                 nextid=0,
                 chaintailid=0):
        self.inet = "{}:{}".format(host, port)
        self.retry = KazooRetry(max_tries=1000, delay=0.5)
        self.zk = KazooClient(hosts=self.inet,
                              logger=logging,
                              connection_retry=self.retry)

        self.ishead = ishead  # am I head?
        self.istail = istail  # am I tail?
        self.replicaid = 0  # id of this replica
        self.previousid = previousid  # id of predecessor
        self.nextid = nextid  # id of successor
        self.chaintailid = chaintailid
        self.hashtable = {}  # table of values updated by client requests
        self.client = {}  # client stubs for future use
        self.sentlist = []  # sent update requests not yet processed by tail
        self.previous = None
        self.next = None
        self.chaintail = None
        self.currxid = 0  # current xid server has seen so far
        self.host = host
        self.port = port
Example #22
0
    def _zookeeper_resolver(self, cfg):
        hosts, path = cfg[5:].split("/", 1)
        path = "/" + path

        retry = KazooRetry(max_tries=10)
        with zookeeper.client(hosts=hosts, read_only=True, connection_retry=retry) as zk:
            def master_id(key):
                return int(key.split("_")[-1])

            def get_masters():
                return [x for x in zk.get_children(path)
                        if re.search("\d+", x)]

            leader = sorted(get_masters(), key=lambda x: master_id(x))

            if len(leader) == 0:
                raise exceptions.MasterNotAvailableException("cannot find any masters at {0}".format(cfg,))
            data, stat = zk.get(os.path.join(path, leader[0]))

            if not data:
                exceptions.MasterNotAvailableException("Cannot retrieve valid MasterInfo data from ZooKeeper")

            try:
                parsed = json.loads(data)
                if parsed and "address" in parsed:
                    ip = parsed["address"].get("ip")
                    port = parsed["address"].get("port")
                    if ip and port:
                        return "{ip}:{port}".format(ip=ip, port=port)
            except ValueError as parse_error:
                log.debug("[WARN] No JSON content, probably connecting to older Mesos version. "
                          "Reason: {}".format(parse_error))
                raise exceptions.MasterNotAvailableException("Failed to parse mesos master ip from ZK")
Example #23
0
class DefaultAnnouncerCheckerProvider(AnnouncerCheckerProvider):
    DEFAULT_RETRY_MAX_DELAY = Amount(5, Time.MINUTES)
    DEFAULT_RETRY_POLICY = KazooRetry(
        max_tries=None,
        ignore_expire=True,
        max_delay=DEFAULT_RETRY_MAX_DELAY.as_(Time.SECONDS),
    )

    def __init__(self,
                 ensemble,
                 root='/aurora',
                 allow_custom_serverset_path=False):
        self.__ensemble = ensemble
        self.__root = root
        super(DefaultAnnouncerCheckerProvider,
              self).__init__(allow_custom_serverset_path)

    def make_zk_client(self):
        return KazooClient(self.__ensemble,
                           connection_retry=self.DEFAULT_RETRY_POLICY)

    def make_zk_path(self, assigned_task):
        config = assigned_task.task
        role, environment, name = (config.job.role, config.job.environment,
                                   config.job.name)
        return posixpath.join(self.__root, role, environment, name)
Example #24
0
    def __init__(self,
                 host=DEFAULT_HOST,
                 db_access=None,
                 log_level=logging.INFO):
        """ Creates a new ZKTransaction, which will communicate with Zookeeper
    on the given host.

    Args:
      host: A str that indicates which machine runs the Zookeeper service.
      db_access: A DatastoreProxy instance.
      log_level: A logging constant that specifies the instance logging level.
    """
        retry_policy = KazooRetry(max_tries=5)

        class_name = self.__class__.__name__
        self.logger = logging.getLogger(class_name)
        self.logger.setLevel(log_level)
        self.logger.info('Starting {}'.format(class_name))

        # Connection instance variables.
        self.host = host
        self.handle = kazoo.client.KazooClient(
            hosts=host,
            connection_retry=ZK_PERSISTENT_RECONNECTS,
            command_retry=retry_policy)
        self.run_with_retry = self.handle.retry
        self.handle.start()

        self.__counter_cache = {}

        self.db_access = db_access
Example #25
0
def zk_client(static_three_master_cluster: Cluster) -> KazooClient:
    """
    ZooKeeper client connected to a given DC/OS cluster.
    """
    zk_hostports = ','.join([
        '{}:2181'.format(m.public_ip_address)
        for m in static_three_master_cluster.masters
    ])
    retry_policy = KazooRetry(
        max_tries=-1,
        delay=1,
        backoff=1,
        max_delay=600,
        ignore_expire=True,
    )
    zk_client = KazooClient(
        hosts=zk_hostports,
        # Avoid failure due to client session timeout.
        timeout=40,
        # Work around https://github.com/python-zk/kazoo/issues/374
        connection_retry=retry_policy,
        command_retry=retry_policy,
    )
    zk_client.start()
    try:
        yield zk_client
    finally:
        zk_client.stop()
        zk_client.close()
Example #26
0
    def rebalance(self, partition_ids=None):
        if partition_ids is None:
            partition_ids = [
                str(p_id)
                for p_id in self.consumer_partitions[self._identifier]
            ]
        kr = KazooRetry(max_tries=3)
        kr.retry_exceptions = kr.retry_exceptions + tuple([NodeExistsError])

        my_partitions = self.consumer_partitions[self._identifier]
        self.logger.info('My partitions (%d): %s', len(my_partitions), my_partitions)

        # Clean up old ownership data first, so we don't block
        # the joining node(s)
        self._release_locks()

        nodes = sorted([node for node in self._group], key=lambda x: hash(x))
        my_new_partitions = [
            partition
            for partition in partition_ids
            if nodes[int(partition) % len(nodes)] == self._identifier and
               int(partition) not in my_partitions
        ]
        self.logger.info('My new partitions (%d): %s', len(my_new_partitions), my_new_partitions)
        for partition in my_new_partitions:
            c_id = nodes[int(partition) % len(nodes)]
            self.consumer_partitions[c_id].append(int(partition))
            p_path = self.path_formats['owner'].format(group=self.group,
                                                       topic=self.topic,
                                                       partition=partition)
            try:
                self.logger.debug('Acquiring ownership of partition %s',
                                  partition)
                kr(self._client.create, p_path,
                   value=self._identifier, ephemeral=True, makepath=True)
            except RetryFailedError as err:
                # A different consumer had been registered as the owner
                expired_cid, zstat = self._client.get(p_path)
                msg = 'Acquiring ownership of partition %s (was owned by %s)'
                self.logger.warn(msg, partition, expired_cid)
                # We need to delete / create, so that the node is created
                # ephemeral and owned by us
                self._client.delete(p_path)
                self._client.create(p_path, value=self._identifier,
                                    ephemeral=True, makepath=True)
        if self.partitions_changed_cb:
            self.partitions_changed_cb(self.consumer_partitions[self._identifier])
Example #27
0
    def rebalance(self, partition_ids=None):
        if partition_ids is None:
            partition_ids = [
                str(p_id)
                for p_id in self.consumer_partitions[self._identifier]
            ]
        kr = KazooRetry(max_tries=3)
        kr.retry_exceptions = kr.retry_exceptions + tuple([NodeExistsError])

        my_partitions = self.consumer_partitions[self._identifier]
        self.logger.info('My partitions (%d): %s', len(my_partitions),
                         my_partitions)

        # Clean up old ownership data first, so we don't block
        # the joining node(s)
        self._release_locks()

        nodes = sorted([node for node in self._group])
        self.logger.info('Connected nodes (%d): %s', len(nodes), nodes)
        my_new_partitions = [
            partition for partition in partition_ids
            if nodes[int(partition) % len(nodes)] == self._identifier
            and int(partition) not in my_partitions
        ]
        self.logger.info('My new partitions (%d): %s', len(my_new_partitions),
                         my_new_partitions)
        for partition in my_new_partitions:
            c_id = nodes[int(partition) % len(nodes)]
            self.consumer_partitions[c_id].append(int(partition))
            p_path = self.path_formats['owner'].format(group=self.group,
                                                       topic=self.topic,
                                                       partition=partition)
            try:
                self.logger.debug('Acquiring ownership of partition %s',
                                  partition)
                kr(self._client.create,
                   p_path,
                   value=self._identifier,
                   ephemeral=True,
                   makepath=True)
            except RetryFailedError:
                # A different consumer is still connected and owns this,
                # try to gracefully release everything else and fail out
                self.finish()
        if self.partitions_changed_cb:
            self.partitions_changed_cb(
                self.consumer_partitions[self._identifier])
Example #28
0
 def get_job(self, entry):
     path = self.unowned_path + "/" + str(entry)
     kr = KazooRetry(max_tries=3, ignore_expire=False)
     try:
         result = kr(self._inner_get, path)
     except RetryFailedError:
         return None
     return result
Example #29
0
    def __init__(self, client, path, identifier=None, extra_lock_patterns=()):
        """Create a Kazoo lock.

        :param client: A :class:`~kazoo.client.KazooClient` instance.
        :param path: The lock path to use.
        :param identifier: Name to use for this lock contender. This can be
                          useful for querying to see who the current lock
                          contenders are.
        :param extra_lock_patterns: Strings that will be used to
                                    identify other znode in the path
                                    that should be considered contenders
                                    for this lock.
                                    Use this for cross-implementation
                                    compatibility.

        .. versionadded:: 2.7.1
            The extra_lock_patterns option.
        """
        self.client = client
        self.path = path
        self._exclude_names = set(self._EXCLUDE_NAMES +
                                  list(extra_lock_patterns))
        self._contenders_re = re.compile(r"(?:{patterns})(-?\d{{10}})$".format(
            patterns="|".join(self._exclude_names)))

        # some data is written to the node. this can be queried via
        # contenders() to see who is contending for the lock
        self.data = str(identifier or "").encode("utf-8")
        self.node = None

        self.wake_event = client.handler.event_object()

        # props to Netflix Curator for this trick. It is possible for our
        # create request to succeed on the server, but for a failure to
        # prevent us from getting back the full path name. We prefix our
        # lock name with a uuid and can check for its presence on retry.
        self.prefix = uuid.uuid4().hex + self._NODE_NAME
        self.create_path = self.path + "/" + self.prefix

        self.create_tried = False
        self.is_acquired = False
        self.assured_path = False
        self.cancelled = False
        self._retry = KazooRetry(max_tries=None,
                                 sleep_func=client.handler.sleep_func)
        self._lock = client.handler.lock_object()
Example #30
0
 def __enter__(self):
     """Initialize zk connnection."""
     kazooRetry = KazooRetry(max_tries=5)
     self.zk = KazooClient(
         hosts=self.hosts, read_only=True, connection_retry=kazooRetry
     )
     self.zk.start()
     return self
Example #31
0
    def rebalance(self, partition_ids=None):
        if partition_ids is None:
            partition_ids = [
                str(p_id)
                for p_id in self.consumer_partitions[self._identifier]
            ]
        kr = KazooRetry(max_tries=3)
        kr.retry_exceptions = kr.retry_exceptions + tuple([NodeExistsError])

        my_partitions = self.consumer_partitions[self._identifier]
        self.logger.info('My partitions (%d): %s',
                         len(my_partitions), my_partitions)

        # Clean up old ownership data first, so we don't block
        # the joining node(s)
        self._release_locks()

        nodes = sorted([node for node in self._group])
        self.logger.info('Connected nodes (%d): %s',
                         len(nodes), nodes)
        my_new_partitions = [
            partition
            for partition in partition_ids
            if nodes[int(partition) % len(nodes)] == self._identifier and
               int(partition) not in my_partitions
        ]
        self.logger.info('My new partitions (%d): %s',
                         len(my_new_partitions), my_new_partitions)
        for partition in my_new_partitions:
            c_id = nodes[int(partition) % len(nodes)]
            self.consumer_partitions[c_id].append(int(partition))
            p_path = self.path_formats['owner'].format(group=self.group,
                                                       topic=self.topic,
                                                       partition=partition)
            try:
                self.logger.debug('Acquiring ownership of partition %s',
                                  partition)
                kr(self._client.create, p_path,
                   value=self._identifier, ephemeral=True, makepath=True)
            except RetryFailedError:
                # A different consumer is still connected and owns this,
                # try to gracefully release everything else and fail out
                self.finish()
        if self.partitions_changed_cb:
            self.partitions_changed_cb(self.consumer_partitions[self._identifier])
Example #32
0
    def __init__(self, hosts, config):
        self._section_name = utils.get_module(__name__)
        self._max_delay = config.getint(self._section_name, "max_retry_delay",
                                        default=settings.DEFAULT_ZK_RETRY_MAX_DELAY)

        self._timeout = config.getint(self._section_name, "time_out", default=settings.DEFAULT_ZK_CONNECTION_TIMEOUT)
        connection_retry = KazooRetry(max_tries=-1, max_delay=self._max_delay)
        super(prpcZKClientManager, self).__init__(hosts=hosts, timeout=self._timeout,
                                                   connection_retry=connection_retry)
Example #33
0
    def __init__(self) -> None:
        hosts = settings.ZOO_HOSTS
        retry = KazooRetry(max_tries=-1, max_delay=60)
        self._zk = KazooClient(hosts,
                               connection_retry=retry,
                               command_retry=retry)

        # establish the connection
        self._zk.start()
Example #34
0
    def __init__(self, module, server_list, logging_fn=None):
        # logging
        logger = logging.getLogger(module)
        logger.setLevel(logging.INFO)
        try:
            handler = logging.handlers.RotatingFileHandler(
                LOG_DIR + module + '-zk.log',
                maxBytes=10 * 1024 * 1024,
                backupCount=5)
        except IOError:
            print "Cannot open log file in %s" % (LOG_DIR)
        else:
            log_format = logging.Formatter(
                '%(asctime)s [%(name)s]: %(message)s',
                datefmt='%m/%d/%Y %I:%M:%S %p')
            handler.setFormatter(log_format)
            logger.addHandler(handler)

        if logging_fn:
            self.log = logging_fn
        else:
            self.log = self.syslog

        self._zk_client = \
            kazoo.client.KazooClient(
                server_list,
                timeout=400,
                handler=kazoo.handlers.gevent.SequentialGeventHandler(),
                logger=logger)

        self._zk_client.add_listener(self._zk_listener)
        self._logger = logger
        self._election = None
        self._server_list = server_list
        # KazooRetry to retry keeper CRUD operations
        self._retry = KazooRetry(max_tries=None,
                                 max_delay=300,
                                 sleep_func=gevent.sleep)

        self._conn_state = None
        self._sandesh_connection_info_update(status='INIT', message='')
        self._lost_cb = None

        self.connect()
 def __init__(self, server_list):
     self._retry = KazooRetry(max_tries=None,
                              max_delay=300,
                              sleep_func=gevent.sleep)
     self._zk_client = KazooClient(hosts=','.join(server_list),
                                   timeout=400,
                                   handler=SequentialGeventHandler(),
                                   logger=logger,
                                   connection_retry=self._retry,
                                   command_retry=self._retry)
Example #36
0
    def __init__(self, client, path, identifier=None, exclusive=True):
        """Create a Kazoo lock.

        :param client: The Kazoo client
        :type client: :class:`~kazoo.client.KazooClient`
        :param path: The lock path to use. May not contain the strings
                     ``__SHARED__`` or ``__EXCLUSIVE__``, as they are used
                     internally
        :type path: str
        :param identifier: Name to use for this lock contender, which may be
                           useful for querying to see who the current lock
                           :py:meth:`contenders` are. May not contain the
                           string ``__UNLOCK__``, as this is used internally.
        :type identifier: str
        :param exclusive: Whether this is an exclusive lock (``False`` means
                          a "shared lock" as described above)
        :type exclusive: bool

        .. versionadded:: 1.4
            The exclusive option.
        """
        if self._MODE_SHARED in path or self._MODE_EXCLUSIVE in path:
            raise ValueError('Path "{}" contains a reserved word'.format(path))

        if identifier and self._UNLOCK_REQUEST in str(identifier):
            raise ValueError('Identifier "{}" contains a reserved word'.format(
                identifier))

        self.client = client
        self.path = path
        self.exclusive = exclusive

        # some data is written to the node. this can be queried via
        # contenders() to see who is contending for the lock
        self.data = str(identifier or "").encode('utf-8')

        self.wake_event = client.handler.event_object()

        mode_suffix = self._MODE_EXCLUSIVE if exclusive else self._MODE_SHARED

        # props to Netflix Curator for this trick. It is possible for our
        # create request to succeed on the server, but for a failure to
        # prevent us from getting back the full path name. We prefix our
        # lock name with a uuid and can check for its presence on retry.
        self.prefix = uuid.uuid4().hex + mode_suffix
        self.create_path = self.path + "/" + self.prefix

        self.create_tried = False
        self.is_acquired = False
        self.assured_path = False
        self.cancelled = False
        self._retry = KazooRetry(max_tries=None)
Example #37
0
    def __init__(self, host='127.0.0.1:2181', lock_path_prefix='/mastermind/locks/'):
        self.client = KazooClient(host, timeout=3)
        logger.info('Connecting to zookeeper host {0}, '
            'lock_path_prefix: {1}'.format(host, lock_path_prefix))
        try:
            self.client.start()
        except Exception as e:
            logger.error(e)
            raise

        self._retry = KazooRetry(max_tries=self.RETRIES)

        self.lock_path_prefix = lock_path_prefix
Example #38
0
    def __init__(self, client, path, identifier=None, node_name="__lock__",
                 exclude_names=None):
        """Create a Kazoo lock.

        node_name and exclude_names are typically only used internally to
        implement read/write locks. They should be left unset for exclusive
        locks.

        :param client: A :class:`~kazoo.client.KazooClient` instance.
        :param path: The lock path to use.
        :param identifier: Name to use for this lock contender. This
                           can be useful for querying to see who the
                           current lock contenders are.
        :param node_name: Node name, after the contender UUID, before the
                          sequence number. Involved in read/write locks. For a
                          normal (exclusive) lock, leave unset.
        :param exclude_names: Node names which exclude this contender when
                              present at a lower sequence number. Involved in
                              read/write locks. For a normal (exclusive) lock,
                              leave unset.
        """
        self.client = client
        self.path = path

        # some data is written to the node. this can be queried via
        # contenders() to see who is contending for the lock
        self.data = str(identifier or "").encode('utf-8')

        self.wake_event = client.handler.event_object()

        self.node_name = node_name

        if exclude_names is None:
            exclude_names = [self.node_name]
        self.exclude_names = exclude_names

        # props to Netflix Curator for this trick. It is possible for our
        # create request to succeed on the server, but for a failure to
        # prevent us from getting back the full path name. We prefix our
        # lock name with a uuid and can check for its presence on retry.
        self.prefix = uuid.uuid4().hex + self.node_name
        self.create_path = self.path + "/" + self.prefix

        self.create_tried = False
        self.is_acquired = False
        self.assured_path = False
        self.cancelled = False
        self._retry = KazooRetry(max_tries=None,
                                 sleep_func=client.handler.sleep_func)
Example #39
0
    def __init__(self, module, server_list, host_ip, logging_fn=None, zk_timeout=400,
                 log_response_time=None):
        self.host_ip = host_ip
        # logging
        logger = logging.getLogger(module)
        logger.setLevel(logging.DEBUG)
        try:
            handler = logging.handlers.RotatingFileHandler(
                LOG_DIR + module + '-zk.log', maxBytes=10*1024*1024, backupCount=5)
        except IOError:
            print "Cannot open log file in %s" %(LOG_DIR)
        else:
            log_format = logging.Formatter('%(asctime)s [%(name)s]: %(message)s',
                                           datefmt='%m/%d/%Y %I:%M:%S %p')
            handler.setFormatter(log_format)
            logger.addHandler(handler)

        if logging_fn:
            self.log = logging_fn
        else:
            self.log = self.syslog
        self.log_response_time = log_response_time
        # KazooRetry to retry keeper CRUD operations
        self._retry = KazooRetry(max_tries=None, max_delay=300,
                                 sleep_func=gevent.sleep)
        self._zk_client = kazoo.client.KazooClient(
                server_list,
                timeout=zk_timeout,
                handler=kazoo.handlers.gevent.SequentialGeventHandler(),
                logger=logger,
                connection_retry=self._retry,
                command_retry=self._retry)

        self._zk_client.add_listener(self._zk_listener)
        self._logger = logger
        self._election = None
        self._server_list = server_list

        self._conn_state = None
        self._sandesh_connection_info_update(status='INIT', message='')
        self._lost_cb = None
        self._suspend_cb = None

        self.delete_node = self._response_time(self.delete_node, "DELETE")
        self.create_node = self._response_time(self.create_node, "CREATE")
        self.read_node = self._response_time(self.read_node, "READ")
        self.get_children= self._response_time(self.get_children, "GET_CHILDREN")
        self.exists = self._response_time(self.exists, "EXISTS")
        self.connect()
Example #40
0
    def __init__(self,server_list):
        self._retry = KazooRetry(max_tries=None, max_delay=300,
                                 sleep_func=gevent.sleep)
        self._zk_client = kazoo.client.KazooClient(
                server_list,
                timeout=400,
                handler=kazoo.handlers.gevent.SequentialGeventHandler(),
                connection_retry=self._retry,
                command_retry=self._retry)

        self._zk_client.add_listener(self._zk_listener)
        self._election = None
        self._server_list = server_list

        self._conn_state = None
        self._lost_cb = None

        self.connect()
    def __init__(self, module, server_list, logging_fn=None):
        # logging
        logger = logging.getLogger(module)
        logger.setLevel(logging.INFO)
        try:
            handler = logging.handlers.RotatingFileHandler(LOG_DIR + module + '-zk.log', maxBytes=10*1024*1024, backupCount=5)
        except IOError:
            print "Cannot open log file in %s" %(LOG_DIR)
        else:
            log_format = logging.Formatter('%(asctime)s [%(name)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
            handler.setFormatter(log_format)
            logger.addHandler(handler)

        if logging_fn:
            self.log = logging_fn
        else:
            self.log = self.syslog

        self._zk_client = \
            kazoo.client.KazooClient(
                server_list,
                timeout=400,
                handler=kazoo.handlers.gevent.SequentialGeventHandler(),
                logger=logger)

        self._zk_client.add_listener(self._zk_listener)
        self._logger = logger
        self._election = None
        self._server_list = server_list
        # KazooRetry to retry keeper CRUD operations
        self._retry = KazooRetry(max_tries=None, max_delay=300,
                                 sleep_func=gevent.sleep)

        self._conn_state = None
        self._sandesh_connection_info_update(status='INIT', message='')
        self._lost_cb = None

        self.connect()
Example #42
0
File: lock.py Project: lmtwga/kazoo
class Lock(object):
    """Kazoo Lock

    Example usage with a :class:`~kazoo.client.KazooClient` instance:

    .. code-block:: python

        zk = KazooClient()
        zk.start()
        lock = zk.Lock("/lockpath", "my-identifier")
        with lock:  # blocks waiting for lock acquisition
            # do something with the lock

    Note: This lock is not *re-entrant*. Repeated calls after already
    acquired will block.

    """

    _NODE_NAME = "__lock__"

    def __init__(self, client, path, identifier=None):
        """Create a Kazoo lock.

        :param client: A :class:`~kazoo.client.KazooClient` instance.
        :param path: The lock path to use.
        :param identifier: Name to use for this lock contender. This
                           can be useful for querying to see who the
                           current lock contenders are.

        """
        self.client = client
        self.path = path

        # some data is written to the node. this can be queried via
        # contenders() to see who is contending for the lock
        self.data = str(identifier or "").encode("utf-8")

        self.wake_event = client.handler.event_object()

        # props to Netflix Curator for this trick. It is possible for our
        # create request to succeed on the server, but for a failure to
        # prevent us from getting back the full path name. We prefix our
        # lock name with a uuid and can check for its presence on retry.
        self.prefix = uuid.uuid4().hex + self._NODE_NAME
        self.create_path = self.path + "/" + self.prefix

        self.create_tried = False
        self.is_acquired = False
        self.assured_path = False
        self.cancelled = False
        self._retry = KazooRetry(max_tries=None, sleep_func=client.handler.sleep_func)
        self._lock = client.handler.lock_object()

    def _ensure_path(self):
        self.client.ensure_path(self.path)
        self.assured_path = True

    def cancel(self):
        """Cancel a pending lock acquire."""
        self.cancelled = True
        self.wake_event.set()

    def acquire(self, blocking=True, timeout=None):
        """
        Acquire the lock. By defaults blocks and waits forever.

        :param blocking: Block until lock is obtained or return immediately.
        :type blocking: bool
        :param timeout: Don't wait forever to acquire the lock.
        :type timeout: float or None

        :returns: Was the lock acquired?
        :rtype: bool

        :raises: :exc:`~kazoo.exceptions.LockTimeout` if the lock
                 wasn't acquired within `timeout` seconds.

        .. versionadded:: 1.1
            The timeout option.
        """

        def _acquire_lock():
            got_it = self._lock.acquire(False)
            if not got_it:
                raise ForceRetryError()
            return True

        retry = self._retry.copy()
        retry.deadline = timeout

        # Ensure we are locked so that we avoid multiple threads in
        # this acquistion routine at the same time...
        locked = self._lock.acquire(False)
        if not locked and not blocking:
            return False
        if not locked:
            # Lock acquire doesn't take a timeout, so simulate it...
            try:
                locked = retry(_acquire_lock)
            except RetryFailedError:
                return False
        already_acquired = self.is_acquired
        try:
            gotten = False
            try:
                gotten = retry(self._inner_acquire, blocking=blocking, timeout=timeout)
            except RetryFailedError:
                if not already_acquired:
                    self._best_effort_cleanup()
            except KazooException:
                # if we did ultimately fail, attempt to clean up
                exc_info = sys.exc_info()
                if not already_acquired:
                    self._best_effort_cleanup()
                    self.cancelled = False
                six.reraise(exc_info[0], exc_info[1], exc_info[2])
            if gotten:
                self.is_acquired = gotten
            if not gotten and not already_acquired:
                self._delete_node(self.node)
            return gotten
        finally:
            self._lock.release()

    def _watch_session(self, state):
        self.wake_event.set()
        return True

    def _inner_acquire(self, blocking, timeout):

        # wait until it's our chance to get it..
        if self.is_acquired:
            if not blocking:
                return False
            raise ForceRetryError()

        # make sure our election parent node exists
        if not self.assured_path:
            self._ensure_path()

        node = None
        if self.create_tried:
            node = self._find_node()
        else:
            self.create_tried = True

        if not node:
            node = self.client.create(self.create_path, self.data, ephemeral=True, sequence=True)
            # strip off path to node
            node = node[len(self.path) + 1 :]

        self.node = node

        while True:
            self.wake_event.clear()

            # bail out with an exception if cancellation has been requested
            if self.cancelled:
                raise CancelledError()

            children = self._get_sorted_children()

            try:
                our_index = children.index(node)
            except ValueError:  # pragma: nocover
                # somehow we aren't in the children -- probably we are
                # recovering from a session failure and our ephemeral
                # node was removed
                raise ForceRetryError()

            if self.acquired_lock(children, our_index):
                return True

            if not blocking:
                return False

            # otherwise we are in the mix. watch predecessor and bide our time
            predecessor = self.path + "/" + children[our_index - 1]
            self.client.add_listener(self._watch_session)
            try:
                if self.client.exists(predecessor, self._watch_predecessor):
                    self.wake_event.wait(timeout)
                    if not self.wake_event.isSet():
                        raise LockTimeout("Failed to acquire lock on %s after " "%s seconds" % (self.path, timeout))
            finally:
                self.client.remove_listener(self._watch_session)

    def acquired_lock(self, children, index):
        return index == 0

    def _watch_predecessor(self, event):
        self.wake_event.set()

    def _get_sorted_children(self):
        children = self.client.get_children(self.path)

        # can't just sort directly: the node names are prefixed by uuids
        lockname = self._NODE_NAME
        children.sort(key=lambda c: c[c.find(lockname) + len(lockname) :])
        return children

    def _find_node(self):
        children = self.client.get_children(self.path)
        for child in children:
            if child.startswith(self.prefix):
                return child
        return None

    def _delete_node(self, node):
        self.client.delete(self.path + "/" + node)

    def _best_effort_cleanup(self):
        try:
            node = self._find_node()
            if node:
                self._delete_node(node)
        except KazooException:  # pragma: nocover
            pass

    def release(self):
        """Release the lock immediately."""
        return self.client.retry(self._inner_release)

    def _inner_release(self):
        if not self.is_acquired:
            return False

        try:
            self._delete_node(self.node)
        except NoNodeError:  # pragma: nocover
            pass

        self.is_acquired = False
        self.node = None
        return True

    def contenders(self):
        """Return an ordered list of the current contenders for the
        lock.

        .. note::

            If the contenders did not set an identifier, it will appear
            as a blank string.

        """
        # make sure our election parent node exists
        if not self.assured_path:
            self._ensure_path()

        children = self._get_sorted_children()

        contenders = []
        for child in children:
            try:
                data, stat = self.client.get(self.path + "/" + child)
                contenders.append(data.decode("utf-8"))
            except NoNodeError:  # pragma: nocover
                pass
        return contenders

    def __enter__(self):
        self.acquire()

    def __exit__(self, exc_type, exc_value, traceback):
        self.release()
Example #43
0
class Lock(object):
    """Kazoo Lock

    Kazoo `Lock` supports three different locking strategies for a lock path.

    **Exclusive locks** represent the least complex locking strategy and
    guarantee that only a single ``Lock`` instance can acquire a lock path at
    any given time. This applies even if other locking strategies (as described
    below) are simultaneously in use for the same lock path. Exclusive locks
    are the default and will be provided if :py:meth:`__init__` is invoked
    with the default ``exclusive=True`` parameter.

    **Shared locks** allow different ``Lock`` instances to simultaneously
    acquire locks to the same lock path. In this strategy, a ``Lock`` instance
    is constructed with either ``exclusive=True`` (which is known as an
    "exclusive lock" and is described above) or ``exclusive=False`` (which is
    known as a "shared lock"). A shared lock will only be acquired if no
    exclusive locks are pending at the time acquisition is attempted. This
    means multiple shared locks can be acquired simultaneously, however
    additional shared locks will not be acquired once any exclusive lock for
    the lock path is pending. The shared lock strategy is most useful when
    multiple clients require read-only access to a resource but writing to that
    resource requires exclusive access. To use the shared locks strategy,
    invoke :py:meth:`__init__` and indicate a shared or exclusive lock via the
    ``exclusive`` parameter.

    **Revocable shared locks** provide the same locking guarantees and usage
    behavior as the shared locks strategy described above, however add the
    ability for any blocked lock acquisition request to signal to the blocking
    locks (or other lock requests which would be granted before it) to revoke.
    This is useful if shared lock holders do not routinely release resources
    (eg they are long-running readers) but are able to do so on request. Given
    cooperation from earlier lock holders or requestors is required, a callback
    is used to signal a revocation request. In the callback any resources
    should be released and then :py:meth:`cancel` and :py:meth:`release`
    invoked so the lock is removed. Note that a callback may safely ignore the
    callback notification if desired. To use the revocable shared locks
    strategy, invoke :py:meth:`acquire` with ``revoke=True``. This indicates a
    blocked lock request should request the revocation of any earlier blocking
    locks. For locks that can be interrupted and respond to such revocation
    requests, use the ``unlock`` parameter of :py:meth:`acquire` to provide the
    callback function that should be invoked on the first (and only first)
    revocation request.

    Example exclusive lock usage with a :class:`~kazoo.client.KazooClient`
    instance:

    .. code-block:: python

        zk = KazooClient()
        lock = zk.Lock("/lockpath", "my-identifier")
        with lock:  # blocks waiting for exclusive lock acquisition
            # do something with the lock

    """
    _MODE_SHARED = '__SHARED__'
    _MODE_EXCLUSIVE = '__EXCLUSIVE__'
    _UNLOCK_REQUEST = '__UNLOCK__'
    _UNLOCK_SUFFIX = ' ' + _UNLOCK_REQUEST

    def __init__(self, client, path, identifier=None, exclusive=True):
        """Create a Kazoo lock.

        :param client: The Kazoo client
        :type client: :class:`~kazoo.client.KazooClient`
        :param path: The lock path to use. May not contain the strings
                     ``__SHARED__`` or ``__EXCLUSIVE__``, as they are used
                     internally
        :type path: str
        :param identifier: Name to use for this lock contender, which may be
                           useful for querying to see who the current lock
                           :py:meth:`contenders` are. May not contain the
                           string ``__UNLOCK__``, as this is used internally.
        :type identifier: str
        :param exclusive: Whether this is an exclusive lock (``False`` means
                          a "shared lock" as described above)
        :type exclusive: bool

        .. versionadded:: 1.4
            The exclusive option.
        """
        if self._MODE_SHARED in path or self._MODE_EXCLUSIVE in path:
            raise ValueError('Path "{}" contains a reserved word'.format(path))

        if identifier and self._UNLOCK_REQUEST in str(identifier):
            raise ValueError('Identifier "{}" contains a reserved word'.format(
                identifier))

        self.client = client
        self.path = path
        self.exclusive = exclusive

        # some data is written to the node. this can be queried via
        # contenders() to see who is contending for the lock
        self.data = str(identifier or "").encode('utf-8')

        self.wake_event = client.handler.event_object()

        mode_suffix = self._MODE_EXCLUSIVE if exclusive else self._MODE_SHARED

        # props to Netflix Curator for this trick. It is possible for our
        # create request to succeed on the server, but for a failure to
        # prevent us from getting back the full path name. We prefix our
        # lock name with a uuid and can check for its presence on retry.
        self.prefix = uuid.uuid4().hex + mode_suffix
        self.create_path = self.path + "/" + self.prefix

        self.create_tried = False
        self.is_acquired = False
        self.assured_path = False
        self.cancelled = False
        self._retry = KazooRetry(max_tries=None)

    def _ensure_path(self):
        self.client.ensure_path(self.path)
        self.assured_path = True

    def cancel(self):
        """Cancel a pending lock acquire."""
        self.cancelled = True
        self.wake_event.set()

    def acquire(self, blocking=True, timeout=None, revoke=False, unlock=None):
        """
        Acquire the lock. By defaults blocks and waits forever.

        :param blocking: Block until lock is obtained or return immediately.
        :type blocking: bool
        :param timeout: Don't wait forever to acquire the lock.
        :type timeout: float or None
        :param revoke: Identify all existing locks and lock requests that
                       prevent this lock being acquired and immediately request
                       them to unlock (this does not mean they will unlock or
                       are even listening for such requests though)
        :type revoke: bool
        :param unlock: The callback which will be invoked exactly once if
                       another lock used ``revoke=True`` and this lock or lock
                       request is blocking that lock from being acquired (it is
                       legal to use ``None`` to ignore revocation requests, or
                       provide a callback which takes no action)
        :type unlock: a zero-parameter function

        :returns: Was the lock acquired?
        :rtype: bool

        :raises: :exc:`~kazoo.exceptions.LockTimeout` if the lock
                 wasn't acquired within `timeout` seconds.

        .. versionadded:: 1.1
            The timeout option.
        .. versionadded:: 1.4
            The revoke and unlock options.
        """
        try:
            retry = self._retry.copy()
            retry.deadline = timeout
            self.is_acquired = retry(self._inner_acquire, blocking=blocking,
                                     timeout=timeout, revoke=revoke,
                                     unlock=unlock)
        except KazooException:
            # if we did ultimately fail, attempt to clean up
            self._best_effort_cleanup()
            self.cancelled = False
            raise
        except RetryFailedError:  # pragma: nocover
            self._best_effort_cleanup()

        if not self.is_acquired:
            self._delete_node(self.node)

        return self.is_acquired

    def _inner_acquire(self, blocking, timeout, revoke, unlock):
        # make sure our election parent node exists
        if not self.assured_path:
            self._ensure_path()

        node = None
        if self.create_tried:
            node = self._find_node()
        else:
            self.create_tried = True

        if not node:
            node = self.client.create(self.create_path, self.data,
                                      ephemeral=True, sequence=True)
            if unlock:
                # watch this node for its first data change (the only other
                # events would be deletion or additional data change events, so
                # either way the absence of additional events is fine)
                def unlock_callback(event):
                    if event.type == EventType.CHANGED:
                        unlock()

                data, _ = self.client.get(node, unlock_callback)
                if self._UNLOCK_REQUEST in data.decode('utf-8'):
                    # a request to revoke our request has already been received
                    # (we let the callback know about this, but we keep going
                    # given the callback is under no obligation to comply)
                    unlock()  # pragma: nocover

            # strip off path to node
            node = node[len(self.path) + 1:]

        self.node = node

        while True:
            self.wake_event.clear()

            # bail out with an exception if cancellation has been requested
            if self.cancelled:
                raise CancelledError()

            children = self._get_sorted_children()

            try:
                our_index = children.index(node)
            except ValueError:  # pragma: nocover
                # somehow we aren't in the children -- probably we are
                # recovering from a session failure and our ephemeral
                # node was removed
                raise ForceRetryError()

            acquired, blockers = self.acquired_lock(children, our_index)
            if acquired:
                return True

            if not blocking:
                return False

            # we are in the mix
            if revoke:
                for child in blockers:
                    try:
                        child_path = self.path + "/" + child
                        data, stat = self.client.get(child_path)
                        decoded_data = data.decode('utf-8')
                        if self._UNLOCK_REQUEST not in decoded_data:
                            data = str(decoded_data +
                                       self._UNLOCK_SUFFIX).encode('utf-8')
                            self.client.set(child_path, data)
                    except NoNodeError:  # pragma: nocover
                        pass

            # watch the last blocker and bide our time
            predecessor = self.path + "/" + blockers[-1]
            if self.client.exists(predecessor, self._watch_predecessor):
                self.wake_event.wait(timeout)
                if not self.wake_event.isSet():
                    raise LockTimeout("Failed to acquire lock on %s after %s "
                                      "seconds" % (self.path, timeout))

    def acquired_lock(self, children, index):
        """Return if we acquired the lock, and if not, the blocking
        contenders.

        """
        prior_nodes = children[:index]
        if self.exclusive:
            return (index == 0, prior_nodes)

        # Shared locks are only unavailable if a prior lock is exclusive
        prior_exclusive = [x for x in prior_nodes
                           if self._MODE_EXCLUSIVE in x]
        if prior_exclusive:
            return (False, prior_exclusive)
        return (True, None)

    def _watch_predecessor(self, event):
        self.wake_event.set()

    def _get_sorted_children(self):
        children = self.client.get_children(self.path)

        # zookeeper sequence node suffix of %010d is relied upon for sorting
        children.sort(key=lambda c: c[-10:])
        return children

    def _find_node(self):
        children = self.client.get_children(self.path)
        for child in children:
            if child.startswith(self.prefix):
                return child
        return None

    def _delete_node(self, node):
        self.client.delete(self.path + "/" + node)

    def _best_effort_cleanup(self):
        try:
            node = self._find_node()
            if node:
                self._delete_node(node)
        except KazooException:  # pragma: nocover
            pass

    def release(self):
        """Release the lock immediately."""
        return self.client.retry(self._inner_release)

    def _inner_release(self):
        if not self.is_acquired:
            return False

        try:
            self._delete_node(self.node)
        except NoNodeError:  # pragma: nocover
            pass

        self.is_acquired = False
        self.node = None

        return True

    def contenders(self, unlocks_only=False):
        """Return an ordered list of the current contenders for the
        lock.

        .. note::

            If the contenders did not set an identifier, it will appear
            as a blank string.

        :param unlocks_only: indicates whether to only return those contenders
                             which have been requested to revoke their locks or
                             lock requests
        :type unlocks_only: bool
        :return: a list of contender identifiers
        :type: list

        .. versionadded:: 1.4
            The unlocks_only option.
        """
        # make sure our election parent node exists
        if not self.assured_path:
            self._ensure_path()

        children = self._get_sorted_children()

        contenders = []
        for child in children:
            try:
                data, stat = self.client.get(self.path + "/" + child)
                identifier = data.decode('utf-8')
                if not unlocks_only or self._UNLOCK_REQUEST in identifier:
                    identifier = identifier.replace(self._UNLOCK_SUFFIX, '')
                    contenders.append(identifier)
            except NoNodeError:  # pragma: nocover
                pass
        return contenders

    def __enter__(self):
        self.acquire()

    def __exit__(self, exc_type, exc_value, traceback):
        self.release()
Example #44
0
class KazooClient(object):
    """An Apache Zookeeper Python client supporting alternate callback
    handlers and high-level functionality.

    Watch functions registered with this class will not get session
    events, unlike the default Zookeeper watches. They will also be
    called with a single argument, a
    :class:`~kazoo.protocol.states.WatchedEvent` instance.

    """
    def __init__(self, hosts='127.0.0.1:2181',
                 timeout=10.0, client_id=None, handler=None,
                 default_acl=None, auth_data=None, read_only=None,
                 randomize_hosts=True, connection_retry=None,
                 command_retry=None, logger=None, **kwargs):
        """Create a :class:`KazooClient` instance. All time arguments
        are in seconds.

        :param hosts: Comma-separated list of hosts to connect to
                      (e.g. 127.0.0.1:2181,127.0.0.1:2182,[::1]:2183).
        :param timeout: The longest to wait for a Zookeeper connection.
        :param client_id: A Zookeeper client id, used when
                          re-establishing a prior session connection.
        :param handler: An instance of a class implementing the
                        :class:`~kazoo.interfaces.IHandler` interface
                        for callback handling.
        :param default_acl: A default ACL used on node creation.
        :param auth_data:
            A list of authentication credentials to use for the
            connection. Should be a list of (scheme, credential)
            tuples as :meth:`add_auth` takes.
        :param read_only: Allow connections to read only servers.
        :param randomize_hosts: By default randomize host selection.
        :param connection_retry:
            A :class:`kazoo.retry.KazooRetry` object to use for
            retrying the connection to Zookeeper. Also can be a dict of
            options which will be used for creating one.
        :param command_retry:
            A :class:`kazoo.retry.KazooRetry` object to use for
            the :meth:`KazooClient.retry` method. Also can be a dict of
            options which will be used for creating one.
        :param logger: A custom logger to use instead of the module
            global `log` instance.

        Basic Example:

        .. code-block:: python

            zk = KazooClient()
            zk.start()
            children = zk.get_children('/')
            zk.stop()

        As a convenience all recipe classes are available as attributes
        and get automatically bound to the client. For example::

            zk = KazooClient()
            zk.start()
            lock = zk.Lock('/lock_path')

        .. versionadded:: 0.6
            The read_only option. Requires Zookeeper 3.4+

        .. versionadded:: 0.6
            The retry_max_delay option.

        .. versionadded:: 0.6
            The randomize_hosts option.

        .. versionchanged:: 0.8
            Removed the unused watcher argument (was second argument).

        .. versionadded:: 1.2
            The connection_retry, command_retry and logger options.

        """
        self.logger = logger or log

        # Record the handler strategy used
        self.handler = handler if handler else SequentialThreadingHandler()
        if inspect.isclass(self.handler):
            raise ConfigurationError("Handler must be an instance of a class, "
                                     "not the class: %s" % self.handler)

        self.auth_data = auth_data if auth_data else set([])
        self.default_acl = default_acl
        self.randomize_hosts = randomize_hosts
        self.hosts = None
        self.chroot = None
        self.set_hosts(hosts)

        # Curator like simplified state tracking, and listeners for
        # state transitions
        self._state = KeeperState.CLOSED
        self.state = KazooState.LOST
        self.state_listeners = set()

        self._reset()
        self.read_only = read_only

        if client_id:
            self._session_id = client_id[0]
            self._session_passwd = client_id[1]
        else:
            self._reset_session()

        # ZK uses milliseconds
        self._session_timeout = int(timeout * 1000)

        # We use events like twitter's client to track current and
        # desired state (connected, and whether to shutdown)
        self._live = self.handler.event_object()
        self._writer_stopped = self.handler.event_object()
        self._stopped = self.handler.event_object()
        self._stopped.set()
        self._writer_stopped.set()

        self.retry = self._conn_retry = None

        if type(connection_retry) is dict:
            self._conn_retry = KazooRetry(**connection_retry)
        elif type(connection_retry) is KazooRetry:
            self._conn_retry = connection_retry

        if type(command_retry) is dict:
            self.retry = KazooRetry(**command_retry)
        elif type(command_retry) is KazooRetry:
            self.retry = command_retry


        if type(self._conn_retry) is KazooRetry:
            if self.handler.sleep_func != self._conn_retry.sleep_func:
                raise ConfigurationError("Retry handler and event handler "
                                         " must use the same sleep func")

        if type(self.retry) is KazooRetry:
            if self.handler.sleep_func != self.retry.sleep_func:
                raise ConfigurationError("Command retry handler and event "
                                         "handler must use the same sleep func")

        if self.retry is None or self._conn_retry is None:
            old_retry_keys = dict(_RETRY_COMPAT_DEFAULTS)
            for key in old_retry_keys:
                try:
                    old_retry_keys[key] = kwargs.pop(key)
                    warnings.warn('Passing retry configuration param %s to the'
                            ' client directly is deprecated, please pass a'
                            ' configured retry object (using param %s)' % (
                                key, _RETRY_COMPAT_MAPPING[key]),
                            DeprecationWarning, stacklevel=2)
                except KeyError:
                    pass

            retry_keys = {}
            for oldname, value in old_retry_keys.items():
                retry_keys[_RETRY_COMPAT_MAPPING[oldname]] = value

            if self._conn_retry is None:
                self._conn_retry = KazooRetry(
                    sleep_func=self.handler.sleep_func,
                    **retry_keys)
            if self.retry is None:
                self.retry = KazooRetry(
                    sleep_func=self.handler.sleep_func,
                    **retry_keys)

        self._conn_retry.interrupt = lambda: self._stopped.is_set()
        self._connection = ConnectionHandler(self, self._conn_retry.copy(),
            logger=self.logger)

        # Every retry call should have its own copy of the retry helper
        # to avoid shared retry counts
        self._retry = self.retry
        def _retry(*args, **kwargs):
            return self._retry.copy()(*args, **kwargs)
        self.retry = _retry

        self.Barrier = partial(Barrier, self)
        self.Counter = partial(Counter, self)
        self.DoubleBarrier = partial(DoubleBarrier, self)
        self.ChildrenWatch = partial(ChildrenWatch, self)
        self.DataWatch = partial(DataWatch, self)
        self.Election = partial(Election, self)
        self.Lock = partial(Lock, self)
        self.Party = partial(Party, self)
        self.Queue = partial(Queue, self)
        self.LockingQueue = partial(LockingQueue, self)
        self.SetPartitioner = partial(SetPartitioner, self)
        self.Semaphore = partial(Semaphore, self)
        self.ShallowParty = partial(ShallowParty, self)

         # If we got any unhandled keywords, complain like python would
        if kwargs:
            raise TypeError('__init__() got unexpected keyword arguments: %s'
                            % (kwargs.keys(),))

    def _reset(self):
        """Resets a variety of client states for a new connection."""
        self._queue = deque()
        self._pending = deque()

        self._reset_watchers()
        self._reset_session()
        self.last_zxid = 0
        self._protocol_version = None

    def _reset_watchers(self):
        self._child_watchers = defaultdict(set)
        self._data_watchers = defaultdict(set)

    def _reset_session(self):
        self._session_id = None
        self._session_passwd = b'\x00' * 16

    @property
    def client_state(self):
        """Returns the last Zookeeper client state

        This is the non-simplified state information and is generally
        not as useful as the simplified KazooState information.

        """
        return self._state

    @property
    def client_id(self):
        """Returns the client id for this Zookeeper session if
        connected.

        :returns: client id which consists of the session id and
                  password.
        :rtype: tuple
        """
        if self._live.is_set():
            return (self._session_id, self._session_passwd)
        return None

    @property
    def connected(self):
        """Returns whether the Zookeeper connection has been
        established."""
        return self._live.is_set()

    def set_hosts(self, hosts, randomize_hosts=None):
        """ sets the list of hosts used by this client.

        This function accepts the same format hosts parameter as the init
        function and sets the client to use the new hosts the next time it
        needs to look up a set of hosts. This function does not affect the
        current connected status.

        It is not currently possible to change the chroot with this function,
        setting a host list with a new chroot will raise a ConfigurationError.

        :param hosts: see description in :meth:`KazooClient.__init__`
        :param randomize_hosts: override client default for host randomization
        :raises:
            :exc:`ConfigurationError` if the hosts argument changes the chroot

        .. versionadded:: 1.4

        .. warning::

            Using this function to point a client to a completely disparate
            zookeeper server cluster has undefined behavior.

        """

        if randomize_hosts is None:
            randomize_hosts = self.randomize_hosts

        self.hosts, chroot = collect_hosts(hosts, randomize_hosts)

        if chroot:
            new_chroot = normpath(chroot)
        else:
            new_chroot = ''

        if self.chroot is not None and new_chroot != self.chroot:
            raise ConfigurationError("Changing chroot at runtime is not "
                                     "currently supported")

        self.chroot = new_chroot

    def add_listener(self, listener):
        """Add a function to be called for connection state changes.

        This function will be called with a
        :class:`~kazoo.protocol.states.KazooState` instance indicating
        the new connection state on state transitions.

        .. warning::

            This function must not block. If its at all likely that it
            might need data or a value that could result in blocking
            than the :meth:`~kazoo.interfaces.IHandler.spawn` method
            should be used so that the listener can return immediately.

        """
        if not (listener and callable(listener)):
            raise ConfigurationError("listener must be callable")
        self.state_listeners.add(listener)

    def remove_listener(self, listener):
        """Remove a listener function"""
        self.state_listeners.discard(listener)

    def _make_state_change(self, state):
        # skip if state is current
        if self.state == state:
            return

        self.state = state

        # Create copy of listeners for iteration in case one needs to
        # remove itself
        for listener in list(self.state_listeners):
            try:
                remove = listener(state)
                if remove is True:
                    self.remove_listener(listener)
            except Exception:
                self.logger.exception("Error in connection state listener")

    def _session_callback(self, state):
        if state == self._state:
            return

        # Note that we don't check self.state == LOST since that's also
        # the client's initial state
        dead_state = self._state in LOST_STATES
        self._state = state

        # If we were previously closed or had an expired session, and
        # are now connecting, don't bother with the rest of the
        # transitions since they only apply after
        # we've established a connection
        if dead_state and state == KeeperState.CONNECTING:
            self.logger.log(BLATHER, "Skipping state change")
            return

        if state in (KeeperState.CONNECTED, KeeperState.CONNECTED_RO):
            self.logger.info("Zookeeper connection established, state: %s", state)
            self._live.set()
            self._make_state_change(KazooState.CONNECTED)
        elif state in LOST_STATES:
            self.logger.info("Zookeeper session lost, state: %s", state)
            self._live.clear()
            self._make_state_change(KazooState.LOST)
            self._notify_pending(state)
            self._reset()
        else:
            self.logger.info("Zookeeper connection lost")
            # Connection lost
            self._live.clear()
            self._notify_pending(state)
            self._make_state_change(KazooState.SUSPENDED)
            self._reset_watchers()

    def _notify_pending(self, state):
        """Used to clear a pending response queue and request queue
        during connection drops."""
        if state == KeeperState.AUTH_FAILED:
            exc = AuthFailedError()
        elif state == KeeperState.EXPIRED_SESSION:
            exc = SessionExpiredError()
        else:
            exc = ConnectionLoss()

        while True:
            try:
                request, async_object, xid = self._pending.popleft()
                if async_object:
                    async_object.set_exception(exc)
            except IndexError:
                break

        while True:
            try:
                request, async_object = self._queue.popleft()
                if async_object:
                    async_object.set_exception(exc)
            except IndexError:
                break

    def _safe_close(self):
        self.handler.stop()
        timeout = self._session_timeout // 1000
        if timeout < 10:
            timeout = 10
        if not self._connection.stop(timeout):
            raise WriterNotClosedException(
                "Writer still open from prior connection "
                "and wouldn't close after %s seconds" % timeout)

    def _call(self, request, async_object):
        """Ensure there's an active connection and put the request in
        the queue if there is.

        Returns False if the call short circuits due to AUTH_FAILED,
        CLOSED, EXPIRED_SESSION or CONNECTING state.

        """

        if self._state == KeeperState.AUTH_FAILED:
            async_object.set_exception(AuthFailedError())
            return False
        elif self._state == KeeperState.CLOSED:
            async_object.set_exception(ConnectionClosedError(
                "Connection has been closed"))
            return False
        elif self._state in (KeeperState.EXPIRED_SESSION,
                             KeeperState.CONNECTING):
            async_object.set_exception(SessionExpiredError())
            return False

        self._queue.append((request, async_object))

        # wake the connection, guarding against a race with close()
        write_pipe = self._connection._write_pipe
        if write_pipe is None:
            async_object.set_exception(ConnectionClosedError(
                "Connection has been closed"))
        try:
            os.write(write_pipe, b'\0')
        except:
            async_object.set_exception(ConnectionClosedError(
                "Connection has been closed"))

    def start(self, timeout=15):
        """Initiate connection to ZK.

        :param timeout: Time in seconds to wait for connection to
                        succeed.
        :raises: :attr:`~kazoo.interfaces.IHandler.timeout_exception`
                 if the connection wasn't established within `timeout`
                 seconds.

        """
        event = self.start_async()
        event.wait(timeout=timeout)
        if not self.connected:
            # We time-out, ensure we are disconnected
            self.stop()
            raise self.handler.timeout_exception("Connection time-out")

        if self.chroot and not self.exists("/"):
            warnings.warn("No chroot path exists, the chroot path "
                          "should be created before normal use.")

    def start_async(self):
        """Asynchronously initiate connection to ZK.

        :returns: An event object that can be checked to see if the
                  connection is alive.
        :rtype: :class:`~threading.Event` compatible object.

        """
        # If we're already connected, ignore
        if self._live.is_set():
            return self._live

        # Make sure we're safely closed
        self._safe_close()

        # We've been asked to connect, clear the stop and our writer
        # thread indicator
        self._stopped.clear()
        self._writer_stopped.clear()

        # Start the handler
        self.handler.start()

        # Start the connection
        self._connection.start()
        return self._live

    def stop(self):
        """Gracefully stop this Zookeeper session.

        This method can be called while a reconnection attempt is in
        progress, which will then be halted.

        Once the connection is closed, its session becomes invalid. All
        the ephemeral nodes in the ZooKeeper server associated with the
        session will be removed. The watches left on those nodes (and
        on their parents) will be triggered.

        """
        if self._stopped.is_set():
            return

        self._stopped.set()
        self._queue.append((CloseInstance, None))
        os.write(self._connection._write_pipe, b'\0')
        self._safe_close()

    def restart(self):
        """Stop and restart the Zookeeper session."""
        self.stop()
        self.start()

    def close(self):
        """Free any resources held by the client.

        This method should be called on a stopped client before it is
        discarded. Not doing so may result in filehandles being leaked.

        .. versionadded:: 1.0
        """
        self._connection.close()

    def command(self, cmd=b'ruok'):
        """Sent a management command to the current ZK server.

        Examples are `ruok`, `envi` or `stat`.

        :returns: An unstructured textual response.
        :rtype: str

        :raises:
            :exc:`ConnectionLoss` if there is no connection open, or
            possibly a :exc:`socket.error` if there's a problem with
            the connection used just for this command.

        .. versionadded:: 0.5

        """
        if not self._live.is_set():
            raise ConnectionLoss("No connection to server")

        peer = self._connection._socket.getpeername()
        sock = self.handler.create_connection(
            peer, timeout=self._session_timeout / 1000.0)
        sock.sendall(cmd)
        result = sock.recv(8192)
        sock.close()
        return result.decode('utf-8', 'replace')

    def server_version(self):
        """Get the version of the currently connected ZK server.

        :returns: The server version, for example (3, 4, 3).
        :rtype: tuple

        .. versionadded:: 0.5

        """
        data = self.command(b'envi')
        string = ENVI_VERSION.match(data).group(1)
        return tuple([int(i) for i in string.split('.')])

    def add_auth(self, scheme, credential):
        """Send credentials to server.

        :param scheme: authentication scheme (default supported:
                       "digest").
        :param credential: the credential -- value depends on scheme.

        :returns: True if it was successful.
        :rtype: bool

        :raises:
            :exc:`~kazoo.exceptions.AuthFailedError` if it failed though
            the session state will be set to AUTH_FAILED as well.

        """
        return self.add_auth_async(scheme, credential).get()

    def add_auth_async(self, scheme, credential):
        """Asynchronously send credentials to server. Takes the same
        arguments as :meth:`add_auth`.

        :rtype: :class:`~kazoo.interfaces.IAsyncResult`

        """
        if not isinstance(scheme, basestring):
            raise TypeError("Invalid type for scheme")
        if not isinstance(credential, basestring):
            raise TypeError("Invalid type for credential")

        # we need this auth data to re-authenticate on reconnect
        self.auth_data.add((scheme, credential))

        async_result = self.handler.async_result()
        self._call(Auth(0, scheme, credential), async_result)
        return async_result

    def unchroot(self, path):
        """Strip the chroot if applicable from the path."""
        if not self.chroot:
            return path

        if path.startswith(self.chroot):
            return path[len(self.chroot):]
        else:
            return path

    def sync_async(self, path):
        """Asynchronous sync.

        :rtype: :class:`~kazoo.interfaces.IAsyncResult`

        """
        async_result = self.handler.async_result()
        self._call(Sync(_prefix_root(self.chroot, path)), async_result)
        return async_result

    def sync(self, path):
        """Sync, blocks until response is acknowledged.

        Flushes channel between process and leader.

        :param path: path of node.
        :returns: The node path that was synced.
        :raises:
            :exc:`~kazoo.exceptions.ZookeeperError` if the server
            returns a non-zero error code.

        .. versionadded:: 0.5

        """
        return self.sync_async(path).get()

    def create(self, path, value=b"", acl=None, ephemeral=False,
               sequence=False, makepath=False):
        """Create a node with the given value as its data. Optionally
        set an ACL on the node.

        The ephemeral and sequence arguments determine the type of the
        node.

        An ephemeral node will be automatically removed by ZooKeeper
        when the session associated with the creation of the node
        expires.

        A sequential node will be given the specified path plus a
        suffix `i` where i is the current sequential number of the
        node. The sequence number is always fixed length of 10 digits,
        0 padded. Once such a node is created, the sequential number
        will be incremented by one.

        If a node with the same actual path already exists in
        ZooKeeper, a NodeExistsError will be raised. Note that since a
        different actual path is used for each invocation of creating
        sequential nodes with the same path argument, the call will
        never raise NodeExistsError.

        If the parent node does not exist in ZooKeeper, a NoNodeError
        will be raised. Setting the optional `makepath` argument to
        `True` will create all missing parent nodes instead.

        An ephemeral node cannot have children. If the parent node of
        the given path is ephemeral, a NoChildrenForEphemeralsError
        will be raised.

        This operation, if successful, will trigger all the watches
        left on the node of the given path by :meth:`exists` and
        :meth:`get` API calls, and the watches left on the parent node
        by :meth:`get_children` API calls.

        The maximum allowable size of the node value is 1 MB. Values
        larger than this will cause a ZookeeperError to be raised.

        :param path: Path of node.
        :param value: Initial bytes value of node.
        :param acl: :class:`~kazoo.security.ACL` list.
        :param ephemeral: Boolean indicating whether node is ephemeral
                          (tied to this session).
        :param sequence: Boolean indicating whether path is suffixed
                         with a unique index.
        :param makepath: Whether the path should be created if it
                         doesn't exist.
        :returns: Real path of the new node.
        :rtype: str

        :raises:
            :exc:`~kazoo.exceptions.NodeExistsError` if the node
            already exists.

            :exc:`~kazoo.exceptions.NoNodeError` if parent nodes are
            missing.

            :exc:`~kazoo.exceptions.NoChildrenForEphemeralsError` if
            the parent node is an ephemeral node.

            :exc:`~kazoo.exceptions.ZookeeperError` if the provided
            value is too large.

            :exc:`~kazoo.exceptions.ZookeeperError` if the server
            returns a non-zero error code.

        """
        acl = acl or self.default_acl
        return self.create_async(path, value, acl=acl, ephemeral=ephemeral,
            sequence=sequence, makepath=makepath).get()

    def create_async(self, path, value=b"", acl=None, ephemeral=False,
                     sequence=False, makepath=False):
        """Asynchronously create a ZNode. Takes the same arguments as
        :meth:`create`.

        :rtype: :class:`~kazoo.interfaces.IAsyncResult`

        .. versionadded:: 1.1
            The makepath option.

        """
        if acl is None and self.default_acl:
            acl = self.default_acl

        if not isinstance(path, basestring):
            raise TypeError("path must be a string")
        if acl and (isinstance(acl, ACL) or
                    not isinstance(acl, (tuple, list))):
            raise TypeError("acl must be a tuple/list of ACL's")
        if value is not None and not isinstance(value, bytes):
            raise TypeError("value must be a byte string")
        if not isinstance(ephemeral, bool):
            raise TypeError("ephemeral must be a bool")
        if not isinstance(sequence, bool):
            raise TypeError("sequence must be a bool")
        if not isinstance(makepath, bool):
            raise TypeError("makepath must be a bool")

        flags = 0
        if ephemeral:
            flags |= 1
        if sequence:
            flags |= 2
        if acl is None:
            acl = OPEN_ACL_UNSAFE

        async_result = self.handler.async_result()

        @capture_exceptions(async_result)
        def do_create():
            result = self._create_async_inner(path, value, acl, flags, trailing=sequence)
            result.rawlink(create_completion)

        @capture_exceptions(async_result)
        def retry_completion(result):
            result.get()
            do_create()

        @wrap(async_result)
        def create_completion(result):
            try:
                return self.unchroot(result.get())
            except NoNodeError:
                if not makepath:
                    raise
                if sequence and path.endswith('/'):
                    parent = path.rstrip('/')
                else:
                    parent, _ = split(path)
                self.ensure_path_async(parent, acl).rawlink(retry_completion)

        do_create()
        return async_result

    def _create_async_inner(self, path, value, acl, flags, trailing=False):
        async_result = self.handler.async_result()
        call_result = self._call(
            Create(_prefix_root(self.chroot, path, trailing=trailing),
                   value, acl, flags), async_result)
        if call_result is False:
            # We hit a short-circuit exit on the _call. Because we are
            # not using the original async_result here, we bubble the
            # exception upwards to the do_create function in
            # KazooClient.create so that it gets set on the correct
            # async_result object
            raise async_result.exception
        return async_result

    def ensure_path(self, path, acl=None):
        """Recursively create a path if it doesn't exist.

        :param path: Path of node.
        :param acl: Permissions for node.

        """
        return self.ensure_path_async(path, acl).get()

    def ensure_path_async(self, path, acl=None):
        """Recursively create a path asynchronously if it doesn't
        exist. Takes the same arguments as :meth:`ensure_path`.

        :rtype: :class:`~kazoo.interfaces.IAsyncResult`

        .. versionadded:: 1.1

        """
        acl = acl or self.default_acl
        async_result = self.handler.async_result()

        @wrap(async_result)
        def create_completion(result):
            try:
                return result.get()
            except NodeExistsError:
                return True

        @capture_exceptions(async_result)
        def prepare_completion(next_path, result):
            result.get()
            self.create_async(next_path, acl=acl).rawlink(create_completion)

        @wrap(async_result)
        def exists_completion(path, result):
            if result.get():
                return True
            parent, node = split(path)
            if node:
                self.ensure_path_async(parent, acl=acl).rawlink(
                    partial(prepare_completion, path))
            else:
                self.create_async(path, acl=acl).rawlink(create_completion)

        self.exists_async(path).rawlink(partial(exists_completion, path))

        return async_result

    def exists(self, path, watch=None):
        """Check if a node exists.

        If a watch is provided, it will be left on the node with the
        given path. The watch will be triggered by a successful
        operation that creates/deletes the node or sets the data on the
        node.

        :param path: Path of node.
        :param watch: Optional watch callback to set for future changes
                      to this path.
        :returns: ZnodeStat of the node if it exists, else None if the
                  node does not exist.
        :rtype: :class:`~kazoo.protocol.states.ZnodeStat` or `None`.

        :raises:
            :exc:`~kazoo.exceptions.ZookeeperError` if the server
            returns a non-zero error code.

        """
        return self.exists_async(path, watch).get()

    def exists_async(self, path, watch=None):
        """Asynchronously check if a node exists. Takes the same
        arguments as :meth:`exists`.

        :rtype: :class:`~kazoo.interfaces.IAsyncResult`

        """
        if not isinstance(path, basestring):
            raise TypeError("path must be a string")
        if watch and not callable(watch):
            raise TypeError("watch must be a callable")

        async_result = self.handler.async_result()
        self._call(Exists(_prefix_root(self.chroot, path), watch),
                   async_result)
        return async_result

    def get(self, path, watch=None):
        """Get the value of a node.

        If a watch is provided, it will be left on the node with the
        given path. The watch will be triggered by a successful
        operation that sets data on the node, or deletes the node.

        :param path: Path of node.
        :param watch: Optional watch callback to set for future changes
                      to this path.
        :returns:
            Tuple (value, :class:`~kazoo.protocol.states.ZnodeStat`) of
            node.
        :rtype: tuple

        :raises:
            :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
            exist

            :exc:`~kazoo.exceptions.ZookeeperError` if the server
            returns a non-zero error code

        """
        return self.get_async(path, watch).get()

    def get_async(self, path, watch=None):
        """Asynchronously get the value of a node. Takes the same
        arguments as :meth:`get`.

        :rtype: :class:`~kazoo.interfaces.IAsyncResult`

        """
        if not isinstance(path, basestring):
            raise TypeError("path must be a string")
        if watch and not callable(watch):
            raise TypeError("watch must be a callable")

        async_result = self.handler.async_result()
        self._call(GetData(_prefix_root(self.chroot, path), watch),
                   async_result)
        return async_result

    def get_children(self, path, watch=None, include_data=False):
        """Get a list of child nodes of a path.

        If a watch is provided it will be left on the node with the
        given path. The watch will be triggered by a successful
        operation that deletes the node of the given path or
        creates/deletes a child under the node.

        The list of children returned is not sorted and no guarantee is
        provided as to its natural or lexical order.

        :param path: Path of node to list.
        :param watch: Optional watch callback to set for future changes
                      to this path.
        :param include_data:
            Include the :class:`~kazoo.protocol.states.ZnodeStat` of
            the node in addition to the children. This option changes
            the return value to be a tuple of (children, stat).

        :returns: List of child node names, or tuple if `include_data`
                  is `True`.
        :rtype: list

        :raises:
            :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
            exist.

            :exc:`~kazoo.exceptions.ZookeeperError` if the server
            returns a non-zero error code.

        .. versionadded:: 0.5
            The `include_data` option.

        """
        return self.get_children_async(path, watch, include_data).get()

    def get_children_async(self, path, watch=None, include_data=False):
        """Asynchronously get a list of child nodes of a path. Takes
        the same arguments as :meth:`get_children`.

        :rtype: :class:`~kazoo.interfaces.IAsyncResult`

        """
        if not isinstance(path, basestring):
            raise TypeError("path must be a string")
        if watch and not callable(watch):
            raise TypeError("watch must be a callable")
        if not isinstance(include_data, bool):
            raise TypeError("include_data must be a bool")

        async_result = self.handler.async_result()
        if include_data:
            req = GetChildren2(_prefix_root(self.chroot, path), watch)
        else:
            req = GetChildren(_prefix_root(self.chroot, path), watch)
        self._call(req, async_result)
        return async_result

    def get_acls(self, path):
        """Return the ACL and stat of the node of the given path.

        :param path: Path of the node.
        :returns: The ACL array of the given node and its
            :class:`~kazoo.protocol.states.ZnodeStat`.
        :rtype: tuple of (:class:`~kazoo.security.ACL` list,
                :class:`~kazoo.protocol.states.ZnodeStat`)
        :raises:
            :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
            exist.

            :exc:`~kazoo.exceptions.ZookeeperError` if the server
            returns a non-zero error code

        .. versionadded:: 0.5

        """
        return self.get_acls_async(path).get()

    def get_acls_async(self, path):
        """Return the ACL and stat of the node of the given path. Takes
        the same arguments as :meth:`get_acls`.

        :rtype: :class:`~kazoo.interfaces.IAsyncResult`

        """
        if not isinstance(path, basestring):
            raise TypeError("path must be a string")

        async_result = self.handler.async_result()
        self._call(GetACL(_prefix_root(self.chroot, path)), async_result)
        return async_result

    def set_acls(self, path, acls, version=-1):
        """Set the ACL for the node of the given path.

        Set the ACL for the node of the given path if such a node
        exists and the given version matches the version of the node.

        :param path: Path for the node.
        :param acls: List of :class:`~kazoo.security.ACL` objects to
                     set.
        :param version: The expected node version that must match.
        :returns: The stat of the node.
        :raises:
            :exc:`~kazoo.exceptions.BadVersionError` if version doesn't
            match.

            :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
            exist.

            :exc:`~kazoo.exceptions.InvalidACLError` if the ACL is
            invalid.

            :exc:`~kazoo.exceptions.ZookeeperError` if the server
            returns a non-zero error code.

        .. versionadded:: 0.5

        """
        return self.set_acls_async(path, acls, version).get()

    def set_acls_async(self, path, acls, version=-1):
        """Set the ACL for the node of the given path. Takes the same
        arguments as :meth:`set_acls`.

        :rtype: :class:`~kazoo.interfaces.IAsyncResult`

        """
        if not isinstance(path, basestring):
            raise TypeError("path must be a string")
        if isinstance(acls, ACL) or not isinstance(acls, (tuple, list)):
            raise TypeError("acl must be a tuple/list of ACL's")
        if not isinstance(version, int):
            raise TypeError("version must be an int")

        async_result = self.handler.async_result()
        self._call(SetACL(_prefix_root(self.chroot, path), acls, version),
                   async_result)
        return async_result

    def set(self, path, value, version=-1):
        """Set the value of a node.

        If the version of the node being updated is newer than the
        supplied version (and the supplied version is not -1), a
        BadVersionError will be raised.

        This operation, if successful, will trigger all the watches on
        the node of the given path left by :meth:`get` API calls.

        The maximum allowable size of the value is 1 MB. Values larger
        than this will cause a ZookeeperError to be raised.

        :param path: Path of node.
        :param value: New data value.
        :param version: Version of node being updated, or -1.
        :returns: Updated :class:`~kazoo.protocol.states.ZnodeStat` of
                  the node.

        :raises:
            :exc:`~kazoo.exceptions.BadVersionError` if version doesn't
            match.

            :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
            exist.

            :exc:`~kazoo.exceptions.ZookeeperError` if the provided
            value is too large.

            :exc:`~kazoo.exceptions.ZookeeperError` if the server
            returns a non-zero error code.

        """
        return self.set_async(path, value, version).get()

    def set_async(self, path, value, version=-1):
        """Set the value of a node. Takes the same arguments as
        :meth:`set`.

        :rtype: :class:`~kazoo.interfaces.IAsyncResult`

        """
        if not isinstance(path, basestring):
            raise TypeError("path must be a string")
        if value is not None and not isinstance(value, bytes):
            raise TypeError("value must be a byte string")
        if not isinstance(version, int):
            raise TypeError("version must be an int")

        async_result = self.handler.async_result()
        self._call(SetData(_prefix_root(self.chroot, path), value, version),
                   async_result)
        return async_result

    def transaction(self):
        """Create and return a :class:`TransactionRequest` object

        Creates a :class:`TransactionRequest` object. A Transaction can
        consist of multiple operations which can be committed as a
        single atomic unit. Either all of the operations will succeed
        or none of them.

        :returns: A TransactionRequest.
        :rtype: :class:`TransactionRequest`

        .. versionadded:: 0.6
            Requires Zookeeper 3.4+

        """
        return TransactionRequest(self)

    def delete(self, path, version=-1, recursive=False):
        """Delete a node.

        The call will succeed if such a node exists, and the given
        version matches the node's version (if the given version is -1,
        the default, it matches any node's versions).

        This operation, if successful, will trigger all the watches on
        the node of the given path left by `exists` API calls, and the
        watches on the parent node left by `get_children` API calls.

        :param path: Path of node to delete.
        :param version: Version of node to delete, or -1 for any.
        :param recursive: Recursively delete node and all its children,
                          defaults to False.
        :type recursive: bool

        :raises:
            :exc:`~kazoo.exceptions.BadVersionError` if version doesn't
            match.

            :exc:`~kazoo.exceptions.NoNodeError` if the node doesn't
            exist.

            :exc:`~kazoo.exceptions.NotEmptyError` if the node has
            children.

            :exc:`~kazoo.exceptions.ZookeeperError` if the server
            returns a non-zero error code.

        """
        if not isinstance(recursive, bool):
            raise TypeError("recursive must be a bool")

        if recursive:
            return self._delete_recursive(path)
        else:
            return self.delete_async(path, version).get()

    def delete_async(self, path, version=-1):
        """Asynchronously delete a node. Takes the same arguments as
        :meth:`delete`, with the exception of `recursive`.

        :rtype: :class:`~kazoo.interfaces.IAsyncResult`

        """
        if not isinstance(path, basestring):
            raise TypeError("path must be a string")
        if not isinstance(version, int):
            raise TypeError("version must be an int")
        async_result = self.handler.async_result()
        self._call(Delete(_prefix_root(self.chroot, path), version),
                   async_result)
        return async_result

    def _delete_recursive(self, path):
        try:
            children = self.get_children(path)
        except NoNodeError:
            return True

        if children:
            for child in children:
                if path == "/":
                    child_path = path + child
                else:
                    child_path = path + "/" + child

                self._delete_recursive(child_path)
        try:
            self.delete(path)
        except NoNodeError:  # pragma: nocover
            pass
Example #45
0
    def __init__(self, hosts='127.0.0.1:2181',
                 timeout=10.0, client_id=None, handler=None,
                 default_acl=None, auth_data=None, read_only=None,
                 randomize_hosts=True, connection_retry=None,
                 command_retry=None, logger=None, **kwargs):
        """Create a :class:`KazooClient` instance. All time arguments
        are in seconds.

        :param hosts: Comma-separated list of hosts to connect to
                      (e.g. 127.0.0.1:2181,127.0.0.1:2182,[::1]:2183).
        :param timeout: The longest to wait for a Zookeeper connection.
        :param client_id: A Zookeeper client id, used when
                          re-establishing a prior session connection.
        :param handler: An instance of a class implementing the
                        :class:`~kazoo.interfaces.IHandler` interface
                        for callback handling.
        :param default_acl: A default ACL used on node creation.
        :param auth_data:
            A list of authentication credentials to use for the
            connection. Should be a list of (scheme, credential)
            tuples as :meth:`add_auth` takes.
        :param read_only: Allow connections to read only servers.
        :param randomize_hosts: By default randomize host selection.
        :param connection_retry:
            A :class:`kazoo.retry.KazooRetry` object to use for
            retrying the connection to Zookeeper. Also can be a dict of
            options which will be used for creating one.
        :param command_retry:
            A :class:`kazoo.retry.KazooRetry` object to use for
            the :meth:`KazooClient.retry` method. Also can be a dict of
            options which will be used for creating one.
        :param logger: A custom logger to use instead of the module
            global `log` instance.

        Basic Example:

        .. code-block:: python

            zk = KazooClient()
            zk.start()
            children = zk.get_children('/')
            zk.stop()

        As a convenience all recipe classes are available as attributes
        and get automatically bound to the client. For example::

            zk = KazooClient()
            zk.start()
            lock = zk.Lock('/lock_path')

        .. versionadded:: 0.6
            The read_only option. Requires Zookeeper 3.4+

        .. versionadded:: 0.6
            The retry_max_delay option.

        .. versionadded:: 0.6
            The randomize_hosts option.

        .. versionchanged:: 0.8
            Removed the unused watcher argument (was second argument).

        .. versionadded:: 1.2
            The connection_retry, command_retry and logger options.

        """
        self.logger = logger or log

        # Record the handler strategy used
        self.handler = handler if handler else SequentialThreadingHandler()
        if inspect.isclass(self.handler):
            raise ConfigurationError("Handler must be an instance of a class, "
                                     "not the class: %s" % self.handler)

        self.auth_data = auth_data if auth_data else set([])
        self.default_acl = default_acl
        self.randomize_hosts = randomize_hosts
        self.hosts = None
        self.chroot = None
        self.set_hosts(hosts)

        # Curator like simplified state tracking, and listeners for
        # state transitions
        self._state = KeeperState.CLOSED
        self.state = KazooState.LOST
        self.state_listeners = set()

        self._reset()
        self.read_only = read_only

        if client_id:
            self._session_id = client_id[0]
            self._session_passwd = client_id[1]
        else:
            self._reset_session()

        # ZK uses milliseconds
        self._session_timeout = int(timeout * 1000)

        # We use events like twitter's client to track current and
        # desired state (connected, and whether to shutdown)
        self._live = self.handler.event_object()
        self._writer_stopped = self.handler.event_object()
        self._stopped = self.handler.event_object()
        self._stopped.set()
        self._writer_stopped.set()

        self.retry = self._conn_retry = None

        if type(connection_retry) is dict:
            self._conn_retry = KazooRetry(**connection_retry)
        elif type(connection_retry) is KazooRetry:
            self._conn_retry = connection_retry

        if type(command_retry) is dict:
            self.retry = KazooRetry(**command_retry)
        elif type(command_retry) is KazooRetry:
            self.retry = command_retry


        if type(self._conn_retry) is KazooRetry:
            if self.handler.sleep_func != self._conn_retry.sleep_func:
                raise ConfigurationError("Retry handler and event handler "
                                         " must use the same sleep func")

        if type(self.retry) is KazooRetry:
            if self.handler.sleep_func != self.retry.sleep_func:
                raise ConfigurationError("Command retry handler and event "
                                         "handler must use the same sleep func")

        if self.retry is None or self._conn_retry is None:
            old_retry_keys = dict(_RETRY_COMPAT_DEFAULTS)
            for key in old_retry_keys:
                try:
                    old_retry_keys[key] = kwargs.pop(key)
                    warnings.warn('Passing retry configuration param %s to the'
                            ' client directly is deprecated, please pass a'
                            ' configured retry object (using param %s)' % (
                                key, _RETRY_COMPAT_MAPPING[key]),
                            DeprecationWarning, stacklevel=2)
                except KeyError:
                    pass

            retry_keys = {}
            for oldname, value in old_retry_keys.items():
                retry_keys[_RETRY_COMPAT_MAPPING[oldname]] = value

            if self._conn_retry is None:
                self._conn_retry = KazooRetry(
                    sleep_func=self.handler.sleep_func,
                    **retry_keys)
            if self.retry is None:
                self.retry = KazooRetry(
                    sleep_func=self.handler.sleep_func,
                    **retry_keys)

        self._conn_retry.interrupt = lambda: self._stopped.is_set()
        self._connection = ConnectionHandler(self, self._conn_retry.copy(),
            logger=self.logger)

        # Every retry call should have its own copy of the retry helper
        # to avoid shared retry counts
        self._retry = self.retry
        def _retry(*args, **kwargs):
            return self._retry.copy()(*args, **kwargs)
        self.retry = _retry

        self.Barrier = partial(Barrier, self)
        self.Counter = partial(Counter, self)
        self.DoubleBarrier = partial(DoubleBarrier, self)
        self.ChildrenWatch = partial(ChildrenWatch, self)
        self.DataWatch = partial(DataWatch, self)
        self.Election = partial(Election, self)
        self.Lock = partial(Lock, self)
        self.Party = partial(Party, self)
        self.Queue = partial(Queue, self)
        self.LockingQueue = partial(LockingQueue, self)
        self.SetPartitioner = partial(SetPartitioner, self)
        self.Semaphore = partial(Semaphore, self)
        self.ShallowParty = partial(ShallowParty, self)

         # If we got any unhandled keywords, complain like python would
        if kwargs:
            raise TypeError('__init__() got unexpected keyword arguments: %s'
                            % (kwargs.keys(),))
Example #46
0
class Lock(object):
    """Kazoo Lock

    Example usage with a :class:`~kazoo.client.KazooClient` instance:

    .. code-block:: python

        zk = KazooClient()
        lock = zk.Lock("/lockpath", "my-identifier")
        with lock:  # blocks waiting for lock acquisition
            # do something with the lock

    Note: This lock is not *re-entrant*. Repeated calls after already
    acquired will raise a ``RuntimeError``.

    This is an exclusive lock. For a read/write lock, see :method:`WLock` and
    :method:`RLock`.

    """

    def __init__(self, client, path, identifier=None, node_name="__lock__",
                 exclude_names=None):
        """Create a Kazoo lock.

        node_name and exclude_names are typically only used internally to
        implement read/write locks. They should be left unset for exclusive
        locks.

        :param client: A :class:`~kazoo.client.KazooClient` instance.
        :param path: The lock path to use.
        :param identifier: Name to use for this lock contender. This
                           can be useful for querying to see who the
                           current lock contenders are.
        :param node_name: Node name, after the contender UUID, before the
                          sequence number. Involved in read/write locks. For a
                          normal (exclusive) lock, leave unset.
        :param exclude_names: Node names which exclude this contender when
                              present at a lower sequence number. Involved in
                              read/write locks. For a normal (exclusive) lock,
                              leave unset.
        """
        self.client = client
        self.path = path

        # some data is written to the node. this can be queried via
        # contenders() to see who is contending for the lock
        self.data = str(identifier or "").encode('utf-8')

        self.wake_event = client.handler.event_object()

        self.node_name = node_name

        if exclude_names is None:
            exclude_names = [self.node_name]
        self.exclude_names = exclude_names

        # props to Netflix Curator for this trick. It is possible for our
        # create request to succeed on the server, but for a failure to
        # prevent us from getting back the full path name. We prefix our
        # lock name with a uuid and can check for its presence on retry.
        self.prefix = uuid.uuid4().hex + self.node_name
        self.create_path = self.path + "/" + self.prefix

        self.create_tried = False
        self.is_acquired = False
        self.assured_path = False
        self.cancelled = False
        self._retry = KazooRetry(max_tries=None,
                                 sleep_func=client.handler.sleep_func)

    def _ensure_path(self):
        self.client.ensure_path(self.path)
        self.assured_path = True

    def cancel(self):
        """Cancel a pending lock acquire."""
        self.cancelled = True
        self.wake_event.set()

    def acquire(self, blocking=True, timeout=None):
        """
        Acquire the lock. By defaults blocks and waits forever.

        :param blocking: Block until lock is obtained or return immediately.
        :type blocking: bool
        :param timeout: Don't wait forever to acquire the lock.
        :type timeout: float or None

        :returns: Was the lock acquired?
        :rtype: bool

        :raises: :exc:`~kazoo.exceptions.LockTimeout` if the lock
                 wasn't acquired within `timeout` seconds.

        .. versionadded:: 1.1
            The timeout option.
        """
        if self.is_acquired:
            raise RuntimeError("Lock at path '%s' has already been"
                               " acquired" % self.path)
        try:
            retry = self._retry.copy()
            retry.deadline = timeout
            self.is_acquired = retry(self._inner_acquire,
                                     blocking=blocking, timeout=timeout)
        except RetryFailedError:
            self._best_effort_cleanup()
        except KazooException:
            # if we did ultimately fail, attempt to clean up
            self._best_effort_cleanup()
            self.cancelled = False
            raise

        if not self.is_acquired:
            self._delete_node(self.node)

        return self.is_acquired

    def _watch_session(self, state):
        self.wake_event.set()
        return True

    def _inner_acquire(self, blocking, timeout):
        # make sure our election parent node exists
        if not self.assured_path:
            self._ensure_path()

        node = None
        if self.create_tried:
            node = self._find_node()
        else:
            self.create_tried = True

        if not node:
            node = self.client.create(self.create_path, self.data,
                                      ephemeral=True, sequence=True)
            # strip off path to node
            node = node[len(self.path) + 1:]

        self.node = node

        while True:
            self.wake_event.clear()

            # bail out with an exception if cancellation has been requested
            if self.cancelled:
                raise CancelledError()

            children = self._get_sorted_children()

            try:
                our_index = children.index(node)
            except ValueError:  # pragma: nocover
                # somehow we aren't in the children -- probably we are
                # recovering from a session failure and our ephemeral
                # node was removed
                raise ForceRetryError()

            predecessor = self.predecessor(children, our_index)
            if not predecessor:
                return True

            if not blocking:
                return False

            # otherwise we are in the mix. watch predecessor and bide our time
            predecessor = self.path + "/" + predecessor
            self.client.add_listener(self._watch_session)
            try:
                if self.client.exists(predecessor, self._watch_predecessor):
                    self.wake_event.wait(timeout)
                    if not self.wake_event.isSet():
                        raise LockTimeout("Failed to acquire lock on %s after "
                                          "%s seconds" % (self.path, timeout))
            finally:
                self.client.remove_listener(self._watch_session)

    def predecessor(self, children, index):
        for c in children[:index]:
            if any(n in c for n in self.exclude_names):
                return c
        return None

    def _watch_predecessor(self, event):
        self.wake_event.set()

    def _get_sorted_children(self):
        children = self.client.get_children(self.path)

        # Node names are prefixed by a type: strip the prefix first, which may
        # be one of multiple values in case of a read-write lock, and return
        # only the sequence number (as a string since it is padded and will sort
        # correctly anyway).
        #
        # In some cases, the lock path may contain nodes with other prefixes
        # (eg. in case of a lease), just sort them last ('~' sorts after all
        # ASCII digits).
        def _seq(c):
            for name in ["__lock__", "__rlock__"]:
                idx = c.find(name)
                if idx != -1:
                    return c[idx + len(name):]
            # Sort unknown node names eg. "lease_holder" last.
            return '~'
        children.sort(key=_seq)
        return children

    def _find_node(self):
        children = self.client.get_children(self.path)
        for child in children:
            if child.startswith(self.prefix):
                return child
        return None

    def _delete_node(self, node):
        self.client.delete(self.path + "/" + node)

    def _best_effort_cleanup(self):
        try:
            node = self._find_node()
            if node:
                self._delete_node(node)
        except KazooException:  # pragma: nocover
            pass

    def release(self):
        """Release the lock immediately."""
        return self.client.retry(self._inner_release)

    def _inner_release(self):
        if not self.is_acquired:
            return False

        try:
            self._delete_node(self.node)
        except NoNodeError:  # pragma: nocover
            pass

        self.is_acquired = False
        self.node = None

        return True

    def contenders(self):
        """Return an ordered list of the current contenders for the
        lock.

        .. note::

            If the contenders did not set an identifier, it will appear
            as a blank string.

        """
        # make sure our election parent node exists
        if not self.assured_path:
            self._ensure_path()

        children = self._get_sorted_children()

        contenders = []
        for child in children:
            try:
                data, stat = self.client.get(self.path + "/" + child)
                contenders.append(data.decode('utf-8'))
            except NoNodeError:  # pragma: nocover
                pass
        return contenders

    def __enter__(self):
        self.acquire()

    def __exit__(self, exc_type, exc_value, traceback):
        self.release()
Example #47
0
class ZookeeperClient(object):

    def __init__(self, module, server_list):
        # logging
        logger = logging.getLogger(module)
        logger.setLevel(logging.INFO)
        try:
            handler = logging.handlers.RotatingFileHandler('/var/log/contrail/' + module + '-zk.log', maxBytes=10*1024*1024, backupCount=5)
        except IOError:
            print "Cannot open log file in /var/log/contrail/"
        else:
            log_format = logging.Formatter('%(asctime)s [%(name)s]: %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
            handler.setFormatter(log_format)
            logger.addHandler(handler)

        self._zk_client = \
            kazoo.client.KazooClient(
                server_list,
                timeout=20,
                handler=kazoo.handlers.gevent.SequentialGeventHandler(),
                logger=logger)

        self._zk_client.add_listener(self._zk_listener)
        self._logger = logger
        self._election = None
        # KazooRetry to retry keeper CRUD operations
        self._retry = KazooRetry(max_tries=None)
        self.connect()
    # end __init__

    # start 
    def connect(self):
        while True:
            try:
                self._zk_client.start()
                break
            except gevent.event.Timeout as e:
                self.syslog(
                    'Failed to connect with Zookeeper -will retry in a second')
                gevent.sleep(1)
            # Zookeeper is also throwing exception due to delay in master election
            except Exception as e:
                self.syslog('%s -will retry in a second' % (str(e)))
                gevent.sleep(1)
        self.syslog('Connected to ZooKeeper!')
    # end

    def is_connected(self):
        return self._zk_client.state == KazooState.CONNECTED
    # end is_connected

    def syslog(self, msg):
        if not self._logger:
            return
        self._logger.info(msg)
    # end syslog

    def _zk_listener(self, state):
        if state == KazooState.CONNECTED:
            if self._election:
                self._election.cancel()
        elif state == KazooState.LOST:
            # Lost the session with ZooKeeper Server
            # Best of option we have is to exit the process and restart all 
            # over again
            os._exit(2)
    # end

    def _zk_election_callback(self, func, *args, **kwargs):
        func(*args, **kwargs)
        # Exit if running master encounters error or exception
        exit(1)
    # end

    def master_election(self, path, identifier, func, *args, **kwargs):
        while True:
            self._election = self._zk_client.Election(path, identifier)
            self._election.run(self._zk_election_callback, func, *args, **kwargs)
    # end master_election

    def create_node(self, path, value=None):
        try:
            if value is None:
                value = uuid.uuid4()
            retry = self._retry.copy()
            retry(self._zk_client.create, path, str(value), makepath=True)
        except kazoo.exceptions.NodeExistsError:
            current_value = self.read_node(path)
            if current_value == value:
                return True;
            raise ResourceExistsError(path, str(current_value))
    # end create_node

    def delete_node(self, path, recursive=False):
        try:
            retry = self._retry.copy()
            retry(self._zk_client.delete, path, recursive=recursive)
        except kazoo.exceptions.NoNodeError:
            pass
        except Exeception as e:
            raise e
    # end delete_node

    def read_node(self, path):
        try:
            retry = self._retry.copy()
            value = retry(self._zk_client.get, path)
            return value[0]
        except Exception:
            return None
    # end read_node

    def get_children(self, path):
        try:
            retry = self._retry.copy()
            return retry(self._zk_client.get_children, path)
        except Exception:
            return []
Example #48
0
class ZkSyncManager(object):

    RETRIES = 2
    LOCK_TIMEOUT = 3

    def __init__(self, host='127.0.0.1:2181', lock_path_prefix='/mastermind/locks/'):
        self.client = KazooClient(host, timeout=3)
        logger.info('Connecting to zookeeper host {0}, '
            'lock_path_prefix: {1}'.format(host, lock_path_prefix))
        try:
            self.client.start()
        except Exception as e:
            logger.error(e)
            raise

        self._retry = KazooRetry(max_tries=self.RETRIES)

        self.lock_path_prefix = lock_path_prefix

    @contextmanager
    def lock(self, lockid, blocking=True, timeout=LOCK_TIMEOUT):
        # with self.__locks_lock:
        lock = Lock(self.client, self.lock_path_prefix + lockid)
        try:
            acquired = lock.acquire(blocking=blocking, timeout=timeout)
            logger.debug('Lock {0} acquired: {1}'.format(lockid, acquired))
            if not acquired:
                raise LockFailedError(lock_id=lockid)
            yield
        except LockTimeout:
            logger.info('Failed to acquire lock {0} due to timeout '
                '({1} seconds)'.format(lockid, timeout))
            raise LockFailedError(lock_id=lockid)
        except LockFailedError:
            raise
        except Exception as e:
            logger.error('Failed to acquire lock {0}: {1}\n{2}'.format(
                lockid, e, traceback.format_exc()))
            raise
        finally:
            lock.release()

    def persistent_locks_acquire(self, locks, data=''):
        try:
            retry = self._retry.copy()
            result = retry(self._inner_persistent_locks_acquire, locks=locks, data=data)
        except RetryFailedError:
            raise LockError
        except KazooException as e:
            logger.error('Failed to fetch persistent locks {0}: {1}\n{2}'.format(
                locks, e, traceback.format_exc()))
            raise LockError
        return result

    def _inner_persistent_locks_acquire(self, locks, data):

        ensured_paths = set()

        tr = self.client.transaction()
        for lockid in locks:
            path = self.lock_path_prefix + lockid
            parts = path.rsplit('/', 1)
            if len(parts) == 2 and parts[0] not in ensured_paths:
                self.client.ensure_path(parts[0])
                ensured_paths.add(parts[0])
            tr.create(path, data)

        failed = False
        failed_locks = []
        result = tr.commit()
        for i, res in enumerate(result):
            if isinstance(res, ZookeeperError):
                failed = True
            if isinstance(res, NodeExistsError):
                failed_locks.append(locks[i])

        if failed_locks:
            holders = []
            for f in failed_locks:
                # TODO: fetch all holders with 1 transaction request
                holders.append((f, self.client.get(self.lock_path_prefix + f)))
            foreign_holders = [(l, h) for l, h in holders if h[0] != data]
            failed_lock, holder_resp = foreign_holders and foreign_holders[0] or holders[0]
            holder = holder_resp[0]
            holders_ids = list(set([h[0] for _, h in holders]))
            logger.warn('Persistent lock {0} is already set by {1}'.format(failed_lock, holder))
            raise LockAlreadyAcquiredError(
                'Lock for {0} is already acquired by job {1}'.format(failed_lock, holder),
                lock_id=failed_lock, holder_id=holder, holders_ids=holders_ids)
        elif failed:
            logger.error('Failed to set persistent locks {0}, result: {1}'.format(
                locks, result))
            raise LockError

        return True

    def get_children_locks(self, lock_prefix):
        try:
            retry = self._retry.copy()
            result = retry(self.__inner_get_children_locks, lock_prefix)
        except RetryFailedError:
            raise LockError
        return result

    def __inner_get_children_locks(self, lock_prefix):
        full_path = self.lock_path_prefix + lock_prefix
        self.client.ensure_path(os.path.normpath(full_path))
        result = self.client.get_children(full_path)
        return ['{0}{1}'.format(lock_prefix, lock) for lock in result]

    def persistent_locks_release(self, locks, check=''):
        try:
            retry = self._retry.copy()
            result = retry(self.__inner_persistent_locks_release, locks=locks, check=check)
        except RetryFailedError:
            raise LockError
        except KazooException as e:
            logger.error('Failed to remove persistent locks {0}: {1}\n{2}'.format(
                locks, e, traceback.format_exc()))
            raise LockError
        return result

    def __inner_persistent_locks_release(self, locks, check):
        for lockid in locks:
            try:
                if check:
                    data = self.client.get(self.lock_path_prefix + lockid)
                    if data[0] != check:
                        logger.error('Lock {0} has inconsistent data: {1}, '
                            'expected {2}'.format(lockid, data[0], check))
                        raise InconsistentLockError(lock_id=lockid, holder_id=data[0])
                self.client.delete(self.lock_path_prefix + lockid)
            except NoNodeError:
                logger.warn('Persistent lock {0} is already removed'.format(lockid))
                pass
        return True
Example #49
0
    def __init__(self, hosts='127.0.0.1:2181',
                 timeout=10.0, client_id=None, handler=None, default_acl=None,
                 auth_data=None, read_only=None, randomize_hosts=True,
                 retry=None, logger=None, **kwargs):
        """Create a :class:`KazooClient` instance. All time arguments
        are in seconds.

        :param hosts: Comma-separated list of hosts to connect to
                      (e.g. 127.0.0.1:2181,127.0.0.1:2182).
        :param timeout: The longest to wait for a Zookeeper connection.
        :param client_id: A Zookeeper client id, used when
                          re-establishing a prior session connection.
        :param handler: An instance of a class implementing the
                        :class:`~kazoo.interfaces.IHandler` interface
                        for callback handling.
        :param default_acl: A default ACL used on node creation.
        :param auth_data:
            A list of authentication credentials to use for the
            connection. Should be a list of (scheme, credential)
            tuples as :meth:`add_auth` takes.
        :param read_only: Allow connections to read only servers.
        :param randomize_hosts: By default randomize host selection.
        :param retry: The configured retry object to use.

        Retry parameters will be used for connection establishment
        attempts and reconnects.

        Basic Example:

        .. code-block:: python

            zk = KazooClient()
            zk.start()
            children = zk.get_children('/')
            zk.stop()

        As a convenience all recipe classes are available as attributes
        and get automatically bound to the client. For example::

            zk = KazooClient()
            zk.start()
            lock = zk.Lock('/lock_path')

        .. versionadded:: 0.6
            The read_only option. Requires Zookeeper 3.4+

        .. versionadded:: 0.6
            The retry_max_delay option.

        .. versionadded:: 0.6
            The randomize_hosts option.

        .. versionchanged:: 0.8
            Removed the unused watcher argument (was second argument).

        """
        self.logger = logger or log

        # Record the handler strategy used
        self.handler = handler if handler else SequentialThreadingHandler()
        if inspect.isclass(self.handler):
            raise ConfigurationError("Handler must be an instance of a class, "
                                     "not the class: %s" % self.handler)

        self.auth_data = auth_data if auth_data else set([])
        self.default_acl = default_acl
        self.randomize_hosts = randomize_hosts
        self.hosts, chroot = collect_hosts(hosts, randomize_hosts)
        if chroot:
            self.chroot = normpath(chroot)
        else:
            self.chroot = ''

        # Curator like simplified state tracking, and listeners for
        # state transitions
        self._state = KeeperState.CLOSED
        self.state = KazooState.LOST
        self.state_listeners = set()

        self._reset()
        self.read_only = read_only

        if client_id:
            self._session_id = client_id[0]
            self._session_passwd = client_id[1]
        else:
            self._reset_session()

        # ZK uses milliseconds
        self._session_timeout = int(timeout * 1000)

        # We use events like twitter's client to track current and
        # desired state (connected, and whether to shutdown)
        self._live = self.handler.async_result()
        self._live.set(False)
        self._writer_stopped = self.handler.event_object()
        self._stopped = self.handler.event_object()
        self._stopped.set()
        self._writer_stopped.set()

        if retry is not None:
            self.retry = retry
            assert self.handler.sleep_func == self.retry.sleep_func, \
                    'retry handler and event handler must use the same sleep func'
        else:
            retry_keys = dict(_RETRY_COMPAT_DEFAULTS)
            for key in retry_keys:
                try:
                    retry_keys[key] = kwargs.pop(key)
                    warnings.warn('Passing retry configuration param %s to the'
                            ' client directly is deprecated, please pass a'
                            ' configured retry object (using param %s)' % (
                                key, _RETRY_COMPAT_MAPPING[key]),
                            DeprecationWarning, stacklevel=2)
                except KeyError:
                    pass

            retry_keys = {_RETRY_COMPAT_MAPPING[oldname]: value for oldname, value in retry_keys.items()}

            self.retry = KazooRetry(
                sleep_func=self.handler.sleep_func,
                **retry_keys)

        self._connection = ConnectionHandler(
            self, self.retry.copy(),
            logger=self.logger)

        # convenience API
        from kazoo.recipe.barrier import Barrier
        from kazoo.recipe.barrier import DoubleBarrier
        from kazoo.recipe.counter import Counter
        from kazoo.recipe.election import Election
        from kazoo.recipe.lock import Lock
        from kazoo.recipe.lock import Semaphore
        from kazoo.recipe.partitioner import SetPartitioner
        from kazoo.recipe.party import Party
        from kazoo.recipe.party import ShallowParty
        from kazoo.recipe.queue import Queue
        from kazoo.recipe.queue import LockingQueue
        from kazoo.recipe.watchers import ChildrenWatch
        from kazoo.recipe.watchers import DataWatch

        self.Barrier = partial(Barrier, self)
        self.Counter = partial(Counter, self)
        self.DoubleBarrier = partial(DoubleBarrier, self)
        self.ChildrenWatch = partial(ChildrenWatch, self)
        self.DataWatch = partial(DataWatch, self)
        self.Election = partial(Election, self)
        self.Lock = partial(Lock, self)
        self.Party = partial(Party, self)
        self.Queue = partial(Queue, self)
        self.LockingQueue = partial(LockingQueue, self)
        self.SetPartitioner = partial(SetPartitioner, self)
        self.Semaphore = partial(Semaphore, self)
        self.ShallowParty = partial(ShallowParty, self)

        # If we got any unhandled keywords, complain like python would
        if kwargs:
            raise TypeError('__init__() got unexpected keyword arguments: %s' % (kwargs.keys(),))
Example #50
0
class ZookeeperClient(object):

    def __init__(self, module, server_list, logging_fn=None):
        # logging
        logger = logging.getLogger(module)
        logger.setLevel(logging.DEBUG)
        try:
            handler = logging.handlers.RotatingFileHandler(
                LOG_DIR + module + '-zk.log', maxBytes=10*1024*1024, backupCount=5)
        except IOError:
            print "Cannot open log file in %s" %(LOG_DIR)
        else:
            log_format = logging.Formatter('%(asctime)s [%(name)s]: %(message)s',
                                           datefmt='%m/%d/%Y %I:%M:%S %p')
            handler.setFormatter(log_format)
            logger.addHandler(handler)

        if logging_fn:
            self.log = logging_fn
        else:
            self.log = self.syslog

        # KazooRetry to retry keeper CRUD operations
        self._retry = KazooRetry(max_tries=None, max_delay=300,
                                 sleep_func=gevent.sleep)
        self._zk_client = kazoo.client.KazooClient(
                server_list,
                timeout=400,
                handler=kazoo.handlers.gevent.SequentialGeventHandler(),
                logger=logger,
                connection_retry=self._retry,
                command_retry=self._retry)

        self._zk_client.add_listener(self._zk_listener)
        self._logger = logger
        self._election = None
        self._server_list = server_list

        self._conn_state = None
        self._sandesh_connection_info_update(status='INIT', message='')
        self._lost_cb = None

        self.connect()
    # end __init__

    # start
    def connect(self):
        while True:
            try:
                self._zk_client.start()
                break
            except gevent.event.Timeout as e:
                # Update connection info
                self._sandesh_connection_info_update(status='DOWN',
                                                     message=str(e))
                gevent.sleep(1)
            # Zookeeper is also throwing exception due to delay in master election
            except Exception as e:
                # Update connection info
                self._sandesh_connection_info_update(status='DOWN',
                                                     message=str(e))
                gevent.sleep(1)
        # Update connection info
        self._sandesh_connection_info_update(status='UP', message='')

    # end

    def is_connected(self):
        return self._zk_client.state == KazooState.CONNECTED
    # end is_connected

    def syslog(self, msg, *args, **kwargs):
        if not self._logger:
            return
        level = kwargs.get('level', 'info')
        if isinstance(level, int):
            from pysandesh.sandesh_logger import SandeshLogger
            level = SandeshLogger.get_py_logger_level(level)

        log_method = getattr(self._logger, level, self._logger.info)
        log_method(msg)
    # end syslog

    def set_lost_cb(self, lost_cb=None):
        # set a callback to be called when kazoo state is lost
        # set to None for default action
        self._lost_cb = lost_cb
    # end set_lost_cb

    def _zk_listener(self, state):
        if state == KazooState.CONNECTED:
            if self._election:
                self._election.cancel()
            # Update connection info
            self._sandesh_connection_info_update(status='UP', message='')
        elif state == KazooState.LOST:
            # Lost the session with ZooKeeper Server
            # Best of option we have is to exit the process and restart all 
            # over again
            self._sandesh_connection_info_update(status='DOWN',
                                      message='Connection to Zookeeper lost')
            if self._lost_cb:
                self._lost_cb()
            else:
                os._exit(2)
        elif state == KazooState.SUSPENDED:
            # Update connection info
            self._sandesh_connection_info_update(status='INIT',
                message = 'Connection to zookeeper lost. Retrying')

    # end

    def _zk_election_callback(self, func, *args, **kwargs):
        func(*args, **kwargs)
        # Exit if running master encounters error or exception
        exit(1)
    # end

    def master_election(self, path, identifier, func, *args, **kwargs):
        while True:
            self._election = self._zk_client.Election(path, identifier)
            self._election.run(self._zk_election_callback, func, *args, **kwargs)
    # end master_election

    def create_node(self, path, value=None):
        try:
            if value is None:
                value = uuid.uuid4()
            retry = self._retry.copy()
            retry(self._zk_client.create, path, str(value), makepath=True)
        except kazoo.exceptions.NodeExistsError:
            current_value = self.read_node(path)
            if current_value == value:
                return True;
            raise ResourceExistsError(path, str(current_value), 'zookeeper')
    # end create_node

    def delete_node(self, path, recursive=False):
        try:
            retry = self._retry.copy()
            retry(self._zk_client.delete, path, recursive=recursive)
        except kazoo.exceptions.NoNodeError:
            pass
        except Exception as e:
            raise e
    # end delete_node

    def read_node(self, path, include_timestamp=False):
        try:
            retry = self._retry.copy()
            value = retry(self._zk_client.get, path)
            if include_timestamp:
                return value
            return value[0]
        except Exception:
            return None
    # end read_node

    def get_children(self, path):
        try:
            retry = self._retry.copy()
            return retry(self._zk_client.get_children, path)
        except Exception:
            return []
    # end read_node

    def _sandesh_connection_info_update(self, status, message):
        from pysandesh.connection_info import ConnectionState
        from pysandesh.gen_py.process_info.ttypes import ConnectionStatus
        from pysandesh.gen_py.process_info.ttypes import ConnectionType as ConnType
        from pysandesh.gen_py.sandesh.ttypes import SandeshLevel

        new_conn_state = getattr(ConnectionStatus, status)
        ConnectionState.update(conn_type = ConnType.ZOOKEEPER,
                name = 'Zookeeper', status = new_conn_state,
                message = message,
                server_addrs = self._server_list.split(','))

        if (self._conn_state and self._conn_state != ConnectionStatus.DOWN and
            new_conn_state == ConnectionStatus.DOWN):
            msg = 'Connection to Zookeeper down: %s' %(message)
            self.log(msg, level=SandeshLevel.SYS_ERR)
        if (self._conn_state and self._conn_state != new_conn_state and
            new_conn_state == ConnectionStatus.UP):
            msg = 'Connection to Zookeeper ESTABLISHED'
            self.log(msg, level=SandeshLevel.SYS_NOTICE)

        self._conn_state = new_conn_state
Example #51
0
class zkClient(object):
    def __init__(self,server_list):
        self._retry = KazooRetry(max_tries=None, max_delay=300,
                                 sleep_func=gevent.sleep)
        self._zk_client = kazoo.client.KazooClient(
                server_list,
                timeout=400,
                handler=kazoo.handlers.gevent.SequentialGeventHandler(),
                connection_retry=self._retry,
                command_retry=self._retry)

        self._zk_client.add_listener(self._zk_listener)
        self._election = None
        self._server_list = server_list

        self._conn_state = None
        self._lost_cb = None

        self.connect()

    def _zk_listener(self, state):
        if state == KazooState.CONNECTED:
            if self._election:
                self._election.cancel()
        elif state == KazooState.LOST:
            if self._lost_cb:
                self._lost_cb()
            else:
                os._exit(2)
        elif state == KazooState.SUSPENDED:
            pass

    def connect(self):
        while True:
            try:
                self._zk_client.start()
                break
            except gevent.event.Timeout as e:
                gevent.sleep(1)
            except Exception as e:
                gevent.sleep(1)

    def is_connected(self):
        return self._zk_client.state == KazooState.CONNECTED

    def master_election(self, path, identifier, func, *args, **kwargs):
        self._election = self._zk_client.Election(path, identifier)
        self._election.run(func, *args, **kwargs)

    def create_node(self, path, value=None):
        try:
            if value is None:
                value = uuid.uuid4()
            retry = self._retry.copy()
            retry(self._zk_client.create, path, str(value), makepath=True)
        except kazoo.exceptions.NodeExistsError:
            current_value = self.read_node(path)
            if current_value == value:
                return True;
            raise Exception("create node path %s, value %s" % (path,value))
    # end create_node

    def delete_node(self, path, recursive=False):
        try:
            retry = self._retry.copy()
            retry(self._zk_client.delete, path, recursive=recursive)
        except kazoo.exceptions.NoNodeError:
            pass
        except Exception as e:
            raise e
    # end delete_node

    def read_node(self, path, include_timestamp=False):
        try:
            retry = self._retry.copy()
            value = retry(self._zk_client.get, path)
            if include_timestamp:
                return value
            return value[0]
        except Exception:
            return None
Example #52
0
class EntityLock(object):
  """ A ZooKeeper-based entity lock that allows test-and-set operations.

  This is based on kazoo's lock recipe, and has been modified to lock multiple
  entity groups. This lock is not re-entrant. Repeated calls after already
  acquired will block.
  """
  _NODE_NAME = '__lock__'

  # Tornado lock which allows tornado to switch to different coroutine
  # if current one is waiting for entity group lock
  _tornado_lock = TornadoLock()

  def __init__(self, client, keys, txid=None):
    """ Create an entity lock.

    Args:
      client: A kazoo client.
      keys: A list of entity Reference objects.
      txid: An integer specifying the transaction ID.
    """
    self.client = client
    self.paths = [zk_group_path(key) for key in keys]

    # The txid is written to the contender nodes for deadlock resolution.
    self.data = str(txid or '')

    self.wake_event = client.handler.event_object()

    # Give the contender nodes a uniquely identifiable prefix in case its
    # existence is in question.
    self.prefix = uuid.uuid4().hex + self._NODE_NAME

    self.create_paths = [path + '/' + self.prefix for path in self.paths]

    self.create_tried = False
    self.is_acquired = False
    self.cancelled = False
    self._retry = KazooRetry(max_tries=None,
                             sleep_func=client.handler.sleep_func)
    self._lock = client.handler.lock_object()

  def _ensure_path(self):
    """ Make sure the ZooKeeper lock paths have been created. """
    for path in self.paths:
      self.client.ensure_path(path)

  def cancel(self):
    """ Cancel a pending lock acquire. """
    self.cancelled = True
    self.wake_event.set()

  @gen.coroutine
  def acquire(self):
    now = ioloop.IOLoop.current().time()
    yield EntityLock._tornado_lock.acquire(now + LOCK_TIMEOUT)
    try:
      locked = self.unsafe_acquire()
      raise gen.Return(locked)
    finally:
      if not self.is_acquired:
        EntityLock._tornado_lock.release()

  def unsafe_acquire(self):
    """ Acquire the lock. By default blocks and waits forever.

    Returns:
      A boolean indicating whether or not the lock was acquired.
    """

    def _acquire_lock():
      """ Acquire a kazoo thread lock. """
      got_it = self._lock.acquire(False)
      if not got_it:
        raise ForceRetryError()
      return True

    retry = self._retry.copy()
    retry.deadline = LOCK_TIMEOUT

    # Prevent other threads from acquiring the lock at the same time.
    locked = self._lock.acquire(False)
    if not locked:
      try:
        retry(_acquire_lock)
      except RetryFailedError:
        return False

    already_acquired = self.is_acquired
    try:
      gotten = False
      try:
        gotten = retry(self._inner_acquire)
      except RetryFailedError:
        if not already_acquired:
          self._best_effort_cleanup()
      except KazooException:
        if not already_acquired:
          self._best_effort_cleanup()
          self.cancelled = False
        raise
      if gotten:
        self.is_acquired = gotten
      if not gotten and not already_acquired:
        self._delete_nodes(self.nodes)
      return gotten
    finally:
      self._lock.release()

  def _watch_session(self, state):
    """ A callback function for handling connection state changes.

    Args:
      state: The new connection state.
    """
    self.wake_event.set()
    return True

  def _resolve_deadlocks(self, children_list):
    """ Check if there are any concurrent cross-group locks.

    Args:
      children_list: A list of current transactions for each group.
    """
    current_txid = int(self.data)
    for index, children in enumerate(children_list):
      our_index = children.index(self.nodes[index])

      # Skip groups where this lock already has the earliest contender.
      if our_index == 0:
        continue

      # Get transaction IDs for earlier contenders.
      for child in children[:our_index - 1]:
        try:
          data, _ = self.client.get(
            self.paths[index] + '/' + child)
        except NoNodeError:
          continue

        # If data is not set, it doesn't belong to a cross-group
        # transaction.
        if not data:
          continue

        child_txid = int(data)
        # As an arbitrary rule, require later transactions to
        # resolve deadlocks.
        if current_txid > child_txid:
          # TODO: Implement a more graceful deadlock detection.
          self.client.retry(self._delete_nodes(self.nodes))
          raise ForceRetryError()

  def _inner_acquire(self):
    """ Create contender node(s) and wait until the lock is acquired. """

    # Make sure the group lock node exists.
    self._ensure_path()

    nodes = [None for _ in self.paths]
    if self.create_tried:
      nodes = self._find_nodes()
    else:
      self.create_tried = True

    for index, node in enumerate(nodes):
      if node is not None:
        continue

      # The entity group lock root may have been deleted, so try a few times.
      try_num = 0
      while True:
        try:
          node = self.client.create(
            self.create_paths[index], self.data, sequence=True)
          break
        except NoNodeError:
          self.client.ensure_path(self.paths[index])
          if try_num > 3:
            raise ForceRetryError()
        try_num += 1

      # Strip off path to node.
      node = node[len(self.paths[index]) + 1:]
      nodes[index] = node

    self.nodes = nodes

    while True:
      self.wake_event.clear()

      # Bail out with an exception if cancellation has been requested.
      if self.cancelled:
        raise CancelledError()

      children_list = self._get_sorted_children()

      predecessors = []
      for index, children in enumerate(children_list):
        try:
          our_index = children.index(nodes[index])
        except ValueError:
          raise ForceRetryError()

        # If the lock for this group hasn't been acquired, get the predecessor.
        if our_index != 0:
          predecessors.append(
            self.paths[index] + "/" + children[our_index - 1])

      if not predecessors:
        return True

      if len(nodes) > 1:
        self._resolve_deadlocks(children_list)

      # Wait for predecessor to be removed.
      # TODO: Listen for all at the same time.
      for index, predecessor in enumerate(predecessors):
        self.client.add_listener(self._watch_session)
        try:
          if self.client.exists(predecessor, self._watch_predecessor):
            self.wake_event.wait(LOCK_TIMEOUT)
            if not self.wake_event.isSet():
              error = 'Failed to acquire lock on {} after {} '\
                'seconds'.format(self.paths, LOCK_TIMEOUT * (index + 1))
              raise LockTimeout(error)
        finally:
          self.client.remove_listener(self._watch_session)

  def _watch_predecessor(self, event):
    """ A callback function for handling contender deletions.

    Args:
      event: A ZooKeeper event.
    """
    self.wake_event.set()

  def _get_sorted_children(self):
    """ Retrieve a list of sorted contenders for each group.

    Returns:
      A list of contenders for each group.
    """
    children = []
    for path in self.paths:
      try:
        children.append(self.client.get_children(path))
      except NoNodeError:
        children.append([])

    # Ignore lock path prefix when sorting contenders.
    lockname = self._NODE_NAME
    for child_list in children:
      child_list.sort(key=lambda c: c[c.find(lockname) + len(lockname):])
    return children

  def _find_nodes(self):
    """ Retrieve a list of paths this lock has created.

    Returns:
      A list of ZooKeeper paths.
    """
    nodes = []
    for path in self.paths:
      try:
        children = self.client.get_children(path)
      except NoNodeError:
        children = []

      node = None
      for child in children:
        if child.startswith(self.prefix):
          node = child
      nodes.append(node)
    return nodes

  def _delete_nodes(self, nodes):
    """ Remove ZooKeeper nodes.

    Args:
      nodes: A list of nodes to delete.
    """
    for index, node in enumerate(nodes):
      if node is None:
        continue
      self.client.delete(self.paths[index] + "/" + node)

  def _best_effort_cleanup(self):
    """ Attempt to delete nodes that this lock has created. """
    try:
      nodes = self._find_nodes()
      self._delete_nodes(nodes)
    except KazooException:
      pass

  def release(self):
    """ Release the lock immediately. """
    try:
      self.client.retry(self._inner_release)

      # Try to clean up the group lock path.
      for path in self.paths:
        try:
          self.client.delete(path)
        except (NotEmptyError, NoNodeError):
          pass
      return
    finally:
      if not self.is_acquired:
        EntityLock._tornado_lock.release()

  def ensure_release_tornado_lock(self):
    """ Ensures that tornado lock (which is global for datastore server)
    is released.
    It MUST BE CALLED any time when lock is acquired
    even if entity group lock in zookeeper left acquired after failure.
    """
    if self.is_acquired:
      EntityLock._tornado_lock.release()

  def _inner_release(self):
    """ Release the lock by removing created nodes. """
    if not self.is_acquired:
      return False

    try:
      self._delete_nodes(self.nodes)
    except NoNodeError:
      pass

    self.is_acquired = False
    self.nodes = [None for _ in self.paths]
    return True

  def __enter__(self):
    self.unsafe_acquire()

  def __exit__(self, exc_type, exc_value, traceback):
    self.release()