def put(self, value, priority=100, timeout=None): """Put an entry into the queue. :param value: Byte string to put into the queue. :param priority: An optional priority as an integer with at most 3 digits. Lower values signify higher priority. :param timeout: Maximum waiting time in seconds. If None then it will wait untill lock will be released """ self._check_put_arguments(value, priority) lock = Lock(self.client, self._uniq_lock_path) lock.acquire(timeout=timeout) try: if self._check_uniq(value): self._ensure_paths() self.client.create( "{path}/{prefix}-{priority:03d}-".format( path=self._entries_path, prefix=self.entry, priority=priority), value, sequence=True) elif not self._ignore_duplicates: raise ValueError("Duplicate value {value}".format( value=value)) finally: lock.release()
def put(self, value, priority=100, timeout=None): """Put an entry into the queue. :param value: Byte string to put into the queue. :param priority: An optional priority as an integer with at most 3 digits. Lower values signify higher priority. :param timeout: Maximum waiting time in seconds. If None then it will wait untill lock will be released """ self._check_put_arguments(value, priority) lock = Lock(self.client, self._uniq_lock_path) lock.acquire(timeout=timeout) try: if self._check_uniq(value): self._ensure_paths() self.client.create("{path}/{prefix}-{priority:03d}-".format( path=self._entries_path, prefix=self.entry, priority=priority), value, sequence=True) elif not self._ignore_duplicates: raise ValueError("Duplicate value {value}".format(value=value)) finally: lock.release()
def lockHoldRequest(self, request, blocking=True, timeout=None): ''' Lock a node request. This will set the `lock` attribute of the request object when the lock is successfully acquired. :param HoldRequest request: The hold request to lock. ''' if not request.id: raise LockException( "Hold request without an ID cannot be locked: %s" % request) path = "%s/%s/lock" % (self.HOLD_REQUEST_ROOT, request.id) try: lock = Lock(self.client, path) have_lock = lock.acquire(blocking, timeout) except kze.LockTimeout: raise LockException("Timeout trying to acquire lock %s" % path) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. if not have_lock: raise LockException("Did not get lock on %s" % path) request.lock = lock
def lockNodeRequest(self, request, blocking=True, timeout=None): ''' Lock a node request. This will set the `lock` attribute of the request object when the lock is successfully acquired. :param NodeRequest request: The request to lock. :param bool blocking: Whether or not to block on trying to acquire the lock :param int timeout: When blocking, how long to wait for the lock to get acquired. None, the default, waits forever. :raises: TimeoutException if we failed to acquire the lock when blocking with a timeout. ZKLockException if we are not blocking and could not get the lock, or a lock is already held. ''' path = "%s/%s" % (self.REQUEST_LOCK_ROOT, request.id) try: lock = Lock(self.client, path) have_lock = lock.acquire(blocking, timeout) except kze.LockTimeout: raise LockException("Timeout trying to acquire lock %s" % path) except kze.NoNodeError: have_lock = False self.log.error("Request not found for locking: %s", request) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. if not have_lock: raise LockException("Did not get lock on %s" % path) request.lock = lock self.updateNodeRequest(request)
def lockNode(self, node, blocking=True, timeout=None): ''' Lock a node. This should be called as soon as a request is fulfilled and the lock held for as long as the node is in-use. It can be used by nodepool to detect if Zuul has gone offline and the node should be reclaimed. :param Node node: The node which should be locked. ''' lock_path = '%s/%s/lock' % (self.NODE_ROOT, node.id) try: lock = Lock(self.client, lock_path) have_lock = lock.acquire(blocking, timeout) except kze.LockTimeout: raise LockException("Timeout trying to acquire lock %s" % lock_path) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. if not have_lock: raise LockException("Did not get lock on %s" % lock_path) node.lock = lock
def lockNode(self, node, blocking=True, timeout=None): ''' Lock a node. This should be called as soon as a request is fulfilled and the lock held for as long as the node is in-use. It can be used by nodepool to detect if Zuul has gone offline and the node should be reclaimed. :param Node node: The node which should be locked. ''' lock_path = '%s/%s/lock' % (self.NODE_ROOT, node.id) try: lock = Lock(self.client, lock_path) have_lock = lock.acquire(blocking, timeout) except kze.LockTimeout: raise LockException( "Timeout trying to acquire lock %s" % lock_path) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. if not have_lock: raise LockException("Did not get lock on %s" % lock_path) node.lock = lock
class ZooKeeperLock(): def __init__(self, hosts, id_str, lock_name, logger=None, timeout=1): self.hosts = hosts self.id_str = id_str self.zk_client = None self.timeout = timeout self.logger = logger self.name = lock_name self.lock_handle = None self.create_lock() def create_lock(self): try: self.zk_client = KazooClient(hosts=self.hosts, logger=self.logger, timeout=self.timeout) self.zk_client.start(timeout=self.timeout) except Exception as ex: self.init_ret = False self.err_str = "Create KazooClient failed! Exception: %s" % str(ex) logging.error(self.err_str) return try: lock_path = os.path.join("/", "locks", self.name) self.lock_handle = Lock(self.zk_client, lock_path) except Exception as ex: self.init_ret = False self.err_str = "Create lock failed! Exception: %s" % str(ex) logging.error(self.err_str) return def destroy_lock(self): # self.release() if self.zk_client != None: self.zk_client.stop() self.zk_client = None def acquire(self, blocking=True, timeout=None): if self.lock_handle == None: return None try: return self.lock_handle.acquire(blocking=blocking, timeout=timeout) except Exception as ex: self.err_str = "Acquire lock failed! Exception: %s" % str(ex) logging.error(self.err_str) return None def release(self): if self.lock_handle == None: return None return self.lock_handle.release() def __del__(self): self.destroy_lock()
def acquire_name(client, hostname_path): name_slots = client.get_children(ROOT, watch=bail_if_slots_change) hostname = platform.node() semaphore = Semaphore( client=client, path=LOCK, identifier=hostname, max_leases=len(name_slots), ) # waiting on the semaphore indefinitely seems to cause things to hang up # sometimes. instead, we'll cause ourselves to time out and retry if things # are taking a while. while True: print "waiting for name semaphore" try: semaphore.acquire(timeout=60) except kazoo.exceptions.LockTimeout: continue else: break try: # OK, we're one of the chosen servers. let's find a name that no one is # using. print "name semaphore acquired. finding name." while True: name_slots = client.get_children(ROOT) for slot in name_slots: slot_path = os.path.join(ROOT, slot) slot_lock = Lock(client, slot_path, hostname) if slot_lock.acquire(blocking=False): @client.DataWatch(slot_path) def on_name_change(data, stat): print "got name data %r." % data with open(hostname_path, "w") as hostname_file: print >> hostname_file, data # just sit around doing nothing for ever try: while True: time.sleep(1) finally: # explicitly releasing the lock decreases delay until # someone else can get this slot. slot_lock.release() else: # failed to lock anything. likely waiting for a session to # expire. just pause for a little while. print "failed to find a name. will try again." time.sleep(1) finally: semaphore.release()
def put_all(self, values, priority=100, timeout=None): """Put several entries into the queue. The action only succeeds if all entries where put into the queue. :param values: A list of values to put into the queue. :param priority: An optional priority as an integer with at most 3 digits. Lower values signify higher priority. :param timeout: Maximum waiting time in seconds. If None then it will wait untill lock will be released """ if not isinstance(values, list): raise TypeError("values must be a list of byte strings") if not isinstance(priority, int): raise TypeError("priority must be an int") elif priority < 0 or priority > 999: raise ValueError("priority must be between 0 and 999") lock = Lock(self.client, self._uniq_lock_path) lock.acquire(timeout=timeout) try: self._ensure_paths() with self.client.transaction() as transaction: for value in values: if not isinstance(value, bytes): raise TypeError("value must be a byte string") if self._check_uniq(value): transaction.create( "{path}/{prefix}-{priority:03d}-".format( path=self._entries_path, prefix=self.entry, priority=priority), value, sequence=True) elif not self._ignore_duplicates: raise ValueError( "Duplicate value {value}".format(value=value)) finally: lock.release()
def put_all(self, values, priority=100, timeout=None): """Put several entries into the queue. The action only succeeds if all entries where put into the queue. :param values: A list of values to put into the queue. :param priority: An optional priority as an integer with at most 3 digits. Lower values signify higher priority. :param timeout: Maximum waiting time in seconds. If None then it will wait untill lock will be released """ if not isinstance(values, list): raise TypeError("values must be a list of byte strings") if not isinstance(priority, int): raise TypeError("priority must be an int") elif priority < 0 or priority > 999: raise ValueError("priority must be between 0 and 999") lock = Lock(self.client, self._uniq_lock_path) lock.acquire(timeout=timeout) try: self._ensure_paths() with self.client.transaction() as transaction: for value in values: if not isinstance(value, bytes): raise TypeError("value must be a byte string") if self._check_uniq(value): transaction.create( "{path}/{prefix}-{priority:03d}-".format( path=self._entries_path, prefix=self.entry, priority=priority), value, sequence=True) elif not self._ignore_duplicates: raise ValueError("Duplicate value {value}".format( value=value)) finally: lock.release()
def _getImageBuildLock(self, image, blocking=True, timeout=None): lock_path = self._imageBuildLockPath(image) try: lock = Lock(self.client, lock_path) have_lock = lock.acquire(blocking, timeout) except kze.LockTimeout: raise npe.TimeoutException( "Timeout trying to acquire lock %s" % lock_path) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. if not have_lock: raise npe.ZKLockException("Did not get lock on %s" % lock_path) return lock
class ZooKeeperLock(object): def __init__(self, hosts, id_str, lock_key, timeout=0.1): self.hosts = hosts self.id_str = id_str self.lock_key = lock_key self.timeout = timeout self.zk_clinet = None self.lock_handle = None self._init = self.create_lock() def create_lock(self): try: self.zk_clinet = KazooClient(hosts=self.hosts, timeout=self.timeout) self.zk_clinet.start(timeout=self.timeout) except Exception as e: print("Kazoo clinet create fail: %s" % str(e)) return try: lock_path = os.path.join("/", "locks", self.lock_key) self.lock_handle = Lock(self.zk_clinet, lock_path) except Exception as e: print("client lock init fail: %s" % str(e)) return def destory_lock(self): if self.zk_clinet != None: self.zk_clinet.stop() self.zk_clinet = None def acquire(self, blocking=True, timeout=None): if self.lock_handle == None: return None try: return self.lock_handle.acquire(blocking=blocking, timeout=timeout) except Exception as e: print("Acquire lock failed : %s" % str(e)) return None def release(self): if self.lock_handle == None: return None return self.lock_handle.release() def __del__(self): self.destory_lock()
def schedule(self, task_id): node = '/{}/tasks/{}'.format(self.root, task_id) lock_node = '{}/lock'.format(node) self.zk.ensure_path(lock_node) lock = Lock(self.zk, lock_node) try: if lock.acquire(timeout=1): data, _ = self.zk.get(node) task = json.loads(data.decode()) p = task.get('parallel', 1) rate = task.get('fail_rate', 0) targets = self.get_targets(task_id) if count(targets, ('F', )) / len(targets) > rate: return self.callback(task_id) if count(targets, ('F', 'S', 'K')) == len(targets): return self.callback(task_id) wait_schedule = choose(targets, p - count(targets, ('W', 'R'))) self.copy_task(wait_schedule, task) except LockTimeout: pass finally: lock.release()
def lockNodeRequest(self, request, blocking=True, timeout=None): ''' Lock a node request. This will set the `lock` attribute of the request object when the lock is successfully acquired. :param NodeRequest request: The request to lock. :param bool blocking: Whether or not to block on trying to acquire the lock :param int timeout: When blocking, how long to wait for the lock to get acquired. None, the default, waits forever. :raises: TimeoutException if we failed to acquire the lock when blocking with a timeout. ZKLockException if we are not blocking and could not get the lock, or a lock is already held. ''' path = "%s/%s" % (self.REQUEST_LOCK_ROOT, request.id) try: lock = Lock(self.client, path) have_lock = lock.acquire(blocking, timeout) except kze.LockTimeout: raise LockException( "Timeout trying to acquire lock %s" % path) except kze.NoNodeError: have_lock = False self.log.error("Request not found for locking: %s", request) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. if not have_lock: raise LockException("Did not get lock on %s" % path) request.lock = lock self.updateNodeRequest(request)
def handle_running_target(self, job_id): logger.info("handle_running_target start: job_id={}".format(job_id)) node = '{}/jobs/{}/targets'.format(self.root, job_id) # 这里遍历了job下所有的主机状态,主机数量多的话,要考虑性能问题 targets = self.zk.get_children(node) target_success_count = 0 target_fail_count = 0 target_init_count = 0 target_running_count = 0 for target in targets: target_lock_node = '{}/{}/lock'.format(node, target) self.zk.ensure_path(target_lock_node) target_lock = Lock(self.zk, target_lock_node) try: if target_lock.acquire(timeout=1): logger.info( "Target Lock acquire: job_id={}, target={}".format( job_id, target)) logger.info( "handle_running_target start: job_id={}, target={}". format(job_id, target)) path = '{}/{}'.format(node, target) target_value, _ = self.zk.get(path) target_value = json.loads(target_value.decode()) """ target_value = { "status" = 0, "current_task" = "offline", "next_task" = "stop_service", } """ target_status = target_value['status'] target_running_task = target_value['current_task'] # 处理running的target if target_status == TargetStatus.running.value: self.handle_running_task(job_id, target, target_running_task) elif target_status == TargetStatus.success.value: target_success_count += 1 elif target_status == TargetStatus.fail.value: target_fail_count += 1 elif target_status == TargetStatus.init.value: target_init_count += 1 elif target_status == TargetStatus.running.value: target_running_count += 1 else: logger.error( "handle running target: unexpected target status, target_status={}" .format(target_status)) except LockTimeout: logger.error( 'Target lock timeout: job_id={}, target={}'.format( job_id, target)) finally: if target_lock.release(): logger.info( 'Target lock release: success, job_id={}, target={}'. format(job_id, target)) else: logger.error( 'Target lock release: fail, job_id={}, target={}'. format(job_id, target)) # job汇总信息 logger.info( "job targets status detail: jobid={}, targets_count={}, target_init_count={}, target_running_count={}, target_success_count={}, target_fail_count={}" .format(job_id, len(targets), target_init_count, target_running_count, target_success_count, target_fail_count)) if (target_success_count + target_fail_count) == len(targets): logger.info( "job is finished: jobid={}, targets_count={}, target_success_count={}, target_fail_count={}" .format(job_id, len(targets), target_success_count, target_fail_count)) # job 终结点 if target_success_count == len(targets): self.job_callback(job_id, JobStatus.success.value) else: self.job_callback(job_id, JobStatus.fail.value) else: logger.info( "job is not finished: jobid={},target_count: {}, job target_success_count: {}" .format(job_id, len(targets), target_success_count))
class ZooKeeper(object): ''' Class implementing the ZooKeeper interface. This class uses the facade design pattern to keep common interaction with the ZooKeeper API simple and consistent for the caller, and limits coupling between objects. It allows for more complex interactions by providing direct access to the client connection when needed (though that is discouraged). It also provides for a convenient entry point for testing only ZooKeeper interactions. Most API calls reference an image name only, as the path for the znode for that image is calculated automatically. And image names are assumed to be unique. If you will have multiple threads needing this API, each thread should instantiate their own ZooKeeper object. It should not be shared. ''' log = logging.getLogger("nodepool.zk.ZooKeeper") IMAGE_ROOT = "/nodepool/image" def __init__(self, client=None): ''' Initialize the ZooKeeper object. :param client: A pre-connected client. Optionally, you may choose to use the connect() call. ''' self.client = client self._current_lock = None #======================================================================== # Private Methods #======================================================================== def _imagePath(self, image): return "%s/%s" % (self.IMAGE_ROOT, image) def _imageBuildsPath(self, image): return "%s/builds" % self._imagePath(image) def _imageLockPath(self, image): return "%s/lock" % self._imageBuildsPath(image) def _imageUploadPath(self, image, build_number, provider): return "%s/%s/provider/%s/images" % (self._imageBuildsPath(image), build_number, provider) def _dictToStr(self, data): return json.dumps(data) def _strToDict(self, data): return json.loads(data) def _getImageLock(self, image, blocking=True, timeout=None): # If we don't already have a znode for this image, create it. image_lock = self._imageLockPath(image) try: self.client.ensure_path(self._imagePath(image)) self._current_lock = Lock(self.client, image_lock) have_lock = self._current_lock.acquire(blocking, timeout) except kze.LockTimeout: raise npe.TimeoutException( "Timeout trying to acquire lock %s" % image_lock) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. if not have_lock: raise npe.ZKLockException("Did not get lock on %s" % image_lock) def _getImageBuildLock(self, image, blocking=True, timeout=None): ''' This differs from _get_image_lock() in that it creates a new build znode and returns its name to the caller. ''' self._getImageLock(image, blocking, timeout) # Create new znode with new build_number build_number = self.getMaxBuildId(image) + 1 self.client.create( self._imageBuildsPath(image) + "/%s" % build_number ) return build_number def _connection_listener(self, state): ''' Listener method for Kazoo connection state changes. .. warning:: This method must not block. ''' if state == KazooState.LOST: self.log.debug("ZooKeeper connection: LOST") elif state == KazooState.SUSPENDED: self.log.debug("ZooKeeper connection: SUSPENDED") else: self.log.debug("ZooKeeper connection: CONNECTED") #======================================================================== # Public Methods #======================================================================== def connect(self, host_list, read_only=False): ''' Establish a connection with ZooKeeper cluster. Convenience method if a pre-existing ZooKeeper connection is not supplied to the ZooKeeper object at instantiation time. :param list host_list: A list of dicts (one per server) defining the ZooKeeper cluster servers. :param bool read_only: If True, establishes a read-only connection. ''' if not self.client: hosts = buildZooKeeperHosts(host_list) self.client = KazooClient(hosts=hosts, read_only=read_only) self.client.add_listener(self._connection_listener) self.client.start() def disconnect(self): ''' Close the ZooKeeper cluster connection. You should call this method if you used connect() to establish a cluster connection. ''' if self.client: self.client.stop() def getMaxBuildId(self, image): ''' Find the highest build number for a given image. Image builds are integer znodes, which are children of the 'builds' parent znode. :param str image: The image name. :returns: An int value for the max existing image build number, or zero if none exist. :raises: ZKException if the image build path is not found. ''' path = self._imageBuildsPath(image) if not self.client.exists(path): raise npe.ZKException( "Image build path not found for image %s" % image ) max_found = 0 children = self.client.get_children(path) if children: for child in children: # There can be a lock znode that we should ignore if child != 'lock': max_found = max(max_found, int(child)) return max_found def getMaxImageUploadId(self, image, build_number, provider): ''' Find the highest image upload number for a given image for a provider. For a given image build, it may have been uploaded one or more times to a provider (with once being the most common case). Each upload is given its own znode, which is a integer increased by one for each upload. This method gets the highest numbered znode. :param str image: The image name. :param int build_number: The image build number. :param str provider: The provider name owning the image. :returns: An int value for the max existing image upload number, or zero if none exist. :raises: ZKException if the image upload path is not found. ''' path = self._imageUploadPath(image, build_number, provider) if not self.client.exists(path): raise npe.ZKException( "Image upload path not found for build %s of image %s" % ( build_number, provider) ) max_found = 0 children = self.client.get_children(path ) if children: max_found = max([int(child) for child in children]) return max_found @contextmanager def imageLock(self, image, blocking=True, timeout=None): ''' Context manager to use for locking an image. Obtains a write lock for the specified image. A thread of control using this API may have only one image locked at a time. This is different from image_build_lock() in that a new build node is NOT created and returned. :param str image: Name of the image to lock :param bool blocking: Whether or not to block on trying to acquire the lock :param int timeout: When blocking, how long to wait for the lock to get acquired. None, the default, waits forever. :raises: TimeoutException if we failed to acquire the lock when blocking with a timeout. ZKLockException if we are not blocking and could not get the lock, or a lock is already held. ''' if self._current_lock: raise npe.ZKLockException("A lock is already held.") try: yield self._getImageLock(image, blocking, timeout) finally: if self._current_lock: self._current_lock.release() self._current_lock = None @contextmanager def imageBuildLock(self, image, blocking=True, timeout=None): ''' Context manager to use for locking new image builds. Obtains a write lock for the specified image. A thread of control using this API may have only one image locked at a time. A new znode is created with the next highest build number. This build number is returned to the caller. :param str image: Name of the image to lock :param bool blocking: Whether or not to block on trying to acquire the lock :param int timeout: When blocking, how long to wait for the lock to get acquired. None, the default, waits forever. :returns: A integer to use for the new build id. :raises: TimeoutException if we failed to acquire the lock when blocking with a timeout. ZKLockException if we are not blocking and could not get the lock, or a lock is already held. ''' if self._current_lock: raise npe.ZKLockException("A lock is already held.") try: yield self._getImageBuildLock(image, blocking, timeout) finally: if self._current_lock: self._current_lock.release() self._current_lock = None def getBuild(self, image, build_number): ''' Retrieve the image build data. :param str image: The image name. :param int build_number: The image build number. :returns: The dictionary of build data. ''' path = self._imageBuildsPath(image) + "/%s" % build_number if not self.client.exists(path): raise npe.ZKException( "Cannot find build data (image: %s, build: %s)" % ( image, build_number) ) data, stat = self.client.get(path) return self._strToDict(data) def storeBuild(self, image, build_number, build_data): ''' Store the image build data. The build data is either created if it does not exist, or it is updated in its entirety if it does not. There is no partial updating. The build data is expected to be represented as a dict. This dict may contain any data, as appropriate. :param str image: The image name for which we have data. :param int build_number: The image build number. :param dict build_data: The build data. :raises: ZKException if the build znode does not exist (it is created with the image_build_lock() context manager). ''' path = self._imageBuildsPath(image) + "/%s" % build_number # The build path won't exist until it's created with the build lock if not self.client.exists(path): raise npe.ZKException( "%s does not exist. Did you lock it?" % path) self.client.set(path, self._dictToStr(build_data)) def getImageUpload(self, image, build_number, provider, upload_number=None): ''' Retrieve the image upload data. :param str image: The image name. :param int build_number: The image build number. :param str provider: The provider name owning the image. :param int build_number: The image upload number. If this is None, the most recent upload data is returned. :returns: A dict of upload data. :raises: ZKException if the image upload path is not found. ''' if upload_number is None: upload_number = self.getMaxImageUploadId(image, build_number, provider) path = self._imageUploadPath(image, build_number, provider) path = path + "/%s" % upload_number if not self.client.exists(path): raise npe.ZKException( "Cannot find upload data " "(image: %s, build: %s, provider: %s, upload: %s)" % ( image, build_number, provider, upload_number) ) data, stat = self.client.get(path) return self._strToDict(data) def storeImageUpload(self, image, build_number, provider, image_data): ''' Store the built image's upload data for the given provider. :param str image: The image name for which we have data. :param int build_number: The image build number. :param str provider: The provider name owning the image. :param dict image_data: The image data we want to store. :returns: An int for the new upload id. :raises: ZKException for an invalid image build. ''' # We expect the image builds path to already exist. build_path = self._imageBuildsPath(image) if not self.client.exists(build_path): raise npe.ZKException( "Cannot find build %s of image %s" % (build_number, provider) ) # Generate a path for the upload. This doesn't have to exist yet # since we'll create new provider/upload ID znodes automatically. path = self._imageUploadPath(image, build_number, provider) # We need to create the provider upload path if it doesn't exist # before we attempt to get the max image upload ID next. self.client.ensure_path(path) # Get a new upload ID next_id = self.getMaxImageUploadId(image, build_number, provider) + 1 path = path + "/%s" % next_id self.client.create(path, self._dictToStr(image_data)) return next_id
class ZooKeeper(object): ''' Class implementing the ZooKeeper interface. This class uses the facade design pattern to keep common interaction with the ZooKeeper API simple and consistent for the caller, and limits coupling between objects. It allows for more complex interactions by providing direct access to the client connection when needed (though that is discouraged). It also provides for a convenient entry point for testing only ZooKeeper interactions. Most API calls reference an image name only, as the path for the znode for that image is calculated automatically. And image names are assumed to be unique. If you will have multiple threads needing this API, each thread should instantiate their own ZooKeeper object. It should not be shared. ''' log = logging.getLogger("nodepool.zk.ZooKeeper") IMAGE_ROOT = "/nodepool/image" def __init__(self, client=None): ''' Initialize the ZooKeeper object. :param client: A pre-connected client. Optionally, you may choose to use the connect() call. ''' self.client = client self._current_lock = None #======================================================================== # Private Methods #======================================================================== def _imagePath(self, image): return "%s/%s" % (self.IMAGE_ROOT, image) def _imageBuildsPath(self, image): return "%s/builds" % self._imagePath(image) def _imageLockPath(self, image): return "%s/lock" % self._imageBuildsPath(image) def _imageUploadPath(self, image, build_number, provider): return "%s/%s/provider/%s/images" % (self._imageBuildsPath(image), build_number, provider) def _dictToStr(self, data): return json.dumps(data) def _strToDict(self, data): return json.loads(data) def _getImageLock(self, image, blocking=True, timeout=None): # If we don't already have a znode for this image, create it. image_lock = self._imageLockPath(image) try: self.client.ensure_path(self._imagePath(image)) self._current_lock = Lock(self.client, image_lock) have_lock = self._current_lock.acquire(blocking, timeout) except kze.LockTimeout: raise npe.TimeoutException("Timeout trying to acquire lock %s" % image_lock) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. if not have_lock: raise npe.ZKLockException("Did not get lock on %s" % image_lock) def _getImageBuildLock(self, image, blocking=True, timeout=None): ''' This differs from _get_image_lock() in that it creates a new build znode and returns its name to the caller. ''' self._getImageLock(image, blocking, timeout) # Create new znode with new build_number build_number = self.getMaxBuildId(image) + 1 self.client.create(self._imageBuildsPath(image) + "/%s" % build_number) return build_number def _connection_listener(self, state): ''' Listener method for Kazoo connection state changes. .. warning:: This method must not block. ''' if state == KazooState.LOST: self.log.debug("ZooKeeper connection: LOST") elif state == KazooState.SUSPENDED: self.log.debug("ZooKeeper connection: SUSPENDED") else: self.log.debug("ZooKeeper connection: CONNECTED") #======================================================================== # Public Methods #======================================================================== def connect(self, host_list, read_only=False): ''' Establish a connection with ZooKeeper cluster. Convenience method if a pre-existing ZooKeeper connection is not supplied to the ZooKeeper object at instantiation time. :param list host_list: A list of dicts (one per server) defining the ZooKeeper cluster servers. :param bool read_only: If True, establishes a read-only connection. ''' if not self.client: hosts = buildZooKeeperHosts(host_list) self.client = KazooClient(hosts=hosts, read_only=read_only) self.client.add_listener(self._connection_listener) self.client.start() def disconnect(self): ''' Close the ZooKeeper cluster connection. You should call this method if you used connect() to establish a cluster connection. ''' if self.client: self.client.stop() def getMaxBuildId(self, image): ''' Find the highest build number for a given image. Image builds are integer znodes, which are children of the 'builds' parent znode. :param str image: The image name. :returns: An int value for the max existing image build number, or zero if none exist. :raises: ZKException if the image build path is not found. ''' path = self._imageBuildsPath(image) if not self.client.exists(path): raise npe.ZKException("Image build path not found for image %s" % image) max_found = 0 children = self.client.get_children(path) if children: for child in children: # There can be a lock znode that we should ignore if child != 'lock': max_found = max(max_found, int(child)) return max_found def getMaxImageUploadId(self, image, build_number, provider): ''' Find the highest image upload number for a given image for a provider. For a given image build, it may have been uploaded one or more times to a provider (with once being the most common case). Each upload is given its own znode, which is a integer increased by one for each upload. This method gets the highest numbered znode. :param str image: The image name. :param int build_number: The image build number. :param str provider: The provider name owning the image. :returns: An int value for the max existing image upload number, or zero if none exist. :raises: ZKException if the image upload path is not found. ''' path = self._imageUploadPath(image, build_number, provider) if not self.client.exists(path): raise npe.ZKException( "Image upload path not found for build %s of image %s" % (build_number, provider)) max_found = 0 children = self.client.get_children(path) if children: max_found = max([int(child) for child in children]) return max_found @contextmanager def imageLock(self, image, blocking=True, timeout=None): ''' Context manager to use for locking an image. Obtains a write lock for the specified image. A thread of control using this API may have only one image locked at a time. This is different from image_build_lock() in that a new build node is NOT created and returned. :param str image: Name of the image to lock :param bool blocking: Whether or not to block on trying to acquire the lock :param int timeout: When blocking, how long to wait for the lock to get acquired. None, the default, waits forever. :raises: TimeoutException if we failed to acquire the lock when blocking with a timeout. ZKLockException if we are not blocking and could not get the lock, or a lock is already held. ''' if self._current_lock: raise npe.ZKLockException("A lock is already held.") try: yield self._getImageLock(image, blocking, timeout) finally: if self._current_lock: self._current_lock.release() self._current_lock = None @contextmanager def imageBuildLock(self, image, blocking=True, timeout=None): ''' Context manager to use for locking new image builds. Obtains a write lock for the specified image. A thread of control using this API may have only one image locked at a time. A new znode is created with the next highest build number. This build number is returned to the caller. :param str image: Name of the image to lock :param bool blocking: Whether or not to block on trying to acquire the lock :param int timeout: When blocking, how long to wait for the lock to get acquired. None, the default, waits forever. :returns: A integer to use for the new build id. :raises: TimeoutException if we failed to acquire the lock when blocking with a timeout. ZKLockException if we are not blocking and could not get the lock, or a lock is already held. ''' if self._current_lock: raise npe.ZKLockException("A lock is already held.") try: yield self._getImageBuildLock(image, blocking, timeout) finally: if self._current_lock: self._current_lock.release() self._current_lock = None def getBuild(self, image, build_number): ''' Retrieve the image build data. :param str image: The image name. :param int build_number: The image build number. :returns: The dictionary of build data. ''' path = self._imageBuildsPath(image) + "/%s" % build_number if not self.client.exists(path): raise npe.ZKException( "Cannot find build data (image: %s, build: %s)" % (image, build_number)) data, stat = self.client.get(path) return self._strToDict(data) def storeBuild(self, image, build_number, build_data): ''' Store the image build data. The build data is either created if it does not exist, or it is updated in its entirety if it does not. There is no partial updating. The build data is expected to be represented as a dict. This dict may contain any data, as appropriate. :param str image: The image name for which we have data. :param int build_number: The image build number. :param dict build_data: The build data. :raises: ZKException if the build znode does not exist (it is created with the image_build_lock() context manager). ''' path = self._imageBuildsPath(image) + "/%s" % build_number # The build path won't exist until it's created with the build lock if not self.client.exists(path): raise npe.ZKException("%s does not exist. Did you lock it?" % path) self.client.set(path, self._dictToStr(build_data)) def getImageUpload(self, image, build_number, provider, upload_number=None): ''' Retrieve the image upload data. :param str image: The image name. :param int build_number: The image build number. :param str provider: The provider name owning the image. :param int build_number: The image upload number. If this is None, the most recent upload data is returned. :returns: A dict of upload data. :raises: ZKException if the image upload path is not found. ''' if upload_number is None: upload_number = self.getMaxImageUploadId(image, build_number, provider) path = self._imageUploadPath(image, build_number, provider) path = path + "/%s" % upload_number if not self.client.exists(path): raise npe.ZKException( "Cannot find upload data " "(image: %s, build: %s, provider: %s, upload: %s)" % (image, build_number, provider, upload_number)) data, stat = self.client.get(path) return self._strToDict(data) def storeImageUpload(self, image, build_number, provider, image_data): ''' Store the built image's upload data for the given provider. :param str image: The image name for which we have data. :param int build_number: The image build number. :param str provider: The provider name owning the image. :param dict image_data: The image data we want to store. :returns: An int for the new upload id. :raises: ZKException for an invalid image build. ''' # We expect the image builds path to already exist. build_path = self._imageBuildsPath(image) if not self.client.exists(build_path): raise npe.ZKException("Cannot find build %s of image %s" % (build_number, provider)) # Generate a path for the upload. This doesn't have to exist yet # since we'll create new provider/upload ID znodes automatically. path = self._imageUploadPath(image, build_number, provider) # We need to create the provider upload path if it doesn't exist # before we attempt to get the max image upload ID next. self.client.ensure_path(path) # Get a new upload ID next_id = self.getMaxImageUploadId(image, build_number, provider) + 1 path = path + "/%s" % next_id self.client.create(path, self._dictToStr(image_data)) return next_id