예제 #1
0
파일: zk.py 프로젝트: openstack-infra/zuul
    def lockNode(self, node, blocking=True, timeout=None):
        '''
        Lock a node.

        This should be called as soon as a request is fulfilled and
        the lock held for as long as the node is in-use.  It can be
        used by nodepool to detect if Zuul has gone offline and the
        node should be reclaimed.

        :param Node node: The node which should be locked.
        '''

        lock_path = '%s/%s/lock' % (self.NODE_ROOT, node.id)
        try:
            lock = Lock(self.client, lock_path)
            have_lock = lock.acquire(blocking, timeout)
        except kze.LockTimeout:
            raise LockException(
                "Timeout trying to acquire lock %s" % lock_path)

        # If we aren't blocking, it's possible we didn't get the lock
        # because someone else has it.
        if not have_lock:
            raise LockException("Did not get lock on %s" % lock_path)

        node.lock = lock
예제 #2
0
파일: queue.py 프로젝트: xpahos/kazoo
    def put(self, value, priority=100, timeout=None):
        """Put an entry into the queue.

        :param value: Byte string to put into the queue.
        :param priority:
            An optional priority as an integer with at most 3 digits.
            Lower values signify higher priority.
        :param timeout:
            Maximum waiting time in seconds. If None then it will wait
            untill lock will be released
        """
        self._check_put_arguments(value, priority)

        lock = Lock(self.client, self._uniq_lock_path)
        lock.acquire(timeout=timeout)

        try:
            if self._check_uniq(value):
                self._ensure_paths()
                self.client.create(
                    "{path}/{prefix}-{priority:03d}-".format(
                        path=self._entries_path,
                        prefix=self.entry,
                        priority=priority),
                    value, sequence=True)
            elif not self._ignore_duplicates:
                raise ValueError("Duplicate value {value}".format(
                    value=value))
        finally:
            lock.release()
def acquire_name(client, hostname_path):
    name_slots = client.get_children(ROOT, watch=bail_if_slots_change)
    hostname = platform.node()

    semaphore = Semaphore(
        client=client,
        path=LOCK,
        identifier=hostname,
        max_leases=len(name_slots),
    )

    # waiting on the semaphore indefinitely seems to cause things to hang up
    # sometimes. instead, we'll cause ourselves to time out and retry if things
    # are taking a while.
    while True:
        print "waiting for name semaphore"
        try:
            semaphore.acquire(timeout=60)
        except kazoo.exceptions.LockTimeout:
            continue
        else:
            break

    try:
        # OK, we're one of the chosen servers. let's find a name that no one is
        # using.
        print "name semaphore acquired. finding name."
        while True:
            name_slots = client.get_children(ROOT)

            for slot in name_slots:
                slot_path = os.path.join(ROOT, slot)
                slot_lock = Lock(client, slot_path, hostname)
                if slot_lock.acquire(blocking=False):
                    @client.DataWatch(slot_path)
                    def on_name_change(data, stat):
                        print "got name data %r." % data
                        with open(hostname_path, "w") as hostname_file:
                            print >> hostname_file, data

                    # just sit around doing nothing for ever
                    try:
                        while True:
                            time.sleep(1)
                    finally:
                        # explicitly releasing the lock decreases delay until
                        # someone else can get this slot.
                        slot_lock.release()
            else:
                # failed to lock anything. likely waiting for a session to
                # expire. just pause for a little while.
                print "failed to find a name. will try again."
                time.sleep(1)
    finally:
        semaphore.release()
예제 #4
0
    def _getImageBuildLock(self, image, blocking=True, timeout=None):
        lock_path = self._imageBuildLockPath(image)
        try:
            lock = Lock(self.client, lock_path)
            have_lock = lock.acquire(blocking, timeout)
        except kze.LockTimeout:
            raise npe.TimeoutException(
                "Timeout trying to acquire lock %s" % lock_path)

        # If we aren't blocking, it's possible we didn't get the lock
        # because someone else has it.
        if not have_lock:
            raise npe.ZKLockException("Did not get lock on %s" % lock_path)

        return lock
예제 #5
0
class ZooKeeperLock(object):
    def __init__(self, hosts, id_str, lock_key, timeout=0.1):
        self.hosts = hosts
        self.id_str = id_str
        self.lock_key = lock_key
        self.timeout = timeout
        self.zk_clinet = None
        self.lock_handle = None
        self._init = self.create_lock()

    def create_lock(self):
        try:
            self.zk_clinet = KazooClient(hosts=self.hosts,
                                         timeout=self.timeout)
            self.zk_clinet.start(timeout=self.timeout)
        except Exception as e:
            print("Kazoo clinet create fail: %s" % str(e))
            return
        try:
            lock_path = os.path.join("/", "locks", self.lock_key)
            self.lock_handle = Lock(self.zk_clinet, lock_path)
        except Exception as e:
            print("client lock init fail: %s" % str(e))
            return

    def destory_lock(self):
        if self.zk_clinet != None:
            self.zk_clinet.stop()
            self.zk_clinet = None

    def acquire(self, blocking=True, timeout=None):
        if self.lock_handle == None:
            return None
        try:
            return self.lock_handle.acquire(blocking=blocking, timeout=timeout)
        except Exception as e:
            print("Acquire lock failed : %s" % str(e))
            return None

    def release(self):
        if self.lock_handle == None:
            return None
        return self.lock_handle.release()

    def __del__(self):
        self.destory_lock()
예제 #6
0
 def create_lock(self):
     try:
         self.zk_client = KazooClient(hosts=self.hosts,
                                      logger=self.logger,
                                      timeout=self.timeout)
         self.zk_client.start(timeout=self.timeout)
     except Exception as ex:
         self.init_ret = False
         self.err_str = "Create KazooClient failed! Exception: %s" % str(ex)
         logging.error(self.err_str)
         return
     try:
         lock_path = os.path.join("/", "locks", self.name)
         self.lock_handle = Lock(self.zk_client, lock_path)
     except Exception as ex:
         self.int_ret = False
         self.err_str = "Create lock failed! Exception: %s" % str(ex)
         logging.error(self.err_str)
         return
예제 #7
0
파일: queue.py 프로젝트: xpahos/kazoo
    def put_all(self, values, priority=100, timeout=None):
        """Put several entries into the queue. The action only succeeds
        if all entries where put into the queue.

        :param values: A list of values to put into the queue.
        :param priority:
            An optional priority as an integer with at most 3 digits.
            Lower values signify higher priority.
        :param timeout:
            Maximum waiting time in seconds. If None then it will wait
            untill lock will be released
        """
        if not isinstance(values, list):
            raise TypeError("values must be a list of byte strings")
        if not isinstance(priority, int):
            raise TypeError("priority must be an int")
        elif priority < 0 or priority > 999:
            raise ValueError("priority must be between 0 and 999")

        lock = Lock(self.client, self._uniq_lock_path)
        lock.acquire(timeout=timeout)

        try:
            self._ensure_paths()

            with self.client.transaction() as transaction:
                for value in values:
                    if not isinstance(value, bytes):
                        raise TypeError("value must be a byte string")
                    if self._check_uniq(value):
                        transaction.create(
                            "{path}/{prefix}-{priority:03d}-".format(
                                path=self._entries_path,
                                prefix=self.entry,
                                priority=priority),
                            value, sequence=True)
                    elif not self._ignore_duplicates:
                        raise ValueError("Duplicate value {value}".format(
                            value=value))
        finally:
            lock.release()
예제 #8
0
    def put(self, value, priority=100, timeout=None):
        """Put an entry into the queue.

        :param value: Byte string to put into the queue.
        :param priority:
            An optional priority as an integer with at most 3 digits.
            Lower values signify higher priority.
        :param timeout:
            Maximum waiting time in seconds. If None then it will wait
            untill lock will be released
        """
        self._check_put_arguments(value, priority)

        lock = Lock(self.client, self._uniq_lock_path)
        lock.acquire(timeout=timeout)

        try:
            if self._check_uniq(value):
                self._ensure_paths()
                self.client.create("{path}/{prefix}-{priority:03d}-".format(
                    path=self._entries_path,
                    prefix=self.entry,
                    priority=priority),
                                   value,
                                   sequence=True)
            elif not self._ignore_duplicates:
                raise ValueError("Duplicate value {value}".format(value=value))
        finally:
            lock.release()
예제 #9
0
 def schedule(self, task_id):
     node = '/{}/tasks/{}'.format(self.root, task_id)
     lock_node = '{}/lock'.format(node)
     self.zk.ensure_path(lock_node)
     lock = Lock(self.zk, lock_node)
     try:
         if lock.acquire(timeout=1):
             data, _ = self.zk.get(node)
             task = json.loads(data.decode())
             p = task.get('parallel', 1)
             rate = task.get('fail_rate', 0)
             targets = self.get_targets(task_id)
             if count(targets, ('F', )) / len(targets) > rate:
                 return self.callback(task_id)
             if count(targets, ('F', 'S', 'K')) == len(targets):
                 return self.callback(task_id)
             wait_schedule = choose(targets, p - count(targets, ('W', 'R')))
             self.copy_task(wait_schedule, task)
     except LockTimeout:
         pass
     finally:
         lock.release()
예제 #10
0
    def _getImageLock(self, image, blocking=True, timeout=None):
        # If we don't already have a znode for this image, create it.
        image_lock = self._imageLockPath(image)
        try:
            self.client.ensure_path(self._imagePath(image))
            self._current_lock = Lock(self.client, image_lock)
            have_lock = self._current_lock.acquire(blocking, timeout)
        except kze.LockTimeout:
            raise npe.TimeoutException(
                "Timeout trying to acquire lock %s" % image_lock)

        # If we aren't blocking, it's possible we didn't get the lock
        # because someone else has it.
        if not have_lock:
            raise npe.ZKLockException("Did not get lock on %s" % image_lock)
예제 #11
0
파일: zk.py 프로젝트: openstack-infra/zuul
    def lockNodeRequest(self, request, blocking=True, timeout=None):
        '''
        Lock a node request.

        This will set the `lock` attribute of the request object when the
        lock is successfully acquired.

        :param NodeRequest request: The request to lock.
        :param bool blocking: Whether or not to block on trying to
            acquire the lock
        :param int timeout: When blocking, how long to wait for the lock
            to get acquired. None, the default, waits forever.

        :raises: TimeoutException if we failed to acquire the lock when
            blocking with a timeout. ZKLockException if we are not blocking
            and could not get the lock, or a lock is already held.
        '''

        path = "%s/%s" % (self.REQUEST_LOCK_ROOT, request.id)
        try:
            lock = Lock(self.client, path)
            have_lock = lock.acquire(blocking, timeout)
        except kze.LockTimeout:
            raise LockException(
                "Timeout trying to acquire lock %s" % path)
        except kze.NoNodeError:
            have_lock = False
            self.log.error("Request not found for locking: %s", request)

        # If we aren't blocking, it's possible we didn't get the lock
        # because someone else has it.
        if not have_lock:
            raise LockException("Did not get lock on %s" % path)

        request.lock = lock
        self.updateNodeRequest(request)
예제 #12
0
파일: zk.py 프로젝트: max11max/zuul
    def lockNodeRequest(self, request, blocking=True, timeout=None):
        '''
        Lock a node request.

        This will set the `lock` attribute of the request object when the
        lock is successfully acquired.

        :param NodeRequest request: The request to lock.
        :param bool blocking: Whether or not to block on trying to
            acquire the lock
        :param int timeout: When blocking, how long to wait for the lock
            to get acquired. None, the default, waits forever.

        :raises: TimeoutException if we failed to acquire the lock when
            blocking with a timeout. ZKLockException if we are not blocking
            and could not get the lock, or a lock is already held.
        '''

        path = "%s/%s" % (self.REQUEST_LOCK_ROOT, request.id)
        try:
            lock = Lock(self.client, path)
            have_lock = lock.acquire(blocking, timeout)
        except kze.LockTimeout:
            raise LockException(
                "Timeout trying to acquire lock %s" % path)
        except kze.NoNodeError:
            have_lock = False
            self.log.error("Request not found for locking: %s", request)

        # If we aren't blocking, it's possible we didn't get the lock
        # because someone else has it.
        if not have_lock:
            raise LockException("Did not get lock on %s" % path)

        request.lock = lock
        self.updateNodeRequest(request)
예제 #13
0
class ZooKeeperLock():
    def __init__(self, hosts, lock_path, lock_name, lock_value, timeout=1):
        self.hosts = hosts
        self.zk_client = None
        self.timeout = timeout
        self.name = lock_name
        self.lock_path = "PolicyCtrlCent/" + lock_path + "/" + lock_name
        self.lock_value = lock_value
        self.lock_handle = None

        self.create_lock()

    def create_lock(self):
        try:
            self.zk_client = KazooClient(hosts=self.hosts,
                                         timeout=self.timeout)

            @self.zk_client.add_listener
            def my_listener(state):
                if state == KazooState.LOST:
                    print("LOST")
                elif state == KazooState.SUSPENDED:
                    print("SUSPENDED")
                else:
                    print("Connected")

            self.zk_client.start(timeout=self.timeout)
            self.add_zk_auth()

        except Exception, ex:
            self.init_ret = False
            self.err_str = "Create KazooClient failed! Exception: %s" % str(ex)

        try:
            print self.lock_path
            self.lock_handle = Lock(self.zk_client, self.lock_path)
            self.zk_client.set(self.lock_path, self.lock_value)
        except Exception, ex:
            self.init_ret = False
            self.err_str = "Create lock failed! Exception: %s" % str(ex)
예제 #14
0
class ZookeeperLock(object):
    def __init__(self, hosts, name, logger=None, timeout=1):
        #hosts="192.168.56.112:2181,192.168.56.113:2181,192.168.56.112:2181"
        self.hosts = hosts
        self.zkClient = None
        self.timeout = timeout
        self.logger = logger
        self.name = name
        self.createLock()

    def createLock(self):
        try:
            self.zkClient = KazooClient(hosts=self.hosts,
                                        logger=self.logger,
                                        timeout=self.timeout)
            self.zkClient.start(timeout=self.timeout)
        except Exception, ex:
            print "Create KazooClient failed! Exception: %s" % str(ex)

        try:
            lockPath = os.path.join("/", "locks", self.name)
            self.lockHandle = Lock(self.zkClient, lockPath)
        except Exception, ex:
            self.err_str = "Create lock failed! Exception: %s" % str(ex)
예제 #15
0
    def put_all(self, values, priority=100, timeout=None):
        """Put several entries into the queue. The action only succeeds
        if all entries where put into the queue.

        :param values: A list of values to put into the queue.
        :param priority:
            An optional priority as an integer with at most 3 digits.
            Lower values signify higher priority.
        :param timeout:
            Maximum waiting time in seconds. If None then it will wait
            untill lock will be released
        """
        if not isinstance(values, list):
            raise TypeError("values must be a list of byte strings")
        if not isinstance(priority, int):
            raise TypeError("priority must be an int")
        elif priority < 0 or priority > 999:
            raise ValueError("priority must be between 0 and 999")

        lock = Lock(self.client, self._uniq_lock_path)
        lock.acquire(timeout=timeout)

        try:
            self._ensure_paths()

            with self.client.transaction() as transaction:
                for value in values:
                    if not isinstance(value, bytes):
                        raise TypeError("value must be a byte string")
                    if self._check_uniq(value):
                        transaction.create(
                            "{path}/{prefix}-{priority:03d}-".format(
                                path=self._entries_path,
                                prefix=self.entry,
                                priority=priority),
                            value,
                            sequence=True)
                    elif not self._ignore_duplicates:
                        raise ValueError(
                            "Duplicate value {value}".format(value=value))
        finally:
            lock.release()
예제 #16
0
 def handle_running_target(self, job_id):
     logger.info("handle_running_target start: job_id={}".format(job_id))
     node = '{}/jobs/{}/targets'.format(self.root, job_id)
     # 这里遍历了job下所有的主机状态,主机数量多的话,要考虑性能问题
     targets = self.zk.get_children(node)
     target_success_count = 0
     target_fail_count = 0
     target_init_count = 0
     target_running_count = 0
     for target in targets:
         target_lock_node = '{}/{}/lock'.format(node, target)
         self.zk.ensure_path(target_lock_node)
         target_lock = Lock(self.zk, target_lock_node)
         try:
             if target_lock.acquire(timeout=1):
                 logger.info(
                     "Target Lock acquire: job_id={}, target={}".format(
                         job_id, target))
                 logger.info(
                     "handle_running_target start: job_id={}, target={}".
                     format(job_id, target))
                 path = '{}/{}'.format(node, target)
                 target_value, _ = self.zk.get(path)
                 target_value = json.loads(target_value.decode())
                 """
                 target_value = {
                     "status" = 0,
                     "current_task" = "offline",
                     "next_task" = "stop_service",
                 }
                 """
                 target_status = target_value['status']
                 target_running_task = target_value['current_task']
                 # 处理running的target
                 if target_status == TargetStatus.running.value:
                     self.handle_running_task(job_id, target,
                                              target_running_task)
                 elif target_status == TargetStatus.success.value:
                     target_success_count += 1
                 elif target_status == TargetStatus.fail.value:
                     target_fail_count += 1
                 elif target_status == TargetStatus.init.value:
                     target_init_count += 1
                 elif target_status == TargetStatus.running.value:
                     target_running_count += 1
                 else:
                     logger.error(
                         "handle running target: unexpected target status, target_status={}"
                         .format(target_status))
         except LockTimeout:
             logger.error(
                 'Target lock timeout: job_id={}, target={}'.format(
                     job_id, target))
         finally:
             if target_lock.release():
                 logger.info(
                     'Target lock release: success, job_id={}, target={}'.
                     format(job_id, target))
             else:
                 logger.error(
                     'Target lock release: fail, job_id={}, target={}'.
                     format(job_id, target))
     # job汇总信息
     logger.info(
         "job targets status detail: jobid={}, targets_count={}, target_init_count={}, target_running_count={}, target_success_count={}, target_fail_count={}"
         .format(job_id, len(targets), target_init_count,
                 target_running_count, target_success_count,
                 target_fail_count))
     if (target_success_count + target_fail_count) == len(targets):
         logger.info(
             "job is finished: jobid={}, targets_count={}, target_success_count={}, target_fail_count={}"
             .format(job_id, len(targets), target_success_count,
                     target_fail_count))
         # job 终结点
         if target_success_count == len(targets):
             self.job_callback(job_id, JobStatus.success.value)
         else:
             self.job_callback(job_id, JobStatus.fail.value)
     else:
         logger.info(
             "job is not finished: jobid={},target_count: {}, job target_success_count: {}"
             .format(job_id, len(targets), target_success_count))
예제 #17
0
class ZooKeeper(object):
    '''
    Class implementing the ZooKeeper interface.

    This class uses the facade design pattern to keep common interaction
    with the ZooKeeper API simple and consistent for the caller, and
    limits coupling between objects. It allows for more complex interactions
    by providing direct access to the client connection when needed (though
    that is discouraged). It also provides for a convenient entry point for
    testing only ZooKeeper interactions.

    Most API calls reference an image name only, as the path for the znode
    for that image is calculated automatically. And image names are assumed
    to be unique.

    If you will have multiple threads needing this API, each thread should
    instantiate their own ZooKeeper object. It should not be shared.
    '''

    log = logging.getLogger("nodepool.zk.ZooKeeper")

    IMAGE_ROOT = "/nodepool/image"

    def __init__(self, client=None):
        '''
        Initialize the ZooKeeper object.

        :param client: A pre-connected client. Optionally, you may choose
            to use the connect() call.
        '''
        self.client = client
        self._current_lock = None

    #========================================================================
    # Private Methods
    #========================================================================

    def _imagePath(self, image):
        return "%s/%s" % (self.IMAGE_ROOT, image)

    def _imageBuildsPath(self, image):
        return "%s/builds" % self._imagePath(image)

    def _imageLockPath(self, image):
        return "%s/lock" % self._imageBuildsPath(image)

    def _imageUploadPath(self, image, build_number, provider):
        return "%s/%s/provider/%s/images" % (self._imageBuildsPath(image),
                                             build_number,
                                             provider)
    def _dictToStr(self, data):
        return json.dumps(data)

    def _strToDict(self, data):
        return json.loads(data)

    def _getImageLock(self, image, blocking=True, timeout=None):
        # If we don't already have a znode for this image, create it.
        image_lock = self._imageLockPath(image)
        try:
            self.client.ensure_path(self._imagePath(image))
            self._current_lock = Lock(self.client, image_lock)
            have_lock = self._current_lock.acquire(blocking, timeout)
        except kze.LockTimeout:
            raise npe.TimeoutException(
                "Timeout trying to acquire lock %s" % image_lock)

        # If we aren't blocking, it's possible we didn't get the lock
        # because someone else has it.
        if not have_lock:
            raise npe.ZKLockException("Did not get lock on %s" % image_lock)

    def _getImageBuildLock(self, image, blocking=True, timeout=None):
        '''
        This differs from _get_image_lock() in that it creates a new build
        znode and returns its name to the caller.
        '''
        self._getImageLock(image, blocking, timeout)

        # Create new znode with new build_number
        build_number = self.getMaxBuildId(image) + 1
        self.client.create(
            self._imageBuildsPath(image) + "/%s" % build_number
        )

        return build_number

    def _connection_listener(self, state):
        '''
        Listener method for Kazoo connection state changes.

        .. warning:: This method must not block.
        '''
        if state == KazooState.LOST:
            self.log.debug("ZooKeeper connection: LOST")
        elif state == KazooState.SUSPENDED:
            self.log.debug("ZooKeeper connection: SUSPENDED")
        else:
            self.log.debug("ZooKeeper connection: CONNECTED")


    #========================================================================
    # Public Methods
    #========================================================================

    def connect(self, host_list, read_only=False):
        '''
        Establish a connection with ZooKeeper cluster.

        Convenience method if a pre-existing ZooKeeper connection is not
        supplied to the ZooKeeper object at instantiation time.

        :param list host_list: A list of dicts (one per server) defining
            the ZooKeeper cluster servers.

        :param bool read_only: If True, establishes a read-only connection.
        '''
        if not self.client:
            hosts = buildZooKeeperHosts(host_list)
            self.client = KazooClient(hosts=hosts, read_only=read_only)
            self.client.add_listener(self._connection_listener)
            self.client.start()

    def disconnect(self):
        '''
        Close the ZooKeeper cluster connection.

        You should call this method if you used connect() to establish a
        cluster connection.
        '''
        if self.client:
            self.client.stop()

    def getMaxBuildId(self, image):
        '''
        Find the highest build number for a given image.

        Image builds are integer znodes, which are children of the 'builds'
        parent znode.

        :param str image: The image name.

        :returns: An int value for the max existing image build number, or
            zero if none exist.

        :raises: ZKException if the image build path is not found.
        '''
        path = self._imageBuildsPath(image)

        if not self.client.exists(path):
            raise npe.ZKException(
                "Image build path not found for image %s" % image
            )

        max_found = 0
        children = self.client.get_children(path)
        if children:
            for child in children:
                # There can be a lock znode that we should ignore
                if child != 'lock':
                    max_found = max(max_found, int(child))
        return max_found

    def getMaxImageUploadId(self, image, build_number, provider):
        '''
        Find the highest image upload number for a given image for a provider.

        For a given image build, it may have been uploaded one or more times
        to a provider (with once being the most common case). Each upload is
        given its own znode, which is a integer increased by one for each
        upload. This method gets the highest numbered znode.

        :param str image: The image name.
        :param int build_number: The image build number.
        :param str provider: The provider name owning the image.

        :returns: An int value for the max existing image upload number, or
            zero if none exist.

        :raises: ZKException if the image upload path is not found.
        '''
        path = self._imageUploadPath(image, build_number, provider)

        if not self.client.exists(path):
            raise npe.ZKException(
                "Image upload path not found for build %s of image %s" % (
                    build_number, provider)
            )

        max_found = 0
        children = self.client.get_children(path )
        if children:
            max_found = max([int(child) for child in children])
        return max_found

    @contextmanager
    def imageLock(self, image, blocking=True, timeout=None):
        '''
        Context manager to use for locking an image.

        Obtains a write lock for the specified image. A thread of control
        using this API may have only one image locked at a time. This is
        different from image_build_lock() in that a new build node is NOT
        created and returned.

        :param str image: Name of the image to lock
        :param bool blocking: Whether or not to block on trying to
            acquire the lock
        :param int timeout: When blocking, how long to wait for the lock
            to get acquired. None, the default, waits forever.

        :raises: TimeoutException if we failed to acquire the lock when
            blocking with a timeout. ZKLockException if we are not blocking
            and could not get the lock, or a lock is already held.
        '''
        if self._current_lock:
            raise npe.ZKLockException("A lock is already held.")

        try:
            yield self._getImageLock(image, blocking, timeout)
        finally:
            if self._current_lock:
                self._current_lock.release()
                self._current_lock = None

    @contextmanager
    def imageBuildLock(self, image, blocking=True, timeout=None):
        '''
        Context manager to use for locking new image builds.

        Obtains a write lock for the specified image. A thread of control
        using this API may have only one image locked at a time. A new
        znode is created with the next highest build number. This build
        number is returned to the caller.

        :param str image: Name of the image to lock
        :param bool blocking: Whether or not to block on trying to
            acquire the lock
        :param int timeout: When blocking, how long to wait for the lock
            to get acquired. None, the default, waits forever.

        :returns: A integer to use for the new build id.

        :raises: TimeoutException if we failed to acquire the lock when
            blocking with a timeout. ZKLockException if we are not blocking
            and could not get the lock, or a lock is already held.
        '''
        if self._current_lock:
            raise npe.ZKLockException("A lock is already held.")

        try:
            yield self._getImageBuildLock(image, blocking, timeout)
        finally:
            if self._current_lock:
                self._current_lock.release()
                self._current_lock = None

    def getBuild(self, image, build_number):
        '''
        Retrieve the image build data.

        :param str image: The image name.
        :param int build_number: The image build number.

        :returns: The dictionary of build data.
        '''
        path = self._imageBuildsPath(image) + "/%s" % build_number

        if not self.client.exists(path):
            raise npe.ZKException(
                "Cannot find build data (image: %s, build: %s)" % (
                    image, build_number)
            )

        data, stat = self.client.get(path)
        return self._strToDict(data)

    def storeBuild(self, image, build_number, build_data):
        '''
        Store the image build data.

        The build data is either created if it does not exist, or it is
        updated in its entirety if it does not. There is no partial updating.
        The build data is expected to be represented as a dict. This dict may
        contain any data, as appropriate.

        :param str image: The image name for which we have data.
        :param int build_number: The image build number.
        :param dict build_data: The build data.

        :raises: ZKException if the build znode does not exist (it is created
            with the image_build_lock() context manager).
        '''
        path = self._imageBuildsPath(image) + "/%s" % build_number

        # The build path won't exist until it's created with the build lock
        if not self.client.exists(path):
            raise npe.ZKException(
                "%s does not exist. Did you lock it?" % path)

        self.client.set(path, self._dictToStr(build_data))

    def getImageUpload(self, image, build_number, provider,
                         upload_number=None):
        '''
        Retrieve the image upload data.

        :param str image: The image name.
        :param int build_number: The image build number.
        :param str provider: The provider name owning the image.
        :param int build_number: The image upload number. If this is None,
            the most recent upload data is returned.

        :returns: A dict of upload data.

        :raises: ZKException if the image upload path is not found.
        '''
        if upload_number is None:
            upload_number = self.getMaxImageUploadId(image, build_number,
                                                     provider)

        path = self._imageUploadPath(image, build_number, provider)
        path = path + "/%s" % upload_number

        if not self.client.exists(path):
            raise npe.ZKException(
                "Cannot find upload data "
                "(image: %s, build: %s, provider: %s, upload: %s)" % (
                    image, build_number, provider, upload_number)
            )

        data, stat = self.client.get(path)
        return self._strToDict(data)

    def storeImageUpload(self, image, build_number, provider, image_data):
        '''
        Store the built image's upload data for the given provider.

        :param str image: The image name for which we have data.
        :param int build_number: The image build number.
        :param str provider: The provider name owning the image.
        :param dict image_data: The image data we want to store.

        :returns: An int for the new upload id.

        :raises: ZKException for an invalid image build.
        '''
        # We expect the image builds path to already exist.
        build_path = self._imageBuildsPath(image)
        if not self.client.exists(build_path):
            raise npe.ZKException(
                "Cannot find build %s of image %s" % (build_number, provider)
            )

        # Generate a path for the upload. This doesn't have to exist yet
        # since we'll create new provider/upload ID znodes automatically.
        path = self._imageUploadPath(image, build_number, provider)

        # We need to create the provider upload path if it doesn't exist
        # before we attempt to get the max image upload ID next.
        self.client.ensure_path(path)

        # Get a new upload ID
        next_id = self.getMaxImageUploadId(image, build_number, provider) + 1

        path = path + "/%s" % next_id
        self.client.create(path, self._dictToStr(image_data))

        return next_id
예제 #18
0
class ZooKeeper(object):
    '''
    Class implementing the ZooKeeper interface.

    This class uses the facade design pattern to keep common interaction
    with the ZooKeeper API simple and consistent for the caller, and
    limits coupling between objects. It allows for more complex interactions
    by providing direct access to the client connection when needed (though
    that is discouraged). It also provides for a convenient entry point for
    testing only ZooKeeper interactions.

    Most API calls reference an image name only, as the path for the znode
    for that image is calculated automatically. And image names are assumed
    to be unique.

    If you will have multiple threads needing this API, each thread should
    instantiate their own ZooKeeper object. It should not be shared.
    '''

    log = logging.getLogger("nodepool.zk.ZooKeeper")

    IMAGE_ROOT = "/nodepool/image"

    def __init__(self, client=None):
        '''
        Initialize the ZooKeeper object.

        :param client: A pre-connected client. Optionally, you may choose
            to use the connect() call.
        '''
        self.client = client
        self._current_lock = None

    #========================================================================
    # Private Methods
    #========================================================================

    def _imagePath(self, image):
        return "%s/%s" % (self.IMAGE_ROOT, image)

    def _imageBuildsPath(self, image):
        return "%s/builds" % self._imagePath(image)

    def _imageLockPath(self, image):
        return "%s/lock" % self._imageBuildsPath(image)

    def _imageUploadPath(self, image, build_number, provider):
        return "%s/%s/provider/%s/images" % (self._imageBuildsPath(image),
                                             build_number, provider)

    def _dictToStr(self, data):
        return json.dumps(data)

    def _strToDict(self, data):
        return json.loads(data)

    def _getImageLock(self, image, blocking=True, timeout=None):
        # If we don't already have a znode for this image, create it.
        image_lock = self._imageLockPath(image)
        try:
            self.client.ensure_path(self._imagePath(image))
            self._current_lock = Lock(self.client, image_lock)
            have_lock = self._current_lock.acquire(blocking, timeout)
        except kze.LockTimeout:
            raise npe.TimeoutException("Timeout trying to acquire lock %s" %
                                       image_lock)

        # If we aren't blocking, it's possible we didn't get the lock
        # because someone else has it.
        if not have_lock:
            raise npe.ZKLockException("Did not get lock on %s" % image_lock)

    def _getImageBuildLock(self, image, blocking=True, timeout=None):
        '''
        This differs from _get_image_lock() in that it creates a new build
        znode and returns its name to the caller.
        '''
        self._getImageLock(image, blocking, timeout)

        # Create new znode with new build_number
        build_number = self.getMaxBuildId(image) + 1
        self.client.create(self._imageBuildsPath(image) + "/%s" % build_number)

        return build_number

    def _connection_listener(self, state):
        '''
        Listener method for Kazoo connection state changes.

        .. warning:: This method must not block.
        '''
        if state == KazooState.LOST:
            self.log.debug("ZooKeeper connection: LOST")
        elif state == KazooState.SUSPENDED:
            self.log.debug("ZooKeeper connection: SUSPENDED")
        else:
            self.log.debug("ZooKeeper connection: CONNECTED")

    #========================================================================
    # Public Methods
    #========================================================================

    def connect(self, host_list, read_only=False):
        '''
        Establish a connection with ZooKeeper cluster.

        Convenience method if a pre-existing ZooKeeper connection is not
        supplied to the ZooKeeper object at instantiation time.

        :param list host_list: A list of dicts (one per server) defining
            the ZooKeeper cluster servers.

        :param bool read_only: If True, establishes a read-only connection.
        '''
        if not self.client:
            hosts = buildZooKeeperHosts(host_list)
            self.client = KazooClient(hosts=hosts, read_only=read_only)
            self.client.add_listener(self._connection_listener)
            self.client.start()

    def disconnect(self):
        '''
        Close the ZooKeeper cluster connection.

        You should call this method if you used connect() to establish a
        cluster connection.
        '''
        if self.client:
            self.client.stop()

    def getMaxBuildId(self, image):
        '''
        Find the highest build number for a given image.

        Image builds are integer znodes, which are children of the 'builds'
        parent znode.

        :param str image: The image name.

        :returns: An int value for the max existing image build number, or
            zero if none exist.

        :raises: ZKException if the image build path is not found.
        '''
        path = self._imageBuildsPath(image)

        if not self.client.exists(path):
            raise npe.ZKException("Image build path not found for image %s" %
                                  image)

        max_found = 0
        children = self.client.get_children(path)
        if children:
            for child in children:
                # There can be a lock znode that we should ignore
                if child != 'lock':
                    max_found = max(max_found, int(child))
        return max_found

    def getMaxImageUploadId(self, image, build_number, provider):
        '''
        Find the highest image upload number for a given image for a provider.

        For a given image build, it may have been uploaded one or more times
        to a provider (with once being the most common case). Each upload is
        given its own znode, which is a integer increased by one for each
        upload. This method gets the highest numbered znode.

        :param str image: The image name.
        :param int build_number: The image build number.
        :param str provider: The provider name owning the image.

        :returns: An int value for the max existing image upload number, or
            zero if none exist.

        :raises: ZKException if the image upload path is not found.
        '''
        path = self._imageUploadPath(image, build_number, provider)

        if not self.client.exists(path):
            raise npe.ZKException(
                "Image upload path not found for build %s of image %s" %
                (build_number, provider))

        max_found = 0
        children = self.client.get_children(path)
        if children:
            max_found = max([int(child) for child in children])
        return max_found

    @contextmanager
    def imageLock(self, image, blocking=True, timeout=None):
        '''
        Context manager to use for locking an image.

        Obtains a write lock for the specified image. A thread of control
        using this API may have only one image locked at a time. This is
        different from image_build_lock() in that a new build node is NOT
        created and returned.

        :param str image: Name of the image to lock
        :param bool blocking: Whether or not to block on trying to
            acquire the lock
        :param int timeout: When blocking, how long to wait for the lock
            to get acquired. None, the default, waits forever.

        :raises: TimeoutException if we failed to acquire the lock when
            blocking with a timeout. ZKLockException if we are not blocking
            and could not get the lock, or a lock is already held.
        '''
        if self._current_lock:
            raise npe.ZKLockException("A lock is already held.")

        try:
            yield self._getImageLock(image, blocking, timeout)
        finally:
            if self._current_lock:
                self._current_lock.release()
                self._current_lock = None

    @contextmanager
    def imageBuildLock(self, image, blocking=True, timeout=None):
        '''
        Context manager to use for locking new image builds.

        Obtains a write lock for the specified image. A thread of control
        using this API may have only one image locked at a time. A new
        znode is created with the next highest build number. This build
        number is returned to the caller.

        :param str image: Name of the image to lock
        :param bool blocking: Whether or not to block on trying to
            acquire the lock
        :param int timeout: When blocking, how long to wait for the lock
            to get acquired. None, the default, waits forever.

        :returns: A integer to use for the new build id.

        :raises: TimeoutException if we failed to acquire the lock when
            blocking with a timeout. ZKLockException if we are not blocking
            and could not get the lock, or a lock is already held.
        '''
        if self._current_lock:
            raise npe.ZKLockException("A lock is already held.")

        try:
            yield self._getImageBuildLock(image, blocking, timeout)
        finally:
            if self._current_lock:
                self._current_lock.release()
                self._current_lock = None

    def getBuild(self, image, build_number):
        '''
        Retrieve the image build data.

        :param str image: The image name.
        :param int build_number: The image build number.

        :returns: The dictionary of build data.
        '''
        path = self._imageBuildsPath(image) + "/%s" % build_number

        if not self.client.exists(path):
            raise npe.ZKException(
                "Cannot find build data (image: %s, build: %s)" %
                (image, build_number))

        data, stat = self.client.get(path)
        return self._strToDict(data)

    def storeBuild(self, image, build_number, build_data):
        '''
        Store the image build data.

        The build data is either created if it does not exist, or it is
        updated in its entirety if it does not. There is no partial updating.
        The build data is expected to be represented as a dict. This dict may
        contain any data, as appropriate.

        :param str image: The image name for which we have data.
        :param int build_number: The image build number.
        :param dict build_data: The build data.

        :raises: ZKException if the build znode does not exist (it is created
            with the image_build_lock() context manager).
        '''
        path = self._imageBuildsPath(image) + "/%s" % build_number

        # The build path won't exist until it's created with the build lock
        if not self.client.exists(path):
            raise npe.ZKException("%s does not exist. Did you lock it?" % path)

        self.client.set(path, self._dictToStr(build_data))

    def getImageUpload(self,
                       image,
                       build_number,
                       provider,
                       upload_number=None):
        '''
        Retrieve the image upload data.

        :param str image: The image name.
        :param int build_number: The image build number.
        :param str provider: The provider name owning the image.
        :param int build_number: The image upload number. If this is None,
            the most recent upload data is returned.

        :returns: A dict of upload data.

        :raises: ZKException if the image upload path is not found.
        '''
        if upload_number is None:
            upload_number = self.getMaxImageUploadId(image, build_number,
                                                     provider)

        path = self._imageUploadPath(image, build_number, provider)
        path = path + "/%s" % upload_number

        if not self.client.exists(path):
            raise npe.ZKException(
                "Cannot find upload data "
                "(image: %s, build: %s, provider: %s, upload: %s)" %
                (image, build_number, provider, upload_number))

        data, stat = self.client.get(path)
        return self._strToDict(data)

    def storeImageUpload(self, image, build_number, provider, image_data):
        '''
        Store the built image's upload data for the given provider.

        :param str image: The image name for which we have data.
        :param int build_number: The image build number.
        :param str provider: The provider name owning the image.
        :param dict image_data: The image data we want to store.

        :returns: An int for the new upload id.

        :raises: ZKException for an invalid image build.
        '''
        # We expect the image builds path to already exist.
        build_path = self._imageBuildsPath(image)
        if not self.client.exists(build_path):
            raise npe.ZKException("Cannot find build %s of image %s" %
                                  (build_number, provider))

        # Generate a path for the upload. This doesn't have to exist yet
        # since we'll create new provider/upload ID znodes automatically.
        path = self._imageUploadPath(image, build_number, provider)

        # We need to create the provider upload path if it doesn't exist
        # before we attempt to get the max image upload ID next.
        self.client.ensure_path(path)

        # Get a new upload ID
        next_id = self.getMaxImageUploadId(image, build_number, provider) + 1

        path = path + "/%s" % next_id
        self.client.create(path, self._dictToStr(image_data))

        return next_id