class ZooKeeperLock(): def __init__(self, hosts, id_str, lock_name, logger=None, timeout=1): self.hosts = hosts self.id_str = id_str self.zk_client = None self.timeout = timeout self.logger = logger self.name = lock_name self.lock_handle = None self.create_lock() def create_lock(self): try: self.zk_client = KazooClient(hosts=self.hosts, logger=self.logger, timeout=self.timeout) self.zk_client.start(timeout=self.timeout) except Exception, ex: self.init_ret = False self.err_str = "Create KazooClient failed! Exception: %s" % str(ex) logging.error(self.err_str) return try: lock_path = os.path.join("/", "locks", self.name) self.lock_handle = Lock(self.zk_client, lock_path) except Exception, ex: self.init_ret = False self.err_str = "Create lock failed! Exception: %s" % str(ex) logging.error(self.err_str) return
def put(self, value, priority=100, timeout=None): """Put an entry into the queue. :param value: Byte string to put into the queue. :param priority: An optional priority as an integer with at most 3 digits. Lower values signify higher priority. :param timeout: Maximum waiting time in seconds. If None then it will wait untill lock will be released """ self._check_put_arguments(value, priority) lock = Lock(self.client, self._uniq_lock_path) lock.acquire(timeout=timeout) try: if self._check_uniq(value): self._ensure_paths() self.client.create("{path}/{prefix}-{priority:03d}-".format( path=self._entries_path, prefix=self.entry, priority=priority), value, sequence=True) elif not self._ignore_duplicates: raise ValueError("Duplicate value {value}".format(value=value)) finally: lock.release()
def lockHoldRequest(self, request, blocking=True, timeout=None): ''' Lock a node request. This will set the `lock` attribute of the request object when the lock is successfully acquired. :param HoldRequest request: The hold request to lock. ''' if not request.id: raise LockException( "Hold request without an ID cannot be locked: %s" % request) path = "%s/%s/lock" % (self.HOLD_REQUEST_ROOT, request.id) try: lock = Lock(self.client, path) have_lock = lock.acquire(blocking, timeout) except kze.LockTimeout: raise LockException("Timeout trying to acquire lock %s" % path) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. if not have_lock: raise LockException("Did not get lock on %s" % path) request.lock = lock
def lockNode(self, node, blocking=True, timeout=None): ''' Lock a node. This should be called as soon as a request is fulfilled and the lock held for as long as the node is in-use. It can be used by nodepool to detect if Zuul has gone offline and the node should be reclaimed. :param Node node: The node which should be locked. ''' lock_path = '%s/%s/lock' % (self.NODE_ROOT, node.id) try: lock = Lock(self.client, lock_path) have_lock = lock.acquire(blocking, timeout) except kze.LockTimeout: raise LockException("Timeout trying to acquire lock %s" % lock_path) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. if not have_lock: raise LockException("Did not get lock on %s" % lock_path) node.lock = lock
class ZooKeeperLock(): def __init__(self, hosts, lock_name, lock_path): self.hosts = hosts self.zk_client = None self.name = lock_name self.lock_path = lock_path self.lock_handle = None self.create_lock() def create_lock(self): try: self.zk_client = KazooClient(hosts=self.hosts) self.zk_client.start() except Exception, ex: self.init_ret = False self.err_str = "Create KazooClient failed! Exception: %s" % str(ex) print self.err_str return try: self.lock_handle = Lock(self.zk_client, self.lock_path) except Exception, ex: self.init_ret = False self.err_str = "Create lock failed! Exception: %s" % str(ex) CONF.log.exception(self.err_str) return
class ZooKeeperLock: def __init__(self, hosts, lock_path, timeout=1, logger=None): self.hosts = hosts self.zk_client = None self.timeout = timeout self.logger = logger self.lock_handle = None self.lock_path = lock_path self.create_lock() def create_lock(self): try: self.zk_client = KazooClient(hosts=self.hosts, logger=self.logger, timeout=self.timeout) self.zk_client.start(timeout=self.timeout) except Exception, ex: self.init_ret = False self.err_str = "Create KazooClient failed! Exception: %s" % str(ex) logging.error(self.err_str) return try: self.lock_handle = Lock(self.zk_client, self.lock_path) except Exception, ex: self.init_ret = False self.err_str = "Create lock failed! Exception: %s" % str(ex) logging.error(self.err_str) return
def lockNodeRequest(self, request, blocking=True, timeout=None): ''' Lock a node request. This will set the `lock` attribute of the request object when the lock is successfully acquired. :param NodeRequest request: The request to lock. :param bool blocking: Whether or not to block on trying to acquire the lock :param int timeout: When blocking, how long to wait for the lock to get acquired. None, the default, waits forever. :raises: TimeoutException if we failed to acquire the lock when blocking with a timeout. ZKLockException if we are not blocking and could not get the lock, or a lock is already held. ''' path = "%s/%s" % (self.REQUEST_LOCK_ROOT, request.id) try: lock = Lock(self.client, path) have_lock = lock.acquire(blocking, timeout) except kze.LockTimeout: raise LockException("Timeout trying to acquire lock %s" % path) except kze.NoNodeError: have_lock = False self.log.error("Request not found for locking: %s", request) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. if not have_lock: raise LockException("Did not get lock on %s" % path) request.lock = lock self.updateNodeRequest(request)
def acquire_name(client, hostname_path): name_slots = client.get_children(ROOT, watch=bail_if_slots_change) hostname = platform.node() semaphore = Semaphore( client=client, path=LOCK, identifier=hostname, max_leases=len(name_slots), ) # waiting on the semaphore indefinitely seems to cause things to hang up # sometimes. instead, we'll cause ourselves to time out and retry if things # are taking a while. while True: print "waiting for name semaphore" try: semaphore.acquire(timeout=60) except kazoo.exceptions.LockTimeout: continue else: break try: # OK, we're one of the chosen servers. let's find a name that no one is # using. print "name semaphore acquired. finding name." while True: name_slots = client.get_children(ROOT) for slot in name_slots: slot_path = os.path.join(ROOT, slot) slot_lock = Lock(client, slot_path, hostname) if slot_lock.acquire(blocking=False): @client.DataWatch(slot_path) def on_name_change(data, stat): print "got name data %r." % data with open(hostname_path, "w") as hostname_file: print >> hostname_file, data # just sit around doing nothing for ever try: while True: time.sleep(1) finally: # explicitly releasing the lock decreases delay until # someone else can get this slot. slot_lock.release() else: # failed to lock anything. likely waiting for a session to # expire. just pause for a little while. print "failed to find a name. will try again." time.sleep(1) finally: semaphore.release()
def test_get_predecessor(self): """Validate selection of predecessors. """ goLock = "_c_8eb60557ba51e0da67eefc47467d3f34-lock-0000000031" pyLock = "514e5a831836450cb1a56c741e990fd8__lock__0000000032" children = ["hello", goLock, "world", pyLock] client = mock.MagicMock() client.get_children.return_value = children lock = Lock(client, "test") assert lock._get_predecessor(pyLock) is None
def test_get_predecessor_go(self): """Test selection of predecessor when instructed to consider go-zk locks. """ goLock = "_c_8eb60557ba51e0da67eefc47467d3f34-lock-0000000031" pyLock = "514e5a831836450cb1a56c741e990fd8__lock__0000000032" children = ["hello", goLock, "world", pyLock] client = mock.MagicMock() client.get_children.return_value = children lock = Lock(client, "test", extra_lock_patterns=["-lock-"]) assert lock._get_predecessor(pyLock) == goLock
def create_lock(self): try: self.zk_clinet = KazooClient(hosts=self.hosts, timeout=self.timeout) self.zk_clinet.start(timeout=self.timeout) except Exception as e: print("Kazoo clinet create fail: %s" % str(e)) return try: lock_path = os.path.join("/", "locks", self.lock_key) self.lock_handle = Lock(self.zk_clinet, lock_path) except Exception as e: print("client lock init fail: %s" % str(e)) return
def _getImageLock(self, image, blocking=True, timeout=None): # If we don't already have a znode for this image, create it. image_lock = self._imageLockPath(image) try: self.client.ensure_path(self._imagePath(image)) self._current_lock = Lock(self.client, image_lock) have_lock = self._current_lock.acquire(blocking, timeout) except kze.LockTimeout: raise npe.TimeoutException("Timeout trying to acquire lock %s" % image_lock) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. if not have_lock: raise npe.ZKLockException("Did not get lock on %s" % image_lock)
def _getImageBuildLock(self, image, blocking=True, timeout=None): lock_path = self._imageBuildLockPath(image) try: lock = Lock(self.client, lock_path) have_lock = lock.acquire(blocking, timeout) except kze.LockTimeout: raise npe.TimeoutException( "Timeout trying to acquire lock %s" % lock_path) # If we aren't blocking, it's possible we didn't get the lock # because someone else has it. if not have_lock: raise npe.ZKLockException("Did not get lock on %s" % lock_path) return lock
def create_lock(self): try: self.zk_client = KazooClient(hosts=self.hosts, logger=self.logger, timeout=self.timeout) self.zk_client.start(timeout=self.timeout) except Exception as ex: self.init_ret = False self.err_str = "Create KazooClient failed! Exception: %s" % str(ex) logging.error(self.err_str) return try: lock_path = os.path.join("/", "locks", self.name) self.lock_handle = Lock(self.zk_client, lock_path) except Exception as ex: self.int_ret = False self.err_str = "Create lock failed! Exception: %s" % str(ex) logging.error(self.err_str) return
def put_all(self, values, priority=100, timeout=None): """Put several entries into the queue. The action only succeeds if all entries where put into the queue. :param values: A list of values to put into the queue. :param priority: An optional priority as an integer with at most 3 digits. Lower values signify higher priority. :param timeout: Maximum waiting time in seconds. If None then it will wait untill lock will be released """ if not isinstance(values, list): raise TypeError("values must be a list of byte strings") if not isinstance(priority, int): raise TypeError("priority must be an int") elif priority < 0 or priority > 999: raise ValueError("priority must be between 0 and 999") lock = Lock(self.client, self._uniq_lock_path) lock.acquire(timeout=timeout) try: self._ensure_paths() with self.client.transaction() as transaction: for value in values: if not isinstance(value, bytes): raise TypeError("value must be a byte string") if self._check_uniq(value): transaction.create( "{path}/{prefix}-{priority:03d}-".format( path=self._entries_path, prefix=self.entry, priority=priority), value, sequence=True) elif not self._ignore_duplicates: raise ValueError( "Duplicate value {value}".format(value=value)) finally: lock.release()
class ZooKeeperLock(): def __init__(self, hosts, lock_path, lock_name, lock_value, timeout=1): self.hosts = hosts self.zk_client = None self.timeout = timeout self.name = lock_name self.lock_path = "PolicyCtrlCent/" + lock_path + "/" + lock_name self.lock_value = lock_value self.lock_handle = None self.create_lock() def create_lock(self): try: self.zk_client = KazooClient(hosts=self.hosts, timeout=self.timeout) @self.zk_client.add_listener def my_listener(state): if state == KazooState.LOST: print("LOST") elif state == KazooState.SUSPENDED: print("SUSPENDED") else: print("Connected") self.zk_client.start(timeout=self.timeout) self.add_zk_auth() except Exception, ex: self.init_ret = False self.err_str = "Create KazooClient failed! Exception: %s" % str(ex) try: print self.lock_path self.lock_handle = Lock(self.zk_client, self.lock_path) self.zk_client.set(self.lock_path, self.lock_value) except Exception, ex: self.init_ret = False self.err_str = "Create lock failed! Exception: %s" % str(ex)
def schedule(self, task_id): node = '/{}/tasks/{}'.format(self.root, task_id) lock_node = '{}/lock'.format(node) self.zk.ensure_path(lock_node) lock = Lock(self.zk, lock_node) try: if lock.acquire(timeout=1): data, _ = self.zk.get(node) task = json.loads(data.decode()) p = task.get('parallel', 1) rate = task.get('fail_rate', 0) targets = self.get_targets(task_id) if count(targets, ('F', )) / len(targets) > rate: return self.callback(task_id) if count(targets, ('F', 'S', 'K')) == len(targets): return self.callback(task_id) wait_schedule = choose(targets, p - count(targets, ('W', 'R'))) self.copy_task(wait_schedule, task) except LockTimeout: pass finally: lock.release()
class ZookeeperLock(object): def __init__(self, hosts, name, logger=None, timeout=1): #hosts="192.168.56.112:2181,192.168.56.113:2181,192.168.56.112:2181" self.hosts = hosts self.zkClient = None self.timeout = timeout self.logger = logger self.name = name self.createLock() def createLock(self): try: self.zkClient = KazooClient(hosts=self.hosts, logger=self.logger, timeout=self.timeout) self.zkClient.start(timeout=self.timeout) except Exception, ex: print "Create KazooClient failed! Exception: %s" % str(ex) try: lockPath = os.path.join("/", "locks", self.name) self.lockHandle = Lock(self.zkClient, lockPath) except Exception, ex: self.err_str = "Create lock failed! Exception: %s" % str(ex)
def handle_running_target(self, job_id): logger.info("handle_running_target start: job_id={}".format(job_id)) node = '{}/jobs/{}/targets'.format(self.root, job_id) # 这里遍历了job下所有的主机状态,主机数量多的话,要考虑性能问题 targets = self.zk.get_children(node) target_success_count = 0 target_fail_count = 0 target_init_count = 0 target_running_count = 0 for target in targets: target_lock_node = '{}/{}/lock'.format(node, target) self.zk.ensure_path(target_lock_node) target_lock = Lock(self.zk, target_lock_node) try: if target_lock.acquire(timeout=1): logger.info( "Target Lock acquire: job_id={}, target={}".format( job_id, target)) logger.info( "handle_running_target start: job_id={}, target={}". format(job_id, target)) path = '{}/{}'.format(node, target) target_value, _ = self.zk.get(path) target_value = json.loads(target_value.decode()) """ target_value = { "status" = 0, "current_task" = "offline", "next_task" = "stop_service", } """ target_status = target_value['status'] target_running_task = target_value['current_task'] # 处理running的target if target_status == TargetStatus.running.value: self.handle_running_task(job_id, target, target_running_task) elif target_status == TargetStatus.success.value: target_success_count += 1 elif target_status == TargetStatus.fail.value: target_fail_count += 1 elif target_status == TargetStatus.init.value: target_init_count += 1 elif target_status == TargetStatus.running.value: target_running_count += 1 else: logger.error( "handle running target: unexpected target status, target_status={}" .format(target_status)) except LockTimeout: logger.error( 'Target lock timeout: job_id={}, target={}'.format( job_id, target)) finally: if target_lock.release(): logger.info( 'Target lock release: success, job_id={}, target={}'. format(job_id, target)) else: logger.error( 'Target lock release: fail, job_id={}, target={}'. format(job_id, target)) # job汇总信息 logger.info( "job targets status detail: jobid={}, targets_count={}, target_init_count={}, target_running_count={}, target_success_count={}, target_fail_count={}" .format(job_id, len(targets), target_init_count, target_running_count, target_success_count, target_fail_count)) if (target_success_count + target_fail_count) == len(targets): logger.info( "job is finished: jobid={}, targets_count={}, target_success_count={}, target_fail_count={}" .format(job_id, len(targets), target_success_count, target_fail_count)) # job 终结点 if target_success_count == len(targets): self.job_callback(job_id, JobStatus.success.value) else: self.job_callback(job_id, JobStatus.fail.value) else: logger.info( "job is not finished: jobid={},target_count: {}, job target_success_count: {}" .format(job_id, len(targets), target_success_count))